diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2022-03-05 10:59:03 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2022-03-05 10:59:04 +0000 |
commit | d7e2fe4aac8b74bbfe82b2309536528b4dbe0d34 (patch) | |
tree | 8599bc6aee66060dbfd9d2cd1955bd1eea3af236 | |
parent | 5c8463886d50eeb0337bd121ab877cf692731e36 (diff) | |
parent | 78fa41fc671eae51fd3390a12a041d1c4a241c66 (diff) |
Merge remote-tracking branch 'remotes/kwolf-gitlab/tags/for-upstream' into staging
Block layer patches
- qemu-storage-daemon: Add --daemonize
- Fix x-blockdev-amend and block node activation code which incorrectly
executed code in the iothread that must run in the main thread.
- Add macros for coroutine-safe TLS variables (required for correctness
with LTO)
- Fix crashes with concurrent I/O and bdrv_refresh_limits()
- Split block APIs in global state and I/O
- iotests: Don't refuse to run at all without GNU sed, just skip tests
that need it
# gpg: Signature made Fri 04 Mar 2022 17:18:31 GMT
# gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg: issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kwolf-gitlab/tags/for-upstream: (50 commits)
block/amend: Keep strong reference to BDS
block/amend: Always call .bdrv_amend_clean()
tests/qemu-iotests: Rework the checks and spots using GNU sed
iotests/graph-changes-while-io: New test
iotests: Allow using QMP with the QSD
block: Make bdrv_refresh_limits() non-recursive
job.h: assertions in the callers of JobDriver function pointers
job.h: split function pointers in JobDriver
block-backend-common.h: split function pointers in BlockDevOps
block_int-common.h: assertions in the callers of BdrvChildClass function pointers
block_int-common.h: split function pointers in BdrvChildClass
block_int-common.h: assertions in the callers of BlockDriver function pointers
block_int-common.h: split function pointers in BlockDriver
block/coroutines: I/O and "I/O or GS" API
block/copy-before-write.h: global state API + assertions
include/block/snapshot: global state API + assertions
assertions for blockdev.h global state API
include/sysemu/blockdev.h: global state API
assertions for blockjob.h global state API
include/block/blockjob.h: global state API
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
75 files changed, 4860 insertions, 2823 deletions
@@ -67,12 +67,15 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +/* Protected by BQL */ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); +/* Protected by BQL */ static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = QTAILQ_HEAD_INITIALIZER(all_bdrv_states); +/* Protected by BQL */ static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); @@ -134,6 +137,7 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs) /* page size or 4k (hdd sector size) should be on the safe side */ return MAX(4096, qemu_real_host_page_size); } + IO_CODE(); return bs->bl.opt_mem_alignment; } @@ -144,6 +148,7 @@ size_t bdrv_min_mem_align(BlockDriverState *bs) /* page size or 4k (hdd sector size) should be on the safe side */ return MAX(4096, qemu_real_host_page_size); } + IO_CODE(); return bs->bl.min_mem_alignment; } @@ -269,12 +274,15 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { + IO_CODE(); return !(bs->open_flags & BDRV_O_RDWR); } int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, bool ignore_allow_rdw, Error **errp) { + IO_CODE(); + /* Do not set read_only if copy_on_read is enabled */ if (bs->copy_on_read && read_only) { error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", @@ -308,6 +316,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, Error **errp) { int ret = 0; + IO_CODE(); if (!(bs->open_flags & BDRV_O_RDWR)) { return 0; @@ -384,12 +393,14 @@ static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_make_absolute_filename(bs, bs->backing_file, errp); } void bdrv_register(BlockDriver *bdrv) { assert(bdrv->format_name); + GLOBAL_STATE_CODE(); QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); } @@ -398,6 +409,8 @@ BlockDriverState *bdrv_new(void) BlockDriverState *bs; int i; + GLOBAL_STATE_CODE(); + bs = g_new0(BlockDriverState, 1); QLIST_INIT(&bs->dirty_bitmaps); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { @@ -425,6 +438,7 @@ BlockDriverState *bdrv_new(void) static BlockDriver *bdrv_do_find_format(const char *format_name) { BlockDriver *drv1; + GLOBAL_STATE_CODE(); QLIST_FOREACH(drv1, &bdrv_drivers, list) { if (!strcmp(drv1->format_name, format_name)) { @@ -440,6 +454,8 @@ BlockDriver *bdrv_find_format(const char *format_name) BlockDriver *drv1; int i; + GLOBAL_STATE_CODE(); + drv1 = bdrv_do_find_format(format_name); if (drv1) { return drv1; @@ -489,6 +505,7 @@ static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) { + GLOBAL_STATE_CODE(); return bdrv_format_is_whitelisted(drv->format_name, read_only); } @@ -512,6 +529,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) CreateCo *cco = opaque; assert(cco->drv); + GLOBAL_STATE_CODE(); ret = cco->drv->bdrv_co_create_opts(cco->drv, cco->filename, cco->opts, &local_err); @@ -524,6 +542,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, { int ret; + GLOBAL_STATE_CODE(); + Coroutine *co; CreateCo cco = { .drv = drv, @@ -578,6 +598,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, int64_t size; int ret; + GLOBAL_STATE_CODE(); + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0 && ret != -ENOTSUP) { @@ -616,6 +638,8 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, int64_t bytes_to_clear; int ret; + GLOBAL_STATE_CODE(); + bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); if (bytes_to_clear) { ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); @@ -647,6 +671,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, Error *local_err = NULL; int ret; + GLOBAL_STATE_CODE(); + size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, @@ -699,6 +725,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) QDict *qdict; int ret; + GLOBAL_STATE_CODE(); + drv = bdrv_find_protocol(filename, true, errp); if (drv == NULL) { return -ENOENT; @@ -743,6 +771,7 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) Error *local_err = NULL; int ret; + IO_CODE(); assert(bs != NULL); if (!bs->drv) { @@ -768,6 +797,7 @@ void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) { Error *local_err = NULL; int ret; + IO_CODE(); if (!bs) { return; @@ -796,6 +826,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) { BlockDriver *drv = bs->drv; BlockDriverState *filtered = bdrv_filter_bs(bs); + GLOBAL_STATE_CODE(); if (drv && drv->bdrv_probe_blocksizes) { return drv->bdrv_probe_blocksizes(bs, bsz); @@ -816,6 +847,7 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { BlockDriver *drv = bs->drv; BlockDriverState *filtered = bdrv_filter_bs(bs); + GLOBAL_STATE_CODE(); if (drv && drv->bdrv_probe_geometry) { return drv->bdrv_probe_geometry(bs, geo); @@ -870,6 +902,7 @@ static BlockDriver *find_hdev_driver(const char *filename) { int score_max = 0, score; BlockDriver *drv = NULL, *d; + GLOBAL_STATE_CODE(); QLIST_FOREACH(d, &bdrv_drivers, list) { if (d->bdrv_probe_device) { @@ -887,6 +920,7 @@ static BlockDriver *find_hdev_driver(const char *filename) static BlockDriver *bdrv_do_find_protocol(const char *protocol) { BlockDriver *drv1; + GLOBAL_STATE_CODE(); QLIST_FOREACH(drv1, &bdrv_drivers, list) { if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { @@ -907,6 +941,7 @@ BlockDriver *bdrv_find_protocol(const char *filename, const char *p; int i; + GLOBAL_STATE_CODE(); /* TODO Drivers without bdrv_file_open must be specified explicitly */ /* @@ -972,6 +1007,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, { int score_max = 0, score; BlockDriver *drv = NULL, *d; + IO_CODE(); QLIST_FOREACH(d, &bdrv_drivers, list) { if (d->bdrv_probe) { @@ -993,6 +1029,8 @@ static int find_image_format(BlockBackend *file, const char *filename, uint8_t buf[BLOCK_PROBE_BUF_SIZE]; int ret = 0; + GLOBAL_STATE_CODE(); + /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { *pdrv = &bdrv_raw; @@ -1024,6 +1062,7 @@ static int find_image_format(BlockBackend *file, const char *filename, int refresh_total_sectors(BlockDriverState *bs, int64_t hint) { BlockDriver *drv = bs->drv; + IO_CODE(); if (!drv) { return -ENOMEDIUM; @@ -1058,6 +1097,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint) static void bdrv_join_options(BlockDriverState *bs, QDict *options, QDict *old_options) { + GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_join_options) { bs->drv->bdrv_join_options(options, old_options); } else { @@ -1074,6 +1114,7 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, BlockdevDetectZeroesOptions detect_zeroes = qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); + GLOBAL_STATE_CODE(); g_free(value); if (local_err) { error_propagate(errp, local_err); @@ -1189,6 +1230,7 @@ static void bdrv_child_cb_drained_end(BdrvChild *child, static int bdrv_child_cb_inactivate(BdrvChild *child) { BlockDriverState *bs = child->opaque; + GLOBAL_STATE_CODE(); assert(bs->open_flags & BDRV_O_INACTIVE); return 0; } @@ -1215,6 +1257,7 @@ static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) { + GLOBAL_STATE_CODE(); *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; /* For temporary files, unconditional cache=unsafe is fine */ @@ -1235,6 +1278,7 @@ static void bdrv_backing_attach(BdrvChild *c) BlockDriverState *parent = c->opaque; BlockDriverState *backing_hd = c->bs; + GLOBAL_STATE_CODE(); assert(!parent->backing_blocker); error_setg(&parent->backing_blocker, "node is used as backing hd of '%s'", @@ -1273,6 +1317,7 @@ static void bdrv_backing_detach(BdrvChild *c) { BlockDriverState *parent = c->opaque; + GLOBAL_STATE_CODE(); assert(parent->backing_blocker); bdrv_op_unblock_all(c->bs, parent->backing_blocker); error_free(parent->backing_blocker); @@ -1285,6 +1330,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, BlockDriverState *parent = c->opaque; bool read_only = bdrv_is_read_only(parent); int ret; + GLOBAL_STATE_CODE(); if (read_only) { ret = bdrv_reopen_set_read_only(parent, false, errp); @@ -1316,6 +1362,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, int parent_flags, QDict *parent_options) { int flags = parent_flags; + GLOBAL_STATE_CODE(); /* * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. @@ -1391,6 +1438,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(&bs->children, child, next); if (child->role & BDRV_CHILD_COW) { @@ -1410,6 +1458,7 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_unapply_subtree_drain(child, bs); + assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); } @@ -1425,6 +1474,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) { BlockDriverState *bs = c->opaque; + IO_CODE(); return bdrv_get_aio_context(bs); } @@ -1447,12 +1497,14 @@ const BdrvChildClass child_of_bds = { AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) { + GLOBAL_STATE_CODE(); return c->klass->get_parent_aio_context(c); } static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags; + GLOBAL_STATE_CODE(); /* * Clear flags that are internal to the block layer before opening the @@ -1465,6 +1517,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) static void update_flags_from_options(int *flags, QemuOpts *opts) { + GLOBAL_STATE_CODE(); + *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { @@ -1486,6 +1540,7 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) static void update_options_from_flags(QDict *options, int flags) { + GLOBAL_STATE_CODE(); if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); } @@ -1507,6 +1562,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs, Error **errp) { char *gen_node_name = NULL; + GLOBAL_STATE_CODE(); if (!node_name) { node_name = gen_node_name = id_generate(ID_BLOCK); @@ -1551,6 +1607,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, { Error *local_err = NULL; int i, ret; + GLOBAL_STATE_CODE(); bdrv_assign_node_name(bs, node_name, &local_err); if (local_err) { @@ -1631,6 +1688,8 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, BlockDriverState *bs; int ret; + GLOBAL_STATE_CODE(); + bs = bdrv_new(); bs->open_flags = flags; bs->options = options ?: qdict_new(); @@ -1656,6 +1715,7 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, int flags, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); } @@ -1750,6 +1810,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, assert(bs->file == NULL); assert(options != NULL && bs->options != options); + GLOBAL_STATE_CODE(); opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); if (!qemu_opts_absorb_qdict(opts, options, errp)) { @@ -1875,6 +1936,7 @@ static QDict *parse_json_filename(const char *filename, Error **errp) QObject *options_obj; QDict *options; int ret; + GLOBAL_STATE_CODE(); ret = strstart(filename, "json:", &filename); assert(ret); @@ -1902,6 +1964,7 @@ static void parse_json_protocol(QDict *options, const char **pfilename, { QDict *json_options; Error *local_err = NULL; + GLOBAL_STATE_CODE(); /* Parse json: pseudo-protocol */ if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { @@ -1936,6 +1999,8 @@ static int bdrv_fill_options(QDict **options, const char *filename, BlockDriver *drv = NULL; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + /* * Caution: while qdict_get_try_str() is fine, getting non-string * types would require more care. When @options come from @@ -2057,11 +2122,13 @@ static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, */ bool bdrv_is_writable(BlockDriverState *bs) { + IO_CODE(); return bdrv_is_writable_after_reopen(bs, NULL); } static char *bdrv_child_user_desc(BdrvChild *c) { + GLOBAL_STATE_CODE(); return c->klass->get_parent_desc(c); } @@ -2078,6 +2145,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) assert(a->bs); assert(a->bs == b->bs); + GLOBAL_STATE_CODE(); if ((b->perm & a->shared_perm) == b->perm) { return true; @@ -2101,6 +2169,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) { BdrvChild *a, *b; + GLOBAL_STATE_CODE(); /* * During the loop we'll look at each pair twice. That's correct because @@ -2129,6 +2198,7 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, uint64_t *nperm, uint64_t *nshared) { assert(bs->drv && bs->drv->bdrv_child_perm); + GLOBAL_STATE_CODE(); bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); @@ -2155,6 +2225,8 @@ static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, BdrvChild *child; g_autoptr(GHashTable) local_found = NULL; + GLOBAL_STATE_CODE(); + if (!found) { assert(!list); found = local_found = g_hash_table_new(NULL, NULL); @@ -2182,6 +2254,8 @@ static void bdrv_child_set_perm_abort(void *opaque) { BdrvChildSetPermState *s = opaque; + GLOBAL_STATE_CODE(); + s->child->perm = s->old_perm; s->child->shared_perm = s->old_shared_perm; } @@ -2195,6 +2269,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Transaction *tran) { BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); + GLOBAL_STATE_CODE(); *s = (BdrvChildSetPermState) { .child = c, @@ -2212,6 +2287,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) { BlockDriverState *bs = opaque; uint64_t cumulative_perms, cumulative_shared_perms; + GLOBAL_STATE_CODE(); if (bs->drv->bdrv_set_perm) { bdrv_get_cumulative_perm(bs, &cumulative_perms, @@ -2223,6 +2299,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) static void bdrv_drv_set_perm_abort(void *opaque) { BlockDriverState *bs = opaque; + GLOBAL_STATE_CODE(); if (bs->drv->bdrv_abort_perm_update) { bs->drv->bdrv_abort_perm_update(bs); @@ -2238,6 +2315,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, Transaction *tran, Error **errp) { + GLOBAL_STATE_CODE(); if (!bs->drv) { return 0; } @@ -2266,6 +2344,7 @@ typedef struct BdrvReplaceChildState { static void bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; + GLOBAL_STATE_CODE(); if (s->free_empty_child && !s->child->bs) { bdrv_child_free(s->child); @@ -2278,6 +2357,7 @@ static void bdrv_replace_child_abort(void *opaque) BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; + GLOBAL_STATE_CODE(); /* * old_bs reference is transparently moved from @s to s->child. * @@ -2374,6 +2454,7 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, BdrvChild *c; int ret; uint64_t cumulative_perms, cumulative_shared_perms; + GLOBAL_STATE_CODE(); bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); @@ -2442,6 +2523,7 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, { int ret; BlockDriverState *bs; + GLOBAL_STATE_CODE(); for ( ; list; list = list->next) { bs = list->data; @@ -2466,6 +2548,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, uint64_t cumulative_perms = 0; uint64_t cumulative_shared_perms = BLK_PERM_ALL; + GLOBAL_STATE_CODE(); + QLIST_FOREACH(c, &bs->parents, next_parent) { cumulative_perms |= c->perm; cumulative_shared_perms &= c->shared_perm; @@ -2509,6 +2593,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) int ret; Transaction *tran = tran_new(); g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); + GLOBAL_STATE_CODE(); ret = bdrv_list_refresh_perms(list, NULL, tran, errp); tran_finalize(tran, ret); @@ -2523,6 +2608,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Transaction *tran = tran_new(); int ret; + GLOBAL_STATE_CODE(); + bdrv_child_set_perm(c, perm, shared, tran); ret = bdrv_refresh_perms(c->bs, &local_err); @@ -2553,6 +2640,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) uint64_t parent_perms, parent_shared; uint64_t perms, shared; + GLOBAL_STATE_CODE(); + bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); bdrv_child_perm(bs, c->bs, c, c->role, NULL, parent_perms, parent_shared, &perms, &shared); @@ -2571,6 +2660,7 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { + GLOBAL_STATE_CODE(); *nperm = perm & DEFAULT_PERM_PASSTHROUGH; *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; } @@ -2582,6 +2672,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared) { assert(role & BDRV_CHILD_COW); + GLOBAL_STATE_CODE(); /* * We want consistent read from backing files if the parent needs it. @@ -2618,6 +2709,7 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, { int flags; + GLOBAL_STATE_CODE(); assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); flags = bdrv_reopen_get_flags(reopen_queue, bs); @@ -2694,6 +2786,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { + GLOBAL_STATE_CODE(); if (role & BDRV_CHILD_FILTERED) { assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | BDRV_CHILD_COW))); @@ -2752,6 +2845,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, assert(!child->frozen); assert(old_bs != new_bs); + GLOBAL_STATE_CODE(); if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); @@ -2776,6 +2870,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, if (child->klass->detach) { child->klass->detach(child); } + assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } @@ -2785,6 +2880,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } if (new_bs) { + assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); /* @@ -2827,6 +2923,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); + GLOBAL_STATE_CODE(); assert(!child->next.le_prev); /* not in children list */ g_free(child->name); @@ -2845,6 +2942,7 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; + GLOBAL_STATE_CODE(); /* * Pass free_empty_child=false, because we still need the child * for the AioContext operations on the parent below; those @@ -2907,6 +3005,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, assert(child); assert(*child == NULL); assert(child_class->get_parent_desc); + GLOBAL_STATE_CODE(); new_child = g_new(BdrvChild, 1); *new_child = (BdrvChild) { @@ -2987,6 +3086,7 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, uint64_t perm, shared_perm; assert(parent_bs->drv); + GLOBAL_STATE_CODE(); if (bdrv_recurse_has_child(child_bs, parent_bs)) { error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", @@ -3012,6 +3112,7 @@ static void bdrv_detach_child(BdrvChild **childp) { BlockDriverState *old_bs = (*childp)->bs; + GLOBAL_STATE_CODE(); bdrv_replace_child_noperm(childp, NULL, true); if (old_bs) { @@ -3051,6 +3152,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *child = NULL; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); + ret = bdrv_attach_child_common(child_bs, child_name, child_class, child_role, perm, shared_perm, opaque, &child, tran, errp); @@ -3091,6 +3194,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BdrvChild *child = NULL; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); + ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, child_role, &child, tran, errp); if (ret < 0) { @@ -3117,6 +3222,8 @@ void bdrv_root_unref_child(BdrvChild *child) { BlockDriverState *child_bs; + GLOBAL_STATE_CODE(); + child_bs = child->bs; bdrv_detach_child(&child); bdrv_unref(child_bs); @@ -3191,6 +3298,7 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, /* Callers must ensure that child->frozen is false. */ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) { + GLOBAL_STATE_CODE(); if (child == NULL) { return; } @@ -3203,6 +3311,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) { BdrvChild *c; + GLOBAL_STATE_CODE(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass->change_media) { c->klass->change_media(c, load); @@ -3253,6 +3362,8 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; BdrvChildRole role; + GLOBAL_STATE_CODE(); + if (!parent_bs->drv) { /* * Node without drv is an object without a class :/. TODO: finally fix @@ -3332,6 +3443,7 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, BlockDriverState *backing_hd, Transaction *tran, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); } @@ -3341,6 +3453,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, int ret; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); bdrv_drained_begin(bs); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); @@ -3380,6 +3493,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, QDict *tmp_parent_options = NULL; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + if (bs->backing != NULL) { goto free_exit; } @@ -3539,6 +3654,8 @@ BdrvChild *bdrv_open_child(const char *filename, { BlockDriverState *bs; + GLOBAL_STATE_CODE(); + bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, child_role, allow_none, errp); if (bs == NULL) { @@ -3561,6 +3678,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) const char *reference = NULL; Visitor *v = NULL; + GLOBAL_STATE_CODE(); + if (ref->type == QTYPE_QSTRING) { reference = ref->u.reference; } else { @@ -3603,6 +3722,8 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, BlockDriverState *bs_snapshot = NULL; int ret; + GLOBAL_STATE_CODE(); + /* if snapshot, we create a temporary backing file and open it instead of opening 'filename' directly */ @@ -3690,6 +3811,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, assert(!child_class || !flags); assert(!child_class == !parent); + GLOBAL_STATE_CODE(); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -3958,6 +4080,8 @@ close_and_fail: BlockDriverState *bdrv_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp) { + GLOBAL_STATE_CODE(); + return bdrv_open_inherit(filename, reference, options, flags, NULL, NULL, 0, errp); } @@ -4074,6 +4198,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, * important to avoid graph changes between the recursive queuing here and * bdrv_reopen_multiple(). */ assert(bs->quiesce_counter > 0); + GLOBAL_STATE_CODE(); if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); @@ -4212,12 +4337,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) { + GLOBAL_STATE_CODE(); + return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, NULL, 0, keep_old_opts); } void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) { + GLOBAL_STATE_CODE(); if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { @@ -4259,6 +4387,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) assert(qemu_get_current_aio_context() == qemu_get_aio_context()); assert(bs_queue != NULL); + GLOBAL_STATE_CODE(); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { ctx = bdrv_get_aio_context(bs_entry->state.bs); @@ -4365,6 +4494,8 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, BlockReopenQueue *queue; int ret; + GLOBAL_STATE_CODE(); + bdrv_subtree_drained_begin(bs); if (ctx != qemu_get_aio_context()) { aio_context_release(ctx); @@ -4386,6 +4517,8 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, { QDict *opts = qdict_new(); + GLOBAL_STATE_CODE(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); return bdrv_reopen(bs, opts, true, errp); @@ -4420,6 +4553,8 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, QObject *value; const char *str; + GLOBAL_STATE_CODE(); + value = qdict_get(reopen_state->options, child_name); if (value == NULL) { return 0; @@ -4518,6 +4653,7 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, assert(reopen_state != NULL); assert(reopen_state->bs->drv != NULL); + GLOBAL_STATE_CODE(); drv = reopen_state->bs->drv; /* This function and each driver's bdrv_reopen_prepare() remove @@ -4728,6 +4864,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs = reopen_state->bs; drv = bs->drv; assert(drv != NULL); + GLOBAL_STATE_CODE(); /* If there are any driver level actions to take */ if (drv->bdrv_reopen_commit) { @@ -4769,6 +4906,7 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state) assert(reopen_state != NULL); drv = reopen_state->bs->drv; assert(drv != NULL); + GLOBAL_STATE_CODE(); if (drv->bdrv_reopen_abort) { drv->bdrv_reopen_abort(reopen_state); @@ -4781,6 +4919,7 @@ static void bdrv_close(BlockDriverState *bs) BdrvAioNotifier *ban, *ban_next; BdrvChild *child, *next; + GLOBAL_STATE_CODE(); assert(!bs->refcnt); bdrv_drained_begin(bs); /* complete I/O */ @@ -4840,6 +4979,7 @@ static void bdrv_close(BlockDriverState *bs) void bdrv_close_all(void) { assert(job_next(NULL) == NULL); + GLOBAL_STATE_CODE(); /* Drop references from requests still in flight, such as canceled block * jobs whose AIO context has not been polled yet */ @@ -4958,7 +5098,7 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) static void bdrv_remove_filter_or_cow_child_commit(void *opaque) { BdrvRemoveFilterOrCowChild *s = opaque; - + GLOBAL_STATE_CODE(); bdrv_child_free(s->child); } @@ -5041,6 +5181,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, BdrvChild *c, *next; assert(to != NULL); + GLOBAL_STATE_CODE(); QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); @@ -5091,6 +5232,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to_cow_parent = NULL; int ret; + GLOBAL_STATE_CODE(); assert(to != NULL); if (detach_subchain) { @@ -5154,11 +5296,15 @@ out: int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { + GLOBAL_STATE_CODE(); + return bdrv_replace_node_common(from, to, true, false, errp); } int bdrv_drop_filter(BlockDriverState *bs, Error **errp) { + GLOBAL_STATE_CODE(); + return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, errp); } @@ -5181,6 +5327,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, int ret; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); + assert(!bs_new->backing); ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", @@ -5214,6 +5362,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, g_autoptr(GSList) refresh_list = NULL; BlockDriverState *old_bs = child->bs; + GLOBAL_STATE_CODE(); + bdrv_ref(old_bs); bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); @@ -5241,6 +5391,7 @@ static void bdrv_delete(BlockDriverState *bs) { assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); + GLOBAL_STATE_CODE(); /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { @@ -5285,6 +5436,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, node_name = qdict_get_try_str(options, "node-name"); + GLOBAL_STATE_CODE(); + new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, errp); options = NULL; /* bdrv_new_open_driver() eats options */ @@ -5320,6 +5473,7 @@ fail: int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { + IO_CODE(); if (bs->drv == NULL) { return -ENOMEDIUM; } @@ -5345,6 +5499,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, BlockDriver *drv = bs->drv; int ret; + GLOBAL_STATE_CODE(); + if (!drv) { return -ENOMEDIUM; } @@ -5386,6 +5542,9 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs) { + + GLOBAL_STATE_CODE(); + bs = bdrv_skip_filters(bs); active = bdrv_skip_filters(active); @@ -5403,6 +5562,8 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, /* Given a BDS, searches for the base layer. */ BlockDriverState *bdrv_find_base(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); + return bdrv_find_overlay(bs, NULL); } @@ -5417,6 +5578,8 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *i; BdrvChild *child; + GLOBAL_STATE_CODE(); + for (i = bs; i != base; i = child_bs(child)) { child = bdrv_filter_or_cow_child(i); @@ -5443,6 +5606,8 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *i; BdrvChild *child; + GLOBAL_STATE_CODE(); + if (bdrv_is_backing_chain_frozen(bs, base, errp)) { return -EPERM; } @@ -5477,6 +5642,8 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) BlockDriverState *i; BdrvChild *child; + GLOBAL_STATE_CODE(); + for (i = bs; i != base; i = child_bs(child)) { child = bdrv_filter_or_cow_child(i); if (child) { @@ -5526,6 +5693,8 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, g_autoptr(GSList) updated_children = NULL; GSList *p; + GLOBAL_STATE_CODE(); + bdrv_ref(top); bdrv_subtree_drained_begin(top); @@ -5637,6 +5806,8 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + IO_CODE(); + if (!drv) { return -ENOMEDIUM; } @@ -5686,6 +5857,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, BlockDriverState *in_bs, Error **errp) { + IO_CODE(); if (!drv->bdrv_measure) { error_setg(errp, "Block driver '%s' does not support size measurement", drv->format_name); @@ -5701,6 +5873,7 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, int64_t bdrv_nb_sectors(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + IO_CODE(); if (!drv) return -ENOMEDIUM; @@ -5721,6 +5894,7 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs) int64_t bdrv_getlength(BlockDriverState *bs) { int64_t ret = bdrv_nb_sectors(bs); + IO_CODE(); if (ret < 0) { return ret; @@ -5735,12 +5909,14 @@ int64_t bdrv_getlength(BlockDriverState *bs) void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) { int64_t nb_sectors = bdrv_nb_sectors(bs); + IO_CODE(); *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; } bool bdrv_is_sg(BlockDriverState *bs) { + IO_CODE(); return bs->sg; } @@ -5750,6 +5926,7 @@ bool bdrv_is_sg(BlockDriverState *bs) bool bdrv_supports_compressed_writes(BlockDriverState *bs) { BlockDriverState *filtered; + IO_CODE(); if (!bs->drv || !block_driver_can_compress(bs->drv)) { return false; @@ -5769,6 +5946,7 @@ bool bdrv_supports_compressed_writes(BlockDriverState *bs) const char *bdrv_get_format_name(BlockDriverState *bs) { + IO_CODE(); return bs->drv ? bs->drv->format_name : NULL; } @@ -5785,6 +5963,8 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), int i; const char **formats = NULL; + GLOBAL_STATE_CODE(); + QLIST_FOREACH(drv, &bdrv_drivers, list) { if (drv->format_name) { bool found = false; @@ -5843,6 +6023,7 @@ BlockDriverState *bdrv_find_node(const char *node_name) BlockDriverState *bs; assert(node_name); + GLOBAL_STATE_CODE(); QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { if (!strcmp(node_name, bs->node_name)) { @@ -5859,6 +6040,8 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, BlockDeviceInfoList *list; BlockDriverState *bs; + GLOBAL_STATE_CODE(); + list = NULL; QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); @@ -5934,6 +6117,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, { BlockPermission qapi_perm; XDbgBlockGraphEdge *edge; + GLOBAL_STATE_CODE(); edge = g_new0(XDbgBlockGraphEdge, 1); @@ -5964,6 +6148,8 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) BdrvChild *child; XDbgBlockGraphConstructor *gr = xdbg_graph_new(); + GLOBAL_STATE_CODE(); + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { char *allocated_name = NULL; const char *name = blk_name(blk); @@ -6007,6 +6193,8 @@ BlockDriverState *bdrv_lookup_bs(const char *device, BlockBackend *blk; BlockDriverState *bs; + GLOBAL_STATE_CODE(); + if (device) { blk = blk_by_name(device); @@ -6038,6 +6226,9 @@ BlockDriverState *bdrv_lookup_bs(const char *device, * return false. If either argument is NULL, return false. */ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) { + + GLOBAL_STATE_CODE(); + while (top && top != base) { top = bdrv_filter_or_cow_bs(top); } @@ -6047,6 +6238,7 @@ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) BlockDriverState *bdrv_next_node(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs) { return QTAILQ_FIRST(&graph_bdrv_states); } @@ -6055,6 +6247,7 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs) BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs) { return QTAILQ_FIRST(&all_bdrv_states); } @@ -6063,6 +6256,7 @@ BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) const char *bdrv_get_node_name(const BlockDriverState *bs) { + IO_CODE(); return bs->node_name; } @@ -6070,6 +6264,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs) { BdrvChild *c; const char *name; + IO_CODE(); /* If multiple parents have a name, just pick the first one. */ QLIST_FOREACH(c, &bs->parents, next_parent) { @@ -6087,6 +6282,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs) /* TODO check what callers really want: bs->node_name or blk_name() */ const char *bdrv_get_device_name(const BlockDriverState *bs) { + IO_CODE(); return bdrv_get_parent_name(bs) ?: ""; } @@ -6096,22 +6292,26 @@ const char *bdrv_get_device_name(const BlockDriverState *bs) * absent, then this returns an empty (non-null) string. */ const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) { + IO_CODE(); return bdrv_get_parent_name(bs) ?: bs->node_name; } int bdrv_get_flags(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bs->open_flags; } int bdrv_has_zero_init_1(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return 1; } int bdrv_has_zero_init(BlockDriverState *bs) { BlockDriverState *filtered; + GLOBAL_STATE_CODE(); if (!bs->drv) { return 0; @@ -6137,6 +6337,7 @@ int bdrv_has_zero_init(BlockDriverState *bs) bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) { + IO_CODE(); if (!(bs->open_flags & BDRV_O_UNMAP)) { return false; } @@ -6147,6 +6348,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size) { + IO_CODE(); pstrcpy(filename, filename_size, bs->backing_file); } @@ -6154,6 +6356,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { int ret; BlockDriver *drv = bs->drv; + IO_CODE(); /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ if (!drv) { return -ENOMEDIUM; @@ -6182,6 +6385,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; + IO_CODE(); if (drv && drv->bdrv_get_specific_info) { return drv->bdrv_get_specific_info(bs, errp); } @@ -6191,6 +6395,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + IO_CODE(); if (!drv || !drv->bdrv_get_specific_stats) { return NULL; } @@ -6199,6 +6404,7 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) { + IO_CODE(); if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { return; } @@ -6208,6 +6414,7 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { bs = bdrv_primary_bs(bs); } @@ -6223,6 +6430,7 @@ static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, const char *tag) { + GLOBAL_STATE_CODE(); bs = bdrv_find_debug_node(bs); if (bs) { return bs->drv->bdrv_debug_breakpoint(bs, event, tag); @@ -6233,6 +6441,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) { + GLOBAL_STATE_CODE(); bs = bdrv_find_debug_node(bs); if (bs) { return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); @@ -6243,6 +6452,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) int bdrv_debug_resume(BlockDriverState *bs, const char *tag) { + GLOBAL_STATE_CODE(); while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { bs = bdrv_primary_bs(bs); } @@ -6256,6 +6466,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag) bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) { + GLOBAL_STATE_CODE(); while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { bs = bdrv_primary_bs(bs); } @@ -6283,6 +6494,8 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, BlockDriverState *retval = NULL; BlockDriverState *bs_below; + GLOBAL_STATE_CODE(); + if (!bs || !bs->drv || !backing_file) { return NULL; } @@ -6393,19 +6606,21 @@ void bdrv_init_with_whitelist(void) bdrv_init(); } -int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) +int bdrv_activate(BlockDriverState *bs, Error **errp) { BdrvChild *child, *parent; Error *local_err = NULL; int ret; BdrvDirtyBitmap *bm; + GLOBAL_STATE_CODE(); + if (!bs->drv) { return -ENOMEDIUM; } QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_invalidate_cache(child->bs, &local_err); + bdrv_activate(child->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return -EINVAL; @@ -6418,7 +6633,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) * Note that the required permissions of inactive images are always a * subset of the permissions required after activating the image. This * allows us to just get the permissions upfront without restricting - * drv->bdrv_invalidate_cache(). + * bdrv_co_invalidate_cache(). * * It also means that in error cases, we don't have to try and revert to * the old permissions (which is an operation that could fail, too). We can @@ -6433,13 +6648,10 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) return ret; } - if (bs->drv->bdrv_co_invalidate_cache) { - bs->drv->bdrv_co_invalidate_cache(bs, &local_err); - if (local_err) { - bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return -EINVAL; - } + ret = bdrv_invalidate_cache(bs, errp); + if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + return ret; } FOR_EACH_DIRTY_BITMAP(bs, bm) { @@ -6468,17 +6680,37 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) return 0; } -void bdrv_invalidate_cache_all(Error **errp) +int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) +{ + Error *local_err = NULL; + IO_CODE(); + + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + + if (bs->drv->bdrv_co_invalidate_cache) { + bs->drv->bdrv_co_invalidate_cache(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + } + + return 0; +} + +void bdrv_activate_all(Error **errp) { BlockDriverState *bs; BdrvNextIterator it; + GLOBAL_STATE_CODE(); + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); int ret; aio_context_acquire(aio_context); - ret = bdrv_invalidate_cache(bs, errp); + ret = bdrv_activate(bs, errp); aio_context_release(aio_context); if (ret < 0) { bdrv_next_cleanup(&it); @@ -6490,6 +6722,7 @@ void bdrv_invalidate_cache_all(Error **errp) static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) { BdrvChild *parent; + GLOBAL_STATE_CODE(); QLIST_FOREACH(parent, &bs->parents, next_parent) { if (parent->klass->parent_is_bds) { @@ -6509,6 +6742,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) int ret; uint64_t cumulative_perms, cumulative_shared_perms; + GLOBAL_STATE_CODE(); + if (!bs->drv) { return -ENOMEDIUM; } @@ -6572,6 +6807,8 @@ int bdrv_inactivate_all(void) int ret = 0; GSList *aio_ctxs = NULL, *ctx; + GLOBAL_STATE_CODE(); + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -6615,6 +6852,7 @@ bool bdrv_is_inserted(BlockDriverState *bs) { BlockDriver *drv = bs->drv; BdrvChild *child; + IO_CODE(); if (!drv) { return false; @@ -6636,6 +6874,7 @@ bool bdrv_is_inserted(BlockDriverState *bs) void bdrv_eject(BlockDriverState *bs, bool eject_flag) { BlockDriver *drv = bs->drv; + IO_CODE(); if (drv && drv->bdrv_eject) { drv->bdrv_eject(bs, eject_flag); @@ -6649,7 +6888,7 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag) void bdrv_lock_medium(BlockDriverState *bs, bool locked) { BlockDriver *drv = bs->drv; - + IO_CODE(); trace_bdrv_lock_medium(bs, locked); if (drv && drv->bdrv_lock_medium) { @@ -6660,6 +6899,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked) /* Get a reference to bs */ void bdrv_ref(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); bs->refcnt++; } @@ -6668,6 +6908,7 @@ void bdrv_ref(BlockDriverState *bs) * deleted. */ void bdrv_unref(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs) { return; } @@ -6685,6 +6926,7 @@ struct BdrvOpBlocker { bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) { BdrvOpBlocker *blocker; + GLOBAL_STATE_CODE(); assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); if (!QLIST_EMPTY(&bs->op_blockers[op])) { blocker = QLIST_FIRST(&bs->op_blockers[op]); @@ -6699,6 +6941,7 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) { BdrvOpBlocker *blocker; + GLOBAL_STATE_CODE(); assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); blocker = g_new0(BdrvOpBlocker, 1); @@ -6709,6 +6952,7 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) { BdrvOpBlocker *blocker, *next; + GLOBAL_STATE_CODE(); assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { if (blocker->reason == reason) { @@ -6721,6 +6965,7 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) void bdrv_op_block_all(BlockDriverState *bs, Error *reason) { int i; + GLOBAL_STATE_CODE(); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { bdrv_op_block(bs, i, reason); } @@ -6729,6 +6974,7 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason) void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) { int i; + GLOBAL_STATE_CODE(); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { bdrv_op_unblock(bs, i, reason); } @@ -6737,7 +6983,7 @@ void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) bool bdrv_op_blocker_is_empty(BlockDriverState *bs) { int i; - + GLOBAL_STATE_CODE(); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { if (!QLIST_EMPTY(&bs->op_blockers[i])) { return false; @@ -6759,6 +7005,8 @@ void bdrv_img_create(const char *filename, const char *fmt, Error *local_err = NULL; int ret = 0; + GLOBAL_STATE_CODE(); + /* Find driver and parse its options */ drv = bdrv_find_format(fmt); if (!drv) { @@ -6936,6 +7184,7 @@ out: AioContext *bdrv_get_aio_context(BlockDriverState *bs) { + IO_CODE(); return bs ? bs->aio_context : qemu_get_aio_context(); } @@ -6944,6 +7193,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) Coroutine *self = qemu_coroutine_self(); AioContext *old_ctx = qemu_coroutine_get_aio_context(self); AioContext *new_ctx; + IO_CODE(); /* * Increase bs->in_flight to ensure that this operation is completed before @@ -6958,6 +7208,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) { + IO_CODE(); aio_co_reschedule_self(old_ctx); bdrv_dec_in_flight(bs); } @@ -6991,11 +7242,13 @@ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) { + IO_CODE(); aio_co_enter(bdrv_get_aio_context(bs), co); } static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) { + GLOBAL_STATE_CODE(); QLIST_REMOVE(ban, list); g_free(ban); } @@ -7005,6 +7258,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) BdrvAioNotifier *baf, *baf_tmp; assert(!bs->walking_aio_notifiers); + GLOBAL_STATE_CODE(); bs->walking_aio_notifiers = true; QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { if (baf->deleted) { @@ -7032,6 +7286,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; + GLOBAL_STATE_CODE(); if (bs->quiesce_counter) { aio_disable_external(new_context); @@ -7078,6 +7333,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, BdrvChild *child, *parent; g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + GLOBAL_STATE_CODE(); if (old_context == new_context) { return; @@ -7150,6 +7406,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { + GLOBAL_STATE_CODE(); if (g_slist_find(*ignore, c)) { return true; } @@ -7175,6 +7432,7 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { + GLOBAL_STATE_CODE(); if (g_slist_find(*ignore, c)) { return true; } @@ -7193,6 +7451,8 @@ bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, return true; } + GLOBAL_STATE_CODE(); + QLIST_FOREACH(c, &bs->parents, next_parent) { if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { return false; @@ -7213,6 +7473,8 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, GSList *ignore; bool ret; + GLOBAL_STATE_CODE(); + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); g_slist_free(ignore); @@ -7231,6 +7493,7 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); } @@ -7244,6 +7507,7 @@ void bdrv_add_aio_context_notifier(BlockDriverState *bs, .detach_aio_context = detach_aio_context, .opaque = opaque }; + GLOBAL_STATE_CODE(); QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); } @@ -7255,6 +7519,7 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, void *opaque) { BdrvAioNotifier *ban, *ban_next; + GLOBAL_STATE_CODE(); QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { if (ban->attached_aio_context == attached_aio_context && @@ -7279,6 +7544,7 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, bool force, Error **errp) { + GLOBAL_STATE_CODE(); if (!bs->drv) { error_setg(errp, "Node is ejected"); return -ENOMEDIUM; @@ -7309,6 +7575,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs, { BlockDriverState *filtered; + GLOBAL_STATE_CODE(); + if (!bs || !bs->drv) { return false; } @@ -7349,6 +7617,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, BlockDriverState *to_replace_bs = bdrv_find_node(node_name); AioContext *aio_context; + GLOBAL_STATE_CODE(); + if (!to_replace_bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); return NULL; @@ -7478,6 +7748,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) * would result in exactly bs->backing. */ static bool bdrv_backing_overridden(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (bs->backing) { return strcmp(bs->auto_backing_file, bs->backing->bs->filename); @@ -7510,6 +7781,8 @@ void bdrv_refresh_filename(BlockDriverState *bs) bool generate_json_filename; /* Whether our default implementation should fill exact_filename (false) or not (true) */ + GLOBAL_STATE_CODE(); + if (!drv) { return; } @@ -7632,6 +7905,8 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) BlockDriver *drv = bs->drv; BlockDriverState *child_bs; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, "Node '%s' is ejected", bs->node_name); return NULL; @@ -7663,7 +7938,7 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, Error **errp) { - + GLOBAL_STATE_CODE(); if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { error_setg(errp, "The node %s does not support adding a child", bdrv_get_device_or_node_name(parent_bs)); @@ -7683,6 +7958,7 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) { BdrvChild *tmp; + GLOBAL_STATE_CODE(); if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { error_setg(errp, "The node %s does not support removing a child", bdrv_get_device_or_node_name(parent_bs)); @@ -7710,6 +7986,7 @@ int bdrv_make_empty(BdrvChild *c, Error **errp) BlockDriver *drv = c->bs->drv; int ret; + GLOBAL_STATE_CODE(); assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); if (!drv->bdrv_make_empty) { @@ -7734,6 +8011,8 @@ int bdrv_make_empty(BdrvChild *c, Error **errp) */ BdrvChild *bdrv_cow_child(BlockDriverState *bs) { + IO_CODE(); + if (!bs || !bs->drv) { return NULL; } @@ -7757,6 +8036,7 @@ BdrvChild *bdrv_cow_child(BlockDriverState *bs) BdrvChild *bdrv_filter_child(BlockDriverState *bs) { BdrvChild *c; + IO_CODE(); if (!bs || !bs->drv) { return NULL; @@ -7788,6 +8068,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) { BdrvChild *cow_child = bdrv_cow_child(bs); BdrvChild *filter_child = bdrv_filter_child(bs); + IO_CODE(); /* Filter nodes cannot have COW backing files */ assert(!(cow_child && filter_child)); @@ -7808,6 +8089,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) BdrvChild *bdrv_primary_child(BlockDriverState *bs) { BdrvChild *c, *found = NULL; + IO_CODE(); QLIST_FOREACH(c, &bs->children, next) { if (c->role & BDRV_CHILD_PRIMARY) { @@ -7860,6 +8142,7 @@ static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, */ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bdrv_do_skip_filters(bs, true); } @@ -7869,6 +8152,7 @@ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) */ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) { + IO_CODE(); return bdrv_do_skip_filters(bs, false); } @@ -7878,6 +8162,7 @@ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) */ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) { + IO_CODE(); return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); } @@ -7913,8 +8198,8 @@ static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, */ bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) { + IO_CODE(); RCU_READ_LOCK_GUARD(); - return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); } @@ -7924,6 +8209,7 @@ bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) void bdrv_bsc_invalidate_range(BlockDriverState *bs, int64_t offset, int64_t bytes) { + IO_CODE(); RCU_READ_LOCK_GUARD(); if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { @@ -7938,6 +8224,7 @@ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); BdrvBlockStatusCache *old_bsc; + IO_CODE(); *new_bsc = (BdrvBlockStatusCache) { .valid = true, diff --git a/block/amend.c b/block/amend.c index 392df9ef83..f696a006e3 100644 --- a/block/amend.c +++ b/block/amend.c @@ -53,10 +53,31 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) return ret; } +static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp) +{ + if (s->bs->drv->bdrv_amend_pre_run) { + return s->bs->drv->bdrv_amend_pre_run(s->bs, errp); + } + + return 0; +} + +static void blockdev_amend_free(Job *job) +{ + BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); + + if (s->bs->drv->bdrv_amend_clean) { + s->bs->drv->bdrv_amend_clean(s->bs); + } + + bdrv_unref(s->bs); +} + static const JobDriver blockdev_amend_job_driver = { .instance_size = sizeof(BlockdevAmendJob), .job_type = JOB_TYPE_AMEND, .run = blockdev_amend_run, + .free = blockdev_amend_free, }; void qmp_x_blockdev_amend(const char *job_id, @@ -110,8 +131,15 @@ void qmp_x_blockdev_amend(const char *job_id, return; } + bdrv_ref(bs); s->bs = bs, s->opts = QAPI_CLONE(BlockdevAmendOptions, options), s->force = has_force ? force : false; + + if (blockdev_amend_pre_run(s, errp)) { + job_early_fail(&s->common); + return; + } + job_start(&s->common); } diff --git a/block/backup.c b/block/backup.c index 21d5983779..5cfd0b999c 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, assert(bs); assert(target); + GLOBAL_STATE_CODE(); /* QMP interface protects us from these cases */ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); diff --git a/block/block-backend.c b/block/block-backend.c index 4ff6b4d785..e0e1aff4b1 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -79,6 +79,7 @@ struct BlockBackend { bool allow_aio_context_change; bool allow_write_beyond_eof; + /* Protected by BQL */ NotifierList remove_bs_notifiers, insert_bs_notifiers; QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; @@ -111,12 +112,14 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); -/* All BlockBackends */ +/* All BlockBackends. Protected by BQL. */ static QTAILQ_HEAD(, BlockBackend) block_backends = QTAILQ_HEAD_INITIALIZER(block_backends); -/* All BlockBackends referenced by the monitor and which are iterated through by - * blk_next() */ +/* + * All BlockBackends referenced by the monitor and which are iterated through by + * blk_next(). Protected by BQL. + */ static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = QTAILQ_HEAD_INITIALIZER(monitor_block_backends); @@ -236,6 +239,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) void blk_set_force_allow_inactivate(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->force_allow_inactivate = true; } @@ -354,6 +358,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; + GLOBAL_STATE_CODE(); + blk = g_new0(BlockBackend, 1); blk->refcnt = 1; blk->ctx = ctx; @@ -391,6 +397,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, { BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); + GLOBAL_STATE_CODE(); + if (blk_insert_bs(blk, bs, errp) < 0) { blk_unref(blk); return NULL; @@ -419,6 +427,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, uint64_t perm = 0; uint64_t shared = BLK_PERM_ALL; + GLOBAL_STATE_CODE(); + /* * blk_new_open() is mainly used in .bdrv_create implementations and the * tools where sharing isn't a major concern because the BDS stays private @@ -496,6 +506,7 @@ static void drive_info_del(DriveInfo *dinfo) int blk_get_refcnt(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? blk->refcnt : 0; } @@ -506,6 +517,7 @@ int blk_get_refcnt(BlockBackend *blk) void blk_ref(BlockBackend *blk) { assert(blk->refcnt > 0); + GLOBAL_STATE_CODE(); blk->refcnt++; } @@ -516,6 +528,7 @@ void blk_ref(BlockBackend *blk) */ void blk_unref(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk) { assert(blk->refcnt > 0); if (blk->refcnt > 1) { @@ -536,6 +549,7 @@ void blk_unref(BlockBackend *blk) */ BlockBackend *blk_all_next(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&block_backends); } @@ -544,6 +558,8 @@ void blk_remove_all_bs(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + while ((blk = blk_all_next(blk)) != NULL) { AioContext *ctx = blk_get_aio_context(blk); @@ -567,6 +583,7 @@ void blk_remove_all_bs(void) */ BlockBackend *blk_next(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? QTAILQ_NEXT(blk, monitor_link) : QTAILQ_FIRST(&monitor_block_backends); } @@ -633,6 +650,7 @@ static void bdrv_next_reset(BdrvNextIterator *it) BlockDriverState *bdrv_first(BdrvNextIterator *it) { + GLOBAL_STATE_CODE(); bdrv_next_reset(it); return bdrv_next(it); } @@ -670,6 +688,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) { assert(!blk->name); assert(name && name[0]); + GLOBAL_STATE_CODE(); if (!id_wellformed(name)) { error_setg(errp, "Invalid device name"); @@ -697,6 +716,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) */ void monitor_remove_blk(BlockBackend *blk) { + GLOBAL_STATE_CODE(); + if (!blk->name) { return; } @@ -712,6 +733,7 @@ void monitor_remove_blk(BlockBackend *blk) */ const char *blk_name(const BlockBackend *blk) { + IO_CODE(); return blk->name ?: ""; } @@ -723,6 +745,7 @@ BlockBackend *blk_by_name(const char *name) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); assert(name); while ((blk = blk_next(blk)) != NULL) { if (!strcmp(name, blk->name)) { @@ -737,12 +760,16 @@ BlockBackend *blk_by_name(const char *name) */ BlockDriverState *blk_bs(BlockBackend *blk) { + IO_CODE(); return blk->root ? blk->root->bs : NULL; } static BlockBackend *bdrv_first_blk(BlockDriverState *bs) { BdrvChild *child; + + GLOBAL_STATE_CODE(); + QLIST_FOREACH(child, &bs->parents, next_parent) { if (child->klass == &child_root) { return child->opaque; @@ -757,6 +784,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs) */ bool bdrv_has_blk(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bdrv_first_blk(bs) != NULL; } @@ -767,6 +795,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) { BdrvChild *c; + GLOBAL_STATE_CODE(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass != &child_root) { return false; @@ -781,6 +810,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) */ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->legacy_dinfo; } @@ -792,6 +822,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) { assert(!blk->legacy_dinfo); + GLOBAL_STATE_CODE(); return blk->legacy_dinfo = dinfo; } @@ -802,6 +833,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); while ((blk = blk_next(blk)) != NULL) { if (blk->legacy_dinfo == dinfo) { @@ -816,6 +848,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) */ BlockBackendPublic *blk_get_public(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return &blk->public; } @@ -824,6 +857,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk) */ BlockBackend *blk_by_public(BlockBackendPublic *public) { + GLOBAL_STATE_CODE(); return container_of(public, BlockBackend, public); } @@ -835,6 +869,8 @@ void blk_remove_bs(BlockBackend *blk) ThrottleGroupMember *tgm = &blk->public.throttle_group_member; BdrvChild *root; + GLOBAL_STATE_CODE(); + notifier_list_notify(&blk->remove_bs_notifiers, blk); if (tgm->throttle_state) { BlockDriverState *bs = blk_bs(blk); @@ -869,6 +905,7 @@ void blk_remove_bs(BlockBackend *blk) int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + GLOBAL_STATE_CODE(); bdrv_ref(bs); blk->root = bdrv_root_attach_child(bs, "root", &child_root, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -892,6 +929,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) */ int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_replace_child_bs(blk->root, new_bs, errp); } @@ -902,6 +940,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, Error **errp) { int ret; + GLOBAL_STATE_CODE(); if (blk->root && !blk->disable_perm) { ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); @@ -918,6 +957,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) { + GLOBAL_STATE_CODE(); *perm = blk->perm; *shared_perm = blk->shared_perm; } @@ -928,6 +968,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) */ int blk_attach_dev(BlockBackend *blk, DeviceState *dev) { + GLOBAL_STATE_CODE(); if (blk->dev) { return -EBUSY; } @@ -953,6 +994,7 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev) void blk_detach_dev(BlockBackend *blk, DeviceState *dev) { assert(blk->dev == dev); + GLOBAL_STATE_CODE(); blk->dev = NULL; blk->dev_ops = NULL; blk->dev_opaque = NULL; @@ -966,6 +1008,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev) */ DeviceState *blk_get_attached_dev(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->dev; } @@ -974,6 +1017,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk) char *blk_get_attached_dev_id(BlockBackend *blk) { DeviceState *dev = blk->dev; + IO_CODE(); if (!dev) { return g_strdup(""); @@ -994,6 +1038,8 @@ BlockBackend *blk_by_dev(void *dev) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + assert(dev != NULL); while ((blk = blk_all_next(blk)) != NULL) { if (blk->dev == dev) { @@ -1011,6 +1057,7 @@ BlockBackend *blk_by_dev(void *dev) void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque) { + GLOBAL_STATE_CODE(); blk->dev_ops = ops; blk->dev_opaque = opaque; @@ -1032,6 +1079,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, */ void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; Error *local_err = NULL; @@ -1064,6 +1112,7 @@ static void blk_root_change_media(BdrvChild *child, bool load) */ bool blk_dev_has_removable_media(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb); } @@ -1072,6 +1121,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk) */ bool blk_dev_has_tray(BlockBackend *blk) { + IO_CODE(); return blk->dev_ops && blk->dev_ops->is_tray_open; } @@ -1081,6 +1131,7 @@ bool blk_dev_has_tray(BlockBackend *blk) */ void blk_dev_eject_request(BlockBackend *blk, bool force) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->eject_request_cb) { blk->dev_ops->eject_request_cb(blk->dev_opaque, force); } @@ -1091,6 +1142,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force) */ bool blk_dev_is_tray_open(BlockBackend *blk) { + IO_CODE(); if (blk_dev_has_tray(blk)) { return blk->dev_ops->is_tray_open(blk->dev_opaque); } @@ -1103,6 +1155,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk) */ bool blk_dev_is_medium_locked(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->is_medium_locked) { return blk->dev_ops->is_medium_locked(blk->dev_opaque); } @@ -1123,6 +1176,7 @@ static void blk_root_resize(BdrvChild *child) void blk_iostatus_enable(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->iostatus_enabled = true; blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } @@ -1131,6 +1185,7 @@ void blk_iostatus_enable(BlockBackend *blk) * enables it _and_ the VM is configured to stop on errors */ bool blk_iostatus_is_enabled(const BlockBackend *blk) { + IO_CODE(); return (blk->iostatus_enabled && (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || blk->on_write_error == BLOCKDEV_ON_ERROR_STOP || @@ -1139,16 +1194,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk) BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->iostatus; } void blk_iostatus_disable(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->iostatus_enabled = false; } void blk_iostatus_reset(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk_iostatus_is_enabled(blk)) { blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } @@ -1156,6 +1214,7 @@ void blk_iostatus_reset(BlockBackend *blk) void blk_iostatus_set_err(BlockBackend *blk, int error) { + IO_CODE(); assert(blk_iostatus_is_enabled(blk)); if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : @@ -1165,16 +1224,19 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) { + IO_CODE(); blk->allow_write_beyond_eof = allow; } void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) { + IO_CODE(); blk->allow_aio_context_change = allow; } void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) { + IO_CODE(); blk->disable_request_queuing = disable; } @@ -1228,6 +1290,7 @@ blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, { int ret; BlockDriverState *bs; + IO_CODE(); blk_wait_while_drained(blk); @@ -1258,6 +1321,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags); @@ -1274,6 +1338,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, { int ret; BlockDriverState *bs; + IO_CODE(); blk_wait_while_drained(blk); @@ -1309,6 +1374,7 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); @@ -1321,6 +1387,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); } @@ -1349,22 +1416,26 @@ typedef struct BlkRwCo { int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_pwritev_part(blk, offset, bytes, NULL, 0, flags | BDRV_REQ_ZERO_WRITE); } int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) { + GLOBAL_STATE_CODE(); return bdrv_make_zero(blk->root, flags); } void blk_inc_in_flight(BlockBackend *blk) { + IO_CODE(); qatomic_inc(&blk->in_flight); } void blk_dec_in_flight(BlockBackend *blk) { + IO_CODE(); qatomic_dec(&blk->in_flight); aio_wait_kick(); } @@ -1383,6 +1454,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, void *opaque, int ret) { struct BlockBackendAIOCB *acb; + IO_CODE(); blk_inc_in_flight(blk); acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); @@ -1490,6 +1562,7 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, cb, opaque); } @@ -1498,6 +1571,7 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes) { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_do_preadv(blk, offset, bytes, &qiov, 0); @@ -1511,6 +1585,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags); @@ -1519,6 +1594,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, int64_t blk_getlength(BlockBackend *blk) { + IO_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -1528,6 +1604,7 @@ int64_t blk_getlength(BlockBackend *blk) void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) { + IO_CODE(); if (!blk_bs(blk)) { *nb_sectors_ptr = 0; } else { @@ -1537,6 +1614,7 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) int64_t blk_nb_sectors(BlockBackend *blk) { + IO_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -1548,6 +1626,7 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_read_entry, flags, cb, opaque); @@ -1557,6 +1636,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_write_entry, flags, cb, opaque); @@ -1564,11 +1644,13 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, void blk_aio_cancel(BlockAIOCB *acb) { + GLOBAL_STATE_CODE(); bdrv_aio_cancel(acb); } void blk_aio_cancel_async(BlockAIOCB *acb) { + IO_CODE(); bdrv_aio_cancel_async(acb); } @@ -1576,6 +1658,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb) int coroutine_fn blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { + IO_CODE(); + blk_wait_while_drained(blk); if (!blk_is_available(blk)) { @@ -1588,6 +1672,7 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_do_ioctl(blk, req, buf); @@ -1609,6 +1694,7 @@ static void blk_aio_ioctl_entry(void *opaque) BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); } @@ -1617,6 +1703,7 @@ int coroutine_fn blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_CODE(); blk_wait_while_drained(blk); @@ -1641,6 +1728,7 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, cb, opaque); } @@ -1649,6 +1737,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_pdiscard(blk, offset, bytes); @@ -1660,6 +1749,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_do_pdiscard(blk, offset, bytes); @@ -1672,6 +1762,7 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) int coroutine_fn blk_co_do_flush(BlockBackend *blk) { blk_wait_while_drained(blk); + IO_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; @@ -1692,12 +1783,14 @@ static void blk_aio_flush_entry(void *opaque) BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); } int coroutine_fn blk_co_flush(BlockBackend *blk) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_flush(blk); @@ -1720,6 +1813,7 @@ int blk_flush(BlockBackend *blk) void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_ref(bs); @@ -1740,6 +1834,8 @@ void blk_drain_all(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + bdrv_drain_all_begin(); while ((blk = blk_all_next(blk)) != NULL) { @@ -1759,12 +1855,14 @@ void blk_drain_all(void) void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { + GLOBAL_STATE_CODE(); blk->on_read_error = on_read_error; blk->on_write_error = on_write_error; } BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read) { + IO_CODE(); return is_read ? blk->on_read_error : blk->on_write_error; } @@ -1772,6 +1870,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, int error) { BlockdevOnError on_err = blk_get_on_error(blk, is_read); + IO_CODE(); switch (on_err) { case BLOCKDEV_ON_ERROR_ENOSPC: @@ -1811,6 +1910,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool is_read, int error) { assert(error >= 0); + IO_CODE(); if (action == BLOCK_ERROR_ACTION_STOP) { /* First set the iostatus, so that "info block" returns an iostatus @@ -1842,6 +1942,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool blk_supports_write_perm(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { return !bdrv_is_read_only(bs); @@ -1856,12 +1957,14 @@ bool blk_supports_write_perm(BlockBackend *blk) */ bool blk_is_writable(BlockBackend *blk) { + IO_CODE(); return blk->perm & BLK_PERM_WRITE; } bool blk_is_sg(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { return false; @@ -1872,41 +1975,47 @@ bool blk_is_sg(BlockBackend *blk) bool blk_enable_write_cache(BlockBackend *blk) { + IO_CODE(); return blk->enable_write_cache; } void blk_set_enable_write_cache(BlockBackend *blk, bool wce) { + GLOBAL_STATE_CODE(); blk->enable_write_cache = wce; } -void blk_invalidate_cache(BlockBackend *blk, Error **errp) +void blk_activate(BlockBackend *blk, Error **errp) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { error_setg(errp, "Device '%s' has no medium", blk->name); return; } - bdrv_invalidate_cache(bs, errp); + bdrv_activate(bs, errp); } bool blk_is_inserted(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); return bs && bdrv_is_inserted(bs); } bool blk_is_available(BlockBackend *blk) { + IO_CODE(); return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk); } void blk_lock_medium(BlockBackend *blk, bool locked) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { bdrv_lock_medium(bs, locked); @@ -1917,6 +2026,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag) { BlockDriverState *bs = blk_bs(blk); char *id; + IO_CODE(); if (bs) { bdrv_eject(bs, eject_flag); @@ -1933,6 +2043,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag) int blk_get_flags(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { return bdrv_get_flags(bs); @@ -1945,6 +2056,7 @@ int blk_get_flags(BlockBackend *blk) uint32_t blk_get_request_alignment(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; } @@ -1953,6 +2065,7 @@ uint64_t blk_get_max_hw_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); uint64_t max = INT_MAX; + IO_CODE(); if (bs) { max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); @@ -1966,6 +2079,7 @@ uint32_t blk_get_max_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); uint32_t max = INT_MAX; + IO_CODE(); if (bs) { max = MIN_NON_ZERO(max, bs->bl.max_transfer); @@ -1975,33 +2089,39 @@ uint32_t blk_get_max_transfer(BlockBackend *blk) int blk_get_max_hw_iov(BlockBackend *blk) { + IO_CODE(); return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov, blk->root->bs->bl.max_iov); } int blk_get_max_iov(BlockBackend *blk) { + IO_CODE(); return blk->root->bs->bl.max_iov; } void blk_set_guest_block_size(BlockBackend *blk, int align) { + IO_CODE(); blk->guest_block_size = align; } void *blk_try_blockalign(BlockBackend *blk, size_t size) { + IO_CODE(); return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size); } void *blk_blockalign(BlockBackend *blk, size_t size) { + IO_CODE(); return qemu_blockalign(blk ? blk_bs(blk) : NULL, size); } bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { return false; @@ -2013,6 +2133,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_unblock(bs, op, reason); @@ -2022,6 +2143,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) void blk_op_block_all(BlockBackend *blk, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_block_all(bs, reason); @@ -2031,6 +2153,7 @@ void blk_op_block_all(BlockBackend *blk, Error *reason) void blk_op_unblock_all(BlockBackend *blk, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_unblock_all(bs, reason); @@ -2040,6 +2163,7 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) AioContext *blk_get_aio_context(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); @@ -2090,6 +2214,7 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, Error **errp) { + GLOBAL_STATE_CODE(); return blk_do_set_aio_context(blk, new_context, true, errp); } @@ -2126,6 +2251,7 @@ void blk_add_aio_context_notifier(BlockBackend *blk, { BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); notifier = g_new(BlockBackendAioNotifier, 1); notifier->attached_aio_context = attached_aio_context; @@ -2148,6 +2274,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); + if (bs) { bdrv_remove_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); @@ -2168,17 +2296,20 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) { + GLOBAL_STATE_CODE(); notifier_list_add(&blk->remove_bs_notifiers, notify); } void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) { + GLOBAL_STATE_CODE(); notifier_list_add(&blk->insert_bs_notifiers, notify); } void blk_io_plug(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { bdrv_io_plug(bs); @@ -2188,6 +2319,7 @@ void blk_io_plug(BlockBackend *blk) void blk_io_unplug(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { bdrv_io_unplug(bs); @@ -2196,18 +2328,21 @@ void blk_io_unplug(BlockBackend *blk) BlockAcctStats *blk_get_stats(BlockBackend *blk) { + IO_CODE(); return &blk->stats; } void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); } int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_co_pwritev(blk, offset, bytes, NULL, flags | BDRV_REQ_ZERO_WRITE); } @@ -2216,6 +2351,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, int64_t bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); return blk_pwritev_part(blk, offset, bytes, &qiov, 0, BDRV_REQ_WRITE_COMPRESSED); } @@ -2223,6 +2359,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) { + IO_OR_GS_CODE(); if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; @@ -2235,6 +2372,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size) { int ret; + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; @@ -2254,6 +2392,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2263,6 +2402,7 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2272,6 +2412,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2285,6 +2426,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) */ void blk_update_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); assert(blk->root); blk->root_state.open_flags = blk->root->bs->open_flags; @@ -2297,6 +2439,7 @@ void blk_update_root_state(BlockBackend *blk) */ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root_state.detect_zeroes; } @@ -2306,17 +2449,20 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) */ int blk_get_open_flags_from_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root_state.open_flags; } BlockBackendRootState *blk_get_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return &blk->root_state; } int blk_commit_all(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); while ((blk = blk_all_next(blk)) != NULL) { AioContext *aio_context = blk_get_aio_context(blk); @@ -2341,6 +2487,7 @@ int blk_commit_all(void) /* throttling disk I/O limits */ void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) { + GLOBAL_STATE_CODE(); throttle_group_config(&blk->public.throttle_group_member, cfg); } @@ -2349,6 +2496,7 @@ void blk_io_limits_disable(BlockBackend *blk) BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; assert(tgm->throttle_state); + GLOBAL_STATE_CODE(); if (bs) { bdrv_ref(bs); bdrv_drained_begin(bs); @@ -2364,12 +2512,14 @@ void blk_io_limits_disable(BlockBackend *blk) void blk_io_limits_enable(BlockBackend *blk, const char *group) { assert(!blk->public.throttle_group_member.throttle_state); + GLOBAL_STATE_CODE(); throttle_group_register_tgm(&blk->public.throttle_group_member, group, blk_get_aio_context(blk)); } void blk_io_limits_update_group(BlockBackend *blk, const char *group) { + GLOBAL_STATE_CODE(); /* this BB is not part of any group */ if (!blk->public.throttle_group_member.throttle_state) { return; @@ -2437,11 +2587,13 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) void blk_register_buf(BlockBackend *blk, void *host, size_t size) { + GLOBAL_STATE_CODE(); bdrv_register_buf(blk_bs(blk), host, size); } void blk_unregister_buf(BlockBackend *blk, void *host) { + GLOBAL_STATE_CODE(); bdrv_unregister_buf(blk_bs(blk), host); } @@ -2451,6 +2603,8 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, BdrvRequestFlags write_flags) { int r; + IO_CODE(); + r = blk_check_byte_request(blk_in, off_in, bytes); if (r) { return r; @@ -2466,11 +2620,13 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, const BdrvChild *blk_root(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root; } int blk_make_empty(BlockBackend *blk, Error **errp) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; diff --git a/block/commit.c b/block/commit.c index b1fc7b908b..c76899f640 100644 --- a/block/commit.c +++ b/block/commit.c @@ -253,6 +253,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, uint64_t base_perms, iter_shared_perms; int ret; + GLOBAL_STATE_CODE(); + assert(top != bs); if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) { error_setg(errp, "Invalid files for merge: top and base are the same"); @@ -432,6 +434,8 @@ int bdrv_commit(BlockDriverState *bs) QEMU_AUTO_VFREE uint8_t *buf = NULL; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + if (!drv) return -ENOMEDIUM; diff --git a/block/copy-before-write.c b/block/copy-before-write.c index c30a5ff8de..80b7684dba 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -223,6 +223,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, QDict *opts; assert(source->total_sectors == target->total_sectors); + GLOBAL_STATE_CODE(); opts = qdict_new(); qdict_put_str(opts, "driver", "copy-before-write"); @@ -245,6 +246,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, void bdrv_cbw_drop(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); bdrv_drop_filter(bs, &error_abort); bdrv_unref(bs); } diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 51847e711a..6e72bb25e9 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -29,6 +29,13 @@ #include "block/block_int.h" #include "block/block-copy.h" +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, diff --git a/block/coroutines.h b/block/coroutines.h index c8c14a29c8..b293e943c8 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -30,17 +30,17 @@ /* For blk_bs() in generated block/block-gen.c */ #include "sysemu/block-backend.h" +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); -int generated_co_wrapper -bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags); -int generated_co_wrapper -bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags); - int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, @@ -52,6 +52,51 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file, int *depth); + +int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); +int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); + +int coroutine_fn +nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp); + + +int coroutine_fn +blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + + +int coroutine_fn +blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + +int coroutine_fn +blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + +int coroutine_fn +blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); + +int coroutine_fn blk_co_do_flush(BlockBackend *blk); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +int generated_co_wrapper +bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + +int generated_co_wrapper +bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + int generated_co_wrapper bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, @@ -63,46 +108,24 @@ bdrv_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file, int *depth); - -int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); -int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); - int generated_co_wrapper nbd_do_establish_connection(BlockDriverState *bs, Error **errp); -int coroutine_fn -nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp); - int generated_co_wrapper blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -int coroutine_fn -blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags); - int generated_co_wrapper blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int coroutine_fn -blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags); int generated_co_wrapper blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); -int coroutine_fn -blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); int generated_co_wrapper blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); -int coroutine_fn -blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int generated_co_wrapper blk_do_flush(BlockBackend *blk); -int coroutine_fn blk_co_do_flush(BlockBackend *blk); #endif /* BLOCK_COROUTINES_INT_H */ diff --git a/block/create.c b/block/create.c index 89812669df..4df43f11f4 100644 --- a/block/create.c +++ b/block/create.c @@ -42,6 +42,8 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); int ret; + GLOBAL_STATE_CODE(); + job_progress_set_remaining(&s->common, 1); ret = s->drv->bdrv_co_create(s->opts, errp); job_progress_update(&s->common, 1); diff --git a/block/crypto.c b/block/crypto.c index c8ba4681e2..9d5fecbef8 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -778,36 +778,54 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp) } static int +block_crypto_amend_prepare(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + int ret; + + /* apply for exclusive read/write permissions to the underlying file */ + crypto->updating_keys = true; + ret = bdrv_child_refresh_perms(bs, bs->file, errp); + if (ret < 0) { + /* Well, in this case we will not be updating any keys */ + crypto->updating_keys = false; + } + return ret; +} + +static void +block_crypto_amend_cleanup(BlockDriverState *bs) +{ + BlockCrypto *crypto = bs->opaque; + Error *errp = NULL; + + /* release exclusive read/write permissions to the underlying file */ + crypto->updating_keys = false; + bdrv_child_refresh_perms(bs, bs->file, &errp); + + if (errp) { + error_report_err(errp); + } +} + +static int block_crypto_amend_options_generic_luks(BlockDriverState *bs, QCryptoBlockAmendOptions *amend_options, bool force, Error **errp) { BlockCrypto *crypto = bs->opaque; - int ret; assert(crypto); assert(crypto->block); - /* apply for exclusive read/write permissions to the underlying file*/ - crypto->updating_keys = true; - ret = bdrv_child_refresh_perms(bs, bs->file, errp); - if (ret) { - goto cleanup; - } - - ret = qcrypto_block_amend_options(crypto->block, - block_crypto_read_func, - block_crypto_write_func, - bs, - amend_options, - force, - errp); -cleanup: - /* release exclusive read/write permissions to the underlying file*/ - crypto->updating_keys = false; - bdrv_child_refresh_perms(bs, bs->file, errp); - return ret; + return qcrypto_block_amend_options(crypto->block, + block_crypto_read_func, + block_crypto_write_func, + bs, + amend_options, + force, + errp); } static int @@ -833,8 +851,16 @@ block_crypto_amend_options_luks(BlockDriverState *bs, if (!amend_options) { goto cleanup; } + + ret = block_crypto_amend_prepare(bs, errp); + if (ret) { + goto perm_cleanup; + } ret = block_crypto_amend_options_generic_luks(bs, amend_options, force, errp); + +perm_cleanup: + block_crypto_amend_cleanup(bs); cleanup: qapi_free_QCryptoBlockAmendOptions(amend_options); return ret; @@ -931,6 +957,8 @@ static BlockDriver bdrv_crypto_luks = { .bdrv_get_specific_info = block_crypto_get_specific_info_luks, .bdrv_amend_options = block_crypto_amend_options_luks, .bdrv_co_amend = block_crypto_co_amend_luks, + .bdrv_amend_pre_run = block_crypto_amend_prepare, + .bdrv_amend_clean = block_crypto_amend_cleanup, .is_format = true, diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 0ef46163e3..0334b85805 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -496,6 +496,7 @@ static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque) bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, uint32_t granularity, Error **errp) { + IO_CODE(); if (qemu_in_coroutine()) { return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp); } else { @@ -656,6 +657,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) { + IO_CODE(); assert(!bdrv_dirty_bitmap_readonly(bitmap)); bdrv_dirty_bitmaps_lock(bitmap->bs); if (!out) { @@ -673,6 +675,7 @@ void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup) { HBitmap *tmp = bitmap->bitmap; assert(!bdrv_dirty_bitmap_readonly(bitmap)); + GLOBAL_STATE_CODE(); bitmap->bitmap = backup; hbitmap_free(tmp); } @@ -737,6 +740,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; + IO_CODE(); if (QLIST_EMPTY(&bs->dirty_bitmaps)) { return; @@ -928,6 +932,7 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, bool lock) { bool ret; + IO_CODE(); assert(!bdrv_dirty_bitmap_readonly(dest)); assert(!bdrv_dirty_bitmap_inconsistent(dest)); diff --git a/block/export/export.c b/block/export/export.c index 6d3b9964c8..7253af3bc3 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -139,7 +139,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) * access since the export could be available before migration handover. * ctx was acquired in the caller. */ - bdrv_invalidate_cache(bs, NULL); + bdrv_activate(bs, NULL); perm = BLK_PERM_CONSISTENT_READ; if (export->writable) { diff --git a/block/export/fuse.c b/block/export/fuse.c index fdda8e3c81..5029e70f84 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -86,8 +86,8 @@ static int fuse_export_create(BlockExport *blk_exp, assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE); - /* For growable exports, take the RESIZE permission */ - if (args->growable) { + /* For growable and writable exports, take the RESIZE permission */ + if (args->growable || blk_exp_args->writable) { uint64_t blk_perm, blk_shared_perm; blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); @@ -392,14 +392,23 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, { uint64_t blk_perm, blk_shared_perm; BdrvRequestFlags truncate_flags = 0; - int ret; + bool add_resize_perm; + int ret, ret_check; + + /* Growable and writable exports have a permanent RESIZE permission */ + add_resize_perm = !exp->growable && !exp->writable; if (req_zero_write) { truncate_flags |= BDRV_REQ_ZERO_WRITE; } - /* Growable exports have a permanent RESIZE permission */ - if (!exp->growable) { + if (add_resize_perm) { + + if (!qemu_in_main_thread()) { + /* Changing permissions like below only works in the main thread */ + return -EPERM; + } + blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, @@ -412,9 +421,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, ret = blk_truncate(exp->common.blk, size, true, prealloc, truncate_flags, NULL); - if (!exp->growable) { + if (add_resize_perm) { /* Must succeed, because we are only giving up the RESIZE permission */ - blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort); + ret_check = blk_set_perm(exp->common.blk, blk_perm, + blk_shared_perm, &error_abort); + assert(ret_check == 0); } return ret; diff --git a/block/io.c b/block/io.c index 4e4cb556c5..efc011ce65 100644 --- a/block/io.c +++ b/block/io.c @@ -70,6 +70,7 @@ static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, void bdrv_parent_drained_end_single(BdrvChild *c) { int drained_end_counter = 0; + IO_OR_GS_CODE(); bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0); } @@ -114,6 +115,7 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) { + IO_OR_GS_CODE(); c->parent_quiesce_counter++; if (c->klass->drained_begin) { c->klass->drained_begin(c); @@ -164,6 +166,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) BdrvChild *c; bool have_limits; + GLOBAL_STATE_CODE(); + if (tran) { BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1); *s = (BdrvRefreshLimitsState) { @@ -189,10 +193,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) QLIST_FOREACH(c, &bs->children, next) { if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW)) { - bdrv_refresh_limits(c->bs, tran, errp); - if (*errp) { - return; - } bdrv_merge_limits(&bs->bl, &c->bs->bl); have_limits = true; } @@ -226,12 +226,14 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) */ void bdrv_enable_copy_on_read(BlockDriverState *bs) { + IO_CODE(); qatomic_inc(&bs->copy_on_read); } void bdrv_disable_copy_on_read(BlockDriverState *bs) { int old = qatomic_fetch_dec(&bs->copy_on_read); + IO_CODE(); assert(old >= 1); } @@ -303,6 +305,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, BdrvChild *ignore_parent, bool ignore_bds_parents) { BdrvChild *child, *next; + IO_OR_GS_CODE(); if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { return true; @@ -426,6 +429,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent, bool ignore_bds_parents) { + IO_OR_GS_CODE(); assert(!qemu_in_coroutine()); /* Stop things in parent-to-child order */ @@ -477,11 +481,13 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, void bdrv_drained_begin(BlockDriverState *bs) { + IO_OR_GS_CODE(); bdrv_do_drained_begin(bs, false, NULL, false, true); } void bdrv_subtree_drained_begin(BlockDriverState *bs) { + IO_OR_GS_CODE(); bdrv_do_drained_begin(bs, true, NULL, false, true); } @@ -538,18 +544,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, void bdrv_drained_end(BlockDriverState *bs) { int drained_end_counter = 0; + IO_OR_GS_CODE(); bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); } void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) { + IO_CODE(); bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); } void bdrv_subtree_drained_end(BlockDriverState *bs) { int drained_end_counter = 0; + IO_OR_GS_CODE(); bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); } @@ -557,6 +566,7 @@ void bdrv_subtree_drained_end(BlockDriverState *bs) void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) { int i; + IO_OR_GS_CODE(); for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { bdrv_do_drained_begin(child->bs, true, child, false, true); @@ -567,6 +577,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) { int drained_end_counter = 0; int i; + IO_OR_GS_CODE(); for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { bdrv_do_drained_end(child->bs, true, child, false, @@ -585,6 +596,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) */ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) { + IO_OR_GS_CODE(); assert(qemu_in_coroutine()); bdrv_drained_begin(bs); bdrv_drained_end(bs); @@ -592,6 +604,7 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) void bdrv_drain(BlockDriverState *bs) { + IO_OR_GS_CODE(); bdrv_drained_begin(bs); bdrv_drained_end(bs); } @@ -612,6 +625,7 @@ static bool bdrv_drain_all_poll(void) { BlockDriverState *bs = NULL; bool result = false; + GLOBAL_STATE_CODE(); /* bdrv_drain_poll() can't make changes to the graph and we are holding the * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ @@ -640,6 +654,7 @@ static bool bdrv_drain_all_poll(void) void bdrv_drain_all_begin(void) { BlockDriverState *bs = NULL; + GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); @@ -682,6 +697,7 @@ void bdrv_drain_all_begin(void) void bdrv_drain_all_end_quiesce(BlockDriverState *bs) { int drained_end_counter = 0; + GLOBAL_STATE_CODE(); g_assert(bs->quiesce_counter > 0); g_assert(!bs->refcnt); @@ -696,6 +712,7 @@ void bdrv_drain_all_end(void) { BlockDriverState *bs = NULL; int drained_end_counter = 0; + GLOBAL_STATE_CODE(); /* * bdrv queue is managed by record/replay, @@ -723,6 +740,7 @@ void bdrv_drain_all_end(void) void bdrv_drain_all(void) { + GLOBAL_STATE_CODE(); bdrv_drain_all_begin(); bdrv_drain_all_end(); } @@ -867,6 +885,7 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) { BdrvTrackedRequest *req; Coroutine *self = qemu_coroutine_self(); + IO_CODE(); QLIST_FOREACH(req, &bs->tracked_requests, list) { if (req->co == self) { @@ -886,7 +905,7 @@ void bdrv_round_to_clusters(BlockDriverState *bs, int64_t *cluster_bytes) { BlockDriverInfo bdi; - + IO_CODE(); if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { *cluster_offset = offset; *cluster_bytes = bytes; @@ -912,16 +931,19 @@ static int bdrv_get_cluster_size(BlockDriverState *bs) void bdrv_inc_in_flight(BlockDriverState *bs) { + IO_CODE(); qatomic_inc(&bs->in_flight); } void bdrv_wakeup(BlockDriverState *bs) { + IO_CODE(); aio_wait_kick(); } void bdrv_dec_in_flight(BlockDriverState *bs) { + IO_CODE(); qatomic_dec(&bs->in_flight); bdrv_wakeup(bs); } @@ -946,6 +968,7 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, uint64_t align) { bool waited; + IO_CODE(); qemu_co_mutex_lock(&req->bs->reqs_lock); @@ -1040,6 +1063,7 @@ static int bdrv_check_request32(int64_t offset, int64_t bytes, int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_CODE(); return bdrv_pwritev(child, offset, bytes, NULL, BDRV_REQ_ZERO_WRITE | flags); } @@ -1058,6 +1082,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) int ret; int64_t target_size, bytes, offset = 0; BlockDriverState *bs = child->bs; + IO_CODE(); target_size = bdrv_getlength(bs); if (target_size < 0) { @@ -1090,6 +1115,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes) { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); if (bytes < 0) { return -EINVAL; @@ -1111,6 +1137,7 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); if (bytes < 0) { return -EINVAL; @@ -1131,6 +1158,7 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, const void *buf, int64_t count) { int ret; + IO_CODE(); ret = bdrv_pwrite(child, offset, buf, count); if (ret < 0) { @@ -1797,6 +1825,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + IO_CODE(); return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags); } @@ -1809,6 +1838,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, BdrvTrackedRequest req; BdrvRequestPadding pad; int ret; + IO_CODE(); trace_bdrv_co_preadv_part(bs, offset, bytes, flags); @@ -2230,6 +2260,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + IO_CODE(); return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags); } @@ -2243,6 +2274,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, BdrvRequestPadding pad; int ret; bool padded = false; + IO_CODE(); trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags); @@ -2326,6 +2358,7 @@ out: int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_CODE(); trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags); if (!(child->bs->open_flags & BDRV_O_UNMAP)) { @@ -2345,6 +2378,8 @@ int bdrv_flush_all(void) BlockDriverState *bs = NULL; int result = 0; + GLOBAL_STATE_CODE(); + /* * bdrv queue is managed by record/replay, * creating new flush request for stopping @@ -2639,6 +2674,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *p; int64_t eof = 0; int dummy; + IO_CODE(); assert(!include_base || base); /* Can't include NULL base */ @@ -2728,6 +2764,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { + IO_CODE(); return bdrv_common_block_status_above(bs, base, false, true, offset, bytes, pnum, map, file, NULL); } @@ -2735,6 +2772,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { + IO_CODE(); return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs), offset, bytes, pnum, map, file); } @@ -2751,6 +2789,7 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, { int ret; int64_t pnum = bytes; + IO_CODE(); if (!bytes) { return 1; @@ -2771,6 +2810,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, { int ret; int64_t dummy; + IO_CODE(); ret = bdrv_common_block_status_above(bs, bs, true, false, offset, bytes, pnum ? pnum : &dummy, NULL, @@ -2807,6 +2847,7 @@ int bdrv_is_allocated_above(BlockDriverState *top, int ret = bdrv_common_block_status_above(top, base, include_base, false, offset, bytes, pnum, NULL, NULL, &depth); + IO_CODE(); if (ret < 0) { return ret; } @@ -2823,6 +2864,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; + IO_CODE(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { @@ -2854,6 +2896,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; + IO_CODE(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { @@ -2884,6 +2927,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret = bdrv_writev_vmstate(bs, &qiov, pos); + IO_CODE(); return ret < 0 ? ret : size; } @@ -2893,6 +2937,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret = bdrv_readv_vmstate(bs, &qiov, pos); + IO_CODE(); return ret < 0 ? ret : size; } @@ -2902,6 +2947,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, void bdrv_aio_cancel(BlockAIOCB *acb) { + IO_CODE(); qemu_aio_ref(acb); bdrv_aio_cancel_async(acb); while (acb->refcnt > 1) { @@ -2926,6 +2972,7 @@ void bdrv_aio_cancel(BlockAIOCB *acb) * In either case the completion callback must be called. */ void bdrv_aio_cancel_async(BlockAIOCB *acb) { + IO_CODE(); if (acb->aiocb_info->cancel_async) { acb->aiocb_info->cancel_async(acb); } @@ -2940,6 +2987,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) BdrvChild *child; int current_gen; int ret = 0; + IO_CODE(); bdrv_inc_in_flight(bs); @@ -3065,6 +3113,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t max_pdiscard; int head, tail, align; BlockDriverState *bs = child->bs; + IO_CODE(); if (!bs || !bs->drv || !bdrv_is_inserted(bs)) { return -ENOMEDIUM; @@ -3183,6 +3232,7 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) .coroutine = qemu_coroutine_self(), }; BlockAIOCB *acb; + IO_CODE(); bdrv_inc_in_flight(bs); if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) { @@ -3207,17 +3257,20 @@ out: void *qemu_blockalign(BlockDriverState *bs, size_t size) { + IO_CODE(); return qemu_memalign(bdrv_opt_mem_align(bs), size); } void *qemu_blockalign0(BlockDriverState *bs, size_t size) { + IO_CODE(); return memset(qemu_blockalign(bs, size), 0, size); } void *qemu_try_blockalign(BlockDriverState *bs, size_t size) { size_t align = bdrv_opt_mem_align(bs); + IO_CODE(); /* Ensure that NULL is never returned on success */ assert(align > 0); @@ -3231,6 +3284,7 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size) void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) { void *mem = qemu_try_blockalign(bs, size); + IO_CODE(); if (mem) { memset(mem, 0, size); @@ -3246,6 +3300,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; size_t alignment = bdrv_min_mem_align(bs); + IO_CODE(); for (i = 0; i < qiov->niov; i++) { if ((uintptr_t) qiov->iov[i].iov_base % alignment) { @@ -3262,6 +3317,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) void bdrv_io_plug(BlockDriverState *bs) { BdrvChild *child; + IO_CODE(); QLIST_FOREACH(child, &bs->children, next) { bdrv_io_plug(child->bs); @@ -3278,6 +3334,7 @@ void bdrv_io_plug(BlockDriverState *bs) void bdrv_io_unplug(BlockDriverState *bs) { BdrvChild *child; + IO_CODE(); assert(bs->io_plugged); if (qatomic_fetch_dec(&bs->io_plugged) == 1) { @@ -3296,6 +3353,7 @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size) { BdrvChild *child; + GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_register_buf) { bs->drv->bdrv_register_buf(bs, host, size); } @@ -3308,6 +3366,7 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host) { BdrvChild *child; + GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_unregister_buf) { bs->drv->bdrv_unregister_buf(bs, host); } @@ -3402,6 +3461,7 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + IO_CODE(); trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes, read_flags, write_flags); return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, @@ -3418,6 +3478,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + IO_CODE(); trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, read_flags, write_flags); return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, @@ -3429,6 +3490,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + IO_CODE(); return bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes, read_flags, write_flags); @@ -3461,7 +3523,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, BdrvTrackedRequest req; int64_t old_size, new_bytes; int ret; - + IO_CODE(); /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ if (!drv) { @@ -3579,6 +3641,7 @@ out: void bdrv_cancel_in_flight(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs || !bs->drv) { return; } diff --git a/block/meson.build b/block/meson.build index 8a1ce58c9c..e42bcb58d5 100644 --- a/block/meson.build +++ b/block/meson.build @@ -131,8 +131,11 @@ block_ss.add(module_block_h) wrapper_py = find_program('../scripts/block-coroutine-wrapper.py') block_gen_c = custom_target('block-gen.c', output: 'block-gen.c', - input: files('../include/block/block.h', - 'coroutines.h'), + input: files( + '../include/block/block-io.h', + '../include/block/block-global-state.h', + 'coroutines.h' + ), command: [wrapper_py, '@OUTPUT@', '@INPUT@']) block_ss.add(block_gen_c) diff --git a/block/mirror.c b/block/mirror.c index 69b2c1c697..ce6bc58d1f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1864,6 +1864,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, bool is_none_mode; BlockDriverState *base; + GLOBAL_STATE_CODE(); + if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) || (mode == MIRROR_SYNC_MODE_BITMAP)) { error_setg(errp, "Sync mode '%s' not supported", @@ -1889,6 +1891,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, bool base_read_only; BlockJob *job; + GLOBAL_STATE_CODE(); + base_read_only = bdrv_is_read_only(base); if (base_read_only) { diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c index 9f11deec64..972e8a0afc 100644 --- a/block/monitor/bitmap-qmp-cmds.c +++ b/block/monitor/bitmap-qmp-cmds.c @@ -56,6 +56,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, BlockDriverState *bs; BdrvDirtyBitmap *bitmap; + GLOBAL_STATE_CODE(); + if (!node) { error_setg(errp, "Node cannot be NULL"); return NULL; @@ -155,6 +157,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, BdrvDirtyBitmap *bitmap; AioContext *aio_context; + GLOBAL_STATE_CODE(); + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); if (!bitmap || !bs) { return NULL; @@ -261,6 +265,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, BlockDirtyBitmapMergeSourceList *lst; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + dst = block_dirty_bitmap_lookup(node, target, &bs, errp); if (!dst) { return NULL; diff --git a/block/nbd.c b/block/nbd.c index 5853d85d60..146d25660e 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -313,6 +313,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int ret; bool blocking = nbd_client_connecting_wait(s); + IO_CODE(); assert(!s->ioc); diff --git a/block/parallels.c b/block/parallels.c index 6ebad2a2bb..e58c828422 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -873,7 +873,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_dirty_bmap = bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); - /* Disable migration until bdrv_invalidate_cache method is added */ + /* Disable migration until bdrv_activate method is added */ error_setg(&s->migration_blocker, "The Parallels format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); diff --git a/block/snapshot.c b/block/snapshot.c index ccacda8bd5..d6f53c3065 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, QEMUSnapshotInfo *sn_tab, *sn; int nb_sns, i, ret; + GLOBAL_STATE_CODE(); + ret = -ENOENT; nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns < 0) { @@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, bool ret = false; assert(id || name); + GLOBAL_STATE_CODE(); nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns < 0) { @@ -200,6 +203,7 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) int bdrv_can_snapshot(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + GLOBAL_STATE_CODE(); if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { return 0; } @@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs, { BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + + GLOBAL_STATE_CODE(); + if (!drv) { return -ENOMEDIUM; } @@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs, BdrvChild **fallback_ptr; int ret, open_ret; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, "Block driver is closed"); return -ENOMEDIUM; @@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); int ret; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs, { BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + + GLOBAL_STATE_CODE(); if (!drv) { return -ENOMEDIUM; } @@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, { BlockDriver *drv = bs->drv; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, int ret; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err); if (ret == -ENOENT || ret == -EINVAL) { error_free(local_err); @@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return false; } @@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; } @@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; } @@ -622,6 +645,8 @@ int bdrv_all_has_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; } @@ -663,6 +688,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, { g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; @@ -703,6 +729,8 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return NULL; } diff --git a/block/stream.c b/block/stream.c index 7c6b173ddd..3acb59fe6a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -220,6 +220,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, QDict *opts; int ret; + GLOBAL_STATE_CODE(); + assert(!(base && bottom)); assert(!(backing_file_str && bottom)); diff --git a/blockdev.c b/blockdev.c index 42e098b458..e46e831212 100644 --- a/blockdev.c +++ b/blockdev.c @@ -63,11 +63,13 @@ #include "qemu/main-loop.h" #include "qemu/throttle-options.h" +/* Protected by BQL */ QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states = QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states); void bdrv_set_monitor_owned(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list); } @@ -111,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs) BlockBackend *blk; DriveInfo *dinfo; + GLOBAL_STATE_CODE(); + if (max_devs <= 0) { return; } @@ -140,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk) DriveInfo *dinfo = blk_legacy_dinfo(blk); BlockJob *job; + GLOBAL_STATE_CODE(); + if (!dinfo) { return; } @@ -161,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk) void blockdev_auto_del(BlockBackend *blk) { DriveInfo *dinfo = blk_legacy_dinfo(blk); + GLOBAL_STATE_CODE(); if (dinfo && dinfo->auto_del) { monitor_remove_blk(blk); @@ -185,6 +192,8 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, { QemuOpts *opts; + GLOBAL_STATE_CODE(); + opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); if (!opts) { return NULL; @@ -205,6 +214,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit) BlockBackend *blk; DriveInfo *dinfo; + GLOBAL_STATE_CODE(); + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); if (dinfo && dinfo->type == type @@ -227,6 +238,8 @@ void drive_check_orphaned(void) Location loc; bool orphans = false; + GLOBAL_STATE_CODE(); + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); /* @@ -260,6 +273,7 @@ void drive_check_orphaned(void) DriveInfo *drive_get_by_index(BlockInterfaceType type, int index) { + GLOBAL_STATE_CODE(); return drive_get(type, drive_index_to_bus_id(type, index), drive_index_to_unit_id(type, index)); @@ -271,6 +285,8 @@ int drive_get_max_bus(BlockInterfaceType type) BlockBackend *blk; DriveInfo *dinfo; + GLOBAL_STATE_CODE(); + max_bus = -1; for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); @@ -628,6 +644,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) { int bdrv_flags = 0; + GLOBAL_STATE_CODE(); /* bdrv_open() defaults to the values in bdrv_flags (for compatibility * with other callers) rather than what we want as the real defaults. * Apply the defaults here instead. */ @@ -646,6 +663,7 @@ void blockdev_close_all_bdrv_states(void) { BlockDriverState *bs, *next_bs; + GLOBAL_STATE_CODE(); QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { AioContext *ctx = bdrv_get_aio_context(bs); @@ -658,6 +676,7 @@ void blockdev_close_all_bdrv_states(void) /* Iterates over the list of monitor-owned BlockDriverStates */ BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bs ? QTAILQ_NEXT(bs, monitor_list) : QTAILQ_FIRST(&monitor_bdrv_states); } @@ -754,6 +773,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, const char *filename; int i; + GLOBAL_STATE_CODE(); + /* Change legacy command line options into QMP ones */ static const struct { const char *from; @@ -1174,6 +1195,8 @@ typedef struct BlkActionState BlkActionState; * * Only prepare() may fail. In a single transaction, only one of commit() or * abort() will be called. clean() will always be called if it is present. + * + * Always run under BQL. */ typedef struct BlkActionOps { size_t instance_size; @@ -2283,6 +2306,8 @@ static TransactionProperties *get_transaction_properties( /* * 'Atomic' group operations. The operations are performed as a set, and if * any fail then we roll back all operations in the group. + * + * Always run under BQL. */ void qmp_transaction(TransactionActionList *dev_list, bool has_props, @@ -2294,6 +2319,8 @@ void qmp_transaction(TransactionActionList *dev_list, BlkActionState *state, *next; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + QTAILQ_HEAD(, BlkActionState) snap_bdrv_states; QTAILQ_INIT(&snap_bdrv_states); @@ -3596,6 +3623,8 @@ void qmp_blockdev_del(const char *node_name, Error **errp) AioContext *aio_context; BlockDriverState *bs; + GLOBAL_STATE_CODE(); + bs = bdrv_find_node(node_name); if (!bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); diff --git a/blockjob.c b/blockjob.c index 10815a89fe..4868453d74 100644 --- a/blockjob.c +++ b/blockjob.c @@ -62,6 +62,7 @@ static bool is_block_job(Job *job) BlockJob *block_job_next(BlockJob *bjob) { Job *job = bjob ? &bjob->job : NULL; + GLOBAL_STATE_CODE(); do { job = job_next(job); @@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob) BlockJob *block_job_get(const char *id) { Job *job = job_get(id); + GLOBAL_STATE_CODE(); if (job && is_block_job(job)) { return container_of(job, BlockJob, job); @@ -84,6 +86,7 @@ BlockJob *block_job_get(const char *id) void block_job_free(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); + GLOBAL_STATE_CODE(); block_job_remove_all_bdrv(bjob); ratelimit_destroy(&bjob->limit); @@ -183,6 +186,7 @@ static const BdrvChildClass child_job = { void block_job_remove_all_bdrv(BlockJob *job) { + GLOBAL_STATE_CODE(); /* * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), * which will also traverse job->nodes, so consume the list one by @@ -205,6 +209,7 @@ void block_job_remove_all_bdrv(BlockJob *job) bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) { GSList *el; + GLOBAL_STATE_CODE(); for (el = job->nodes; el; el = el->next) { BdrvChild *c = el->data; @@ -221,6 +226,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, { BdrvChild *c; bool need_context_ops; + GLOBAL_STATE_CODE(); bdrv_ref(bs); @@ -270,6 +276,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) const BlockJobDriver *drv = block_job_driver(job); int64_t old_speed = job->speed; + GLOBAL_STATE_CODE(); + if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp) < 0) { return false; } @@ -299,6 +307,7 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) { + IO_CODE(); return ratelimit_calculate_delay(&job->limit, n); } @@ -307,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp) BlockJobInfo *info; uint64_t progress_current, progress_total; + GLOBAL_STATE_CODE(); + if (block_job_is_internal(job)) { error_setg(errp, "Cannot query QEMU internal jobs"); return NULL; @@ -434,6 +445,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, { BlockJob *job; int ret; + GLOBAL_STATE_CODE(); if (job_id == NULL && !(flags & JOB_INTERNAL)) { job_id = bdrv_get_device_name(bs); @@ -488,6 +500,7 @@ fail: void block_job_iostatus_reset(BlockJob *job) { + GLOBAL_STATE_CODE(); if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { return; } @@ -498,6 +511,7 @@ void block_job_iostatus_reset(BlockJob *job) void block_job_user_resume(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); + GLOBAL_STATE_CODE(); block_job_iostatus_reset(bjob); } @@ -505,6 +519,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, int is_read, int error) { BlockErrorAction action; + IO_CODE(); switch (on_err) { case BLOCKDEV_ON_ERROR_ENOSPC: @@ -543,5 +558,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, AioContext *block_job_get_aio_context(BlockJob *job) { + GLOBAL_STATE_CODE(); return job->job.aio_context; } diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index 878e6a5c5c..8b97592663 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -154,6 +154,13 @@ Standard options: created but before accepting connections. The daemon has started successfully when the pid file is written and clients may begin connecting. +.. option:: --daemonize + + Daemonize the process. The parent process will exit once startup is complete + (i.e., after the pid file has been or would have been written) or failure + occurs. Its exit code reflects whether the child has started up successfully + or failed to do so. + Examples -------- Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 81f9f971d8..74c7190302 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -1023,7 +1023,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state) { PFlashCFI01 *pfl = opaque; - /* This is called after bdrv_invalidate_cache_all. */ + /* This is called after bdrv_activate_all. */ qemu_del_vm_change_state_handler(pfl->vmstate); pfl->vmstate = NULL; diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index fbfdf47e26..18b43be7f6 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -219,7 +219,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state) { SpaprNvram *nvram = opaque; - /* This is called after bdrv_invalidate_cache_all. */ + /* This is called after bdrv_activate_all. */ qemu_del_vm_change_state_handler(nvram->vmstate); nvram->vmstate = NULL; diff --git a/include/block/block-common.h b/include/block/block-common.h new file mode 100644 index 0000000000..0c5dc4a86a --- /dev/null +++ b/include/block/block-common.h @@ -0,0 +1,418 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_COMMON_H +#define BLOCK_COMMON_H + +#include "block/aio.h" +#include "block/aio-wait.h" +#include "qemu/iov.h" +#include "qemu/coroutine.h" +#include "block/accounting.h" +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" +#include "qemu/hbitmap.h" +#include "qemu/transactions.h" + +/* + * generated_co_wrapper + * + * Function specifier, which does nothing but mark functions to be + * generated by scripts/block-coroutine-wrapper.py + * + * Read more in docs/devel/block-coroutine-wrapper.rst + */ +#define generated_co_wrapper + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BdrvChild BdrvChild; +typedef struct BdrvChildClass BdrvChildClass; + +typedef struct BlockDriverInfo { + /* in bytes, 0 if irrelevant */ + int cluster_size; + /* offset at which the VM state can be saved (0 if not possible) */ + int64_t vm_state_offset; + bool is_dirty; + /* + * True if this block driver only supports compressed writes + */ + bool needs_compressed_writes; +} BlockDriverInfo; + +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; + uint64_t compressed_clusters; +} BlockFragInfo; + +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, + + /* + * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate + * that the block driver should unmap (discard) blocks if it is guaranteed + * that the result will read back as zeroes. The flag is only passed to the + * driver if the block device is opened with BDRV_O_UNMAP. + */ + BDRV_REQ_MAY_UNMAP = 0x4, + + BDRV_REQ_FUA = 0x10, + BDRV_REQ_WRITE_COMPRESSED = 0x20, + + /* + * Signifies that this write request will not change the visible disk + * content. + */ + BDRV_REQ_WRITE_UNCHANGED = 0x40, + + /* + * Forces request serialisation. Use only with write requests. + */ + BDRV_REQ_SERIALISING = 0x80, + + /* + * Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. + */ + BDRV_REQ_NO_FALLBACK = 0x100, + + /* + * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read + * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR + * filter is involved), in which case it signals that the COR operation + * need not read the data into memory (qiov) but only ensure they are + * copied to the top layer (i.e., that COR operation is done). + */ + BDRV_REQ_PREFETCH = 0x200, + + /* + * If we need to wait for other requests, just fail immediately. Used + * only together with BDRV_REQ_SERIALISING. + */ + BDRV_REQ_NO_WAIT = 0x400, + + /* Mask of valid flags */ + BDRV_REQ_MASK = 0x7ff, +} BdrvRequestFlags; + +#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ +#define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ +#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save + writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the + thread pool */ +#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ +#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ +#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ +#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ +#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: + select an appropriate protocol driver, + ignoring the format layer */ +#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ +#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening + read-write fails */ +#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) + + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" +#define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" +#define BDRV_OPT_DISCARD "discard" +#define BDRV_OPT_FORCE_SHARE "force-share" + + +#define BDRV_SECTOR_BITS 9 +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) + +#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ + INT_MAX >> BDRV_SECTOR_BITS) +#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) + +/* + * We want allow aligning requests and disk length up to any 32bit alignment + * and don't afraid of overflow. + * To achieve it, and in the same time use some pretty number as maximum disk + * size, let's define maximum "length" (a limit for any offset/bytes request and + * for disk size) to be the greatest power of 2 less than INT64_MAX. + */ +#define BDRV_MAX_ALIGNMENT (1L << 30) +#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT)) + +/* + * Allocation status flags for bdrv_block_status() and friends. + * + * Public flags: + * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer + * BDRV_BLOCK_ZERO: offset reads as zero + * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data + * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this + * layer rather than any backing, set by block layer + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this + * layer, set by block layer + * + * Internal flags: + * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request + * that the block layer recompute the answer from the returned + * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. + * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for + * zeroes in file child of current block node inside + * returned region. Only valid together with both + * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not + * appear with BDRV_BLOCK_ZERO. + * + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the + * host offset within the returned BDS that is allocated for the + * corresponding raw guest data. However, whether that offset + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: + * + * DATA ZERO OFFSET_VALID + * t t t sectors read as zero, returned file is zero at offset + * t f t sectors read as valid from file at offset + * f t t sectors preallocated, read as zero, returned file not + * necessarily zero at offset + * f f t sectors preallocated but read from backing_hd, + * returned file contains garbage at offset + * t t f sectors preallocated, read as zero, unknown offset + * t f f sectors read from unknown file or offset + * f t f not allocated or unknown offset, read as zero + * f f f not allocated or unknown offset, read from backing_hd + */ +#define BDRV_BLOCK_DATA 0x01 +#define BDRV_BLOCK_ZERO 0x02 +#define BDRV_BLOCK_OFFSET_VALID 0x04 +#define BDRV_BLOCK_RAW 0x08 +#define BDRV_BLOCK_ALLOCATED 0x10 +#define BDRV_BLOCK_EOF 0x20 +#define BDRV_BLOCK_RECURSE 0x40 + +typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + BlockdevDetectZeroesOptions detect_zeroes; + bool backing_missing; + BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ + BlockDriverState *old_file_bs; /* keep pointer for permissions update */ + QDict *options; + QDict *explicit_options; + void *opaque; +} BDRVReopenState; + +/* + * Block operation types + */ +typedef enum BlockOpType { + BLOCK_OP_TYPE_BACKUP_SOURCE, + BLOCK_OP_TYPE_BACKUP_TARGET, + BLOCK_OP_TYPE_CHANGE, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, + BLOCK_OP_TYPE_DATAPLANE, + BLOCK_OP_TYPE_DRIVE_DEL, + BLOCK_OP_TYPE_EJECT, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, + BLOCK_OP_TYPE_MIRROR_SOURCE, + BLOCK_OP_TYPE_MIRROR_TARGET, + BLOCK_OP_TYPE_RESIZE, + BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, + BLOCK_OP_TYPE_MAX, +} BlockOpType; + +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + * + * As the BLK_PERM_WRITE permission is strictly stronger, either is + * sufficient to perform an unchanging write. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU + * 6.1 and earlier may still lock the corresponding byte in block/file-posix + * locking. So, implementing some new permission should be very careful to + * not interfere with this old unused thing. + */ + + BLK_PERM_ALL = 0x0f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, +}; + +/* + * Flags that parent nodes assign to child nodes to specify what kind of + * role(s) they take. + * + * At least one of DATA, METADATA, FILTERED, or COW must be set for + * every child. + */ +enum BdrvChildRoleBits { + /* + * This child stores data. + * Any node may have an arbitrary number of such children. + */ + BDRV_CHILD_DATA = (1 << 0), + + /* + * This child stores metadata. + * Any node may have an arbitrary number of metadata-storing + * children. + */ + BDRV_CHILD_METADATA = (1 << 1), + + /* + * A child that always presents exactly the same visible data as + * the parent, e.g. by virtue of the parent forwarding all reads + * and writes. + * This flag is mutually exclusive with DATA, METADATA, and COW. + * Any node may have at most one filtered child at a time. + */ + BDRV_CHILD_FILTERED = (1 << 2), + + /* + * Child from which to read all data that isn't allocated in the + * parent (i.e., the backing child); such data is copied to the + * parent through COW (and optionally COR). + * This field is mutually exclusive with DATA, METADATA, and + * FILTERED. + * Any node may have at most one such backing child at a time. + */ + BDRV_CHILD_COW = (1 << 3), + + /* + * The primary child. For most drivers, this is the child whose + * filename applies best to the parent node. + * Any node may have at most one primary child at a time. + */ + BDRV_CHILD_PRIMARY = (1 << 4), + + /* Useful combination of flags */ + BDRV_CHILD_IMAGE = BDRV_CHILD_DATA + | BDRV_CHILD_METADATA + | BDRV_CHILD_PRIMARY, +}; + +/* Mask of BdrvChildRoleBits values */ +typedef unsigned int BdrvChildRole; + +typedef struct BdrvCheckResult { + int corruptions; + int leaks; + int check_errors; + int corruptions_fixed; + int leaks_fixed; + int64_t image_end_offset; + BlockFragInfo bfi; +} BdrvCheckResult; + +typedef enum { + BDRV_FIX_LEAKS = 1, + BDRV_FIX_ERRORS = 2, +} BdrvCheckMode; + +typedef struct BlockSizes { + uint32_t phys; + uint32_t log; +} BlockSizes; + +typedef struct HDGeometry { + uint32_t heads; + uint32_t sectors; + uint32_t cylinders; +} HDGeometry; + +/* + * Common functions that are neither I/O nor Global State. + * + * These functions must never call any function from other categories + * (I/O, "I/O or GS", Global State) except this one, but can be invoked by + * all of them. + */ + +char *bdrv_perm_names(uint64_t perm); +uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); + +void bdrv_init_with_whitelist(void); +bool bdrv_uses_whitelist(void); +int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); + +int bdrv_parse_aio(const char *mode, int *flags); +int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); +int bdrv_parse_discard_flags(const char *mode, int *flags); + +int path_has_protocol(const char *path); +int path_is_absolute(const char *path); +char *path_combine(const char *base_path, const char *filename); + +char *bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + Error **errp); + +#endif /* BLOCK_COMMON_H */ diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h new file mode 100644 index 0000000000..25bb69bbef --- /dev/null +++ b/include/block/block-global-state.h @@ -0,0 +1,253 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_GLOBAL_STATE_H +#define BLOCK_GLOBAL_STATE_H + +#include "block-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * If a function modifies the graph, it also uses drain and/or + * aio_context_acquire/release to be sure it has unique access. + * aio_context locking is needed together with BQL because of + * the thread-safe I/O API that concurrently runs and accesses + * the graph without the BQL. + * + * It is important to note that not all of these functions are + * necessarily limited to running under the BQL, but they would + * require additional auditing and many small thread-safety changes + * to move them into the I/O API. Often it's not worth doing that + * work since the APIs are only used with the BQL held at the + * moment, so they have been placed in the GS API (for now). + * + * These functions can call any function from this and other categories + * (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs. + * + * All functions in this header must use the macro + * GLOBAL_STATE_CODE(); + * to catch when they are accidentally called without the BQL. + */ + +void bdrv_init(void); +BlockDriver *bdrv_find_protocol(const char *filename, + bool allow_protocol_prefix, + Error **errp); +BlockDriver *bdrv_find_format(const char *format_name); +int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); +int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); + +BlockDriverState *bdrv_new(void); +int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); +int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + Error **errp); +int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + Error **errp); +BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, + int flags, Error **errp); +int bdrv_drop_filter(BlockDriverState *bs, Error **errp); + +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + bool allow_none, Error **errp); +BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); +int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); +int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); +BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); +BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, + const char *node_name, + QDict *options, int flags, + Error **errp); +BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, + int flags, Error **errp); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, QDict *options, + bool keep_old_opts); +void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp); +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp); +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file); +void bdrv_refresh_filename(BlockDriverState *bs); +void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); +int bdrv_commit(BlockDriverState *bs); +int bdrv_make_empty(BdrvChild *c, Error **errp); +int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, + const char *backing_fmt, bool warn); +void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + const char *backing_file_str); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); +bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, + Error **errp); +int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, + Error **errp); +void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); + +/* + * The units of offset and total_work_size may be chosen arbitrarily by the + * block driver; total_work_size may change during the course of the amendment + * operation + */ +typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, + int64_t total_work_size, void *opaque); +int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, + Error **errp); + +/* check if a named node can be replaced when doing drive-mirror */ +BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp); + +int bdrv_activate(BlockDriverState *bs, Error **errp); +void bdrv_activate_all(Error **errp); +int bdrv_inactivate_all(void); + +int bdrv_flush_all(void); +void bdrv_close_all(void); +void bdrv_drain_all_begin(void); +void bdrv_drain_all_end(void); +void bdrv_drain_all(void); + +int bdrv_has_zero_init_1(BlockDriverState *bs); +int bdrv_has_zero_init(BlockDriverState *bs); +BlockDriverState *bdrv_find_node(const char *node_name); +BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); +XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp); +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); +BlockDriverState *bdrv_next_node(BlockDriverState *bs); +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); + +typedef struct BdrvNextIterator { + enum { + BDRV_NEXT_BACKEND_ROOTS, + BDRV_NEXT_MONITOR_OWNED, + } phase; + BlockBackend *blk; + BlockDriverState *bs; +} BdrvNextIterator; + +BlockDriverState *bdrv_first(BdrvNextIterator *it); +BlockDriverState *bdrv_next(BdrvNextIterator *it); +void bdrv_next_cleanup(BdrvNextIterator *it); + +BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); +void bdrv_iterate_format(void (*it)(void *opaque, const char *name), + void *opaque, bool read_only); +int bdrv_get_flags(BlockDriverState *bs); +char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); +char *bdrv_dirname(BlockDriverState *bs, Error **errp); + +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, + bool quiet, Error **errp); + +void bdrv_ref(BlockDriverState *bs); +void bdrv_unref(BlockDriverState *bs); +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Error **errp); + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_block_all(BlockDriverState *bs, Error *reason); +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); +bool bdrv_op_blocker_is_empty(BlockDriverState *bs); + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +/** + * Locks the AioContext of @bs if it's not the current AioContext. This avoids + * double locking which could lead to deadlocks: This is a coroutine_fn, so we + * know we already own the lock of the current AioContext. + * + * May only be called in the main thread. + */ +void coroutine_fn bdrv_co_lock(BlockDriverState *bs); + +/** + * Unlocks the AioContext of @bs if it's not the current AioContext. + */ +void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); + +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore); +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp); +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp); +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp); +AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); + +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); + +void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, + Error **errp); +void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); + +/** + * + * bdrv_register_buf/bdrv_unregister_buf: + * + * Register/unregister a buffer for I/O. For example, VFIO drivers are + * interested to know the memory areas that would later be used for I/O, so + * that they can prepare IOMMU mapping etc., to get better performance. + */ +void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); +void bdrv_unregister_buf(BlockDriverState *bs, void *host); + +void bdrv_cancel_in_flight(BlockDriverState *bs); + +#endif /* BLOCK_GLOBAL_STATE_H */ diff --git a/include/block/block-io.h b/include/block/block-io.h new file mode 100644 index 0000000000..5e3f346806 --- /dev/null +++ b/include/block/block-io.h @@ -0,0 +1,368 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_IO_H +#define BLOCK_IO_H + +#include "block-common.h" + +/* + * I/O API functions. These functions are thread-safe, and therefore + * can run in any thread as long as the thread has called + * aio_context_acquire/release(). + * + * These functions can only call functions from I/O and Common categories, + * but can be invoked by GS, "I/O or GS" and I/O APIs. + * + * All functions in this category must use the macro + * IO_CODE(); + * to catch when they are accidentally called by the wrong API. + */ + +int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); +int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); +int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes); +int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, + int64_t bytes); +int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, + const void *buf, int64_t bytes); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); + +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp); + +int64_t bdrv_nb_sectors(BlockDriverState *bs); +int64_t bdrv_getlength(BlockDriverState *bs); +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); +BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, + BlockDriverState *in_bs, Error **errp); +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); +void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs); + + +/* async block I/O */ +void bdrv_aio_cancel(BlockAIOCB *acb); +void bdrv_aio_cancel_async(BlockAIOCB *acb); + +/* sg packet commands */ +int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); + +/* Ensure contents are flushed to disk. */ +int coroutine_fn bdrv_co_flush(BlockDriverState *bs); + +int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); +int bdrv_block_status(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, + BlockDriverState **file); +int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); +int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum); +int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, + bool include_base, int64_t offset, int64_t bytes, + int64_t *pnum); +int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, + int64_t bytes); + +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, + bool ignore_allow_rdw, Error **errp); +int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, + Error **errp); +bool bdrv_is_read_only(BlockDriverState *bs); +bool bdrv_is_writable(BlockDriverState *bs); +bool bdrv_is_sg(BlockDriverState *bs); +bool bdrv_is_inserted(BlockDriverState *bs); +void bdrv_lock_medium(BlockDriverState *bs, bool locked); +void bdrv_eject(BlockDriverState *bs, bool eject_flag); +const char *bdrv_get_format_name(BlockDriverState *bs); + +bool bdrv_supports_compressed_writes(BlockDriverState *bs); +const char *bdrv_get_node_name(const BlockDriverState *bs); +const char *bdrv_get_device_name(const BlockDriverState *bs); +const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); +int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, + Error **errp); +BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *cluster_offset, + int64_t *cluster_bytes); + +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size); + +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size); + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + +/* + * Returns the alignment in bytes that is required so that no bounce buffer + * is required throughout the stack + */ +size_t bdrv_min_mem_align(BlockDriverState *bs); +/* Returns optimal alignment in bytes for bounce buffer */ +size_t bdrv_opt_mem_align(BlockDriverState *bs); +void *qemu_blockalign(BlockDriverState *bs, size_t size); +void *qemu_blockalign0(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); + +void bdrv_enable_copy_on_read(BlockDriverState *bs); +void bdrv_disable_copy_on_read(BlockDriverState *bs); + +void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); + +#define BLKDBG_EVENT(child, evt) \ + do { \ + if (child) { \ + bdrv_debug_event(child->bs, evt); \ + } \ + } while (0) + +/** + * bdrv_get_aio_context: + * + * Returns: the currently bound #AioContext + */ +AioContext *bdrv_get_aio_context(BlockDriverState *bs); + +/** + * Move the current coroutine to the AioContext of @bs and return the old + * AioContext of the coroutine. Increase bs->in_flight so that draining @bs + * will wait for the operation to proceed until the corresponding + * bdrv_co_leave(). + * + * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as + * this will deadlock. + */ +AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs); + +/** + * Ends a section started by bdrv_co_enter(). Move the current coroutine back + * to old_ctx and decrease bs->in_flight again. + */ +void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); + +/** + * Transfer control to @co in the aio context of @bs + */ +void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); + +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); + +void bdrv_io_plug(BlockDriverState *bs); +void bdrv_io_unplug(BlockDriverState *bs); + +bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp); + +/** + * + * bdrv_co_copy_range: + * + * Do offloaded copy between two children. If the operation is not implemented + * by the driver, or if the backend storage doesn't support it, a negative + * error code will be returned. + * + * Note: block layer doesn't emulate or fallback to a bounce buffer approach + * because usually the caller shouldn't attempt offloaded copy any more (e.g. + * calling copy_file_range(2)) after the first error, thus it should fall back + * to a read+write path in the caller level. + * + * @src: Source child to copy data from + * @src_offset: offset in @src image to read data + * @dst: Destination child to copy data to + * @dst_offset: offset in @dst image to write data + * @bytes: number of bytes to copy + * @flags: request flags. Supported flags: + * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero + * write on @dst as if bdrv_co_pwrite_zeroes is + * called. Used to simplify caller code, or + * during BlockDriver.bdrv_co_copy_range_from() + * recursion. + * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping + * requests currently in flight. + * + * Returns: 0 if succeeded; negative error code if failed. + **/ +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +/** + * bdrv_drained_end_no_poll: + * + * Same as bdrv_drained_end(), but do not poll for the subgraph to + * actually become unquiesced. Therefore, no graph changes will occur + * with this function. + * + * *drained_end_counter is incremented for every background operation + * that is scheduled, and will be decremented for every operation once + * it settles. The caller must poll until it reaches 0. The counter + * should be accessed using atomic operations only. + */ +void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * More specifically, these functions use BDRV_POLL_WHILE(bs), which + * requires the caller to be either in the main thread and hold + * the BlockdriverState (bs) AioContext lock, or directly in the + * home thread that runs the bs AioContext. Calling them from + * another thread in another AioContext would cause deadlocks. + * + * Therefore, these functions are not proper I/O, because they + * can't run in *any* iothreads, but only in a specific one. + * + * These functions can call any function from I/O, Common and this + * categories, but must be invoked only by other "I/O or GS" and GS APIs. + * + * All functions in this category must use the macro + * IO_OR_GS_CODE(); + * to catch when they are accidentally called by the wrong API. + */ + +#define BDRV_POLL_WHILE(bs, cond) ({ \ + BlockDriverState *bs_ = (bs); \ + IO_OR_GS_CODE(); \ + AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ + cond); }) + +void bdrv_drain(BlockDriverState *bs); +void coroutine_fn bdrv_co_drain(BlockDriverState *bs); + +int generated_co_wrapper +bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + +int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix); + +/* Invalidate any cached metadata used by image formats */ +int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, + Error **errp); +int generated_co_wrapper bdrv_flush(BlockDriverState *bs); +int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes); +int generated_co_wrapper +bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int generated_co_wrapper +bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + +/** + * bdrv_parent_drained_begin_single: + * + * Begin a quiesced section for the parent of @c. If @poll is true, wait for + * any pending activity to cease. + */ +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + +/** + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. + * + * This polls @bs's AioContext until all scheduled sub-drained_ends + * have settled, which may result in graph changes. + */ +void bdrv_parent_drained_end_single(BdrvChild *c); + +/** + * bdrv_drain_poll: + * + * Poll for pending requests in @bs, its parents (except for @ignore_parent), + * and if @recursive is true its children as well (used for subtree drain). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for + * drain_all). + * + * This is part of bdrv_drained_begin. + */ +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents); + +/** + * bdrv_drained_begin: + * + * Begin a quiesced section for exclusive access to the BDS, by disabling + * external request sources including NBD server, block jobs, and device model. + * + * This function can be recursive. + */ +void bdrv_drained_begin(BlockDriverState *bs); + +/** + * bdrv_do_drained_begin_quiesce: + * + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +/** + * Like bdrv_drained_begin, but recursively begins a quiesced section for + * exclusive access to all child nodes as well. + */ +void bdrv_subtree_drained_begin(BlockDriverState *bs); + +/** + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). + * + * This polls @bs's AioContext until all scheduled sub-drained_ends + * have settled. On one hand, that may result in graph changes. On + * the other, this requires that the caller either runs in the main + * loop; or that all involved nodes (@bs and all of its parents) are + * in the caller's AioContext. + */ +void bdrv_drained_end(BlockDriverState *bs); + +/** + * End a quiescent section started by bdrv_subtree_drained_begin(). + */ +void bdrv_subtree_drained_end(BlockDriverState *bs); + +#endif /* BLOCK_IO_H */ diff --git a/include/block/block.h b/include/block/block.h index e1713ee306..1e6b8fef1e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -1,864 +1,32 @@ -#ifndef BLOCK_H -#define BLOCK_H - -#include "block/aio.h" -#include "block/aio-wait.h" -#include "qemu/iov.h" -#include "qemu/coroutine.h" -#include "block/accounting.h" -#include "block/dirty-bitmap.h" -#include "block/blockjob.h" -#include "qemu/hbitmap.h" -#include "qemu/transactions.h" - /* - * generated_co_wrapper - * - * Function specifier, which does nothing but mark functions to be - * generated by scripts/block-coroutine-wrapper.py - * - * Read more in docs/devel/block-coroutine-wrapper.rst - */ -#define generated_co_wrapper - -/* block.c */ -typedef struct BlockDriver BlockDriver; -typedef struct BdrvChild BdrvChild; -typedef struct BdrvChildClass BdrvChildClass; - -typedef struct BlockDriverInfo { - /* in bytes, 0 if irrelevant */ - int cluster_size; - /* offset at which the VM state can be saved (0 if not possible) */ - int64_t vm_state_offset; - bool is_dirty; - /* - * True if this block driver only supports compressed writes - */ - bool needs_compressed_writes; -} BlockDriverInfo; - -typedef struct BlockFragInfo { - uint64_t allocated_clusters; - uint64_t total_clusters; - uint64_t fragmented_clusters; - uint64_t compressed_clusters; -} BlockFragInfo; - -typedef enum { - BDRV_REQ_COPY_ON_READ = 0x1, - BDRV_REQ_ZERO_WRITE = 0x2, - - /* - * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate - * that the block driver should unmap (discard) blocks if it is guaranteed - * that the result will read back as zeroes. The flag is only passed to the - * driver if the block device is opened with BDRV_O_UNMAP. - */ - BDRV_REQ_MAY_UNMAP = 0x4, - - BDRV_REQ_FUA = 0x10, - BDRV_REQ_WRITE_COMPRESSED = 0x20, - - /* Signifies that this write request will not change the visible disk - * content. */ - BDRV_REQ_WRITE_UNCHANGED = 0x40, - - /* Forces request serialisation. Use only with write requests. */ - BDRV_REQ_SERIALISING = 0x80, - - /* Execute the request only if the operation can be offloaded or otherwise - * be executed efficiently, but return an error instead of using a slow - * fallback. */ - BDRV_REQ_NO_FALLBACK = 0x100, - - /* - * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read - * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR - * filter is involved), in which case it signals that the COR operation - * need not read the data into memory (qiov) but only ensure they are - * copied to the top layer (i.e., that COR operation is done). - */ - BDRV_REQ_PREFETCH = 0x200, - - /* - * If we need to wait for other requests, just fail immediately. Used - * only together with BDRV_REQ_SERIALISING. - */ - BDRV_REQ_NO_WAIT = 0x400, - - /* Mask of valid flags */ - BDRV_REQ_MASK = 0x7ff, -} BdrvRequestFlags; - -typedef struct BlockSizes { - uint32_t phys; - uint32_t log; -} BlockSizes; - -typedef struct HDGeometry { - uint32_t heads; - uint32_t sectors; - uint32_t cylinders; -} HDGeometry; - -#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ -#define BDRV_O_RDWR 0x0002 -#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ -#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ -#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ -#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ -#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ -#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ -#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ -#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ -#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ -#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ -#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ -#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ -#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: - select an appropriate protocol driver, - ignoring the format layer */ -#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ -#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */ -#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ - -#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) - - -/* Option names of options parsed by the block layer */ - -#define BDRV_OPT_CACHE_WB "cache.writeback" -#define BDRV_OPT_CACHE_DIRECT "cache.direct" -#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" -#define BDRV_OPT_READ_ONLY "read-only" -#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" -#define BDRV_OPT_DISCARD "discard" -#define BDRV_OPT_FORCE_SHARE "force-share" - - -#define BDRV_SECTOR_BITS 9 -#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) - -#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ - INT_MAX >> BDRV_SECTOR_BITS) -#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) - -/* - * We want allow aligning requests and disk length up to any 32bit alignment - * and don't afraid of overflow. - * To achieve it, and in the same time use some pretty number as maximum disk - * size, let's define maximum "length" (a limit for any offset/bytes request and - * for disk size) to be the greatest power of 2 less than INT64_MAX. - */ -#define BDRV_MAX_ALIGNMENT (1L << 30) -#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT)) - -/* - * Allocation status flags for bdrv_block_status() and friends. - * - * Public flags: - * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer - * BDRV_BLOCK_ZERO: offset reads as zero - * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data - * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this - * layer rather than any backing, set by block layer - * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this - * layer, set by block layer - * - * Internal flags: - * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request - * that the block layer recompute the answer from the returned - * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. - * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for - * zeroes in file child of current block node inside - * returned region. Only valid together with both - * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not - * appear with BDRV_BLOCK_ZERO. - * - * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the - * host offset within the returned BDS that is allocated for the - * corresponding raw guest data. However, whether that offset - * actually contains data also depends on BDRV_BLOCK_DATA, as follows: - * - * DATA ZERO OFFSET_VALID - * t t t sectors read as zero, returned file is zero at offset - * t f t sectors read as valid from file at offset - * f t t sectors preallocated, read as zero, returned file not - * necessarily zero at offset - * f f t sectors preallocated but read from backing_hd, - * returned file contains garbage at offset - * t t f sectors preallocated, read as zero, unknown offset - * t f f sectors read from unknown file or offset - * f t f not allocated or unknown offset, read as zero - * f f f not allocated or unknown offset, read from backing_hd - */ -#define BDRV_BLOCK_DATA 0x01 -#define BDRV_BLOCK_ZERO 0x02 -#define BDRV_BLOCK_OFFSET_VALID 0x04 -#define BDRV_BLOCK_RAW 0x08 -#define BDRV_BLOCK_ALLOCATED 0x10 -#define BDRV_BLOCK_EOF 0x20 -#define BDRV_BLOCK_RECURSE 0x40 - -typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; - -typedef struct BDRVReopenState { - BlockDriverState *bs; - int flags; - BlockdevDetectZeroesOptions detect_zeroes; - bool backing_missing; - BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ - BlockDriverState *old_file_bs; /* keep pointer for permissions update */ - QDict *options; - QDict *explicit_options; - void *opaque; -} BDRVReopenState; - -/* - * Block operation types - */ -typedef enum BlockOpType { - BLOCK_OP_TYPE_BACKUP_SOURCE, - BLOCK_OP_TYPE_BACKUP_TARGET, - BLOCK_OP_TYPE_CHANGE, - BLOCK_OP_TYPE_COMMIT_SOURCE, - BLOCK_OP_TYPE_COMMIT_TARGET, - BLOCK_OP_TYPE_DATAPLANE, - BLOCK_OP_TYPE_DRIVE_DEL, - BLOCK_OP_TYPE_EJECT, - BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, - BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, - BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, - BLOCK_OP_TYPE_MIRROR_SOURCE, - BLOCK_OP_TYPE_MIRROR_TARGET, - BLOCK_OP_TYPE_RESIZE, - BLOCK_OP_TYPE_STREAM, - BLOCK_OP_TYPE_REPLACE, - BLOCK_OP_TYPE_MAX, -} BlockOpType; - -/* Block node permission constants */ -enum { - /** - * A user that has the "permission" of consistent reads is guaranteed that - * their view of the contents of the block device is complete and - * self-consistent, representing the contents of a disk at a specific - * point. - * - * For most block devices (including their backing files) this is true, but - * the property cannot be maintained in a few situations like for - * intermediate nodes of a commit block job. - */ - BLK_PERM_CONSISTENT_READ = 0x01, - - /** This permission is required to change the visible disk contents. */ - BLK_PERM_WRITE = 0x02, - - /** - * This permission (which is weaker than BLK_PERM_WRITE) is both enough and - * required for writes to the block node when the caller promises that - * the visible disk content doesn't change. - * - * As the BLK_PERM_WRITE permission is strictly stronger, either is - * sufficient to perform an unchanging write. - */ - BLK_PERM_WRITE_UNCHANGED = 0x04, - - /** This permission is required to change the size of a block node. */ - BLK_PERM_RESIZE = 0x08, - - /** - * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU - * 6.1 and earlier may still lock the corresponding byte in block/file-posix - * locking. So, implementing some new permission should be very careful to - * not interfere with this old unused thing. - */ - - BLK_PERM_ALL = 0x0f, - - DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ - | BLK_PERM_WRITE - | BLK_PERM_WRITE_UNCHANGED - | BLK_PERM_RESIZE, - - DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, -}; - -/* - * Flags that parent nodes assign to child nodes to specify what kind of - * role(s) they take. - * - * At least one of DATA, METADATA, FILTERED, or COW must be set for - * every child. - */ -enum BdrvChildRoleBits { - /* - * This child stores data. - * Any node may have an arbitrary number of such children. - */ - BDRV_CHILD_DATA = (1 << 0), - - /* - * This child stores metadata. - * Any node may have an arbitrary number of metadata-storing - * children. - */ - BDRV_CHILD_METADATA = (1 << 1), - - /* - * A child that always presents exactly the same visible data as - * the parent, e.g. by virtue of the parent forwarding all reads - * and writes. - * This flag is mutually exclusive with DATA, METADATA, and COW. - * Any node may have at most one filtered child at a time. - */ - BDRV_CHILD_FILTERED = (1 << 2), - - /* - * Child from which to read all data that isn't allocated in the - * parent (i.e., the backing child); such data is copied to the - * parent through COW (and optionally COR). - * This field is mutually exclusive with DATA, METADATA, and - * FILTERED. - * Any node may have at most one such backing child at a time. - */ - BDRV_CHILD_COW = (1 << 3), - - /* - * The primary child. For most drivers, this is the child whose - * filename applies best to the parent node. - * Any node may have at most one primary child at a time. - */ - BDRV_CHILD_PRIMARY = (1 << 4), - - /* Useful combination of flags */ - BDRV_CHILD_IMAGE = BDRV_CHILD_DATA - | BDRV_CHILD_METADATA - | BDRV_CHILD_PRIMARY, -}; - -/* Mask of BdrvChildRoleBits values */ -typedef unsigned int BdrvChildRole; - -char *bdrv_perm_names(uint64_t perm); -uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); - -void bdrv_init(void); -void bdrv_init_with_whitelist(void); -bool bdrv_uses_whitelist(void); -int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); -BlockDriver *bdrv_find_protocol(const char *filename, - bool allow_protocol_prefix, - Error **errp); -BlockDriver *bdrv_find_format(const char *format_name); -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); - -BlockDriverState *bdrv_new(void); -int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp); -int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, - Error **errp); -int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - Error **errp); -BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, - int flags, Error **errp); -int bdrv_drop_filter(BlockDriverState *bs, Error **errp); - -int bdrv_parse_aio(const char *mode, int *flags); -int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); -int bdrv_parse_discard_flags(const char *mode, int *flags); -BdrvChild *bdrv_open_child(const char *filename, - QDict *options, const char *bdref_key, - BlockDriverState* parent, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - bool allow_none, Error **errp); -BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); -int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - const char *bdref_key, Error **errp); -BlockDriverState *bdrv_open(const char *filename, const char *reference, - QDict *options, int flags, Error **errp); -BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, - const char *node_name, - QDict *options, int flags, - Error **errp); -BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, - int flags, Error **errp); -BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, QDict *options, - bool keep_old_opts); -void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue); -int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); -int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - Error **errp); -int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - Error **errp); -int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); -int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes); -int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, - int64_t bytes); -int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, - const void *buf, int64_t bytes); -/* - * Efficiently zero a region of the disk image. Note that this is a regular - * I/O request like read or write and should have a reasonable size. This - * function is not suitable for zeroing the entire image in a single request - * because it may allocate memory for the entire region. - */ -int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, - const char *backing_file); -void bdrv_refresh_filename(BlockDriverState *bs); - -int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, - Error **errp); -int generated_co_wrapper -bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); - -int64_t bdrv_nb_sectors(BlockDriverState *bs); -int64_t bdrv_getlength(BlockDriverState *bs); -int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); -BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, - BlockDriverState *in_bs, Error **errp); -void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); -void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); -int bdrv_commit(BlockDriverState *bs); -int bdrv_make_empty(BdrvChild *c, Error **errp); -int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, - const char *backing_fmt, bool warn); -void bdrv_register(BlockDriver *bdrv); -int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - const char *backing_file_str); -BlockDriverState *bdrv_find_overlay(BlockDriverState *active, - BlockDriverState *bs); -BlockDriverState *bdrv_find_base(BlockDriverState *bs); -bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, - Error **errp); -int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, - Error **errp); -void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); -int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); -void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs); - - -typedef struct BdrvCheckResult { - int corruptions; - int leaks; - int check_errors; - int corruptions_fixed; - int leaks_fixed; - int64_t image_end_offset; - BlockFragInfo bfi; -} BdrvCheckResult; - -typedef enum { - BDRV_FIX_LEAKS = 1, - BDRV_FIX_ERRORS = 2, -} BdrvCheckMode; - -int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix); - -/* The units of offset and total_work_size may be chosen arbitrarily by the - * block driver; total_work_size may change during the course of the amendment - * operation */ -typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, - int64_t total_work_size, void *opaque); -int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, void *cb_opaque, - bool force, - Error **errp); - -/* check if a named node can be replaced when doing drive-mirror */ -BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - const char *node_name, Error **errp); - -/* async block I/O */ -void bdrv_aio_cancel(BlockAIOCB *acb); -void bdrv_aio_cancel_async(BlockAIOCB *acb); - -/* sg packet commands */ -int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); - -/* Invalidate any cached metadata used by image formats */ -int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, - Error **errp); -void bdrv_invalidate_cache_all(Error **errp); -int bdrv_inactivate_all(void); - -/* Ensure contents are flushed to disk. */ -int generated_co_wrapper bdrv_flush(BlockDriverState *bs); -int coroutine_fn bdrv_co_flush(BlockDriverState *bs); -int bdrv_flush_all(void); -void bdrv_close_all(void); -void bdrv_drain(BlockDriverState *bs); -void coroutine_fn bdrv_co_drain(BlockDriverState *bs); -void bdrv_drain_all_begin(void); -void bdrv_drain_all_end(void); -void bdrv_drain_all(void); - -#define BDRV_POLL_WHILE(bs, cond) ({ \ - BlockDriverState *bs_ = (bs); \ - AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ - cond); }) - -int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, - int64_t bytes); -int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); -int bdrv_has_zero_init_1(BlockDriverState *bs); -int bdrv_has_zero_init(BlockDriverState *bs); -bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); -int bdrv_block_status(BlockDriverState *bs, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file); -int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); -int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *pnum); -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - bool include_base, int64_t offset, int64_t bytes, - int64_t *pnum); -int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, - int64_t bytes); - -bool bdrv_is_read_only(BlockDriverState *bs); -int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, - bool ignore_allow_rdw, Error **errp); -int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, - Error **errp); -bool bdrv_is_writable(BlockDriverState *bs); -bool bdrv_is_sg(BlockDriverState *bs); -bool bdrv_is_inserted(BlockDriverState *bs); -void bdrv_lock_medium(BlockDriverState *bs, bool locked); -void bdrv_eject(BlockDriverState *bs, bool eject_flag); -const char *bdrv_get_format_name(BlockDriverState *bs); -BlockDriverState *bdrv_find_node(const char *node_name); -BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); -XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); -BlockDriverState *bdrv_lookup_bs(const char *device, - const char *node_name, - Error **errp); -bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); -BlockDriverState *bdrv_next_node(BlockDriverState *bs); -BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); - -typedef struct BdrvNextIterator { - enum { - BDRV_NEXT_BACKEND_ROOTS, - BDRV_NEXT_MONITOR_OWNED, - } phase; - BlockBackend *blk; - BlockDriverState *bs; -} BdrvNextIterator; - -BlockDriverState *bdrv_first(BdrvNextIterator *it); -BlockDriverState *bdrv_next(BdrvNextIterator *it); -void bdrv_next_cleanup(BdrvNextIterator *it); - -BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); -bool bdrv_supports_compressed_writes(BlockDriverState *bs); -void bdrv_iterate_format(void (*it)(void *opaque, const char *name), - void *opaque, bool read_only); -const char *bdrv_get_node_name(const BlockDriverState *bs); -const char *bdrv_get_device_name(const BlockDriverState *bs); -const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); -int bdrv_get_flags(BlockDriverState *bs); -int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); -ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, - Error **errp); -BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); -void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t offset, int64_t bytes, - int64_t *cluster_offset, - int64_t *cluster_bytes); - -void bdrv_get_backing_filename(BlockDriverState *bs, - char *filename, int filename_size); -char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); -char *bdrv_get_full_backing_filename_from_filename(const char *backed, - const char *backing, - Error **errp); -char *bdrv_dirname(BlockDriverState *bs, Error **errp); - -int path_has_protocol(const char *path); -int path_is_absolute(const char *path); -char *path_combine(const char *base_path, const char *filename); - -int generated_co_wrapper -bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int generated_co_wrapper -bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, - int64_t pos, int size); - -int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size); - -void bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags, - bool quiet, Error **errp); - -/* Returns the alignment in bytes that is required so that no bounce buffer - * is required throughout the stack */ -size_t bdrv_min_mem_align(BlockDriverState *bs); -/* Returns optimal alignment in bytes for bounce buffer */ -size_t bdrv_opt_mem_align(BlockDriverState *bs); -void *qemu_blockalign(BlockDriverState *bs, size_t size); -void *qemu_blockalign0(BlockDriverState *bs, size_t size); -void *qemu_try_blockalign(BlockDriverState *bs, size_t size); -void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); -bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); - -void bdrv_enable_copy_on_read(BlockDriverState *bs); -void bdrv_disable_copy_on_read(BlockDriverState *bs); - -void bdrv_ref(BlockDriverState *bs); -void bdrv_unref(BlockDriverState *bs); -void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); -BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - Error **errp); - -bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); -void bdrv_op_block_all(BlockDriverState *bs, Error *reason); -void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); -bool bdrv_op_blocker_is_empty(BlockDriverState *bs); - -#define BLKDBG_EVENT(child, evt) \ - do { \ - if (child) { \ - bdrv_debug_event(child->bs, evt); \ - } \ - } while (0) - -void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); - -int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, - const char *tag); -int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); -int bdrv_debug_resume(BlockDriverState *bs, const char *tag); -bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); - -/** - * bdrv_get_aio_context: + * QEMU System Emulator block driver * - * Returns: the currently bound #AioContext - */ -AioContext *bdrv_get_aio_context(BlockDriverState *bs); - -/** - * Move the current coroutine to the AioContext of @bs and return the old - * AioContext of the coroutine. Increase bs->in_flight so that draining @bs - * will wait for the operation to proceed until the corresponding - * bdrv_co_leave(). + * Copyright (c) 2003 Fabrice Bellard * - * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as - * this will deadlock. - */ -AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs); - -/** - * Ends a section started by bdrv_co_enter(). Move the current coroutine back - * to old_ctx and decrease bs->in_flight again. - */ -void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); - -/** - * Locks the AioContext of @bs if it's not the current AioContext. This avoids - * double locking which could lead to deadlocks: This is a coroutine_fn, so we - * know we already own the lock of the current AioContext. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * May only be called in the main thread. - */ -void coroutine_fn bdrv_co_lock(BlockDriverState *bs); - -/** - * Unlocks the AioContext of @bs if it's not the current AioContext. - */ -void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); - -/** - * Transfer control to @co in the aio context of @bs - */ -void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); - -void bdrv_set_aio_context_ignore(BlockDriverState *bs, - AioContext *new_context, GSList **ignore); -int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - Error **errp); -int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); -bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, - GSList **ignore, Error **errp); -bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, - GSList **ignore, Error **errp); -AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); -AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); - -int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); -int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); - -void bdrv_io_plug(BlockDriverState *bs); -void bdrv_io_unplug(BlockDriverState *bs); - -/** - * bdrv_parent_drained_begin_single: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * - * Begin a quiesced section for the parent of @c. If @poll is true, wait for - * any pending activity to cease. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. */ -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - -/** - * bdrv_parent_drained_end_single: - * - * End a quiesced section for the parent of @c. - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled, which may result in graph changes. - */ -void bdrv_parent_drained_end_single(BdrvChild *c); - -/** - * bdrv_drain_poll: - * - * Poll for pending requests in @bs, its parents (except for @ignore_parent), - * and if @recursive is true its children as well (used for subtree drain). - * - * If @ignore_bds_parents is true, parents that are BlockDriverStates must - * ignore the drain request because they will be drained separately (used for - * drain_all). - * - * This is part of bdrv_drained_begin. - */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents); - -/** - * bdrv_drained_begin: - * - * Begin a quiesced section for exclusive access to the BDS, by disabling - * external request sources including NBD server, block jobs, and device model. - * - * This function can be recursive. - */ -void bdrv_drained_begin(BlockDriverState *bs); - -/** - * bdrv_do_drained_begin_quiesce: - * - * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already - * running requests to complete. - */ -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); - -/** - * Like bdrv_drained_begin, but recursively begins a quiesced section for - * exclusive access to all child nodes as well. - */ -void bdrv_subtree_drained_begin(BlockDriverState *bs); - -/** - * bdrv_drained_end: - * - * End a quiescent section started by bdrv_drained_begin(). - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled. On one hand, that may result in graph changes. On - * the other, this requires that the caller either runs in the main - * loop; or that all involved nodes (@bs and all of its parents) are - * in the caller's AioContext. - */ -void bdrv_drained_end(BlockDriverState *bs); - -/** - * bdrv_drained_end_no_poll: - * - * Same as bdrv_drained_end(), but do not poll for the subgraph to - * actually become unquiesced. Therefore, no graph changes will occur - * with this function. - * - * *drained_end_counter is incremented for every background operation - * that is scheduled, and will be decremented for every operation once - * it settles. The caller must poll until it reaches 0. The counter - * should be accessed using atomic operations only. - */ -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); - -/** - * End a quiescent section started by bdrv_subtree_drained_begin(). - */ -void bdrv_subtree_drained_end(BlockDriverState *bs); - -void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, - Error **errp); -void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); - -bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, - uint32_t granularity, Error **errp); -/** - * - * bdrv_register_buf/bdrv_unregister_buf: - * - * Register/unregister a buffer for I/O. For example, VFIO drivers are - * interested to know the memory areas that would later be used for I/O, so - * that they can prepare IOMMU mapping etc., to get better performance. - */ -void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); -void bdrv_unregister_buf(BlockDriverState *bs, void *host); +#ifndef BLOCK_H +#define BLOCK_H -/** - * - * bdrv_co_copy_range: - * - * Do offloaded copy between two children. If the operation is not implemented - * by the driver, or if the backend storage doesn't support it, a negative - * error code will be returned. - * - * Note: block layer doesn't emulate or fallback to a bounce buffer approach - * because usually the caller shouldn't attempt offloaded copy any more (e.g. - * calling copy_file_range(2)) after the first error, thus it should fall back - * to a read+write path in the caller level. - * - * @src: Source child to copy data from - * @src_offset: offset in @src image to read data - * @dst: Destination child to copy data to - * @dst_offset: offset in @dst image to write data - * @bytes: number of bytes to copy - * @flags: request flags. Supported flags: - * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero - * write on @dst as if bdrv_co_pwrite_zeroes is - * called. Used to simplify caller code, or - * during BlockDriver.bdrv_co_copy_range_from() - * recursion. - * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping - * requests currently in flight. - * - * Returns: 0 if succeeded; negative error code if failed. - **/ -int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - BdrvChild *dst, int64_t dst_offset, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); +#include "block-global-state.h" +#include "block-io.h" -void bdrv_cancel_in_flight(BlockDriverState *bs); +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ -#endif +#endif /* BLOCK_H */ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h new file mode 100644 index 0000000000..5a04c778e4 --- /dev/null +++ b/include/block/block_int-common.h @@ -0,0 +1,1222 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_COMMON_H +#define BLOCK_INT_COMMON_H + +#include "block/accounting.h" +#include "block/block.h" +#include "block/aio-wait.h" +#include "qemu/queue.h" +#include "qemu/coroutine.h" +#include "qemu/stats64.h" +#include "qemu/timer.h" +#include "qemu/hbitmap.h" +#include "block/snapshot.h" +#include "qemu/throttle.h" +#include "qemu/rcu.h" + +#define BLOCK_FLAG_LAZY_REFCOUNTS 8 + +#define BLOCK_OPT_SIZE "size" +#define BLOCK_OPT_ENCRYPT "encryption" +#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" +#define BLOCK_OPT_COMPAT6 "compat6" +#define BLOCK_OPT_HWVERSION "hwversion" +#define BLOCK_OPT_BACKING_FILE "backing_file" +#define BLOCK_OPT_BACKING_FMT "backing_fmt" +#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" +#define BLOCK_OPT_TABLE_SIZE "table_size" +#define BLOCK_OPT_PREALLOC "preallocation" +#define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" +#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" +#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" +#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" +#define BLOCK_OPT_OBJECT_SIZE "object_size" +#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" +#define BLOCK_OPT_DATA_FILE "data_file" +#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" +#define BLOCK_OPT_COMPRESSION_TYPE "compression_type" +#define BLOCK_OPT_EXTL2 "extended_l2" + +#define BLOCK_PROBE_BUF_SIZE 512 + +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_DISCARD, + BDRV_TRACKED_TRUNCATE, +}; + +/* + * That is not quite good that BdrvTrackedRequest structure is public, + * as block/io.c is very careful about incoming offset/bytes being + * correct. Be sure to assert bdrv_check_request() succeeded after any + * modification of BdrvTrackedRequest object out of block/io.c + */ +typedef struct BdrvTrackedRequest { + BlockDriverState *bs; + int64_t offset; + int64_t bytes; + enum BdrvTrackedRequestType type; + + bool serialising; + int64_t overlap_offset; + int64_t overlap_bytes; + + QLIST_ENTRY(BdrvTrackedRequest) list; + Coroutine *co; /* owner, used for deadlock detection */ + CoQueue wait_queue; /* coroutines blocked on this request */ + + struct BdrvTrackedRequest *waiting_for; +} BdrvTrackedRequest; + + +struct BlockDriver { + /* + * These fields are initialized when this object is created, + * and are never changed afterwards. + */ + + const char *format_name; + int instance_size; + + /* + * Set to true if the BlockDriver is a block filter. Block filters pass + * certain callbacks that refer to data (see block.c) to their bs->file + * or bs->backing (whichever one exists) if the driver doesn't implement + * them. Drivers that do not wish to forward must implement them and return + * -ENOTSUP. + * Note that filters are not allowed to modify data. + * + * Filters generally cannot have more than a single filtered child, + * because the data they present must at all times be the same as + * that on their filtered child. That would be impossible to + * achieve for multiple filtered children. + * (And this filtered child must then be bs->file or bs->backing.) + */ + bool is_filter; + /* + * Set to true if the BlockDriver is a format driver. Format nodes + * generally do not expect their children to be other format nodes + * (except for backing files), and so format probing is disabled + * on those children. + */ + bool is_format; + + /* + * Drivers not implementing bdrv_parse_filename nor bdrv_open should have + * this field set to true, except ones that are defined only by their + * child's bs. + * An example of the last type will be the quorum block driver. + */ + bool bdrv_needs_filename; + + /* + * Set if a driver can support backing files. This also implies the + * following semantics: + * + * - Return status 0 of .bdrv_co_block_status means that corresponding + * blocks are not allocated in this layer of backing-chain + * - For such (unallocated) blocks, read will: + * - fill buffer with zeros if there is no backing file + * - read from the backing file otherwise, where the block layer + * takes care of reading zeros beyond EOF if backing file is short + */ + bool supports_backing; + + bool has_variable_length; + + /* + * Drivers setting this field must be able to work with just a plain + * filename with '<protocol_name>:' as a prefix, and no other options. + * Options may be extracted from the filename by implementing + * bdrv_parse_filename. + */ + const char *protocol_name; + + /* List of options for creating images, terminated by name == NULL */ + QemuOptsList *create_opts; + + /* List of options for image amend */ + QemuOptsList *amend_opts; + + /* + * If this driver supports reopening images this contains a + * NULL-terminated list of the runtime options that can be + * modified. If an option in this list is unspecified during + * reopen then it _must_ be reset to its default value or return + * an error. + */ + const char *const *mutable_opts; + + /* + * Pointer to a NULL-terminated array of names of strong options + * that can be specified for bdrv_open(). A strong option is one + * that changes the data of a BDS. + * If this pointer is NULL, the array is considered empty. + * "filename" and "driver" are always considered strong. + */ + const char *const *strong_runtime_opts; + + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* + * This function is invoked under BQL before .bdrv_co_amend() + * (which in contrast does not necessarily run under the BQL) + * to allow driver-specific initialization code that requires + * the BQL, like setting up specific permission flags. + */ + int (*bdrv_amend_pre_run)(BlockDriverState *bs, Error **errp); + /* + * This function is invoked under BQL after .bdrv_co_amend() + * to allow cleaning up what was done in .bdrv_amend_pre_run(). + */ + void (*bdrv_amend_clean)(BlockDriverState *bs); + + /* + * Return true if @to_replace can be replaced by a BDS with the + * same data as @bs without it affecting @bs's behavior (that is, + * without it being visible to @bs's parents). + */ + bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, + BlockDriverState *to_replace); + + int (*bdrv_probe_device)(const char *filename); + + /* + * Any driver implementing this callback is expected to be able to handle + * NULL file names in its .bdrv_open() implementation. + */ + void (*bdrv_parse_filename)(const char *filename, QDict *options, + Error **errp); + + /* For handling image reopen for split or non-split files. */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); + + int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + + /* Protocol drivers should implement this instead of bdrv_open */ + int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + void (*bdrv_close)(BlockDriverState *bs); + + int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, + Error **errp); + int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp); + + int (*bdrv_amend_options)(BlockDriverState *bs, + QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, + bool force, + Error **errp); + + int (*bdrv_make_empty)(BlockDriverState *bs); + + /* + * Refreshes the bs->exact_filename field. If that is impossible, + * bs->exact_filename has to be left empty. + */ + void (*bdrv_refresh_filename)(BlockDriverState *bs); + + /* + * Gathers the open options for all children into @target. + * A simple format driver (without backing file support) might + * implement this function like this: + * + * QINCREF(bs->file->bs->full_open_options); + * qdict_put(target, "file", bs->file->bs->full_open_options); + * + * If not specified, the generic implementation will simply put + * all children's options under their respective name. + * + * @backing_overridden is true when bs->backing seems not to be + * the child that would result from opening bs->backing_file. + * Therefore, if it is true, the backing child's options should be + * gathered; otherwise, there is no need since the backing child + * is the one implied by the image header. + * + * Note that ideally this function would not be needed. Every + * block driver which implements it is probably doing something + * shady regarding its runtime option structure. + */ + void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, + bool backing_overridden); + + /* + * Returns an allocated string which is the directory name of this BDS: It + * will be used to make relative filenames absolute by prepending this + * function's return value to them. + */ + char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); + + /* + * This informs the driver that we are no longer interested in the result + * of in-flight requests, so don't waste the time if possible. + * + * One example usage is to avoid waiting for an nbd target node reconnect + * timeout during job-cancel with force=true. + */ + void (*bdrv_cancel_in_flight)(BlockDriverState *bs); + + int (*bdrv_inactivate)(BlockDriverState *bs); + + int (*bdrv_snapshot_create)(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); + int (*bdrv_snapshot_goto)(BlockDriverState *bs, + const char *snapshot_id); + int (*bdrv_snapshot_delete)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + int (*bdrv_snapshot_list)(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); + int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + + int (*bdrv_change_backing_file)(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); + + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + + void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); + + /* + * Returns 1 if newly created images are guaranteed to contain only + * zeros, 0 otherwise. + */ + int (*bdrv_has_zero_init)(BlockDriverState *bs); + + /* + * Remove fd handlers, timers, and other event loop callbacks so the event + * loop is no longer in use. Called with no in-flight requests and in + * depth-first traversal order with parents before child nodes. + */ + void (*bdrv_detach_aio_context)(BlockDriverState *bs); + + /* + * Add fd handlers, timers, and other event loop callbacks so I/O requests + * can be processed again. Called with no in-flight requests and in + * depth-first traversal order with child nodes before parent nodes. + */ + void (*bdrv_attach_aio_context)(BlockDriverState *bs, + AioContext *new_context); + + /** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ + int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); + /** + * Try to get @bs's geometry (cyls, heads, sectors) + * On success, store them in @geo and return 0. + * On failure return -errno. + * Only drivers that want to override guest geometry implement this + * callback; see hd_geometry_guess(). + */ + int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + + void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, + Error **errp); + void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, + Error **errp); + + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @child_class and @role. + * + * If @reopen_queue is non-NULL, don't return the currently needed + * permissions, but those that will be needed after applying the + * @reopen_queue. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + + /** + * Register/unregister a buffer for I/O. For example, when the driver is + * interested to know the memory areas that will later be used in iovs, so + * that it can do IOMMU mapping with VFIO etc., in order to get better + * performance. In the case of VFIO drivers, this callback is used to do + * DMA mapping for hot buffers. + */ + void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); + void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); + + /* + * This field is modified only under the BQL, and is part of + * the global state. + */ + QLIST_ENTRY(BlockDriver) list; + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); + + int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, + BlockdevAmendOptions *opts, + bool force, + Error **errp); + + /* aio */ + BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, + int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); + + int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + + /** + * @offset: position in bytes to read at + * @bytes: number of bytes to read + * @qiov: the buffers to fill with read data + * @flags: currently unused, always 0 + * + * @offset and @bytes will be a multiple of 'request_alignment', + * but the length of individual @qiov elements does not have to + * be a multiple. + * + * @bytes will always equal the total size of @qiov, and will be + * no larger than 'max_transfer'. + * + * The buffer in @qiov may point directly to guest memory. + */ + int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + + int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + + int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + int flags); + /** + * @offset: position in bytes to write at + * @bytes: number of bytes to write + * @qiov: the buffers containing data to write + * @flags: zero or more bits allowed by 'supported_write_flags' + * + * @offset and @bytes will be a multiple of 'request_alignment', + * but the length of individual @qiov elements does not have to + * be a multiple. + * + * @bytes will always equal the total size of @qiov, and will be + * no larger than 'max_transfer'. + * + * The buffer in @qiov may point directly to guest memory. + */ + int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() + * will be called instead. + */ + int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, + int64_t offset, int64_t bytes); + + /* + * Map [offset, offset + nbytes) range onto a child of @bs to copy from, + * and invoke bdrv_co_copy_range_from(child, ...), or invoke + * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. + */ + int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, + BdrvChild *src, + int64_t offset, + BdrvChild *dst, + int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + /* + * Map [offset, offset + nbytes) range onto a child of bs to copy data to, + * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy + * operation if @bs is the leaf and @src has the same BlockDriver. Return + * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. + */ + int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, + BdrvChild *src, + int64_t src_offset, + BdrvChild *dst, + int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + /* + * Building block for bdrv_block_status[_above] and + * bdrv_is_allocated[_above]. The driver should answer only + * according to the current layer, and should only need to set + * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, + * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See + * block.h for the overall meaning of the bits. As a hint, the + * flag want_zero is true if the caller cares more about precise + * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for + * overall allocation (favor larger *pnum, perhaps by reporting + * _DATA instead of _ZERO). The block layer guarantees input + * clamped to bdrv_getlength() and aligned to request_alignment, + * as well as non-NULL pnum, map, and file; in turn, the driver + * must return an error or set pnum to an aligned non-zero value. + * + * Note that @bytes is just a hint on how big of a region the + * caller wants to inspect. It is not a limit on *pnum. + * Implementations are free to return larger values of *pnum if + * doing so does not incur a performance penalty. + * + * block/io.c's bdrv_co_block_status() will utilize an unclamped + * *pnum value for the block-status cache on protocol nodes, prior + * to clamping *pnum for return to its caller. + */ + int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); + + /* + * Invalidate any cached meta-data. + */ + void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, + Error **errp); + + /* + * Flushes all data for all layers by calling bdrv_co_flush for underlying + * layers, if needed. This function is needed for deterministic + * synchronization of the flush finishing callback. + */ + int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); + + /* Delete a created file. */ + int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, + Error **errp); + + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example file-posix.c calls fsync()). + */ + int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); + + /* + * Flushes all internal caches to the OS. The data may still sit in a + * writeback cache of the host OS, but it will survive a crash of the qemu + * process. + */ + int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); + + /* + * Truncate @bs to @offset bytes using the given @prealloc mode + * when growing. Modes other than PREALLOC_MODE_OFF should be + * rejected when shrinking @bs. + * + * If @exact is true, @bs must be resized to exactly @offset. + * Otherwise, it is sufficient for @bs (if it is a host block + * device and thus there is no way to resize it) to be at least + * @offset bytes in length. + * + * If @exact is true and this function fails but would succeed + * with @exact = false, it should return -ENOTSUP. + */ + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp); + int64_t (*bdrv_getlength)(BlockDriverState *bs); + int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); + BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp); + + int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset); + + int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + + ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, + Error **errp); + BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); + + int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + + /* removable device specific */ + bool (*bdrv_is_inserted)(BlockDriverState *bs); + void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); + void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); + + /* to control generic scsi devices */ + BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf); + + /* + * Returns 0 for completed check, -errno for internal errors. + * The check results are stored in result. + */ + int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); + + void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); + + /* io queue for linux-aio */ + void (*bdrv_io_plug)(BlockDriverState *bs); + void (*bdrv_io_unplug)(BlockDriverState *bs); + + /** + * bdrv_co_drain_begin is called if implemented in the beginning of a + * drain operation to drain and stop any internal sources of requests in + * the driver. + * bdrv_co_drain_end is called if implemented at the end of the drain. + * + * They should be used by the driver to e.g. manage scheduled I/O + * requests, or toggle an internal state. After the end of the drain new + * requests will continue normally. + */ + void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); + void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); + + bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); + int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs, + const char *name, + Error **errp); +}; + +static inline bool block_driver_can_compress(BlockDriver *drv) +{ + return drv->bdrv_co_pwritev_compressed || + drv->bdrv_co_pwritev_compressed_part; +} + +typedef struct BlockLimits { + /* + * Alignment requirement, in bytes, for offset/length of I/O + * requests. Must be a power of 2 less than INT_MAX; defaults to + * 1 for drivers with modern byte interfaces, and to 512 + * otherwise. + */ + uint32_t request_alignment; + + /* + * Maximum number of bytes that can be discarded at once. Must be multiple + * of pdiscard_alignment, but need not be power of 2. May be 0 if no + * inherent 64-bit limit. + */ + int64_t max_pdiscard; + + /* + * Optimal alignment for discard requests in bytes. A power of 2 + * is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pdiscard if + * that is set. May be 0 if bl.request_alignment is good enough + */ + uint32_t pdiscard_alignment; + + /* + * Maximum number of bytes that can zeroized at once. Must be multiple of + * pwrite_zeroes_alignment. 0 means no limit. + */ + int64_t max_pwrite_zeroes; + + /* + * Optimal alignment for write zeroes requests in bytes. A power + * of 2 is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pwrite_zeroes + * if that is set. May be 0 if bl.request_alignment is good + * enough + */ + uint32_t pwrite_zeroes_alignment; + + /* + * Optimal transfer length in bytes. A power of 2 is best but not + * mandatory. Must be a multiple of bl.request_alignment, or 0 if + * no preferred size + */ + uint32_t opt_transfer; + + /* + * Maximal transfer length in bytes. Need not be power of 2, but + * must be multiple of opt_transfer and bl.request_alignment, or 0 + * for no 32-bit limit. For now, anything larger than INT_MAX is + * clamped down. + */ + uint32_t max_transfer; + + /* + * Maximal hardware transfer length in bytes. Applies whenever + * transfers to the device bypass the kernel I/O scheduler, for + * example with SG_IO. If larger than max_transfer or if zero, + * blk_get_max_hw_transfer will fall back to max_transfer. + */ + uint64_t max_hw_transfer; + + /* + * Maximal number of scatter/gather elements allowed by the hardware. + * Applies whenever transfers to the device bypass the kernel I/O + * scheduler, for example with SG_IO. If larger than max_iov + * or if zero, blk_get_max_hw_iov will fall back to max_iov. + */ + int max_hw_iov; + + + /* memory alignment, in bytes so that no bounce buffer is needed */ + size_t min_mem_alignment; + + /* memory alignment, in bytes, for bounce buffer */ + size_t opt_mem_alignment; + + /* maximum number of iovec elements */ + int max_iov; +} BlockLimits; + +typedef struct BdrvOpBlocker BdrvOpBlocker; + +typedef struct BdrvAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + + void *opaque; + bool deleted; + + QLIST_ENTRY(BdrvAioNotifier) list; +} BdrvAioNotifier; + +struct BdrvChildClass { + /* + * If true, bdrv_replace_node() doesn't change the node this BdrvChild + * points to. + */ + bool stay_at_node; + + /* + * If true, the parent is a BlockDriverState and bdrv_next_all_states() + * will return it. This information is used for drain_all, where every node + * will be drained separately, so the drain only needs to be propagated to + * non-BDS parents. + */ + bool parent_is_bds; + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + void (*inherit_options)(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options); + void (*change_media)(BdrvChild *child, bool load); + + /* + * Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. + */ + char *(*get_parent_desc)(BdrvChild *child); + + /* + * Notifies the parent that the child has been activated/inactivated (e.g. + * when migration is completing) and it can start/stop requesting + * permissions and doing I/O on it. + */ + void (*activate)(BdrvChild *child, Error **errp); + int (*inactivate)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); + + /* + * Notifies the parent that the filename of its child has changed (e.g. + * because the direct child was removed from the backing chain), so that it + * can update its reference. + */ + int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, + const char *filename, Error **errp); + + bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); + void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); + + AioContext *(*get_parent_aio_context)(BdrvChild *child); + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + void (*resize)(BdrvChild *child); + + /* + * Returns a name that is supposedly more useful for human users than the + * node name for identifying the node in question (in particular, a BB + * name), or NULL if the parent can't provide a better name. + */ + const char *(*get_name)(BdrvChild *child); + + /* + * If this pair of functions is implemented, the parent doesn't issue new + * requests after returning from .drained_begin() until .drained_end() is + * called. + * + * These functions must not change the graph (and therefore also must not + * call aio_poll(), which could change the graph indirectly). + * + * If drained_end() schedules background operations, it must atomically + * increment *drained_end_counter for each such operation and atomically + * decrement it once the operation has settled. + * + * Note that this can be nested. If drained_begin() was called twice, new + * I/O is allowed only after drained_end() was called twice, too. + */ + void (*drained_begin)(BdrvChild *child); + void (*drained_end)(BdrvChild *child, int *drained_end_counter); + + /* + * Returns whether the parent has pending requests for the child. This + * callback is polled after .drained_begin() has been called until all + * activity on the child has stopped. + */ + bool (*drained_poll)(BdrvChild *child); +}; + +extern const BdrvChildClass child_of_bds; + +struct BdrvChild { + BlockDriverState *bs; + char *name; + const BdrvChildClass *klass; + BdrvChildRole role; + void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + + /* + * This link is frozen: the child can neither be replaced nor + * detached from the parent. + */ + bool frozen; + + /* + * How many times the parent of this child has been drained + * (through klass->drained_*). + * Usually, this is equal to bs->quiesce_counter (potentially + * reduced by bdrv_drain_all_count). It may differ while the + * child is entering or leaving a drained section. + */ + int parent_quiesce_counter; + + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +}; + +/* + * Allows bdrv_co_block_status() to cache one data region for a + * protocol node. + * + * @valid: Whether the cache is valid (should be accessed with atomic + * functions so this can be reset by RCU readers) + * @data_start: Offset where we know (or strongly assume) is data + * @data_end: Offset where the data region ends (which is not necessarily + * the start of a zeroed region) + */ +typedef struct BdrvBlockStatusCache { + struct rcu_head rcu; + + bool valid; + int64_t data_start; + int64_t data_end; +} BdrvBlockStatusCache; + +struct BlockDriverState { + /* + * Protected by big QEMU lock or read-only after opening. No special + * locking needed during I/O... + */ + int open_flags; /* flags used to open the file, re-used for re-open */ + bool encrypted; /* if true, the media is encrypted */ + bool sg; /* if true, the device is a /dev/sg* */ + bool probed; /* if true, format was probed rather than specified */ + bool force_share; /* if true, always allow all shared permissions */ + bool implicit; /* if true, this filter node was automatically inserted */ + + BlockDriver *drv; /* NULL means no media */ + void *opaque; + + AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ + /* + * long-running tasks intended to always use the same AioContext as this + * BDS may register themselves in this list to be notified of changes + * regarding this BDS's context + */ + QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; + bool walking_aio_notifiers; /* to make removal during iteration safe */ + + char filename[PATH_MAX]; + /* + * If not empty, this image is a diff in relation to backing_file. + * Note that this is the name given in the image header and + * therefore may or may not be equal to .backing->bs->filename. + * If this field contains a relative path, it is to be resolved + * relatively to the overlay's location. + */ + char backing_file[PATH_MAX]; + /* + * The backing filename indicated by the image header. Contrary + * to backing_file, if we ever open this file, auto_backing_file + * is replaced by the resulting BDS's filename (i.e. after a + * bdrv_refresh_filename() run). + */ + char auto_backing_file[PATH_MAX]; + char backing_format[16]; /* if non-zero and backing_file exists */ + + QDict *full_open_options; + char exact_filename[PATH_MAX]; + + BdrvChild *backing; + BdrvChild *file; + + /* I/O Limits */ + BlockLimits bl; + + /* + * Flags honored during pread + */ + unsigned int supported_read_flags; + /* + * Flags honored during pwrite (so far: BDRV_REQ_FUA, + * BDRV_REQ_WRITE_UNCHANGED). + * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those + * writes will be issued as normal writes without the flag set. + * This is important to note for drivers that do not explicitly + * request a WRITE permission for their children and instead take + * the same permissions as their parent did (this is commonly what + * block filters do). Such drivers have to be aware that the + * parent may have taken a WRITE_UNCHANGED permission only and is + * issuing such requests. Drivers either must make sure that + * these requests do not result in plain WRITE accesses (usually + * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding + * every incoming write request as-is, including potentially that + * flag), or they have to explicitly take the WRITE permission for + * their children. + */ + unsigned int supported_write_flags; + /* + * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, + * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) + */ + unsigned int supported_zero_flags; + /* + * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). + * + * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure + * that any added space reads as all zeros. If this can't be guaranteed, + * the operation must fail. + */ + unsigned int supported_truncate_flags; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; + /* element of the list of named nodes building the graph */ + QTAILQ_ENTRY(BlockDriverState) node_list; + /* element of the list of all BlockDriverStates (all_bdrv_states) */ + QTAILQ_ENTRY(BlockDriverState) bs_list; + /* element of the list of monitor-owned BDS */ + QTAILQ_ENTRY(BlockDriverState) monitor_list; + int refcnt; + + /* operation blockers. Protected by BQL. */ + QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; + + /* + * The node that this node inherited default options from (and a reopen on + * which can affect this node by changing these defaults). This is always a + * parent node of this node. + */ + BlockDriverState *inherits_from; + QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild) parents; + + QDict *options; + QDict *explicit_options; + BlockdevDetectZeroesOptions detect_zeroes; + + /* The error object in use for blocking operations on backing_hd */ + Error *backing_blocker; + + /* Protected by AioContext lock */ + + /* + * If we are reading a disk image, give its size in sectors. + * Generally read-only; it is written to by load_snapshot and + * save_snaphost, but the block layer is quiescent during those. + */ + int64_t total_sectors; + + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + + /* + * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. + * Reading from the list can be done with either the BQL or the + * dirty_bitmap_mutex. Modifying a bitmap only requires + * dirty_bitmap_mutex. + */ + QemuMutex dirty_bitmap_mutex; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; + + /* Offset after the highest byte written to */ + Stat64 wr_highest_offset; + + /* + * If true, copy read backing sectors into image. Can be >1 if more + * than one client has requested copy-on-read. Accessed with atomic + * ops. + */ + int copy_on_read; + + /* + * number of in-flight requests; overall and serialising. + * Accessed with atomic ops. + */ + unsigned int in_flight; + unsigned int serialising_in_flight; + + /* + * counter for nested bdrv_io_plug. + * Accessed with atomic ops. + */ + unsigned io_plugged; + + /* do we need to tell the quest if we have a volatile write cache? */ + int enable_write_cache; + + /* Accessed with atomic ops. */ + int quiesce_counter; + int recursive_quiesce_counter; + + unsigned int write_gen; /* Current data generation */ + + /* Protected by reqs_lock. */ + CoMutex reqs_lock; + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + CoQueue flush_queue; /* Serializing flush queue */ + bool active_flush_req; /* Flush request in flight? */ + + /* Only read/written by whoever has set active_flush_req to true. */ + unsigned int flushed_gen; /* Flushed write generation */ + + /* BdrvChild links to this node may never be frozen */ + bool never_freeze; + + /* Lock for block-status cache RCU writers */ + CoMutex bsc_modify_lock; + /* Always non-NULL, but must only be dereferenced under an RCU read guard */ + BdrvBlockStatusCache *block_status_cache; +}; + +struct BlockBackendRootState { + int open_flags; + BlockdevDetectZeroesOptions detect_zeroes; +}; + +typedef enum BlockMirrorBackingMode { + /* + * Reuse the existing backing chain from the source for the target. + * - sync=full: Set backing BDS to NULL. + * - sync=top: Use source's backing BDS. + * - sync=none: Use source as the backing BDS. + */ + MIRROR_SOURCE_BACKING_CHAIN, + + /* Open the target's backing chain completely anew */ + MIRROR_OPEN_BACKING_CHAIN, + + /* Do not change the target's backing BDS after job completion */ + MIRROR_LEAVE_BACKING_CHAIN, +} BlockMirrorBackingMode; + + +/* + * Essential block drivers which must always be statically linked into qemu, and + * which therefore can be accessed without using bdrv_find_format() + */ +extern BlockDriver bdrv_file; +extern BlockDriver bdrv_raw; +extern BlockDriver bdrv_qcow2; + +extern unsigned int bdrv_drain_all_count; +extern QemuOptsList bdrv_create_opts_simple; + +/* + * Common functions that are neither I/O nor Global State. + * + * See include/block/block-commmon.h for more information about + * the Common API. + */ + +static inline BlockDriverState *child_bs(BdrvChild *child) +{ + return child ? child->bs : NULL; +} + +int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); +int get_tmp_filename(char *filename, int size); +void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, + QDict *options); + + +int bdrv_check_qiov_request(int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + Error **errp); + +#ifdef _WIN32 +int is_windows_drive(const char *filename); +#endif + +#endif /* BLOCK_INT_COMMON_H */ diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h new file mode 100644 index 0000000000..0f21b0570b --- /dev/null +++ b/include/block/block_int-global-state.h @@ -0,0 +1,329 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_GLOBAL_STATE_H +#define BLOCK_INT_GLOBAL_STATE_H + +#include "block_int-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +/** + * stream_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @base: Block device that will become the new base, or %NULL to + * flatten the whole backing file chain onto @bs. + * @backing_file_str: The file name that will be written to @bs as the + * the new backing file if the job completes. Ignored if @base is %NULL. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the stream job inserts into the graph above + * @bs. NULL means that a node name should be autogenerated. + * @errp: Error object. + * + * Start a streaming operation on @bs. Clusters that are unallocated + * in @bs, but allocated in any image between @base and @bs (both + * exclusive) will be written to @bs. At the end of a successful + * streaming job, the backing file of @bs will be changed to + * @backing_file_str in the written image and to @base in the live + * BlockDriverState. + */ +void stream_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, const char *backing_file_str, + BlockDriverState *bottom, + int creation_flags, int64_t speed, + BlockdevOnError on_error, + const char *filter_node_name, + Error **errp); + +/** + * commit_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Active block device. + * @top: Top block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. + * @errp: Error object. + * + */ +void commit_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, BlockDriverState *top, + int creation_flags, int64_t speed, + BlockdevOnError on_error, const char *backing_file_str, + const char *filter_node_name, Error **errp); +/** + * commit_active_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Active block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @auto_complete: Auto complete the job. + * @errp: Error object. + * + */ +BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, int creation_flags, + int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, + bool auto_complete, Error **errp); +/* + * mirror_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @target: Block device to write to. + * @replaces: Block graph node name to replace once the mirror is done. Can + * only be used when full mirroring is selected. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. + * @mode: Whether to collapse all images in the chain to the target. + * @backing_mode: How to establish the target's backing chain after completion. + * @zero_target: Whether the target should be explicitly zero-initialized + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. + * @copy_mode: When to trigger writes to the target. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @target until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, const char *replaces, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp); + +/* + * backup_job_create: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @sync_mode: What parts of the disk image should be copied to the destination. + * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' + * @bitmap_mode: The bitmap synchronization policy to use. + * @perf: Performance options. All actual fields assumed to be present, + * all ".has_*" fields are ignored. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). + * + * Create a backup operation on @bs. Clusters in @bs are written to @target + * until the job is cancelled or manually completed. + */ +BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, int64_t speed, + MirrorSyncMode sync_mode, + BdrvDirtyBitmap *sync_bitmap, + BitmapSyncMode bitmap_mode, + bool compress, + const char *filter_node_name, + BackupPerf *perf, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + int creation_flags, + BlockCompletionFunc *cb, void *opaque, + JobTxn *txn, Error **errp); + +BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); +void bdrv_root_unref_child(BdrvChild *child); + +void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); + +/** + * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use + * bdrv_child_refresh_perms() instead and make the parent's + * .bdrv_child_perm() implementation return the correct values. + */ +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + +/** + * Calls bs->drv->bdrv_child_perm() and updates the child's permission + * masks with the result. + * Drivers should invoke this function whenever an event occurs that + * makes their .bdrv_child_perm() implementation return different + * values than before, but which will not result in the block layer + * automatically refreshing the permissions. + */ +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); + +bool bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace); + +/* + * Default implementation for BlockDriver.bdrv_child_perm() that can + * be used by block filters and image formats, as long as they use the + * child_of_bds child class and set an appropriate BdrvChildRole. + */ +void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); +bool blk_dev_has_removable_media(BlockBackend *blk); +void blk_dev_eject_request(BlockBackend *blk, bool force); +bool blk_dev_is_medium_locked(BlockBackend *blk); + +void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup); + +void bdrv_set_monitor_owned(BlockDriverState *bs); + +void blockdev_close_all_bdrv_states(void); + +BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp); + +/** + * Simple implementation of bdrv_co_create_opts for protocol drivers + * which only support creation via opening a file + * (usually existing raw storage device) + */ +int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp); + +BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, + const char *name, + BlockDriverState **pbs, + Error **errp); +BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, + BlockDirtyBitmapMergeSourceList *bms, + HBitmap **backup, Error **errp); +BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + bool release, + BlockDriverState **bitmap_bs, + Error **errp); + + +BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs); + +/** + * bdrv_add_aio_context_notifier: + * + * If a long-running job intends to be always run in the same AioContext as a + * certain BDS, it may use this function to be notified of changes regarding the + * association of the BDS to an AioContext. + * + * attached_aio_context() is called after the target BDS has been attached to a + * new AioContext; detach_aio_context() is called before the target BDS is being + * detached from its old AioContext. + */ +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); + +/** + * bdrv_remove_aio_context_notifier: + * + * Unsubscribe of change notifications regarding the BDS's AioContext. The + * parameters given here have to be the same as those given to + * bdrv_add_aio_context_notifier(). + */ +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*aio_context_attached)(AioContext *, + void *), + void (*aio_context_detached)(void *), + void *opaque); + +/** + * End all quiescent sections started by bdrv_drain_all_begin(). This is + * needed when deleting a BDS before bdrv_drain_all_end() is called. + * + * NOTE: this is an internal helper for bdrv_close() *only*. No one else + * should call it. + */ +void bdrv_drain_all_end_quiesce(BlockDriverState *bs); + +/** + * Make sure that the function is running under both drain and BQL. + * The latter protects from concurrent writings + * from the GS API, while the former prevents concurrent reads + * from I/O. + */ +static inline void assert_bdrv_graph_writable(BlockDriverState *bs) +{ + /* + * TODO: this function is incomplete. Because the users of this + * assert lack the necessary drains, check only for BQL. + * Once the necessary drains are added, + * assert also for qatomic_read(&bs->quiesce_counter) > 0 + */ + assert(qemu_in_main_thread()); +} + +#endif /* BLOCK_INT_GLOBAL_STATE */ diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h new file mode 100644 index 0000000000..3da5f01c42 --- /dev/null +++ b/include/block/block_int-io.h @@ -0,0 +1,185 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_IO_H +#define BLOCK_INT_IO_H + +#include "block_int-common.h" + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +int coroutine_fn bdrv_co_preadv(BdrvChild *child, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); + +static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, + int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); + + return bdrv_co_preadv(child, offset, bytes, &qiov, flags); +} + +static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child, + int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); + + return bdrv_co_pwritev(child, offset, bytes, &qiov, flags); +} + +bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, + uint64_t align); +BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); + +BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename); + +/** + * bdrv_wakeup: + * @bs: The BlockDriverState for which an I/O operation has been completed. + * + * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During + * synchronous I/O on a BlockDriverState that is attached to another + * I/O thread, the main thread lets the I/O thread's event loop run, + * waiting for the I/O operation to complete. A bdrv_wakeup will wake + * up the main thread if necessary. + * + * Manual calls to bdrv_wakeup are rarely necessary, because + * bdrv_dec_in_flight already calls it. + */ +void bdrv_wakeup(BlockDriverState *bs); + +const char *bdrv_get_parent_name(const BlockDriverState *bs); +bool blk_dev_has_tray(BlockBackend *blk); +bool blk_dev_is_tray_open(BlockBackend *blk); + +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, + const BdrvDirtyBitmap *src, + HBitmap **backup, bool lock); + +void bdrv_inc_in_flight(BlockDriverState *bs); +void bdrv_dec_in_flight(BlockDriverState *bs); + +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + +BdrvChild *bdrv_cow_child(BlockDriverState *bs); +BdrvChild *bdrv_filter_child(BlockDriverState *bs); +BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); +BdrvChild *bdrv_primary_child(BlockDriverState *bs); +BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); +BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); + +static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_cow_child(bs)); +} + +static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_filter_child(bs)); +} + +static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_filter_or_cow_child(bs)); +} + +static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_primary_child(bs)); +} + +/** + * Check whether the given offset is in the cached block-status data + * region. + * + * If it is, and @pnum is not NULL, *pnum is set to + * `bsc.data_end - offset`, i.e. how many bytes, starting from + * @offset, are data (according to the cache). + * Otherwise, *pnum is not touched. + */ +bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum); + +/** + * If [offset, offset + bytes) overlaps with the currently cached + * block-status region, invalidate the cache. + * + * (To be used by I/O paths that cause data regions to be zero or + * holes.) + */ +void bdrv_bsc_invalidate_range(BlockDriverState *bs, + int64_t offset, int64_t bytes); + +/** + * Mark the range [offset, offset + bytes) as a data region. + */ +void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + +#endif /* BLOCK_INT_IO_H */ diff --git a/include/block/block_int.h b/include/block/block_int.h index 27008cfb22..7d50b6bbd1 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -24,1478 +24,9 @@ #ifndef BLOCK_INT_H #define BLOCK_INT_H -#include "block/accounting.h" -#include "block/block.h" -#include "block/aio-wait.h" -#include "qemu/queue.h" -#include "qemu/coroutine.h" -#include "qemu/stats64.h" -#include "qemu/timer.h" -#include "qemu/hbitmap.h" -#include "block/snapshot.h" -#include "qemu/throttle.h" -#include "qemu/rcu.h" +#include "block_int-global-state.h" +#include "block_int-io.h" -#define BLOCK_FLAG_LAZY_REFCOUNTS 8 - -#define BLOCK_OPT_SIZE "size" -#define BLOCK_OPT_ENCRYPT "encryption" -#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" -#define BLOCK_OPT_COMPAT6 "compat6" -#define BLOCK_OPT_HWVERSION "hwversion" -#define BLOCK_OPT_BACKING_FILE "backing_file" -#define BLOCK_OPT_BACKING_FMT "backing_fmt" -#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" -#define BLOCK_OPT_TABLE_SIZE "table_size" -#define BLOCK_OPT_PREALLOC "preallocation" -#define BLOCK_OPT_SUBFMT "subformat" -#define BLOCK_OPT_COMPAT_LEVEL "compat" -#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" -#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" -#define BLOCK_OPT_REDUNDANCY "redundancy" -#define BLOCK_OPT_NOCOW "nocow" -#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" -#define BLOCK_OPT_OBJECT_SIZE "object_size" -#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" -#define BLOCK_OPT_DATA_FILE "data_file" -#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" -#define BLOCK_OPT_COMPRESSION_TYPE "compression_type" -#define BLOCK_OPT_EXTL2 "extended_l2" - -#define BLOCK_PROBE_BUF_SIZE 512 - -enum BdrvTrackedRequestType { - BDRV_TRACKED_READ, - BDRV_TRACKED_WRITE, - BDRV_TRACKED_DISCARD, - BDRV_TRACKED_TRUNCATE, -}; - -/* - * That is not quite good that BdrvTrackedRequest structure is public, - * as block/io.c is very careful about incoming offset/bytes being - * correct. Be sure to assert bdrv_check_request() succeeded after any - * modification of BdrvTrackedRequest object out of block/io.c - */ -typedef struct BdrvTrackedRequest { - BlockDriverState *bs; - int64_t offset; - int64_t bytes; - enum BdrvTrackedRequestType type; - - bool serialising; - int64_t overlap_offset; - int64_t overlap_bytes; - - QLIST_ENTRY(BdrvTrackedRequest) list; - Coroutine *co; /* owner, used for deadlock detection */ - CoQueue wait_queue; /* coroutines blocked on this request */ - - struct BdrvTrackedRequest *waiting_for; -} BdrvTrackedRequest; - -int bdrv_check_qiov_request(int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - Error **errp); -int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); - -struct BlockDriver { - const char *format_name; - int instance_size; - - /* set to true if the BlockDriver is a block filter. Block filters pass - * certain callbacks that refer to data (see block.c) to their bs->file - * or bs->backing (whichever one exists) if the driver doesn't implement - * them. Drivers that do not wish to forward must implement them and return - * -ENOTSUP. - * Note that filters are not allowed to modify data. - * - * Filters generally cannot have more than a single filtered child, - * because the data they present must at all times be the same as - * that on their filtered child. That would be impossible to - * achieve for multiple filtered children. - * (And this filtered child must then be bs->file or bs->backing.) - */ - bool is_filter; - /* - * Set to true if the BlockDriver is a format driver. Format nodes - * generally do not expect their children to be other format nodes - * (except for backing files), and so format probing is disabled - * on those children. - */ - bool is_format; - /* - * Return true if @to_replace can be replaced by a BDS with the - * same data as @bs without it affecting @bs's behavior (that is, - * without it being visible to @bs's parents). - */ - bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, - BlockDriverState *to_replace); - - int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); - int (*bdrv_probe_device)(const char *filename); - - /* Any driver implementing this callback is expected to be able to handle - * NULL file names in its .bdrv_open() implementation */ - void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); - /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have - * this field set to true, except ones that are defined only by their - * child's bs. - * An example of the last type will be the quorum block driver. - */ - bool bdrv_needs_filename; - - /* - * Set if a driver can support backing files. This also implies the - * following semantics: - * - * - Return status 0 of .bdrv_co_block_status means that corresponding - * blocks are not allocated in this layer of backing-chain - * - For such (unallocated) blocks, read will: - * - fill buffer with zeros if there is no backing file - * - read from the backing file otherwise, where the block layer - * takes care of reading zeros beyond EOF if backing file is short - */ - bool supports_backing; - - /* For handling image reopen for split or non-split files */ - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); - void (*bdrv_join_options)(QDict *options, QDict *old_options); - - int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, - Error **errp); - - /* Protocol drivers should implement this instead of bdrv_open */ - int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, - Error **errp); - void (*bdrv_close)(BlockDriverState *bs); - - - int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, - Error **errp); - int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, - const char *filename, - QemuOpts *opts, - Error **errp); - - int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, - BlockdevAmendOptions *opts, - bool force, - Error **errp); - - int (*bdrv_amend_options)(BlockDriverState *bs, - QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, - bool force, - Error **errp); - - int (*bdrv_make_empty)(BlockDriverState *bs); - - /* - * Refreshes the bs->exact_filename field. If that is impossible, - * bs->exact_filename has to be left empty. - */ - void (*bdrv_refresh_filename)(BlockDriverState *bs); - - /* - * Gathers the open options for all children into @target. - * A simple format driver (without backing file support) might - * implement this function like this: - * - * QINCREF(bs->file->bs->full_open_options); - * qdict_put(target, "file", bs->file->bs->full_open_options); - * - * If not specified, the generic implementation will simply put - * all children's options under their respective name. - * - * @backing_overridden is true when bs->backing seems not to be - * the child that would result from opening bs->backing_file. - * Therefore, if it is true, the backing child's options should be - * gathered; otherwise, there is no need since the backing child - * is the one implied by the image header. - * - * Note that ideally this function would not be needed. Every - * block driver which implements it is probably doing something - * shady regarding its runtime option structure. - */ - void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, - bool backing_overridden); - - /* - * Returns an allocated string which is the directory name of this BDS: It - * will be used to make relative filenames absolute by prepending this - * function's return value to them. - */ - char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); - - /* aio */ - BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); - BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); - BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque); - BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - - int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); - - /** - * @offset: position in bytes to read at - * @bytes: number of bytes to read - * @qiov: the buffers to fill with read data - * @flags: currently unused, always 0 - * - * @offset and @bytes will be a multiple of 'request_alignment', - * but the length of individual @qiov elements does not have to - * be a multiple. - * - * @bytes will always equal the total size of @qiov, and will be - * no larger than 'max_transfer'. - * - * The buffer in @qiov may point directly to guest memory. - */ - int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, - int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); - /** - * @offset: position in bytes to write at - * @bytes: number of bytes to write - * @qiov: the buffers containing data to write - * @flags: zero or more bits allowed by 'supported_write_flags' - * - * @offset and @bytes will be a multiple of 'request_alignment', - * but the length of individual @qiov elements does not have to - * be a multiple. - * - * @bytes will always equal the total size of @qiov, and will be - * no larger than 'max_transfer'. - * - * The buffer in @qiov may point directly to guest memory. - */ - int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags); - - /* - * Efficiently zero a region of the disk image. Typically an image format - * would use a compact metadata representation to implement this. This - * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() - * will be called instead. - */ - int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, - int64_t offset, int64_t bytes, BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, - int64_t offset, int64_t bytes); - - /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, - * and invoke bdrv_co_copy_range_from(child, ...), or invoke - * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. - * - * See the comment of bdrv_co_copy_range for the parameter and return value - * semantics. - */ - int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, - BdrvChild *src, - int64_t offset, - BdrvChild *dst, - int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - - /* Map [offset, offset + nbytes) range onto a child of bs to copy data to, - * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy - * operation if @bs is the leaf and @src has the same BlockDriver. Return - * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. - * - * See the comment of bdrv_co_copy_range for the parameter and return value - * semantics. - */ - int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, - BdrvChild *src, - int64_t src_offset, - BdrvChild *dst, - int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - - /* - * Building block for bdrv_block_status[_above] and - * bdrv_is_allocated[_above]. The driver should answer only - * according to the current layer, and should only need to set - * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, - * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing - * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See - * block.h for the overall meaning of the bits. As a hint, the - * flag want_zero is true if the caller cares more about precise - * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for - * overall allocation (favor larger *pnum, perhaps by reporting - * _DATA instead of _ZERO). The block layer guarantees input - * clamped to bdrv_getlength() and aligned to request_alignment, - * as well as non-NULL pnum, map, and file; in turn, the driver - * must return an error or set pnum to an aligned non-zero value. - * - * Note that @bytes is just a hint on how big of a region the - * caller wants to inspect. It is not a limit on *pnum. - * Implementations are free to return larger values of *pnum if - * doing so does not incur a performance penalty. - * - * block/io.c's bdrv_co_block_status() will utilize an unclamped - * *pnum value for the block-status cache on protocol nodes, prior - * to clamping *pnum for return to its caller. - */ - int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); - - /* - * This informs the driver that we are no longer interested in the result - * of in-flight requests, so don't waste the time if possible. - * - * One example usage is to avoid waiting for an nbd target node reconnect - * timeout during job-cancel with force=true. - */ - void (*bdrv_cancel_in_flight)(BlockDriverState *bs); - - /* - * Invalidate any cached meta-data. - */ - void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, - Error **errp); - int (*bdrv_inactivate)(BlockDriverState *bs); - - /* - * Flushes all data for all layers by calling bdrv_co_flush for underlying - * layers, if needed. This function is needed for deterministic - * synchronization of the flush finishing callback. - */ - int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); - - /* Delete a created file. */ - int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, - Error **errp); - - /* - * Flushes all data that was already written to the OS all the way down to - * the disk (for example file-posix.c calls fsync()). - */ - int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); - - /* - * Flushes all internal caches to the OS. The data may still sit in a - * writeback cache of the host OS, but it will survive a crash of the qemu - * process. - */ - int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); - - /* - * Drivers setting this field must be able to work with just a plain - * filename with '<protocol_name>:' as a prefix, and no other options. - * Options may be extracted from the filename by implementing - * bdrv_parse_filename. - */ - const char *protocol_name; - - /* - * Truncate @bs to @offset bytes using the given @prealloc mode - * when growing. Modes other than PREALLOC_MODE_OFF should be - * rejected when shrinking @bs. - * - * If @exact is true, @bs must be resized to exactly @offset. - * Otherwise, it is sufficient for @bs (if it is a host block - * device and thus there is no way to resize it) to be at least - * @offset bytes in length. - * - * If @exact is true and this function fails but would succeed - * with @exact = false, it should return -ENOTSUP. - */ - int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, - BdrvRequestFlags flags, Error **errp); - - int64_t (*bdrv_getlength)(BlockDriverState *bs); - bool has_variable_length; - int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); - BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, - Error **errp); - - int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov); - int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); - - int (*bdrv_snapshot_create)(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info); - int (*bdrv_snapshot_goto)(BlockDriverState *bs, - const char *snapshot_id); - int (*bdrv_snapshot_delete)(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); - int (*bdrv_snapshot_list)(BlockDriverState *bs, - QEMUSnapshotInfo **psn_info); - int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); - int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); - ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, - Error **errp); - BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); - - int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - - int (*bdrv_change_backing_file)(BlockDriverState *bs, - const char *backing_file, const char *backing_fmt); - - /* removable device specific */ - bool (*bdrv_is_inserted)(BlockDriverState *bs); - void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); - void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); - - /* to control generic scsi devices */ - BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); - int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, - unsigned long int req, void *buf); - - /* List of options for creating images, terminated by name == NULL */ - QemuOptsList *create_opts; - - /* List of options for image amend */ - QemuOptsList *amend_opts; - - /* - * If this driver supports reopening images this contains a - * NULL-terminated list of the runtime options that can be - * modified. If an option in this list is unspecified during - * reopen then it _must_ be reset to its default value or return - * an error. - */ - const char *const *mutable_opts; - - /* - * Returns 0 for completed check, -errno for internal errors. - * The check results are stored in result. - */ - int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, - BdrvCheckResult *result, - BdrvCheckMode fix); - - void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); - - /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ - int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, - const char *tag); - int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, - const char *tag); - int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); - bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); - - void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); - - /* - * Returns 1 if newly created images are guaranteed to contain only - * zeros, 0 otherwise. - */ - int (*bdrv_has_zero_init)(BlockDriverState *bs); - - /* Remove fd handlers, timers, and other event loop callbacks so the event - * loop is no longer in use. Called with no in-flight requests and in - * depth-first traversal order with parents before child nodes. - */ - void (*bdrv_detach_aio_context)(BlockDriverState *bs); - - /* Add fd handlers, timers, and other event loop callbacks so I/O requests - * can be processed again. Called with no in-flight requests and in - * depth-first traversal order with child nodes before parent nodes. - */ - void (*bdrv_attach_aio_context)(BlockDriverState *bs, - AioContext *new_context); - - /* io queue for linux-aio */ - void (*bdrv_io_plug)(BlockDriverState *bs); - void (*bdrv_io_unplug)(BlockDriverState *bs); - - /** - * Try to get @bs's logical and physical block size. - * On success, store them in @bsz and return zero. - * On failure, return negative errno. - */ - int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); - /** - * Try to get @bs's geometry (cyls, heads, sectors) - * On success, store them in @geo and return 0. - * On failure return -errno. - * Only drivers that want to override guest geometry implement this - * callback; see hd_geometry_guess(). - */ - int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); - - /** - * bdrv_co_drain_begin is called if implemented in the beginning of a - * drain operation to drain and stop any internal sources of requests in - * the driver. - * bdrv_co_drain_end is called if implemented at the end of the drain. - * - * They should be used by the driver to e.g. manage scheduled I/O - * requests, or toggle an internal state. After the end of the drain new - * requests will continue normally. - */ - void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); - void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); - - void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, - Error **errp); - void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, - Error **errp); - - /** - * Informs the block driver that a permission change is intended. The - * driver checks whether the change is permissible and may take other - * preparations for the change (e.g. get file system locks). This operation - * is always followed either by a call to either .bdrv_set_perm or - * .bdrv_abort_perm_update. - * - * Checks whether the requested set of cumulative permissions in @perm - * can be granted for accessing @bs and whether no other users are using - * permissions other than those given in @shared (both arguments take - * BLK_PERM_* bitmasks). - * - * If both conditions are met, 0 is returned. Otherwise, -errno is returned - * and errp is set to an error describing the conflict. - */ - int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, - uint64_t shared, Error **errp); - - /** - * Called to inform the driver that the set of cumulative set of used - * permissions for @bs has changed to @perm, and the set of sharable - * permission to @shared. The driver can use this to propagate changes to - * its children (i.e. request permissions only if a parent actually needs - * them). - * - * This function is only invoked after bdrv_check_perm(), so block drivers - * may rely on preparations made in their .bdrv_check_perm implementation. - */ - void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); - - /* - * Called to inform the driver that after a previous bdrv_check_perm() - * call, the permission update is not performed and any preparations made - * for it (e.g. taken file locks) need to be undone. - * - * This function can be called even for nodes that never saw a - * bdrv_check_perm() call. It is a no-op then. - */ - void (*bdrv_abort_perm_update)(BlockDriverState *bs); - - /** - * Returns in @nperm and @nshared the permissions that the driver for @bs - * needs on its child @c, based on the cumulative permissions requested by - * the parents in @parent_perm and @parent_shared. - * - * If @c is NULL, return the permissions for attaching a new child for the - * given @child_class and @role. - * - * If @reopen_queue is non-NULL, don't return the currently needed - * permissions, but those that will be needed after applying the - * @reopen_queue. - */ - void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t parent_perm, uint64_t parent_shared, - uint64_t *nperm, uint64_t *nshared); - - bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, - const char *name, - uint32_t granularity, - Error **errp); - int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs, - const char *name, - Error **errp); - - /** - * Register/unregister a buffer for I/O. For example, when the driver is - * interested to know the memory areas that will later be used in iovs, so - * that it can do IOMMU mapping with VFIO etc., in order to get better - * performance. In the case of VFIO drivers, this callback is used to do - * DMA mapping for hot buffers. - */ - void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); - void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); - QLIST_ENTRY(BlockDriver) list; - - /* Pointer to a NULL-terminated array of names of strong options - * that can be specified for bdrv_open(). A strong option is one - * that changes the data of a BDS. - * If this pointer is NULL, the array is considered empty. - * "filename" and "driver" are always considered strong. */ - const char *const *strong_runtime_opts; -}; - -static inline bool block_driver_can_compress(BlockDriver *drv) -{ - return drv->bdrv_co_pwritev_compressed || - drv->bdrv_co_pwritev_compressed_part; -} - -typedef struct BlockLimits { - /* Alignment requirement, in bytes, for offset/length of I/O - * requests. Must be a power of 2 less than INT_MAX; defaults to - * 1 for drivers with modern byte interfaces, and to 512 - * otherwise. */ - uint32_t request_alignment; - - /* - * Maximum number of bytes that can be discarded at once. Must be multiple - * of pdiscard_alignment, but need not be power of 2. May be 0 if no - * inherent 64-bit limit. - */ - int64_t max_pdiscard; - - /* Optimal alignment for discard requests in bytes. A power of 2 - * is best but not mandatory. Must be a multiple of - * bl.request_alignment, and must be less than max_pdiscard if - * that is set. May be 0 if bl.request_alignment is good enough */ - uint32_t pdiscard_alignment; - - /* - * Maximum number of bytes that can zeroized at once. Must be multiple of - * pwrite_zeroes_alignment. 0 means no limit. - */ - int64_t max_pwrite_zeroes; - - /* Optimal alignment for write zeroes requests in bytes. A power - * of 2 is best but not mandatory. Must be a multiple of - * bl.request_alignment, and must be less than max_pwrite_zeroes - * if that is set. May be 0 if bl.request_alignment is good - * enough */ - uint32_t pwrite_zeroes_alignment; - - /* Optimal transfer length in bytes. A power of 2 is best but not - * mandatory. Must be a multiple of bl.request_alignment, or 0 if - * no preferred size */ - uint32_t opt_transfer; - - /* Maximal transfer length in bytes. Need not be power of 2, but - * must be multiple of opt_transfer and bl.request_alignment, or 0 - * for no 32-bit limit. For now, anything larger than INT_MAX is - * clamped down. */ - uint32_t max_transfer; - - /* Maximal hardware transfer length in bytes. Applies whenever - * transfers to the device bypass the kernel I/O scheduler, for - * example with SG_IO. If larger than max_transfer or if zero, - * blk_get_max_hw_transfer will fall back to max_transfer. - */ - uint64_t max_hw_transfer; - - /* Maximal number of scatter/gather elements allowed by the hardware. - * Applies whenever transfers to the device bypass the kernel I/O - * scheduler, for example with SG_IO. If larger than max_iov - * or if zero, blk_get_max_hw_iov will fall back to max_iov. - */ - int max_hw_iov; - - /* memory alignment, in bytes so that no bounce buffer is needed */ - size_t min_mem_alignment; - - /* memory alignment, in bytes, for bounce buffer */ - size_t opt_mem_alignment; - - /* maximum number of iovec elements */ - int max_iov; -} BlockLimits; - -typedef struct BdrvOpBlocker BdrvOpBlocker; - -typedef struct BdrvAioNotifier { - void (*attached_aio_context)(AioContext *new_context, void *opaque); - void (*detach_aio_context)(void *opaque); - - void *opaque; - bool deleted; - - QLIST_ENTRY(BdrvAioNotifier) list; -} BdrvAioNotifier; - -struct BdrvChildClass { - /* If true, bdrv_replace_node() doesn't change the node this BdrvChild - * points to. */ - bool stay_at_node; - - /* If true, the parent is a BlockDriverState and bdrv_next_all_states() - * will return it. This information is used for drain_all, where every node - * will be drained separately, so the drain only needs to be propagated to - * non-BDS parents. */ - bool parent_is_bds; - - void (*inherit_options)(BdrvChildRole role, bool parent_is_format, - int *child_flags, QDict *child_options, - int parent_flags, QDict *parent_options); - - void (*change_media)(BdrvChild *child, bool load); - void (*resize)(BdrvChild *child); - - /* Returns a name that is supposedly more useful for human users than the - * node name for identifying the node in question (in particular, a BB - * name), or NULL if the parent can't provide a better name. */ - const char *(*get_name)(BdrvChild *child); - - /* Returns a malloced string that describes the parent of the child for a - * human reader. This could be a node-name, BlockBackend name, qdev ID or - * QOM path of the device owning the BlockBackend, job type and ID etc. The - * caller is responsible for freeing the memory. */ - char *(*get_parent_desc)(BdrvChild *child); - - /* - * If this pair of functions is implemented, the parent doesn't issue new - * requests after returning from .drained_begin() until .drained_end() is - * called. - * - * These functions must not change the graph (and therefore also must not - * call aio_poll(), which could change the graph indirectly). - * - * If drained_end() schedules background operations, it must atomically - * increment *drained_end_counter for each such operation and atomically - * decrement it once the operation has settled. - * - * Note that this can be nested. If drained_begin() was called twice, new - * I/O is allowed only after drained_end() was called twice, too. - */ - void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child, int *drained_end_counter); - - /* - * Returns whether the parent has pending requests for the child. This - * callback is polled after .drained_begin() has been called until all - * activity on the child has stopped. - */ - bool (*drained_poll)(BdrvChild *child); - - /* Notifies the parent that the child has been activated/inactivated (e.g. - * when migration is completing) and it can start/stop requesting - * permissions and doing I/O on it. */ - void (*activate)(BdrvChild *child, Error **errp); - int (*inactivate)(BdrvChild *child); - - void (*attach)(BdrvChild *child); - void (*detach)(BdrvChild *child); - - /* Notifies the parent that the filename of its child has changed (e.g. - * because the direct child was removed from the backing chain), so that it - * can update its reference. */ - int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, - const char *filename, Error **errp); - - bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp); - void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); - - AioContext *(*get_parent_aio_context)(BdrvChild *child); -}; - -extern const BdrvChildClass child_of_bds; - -struct BdrvChild { - BlockDriverState *bs; - char *name; - const BdrvChildClass *klass; - BdrvChildRole role; - void *opaque; - - /** - * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) - */ - uint64_t perm; - - /** - * Permissions that can still be granted to other users of @bs while this - * BdrvChild is still attached to it. (BLK_PERM_* bitmask) - */ - uint64_t shared_perm; - - /* - * This link is frozen: the child can neither be replaced nor - * detached from the parent. - */ - bool frozen; - - /* - * How many times the parent of this child has been drained - * (through klass->drained_*). - * Usually, this is equal to bs->quiesce_counter (potentially - * reduced by bdrv_drain_all_count). It may differ while the - * child is entering or leaving a drained section. - */ - int parent_quiesce_counter; - - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; -}; - -/* - * Allows bdrv_co_block_status() to cache one data region for a - * protocol node. - * - * @valid: Whether the cache is valid (should be accessed with atomic - * functions so this can be reset by RCU readers) - * @data_start: Offset where we know (or strongly assume) is data - * @data_end: Offset where the data region ends (which is not necessarily - * the start of a zeroed region) - */ -typedef struct BdrvBlockStatusCache { - struct rcu_head rcu; - - bool valid; - int64_t data_start; - int64_t data_end; -} BdrvBlockStatusCache; - -struct BlockDriverState { - /* Protected by big QEMU lock or read-only after opening. No special - * locking needed during I/O... - */ - int open_flags; /* flags used to open the file, re-used for re-open */ - bool encrypted; /* if true, the media is encrypted */ - bool sg; /* if true, the device is a /dev/sg* */ - bool probed; /* if true, format was probed rather than specified */ - bool force_share; /* if true, always allow all shared permissions */ - bool implicit; /* if true, this filter node was automatically inserted */ - - BlockDriver *drv; /* NULL means no media */ - void *opaque; - - AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ - /* long-running tasks intended to always use the same AioContext as this - * BDS may register themselves in this list to be notified of changes - * regarding this BDS's context */ - QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; - bool walking_aio_notifiers; /* to make removal during iteration safe */ - - char filename[PATH_MAX]; - /* - * If not empty, this image is a diff in relation to backing_file. - * Note that this is the name given in the image header and - * therefore may or may not be equal to .backing->bs->filename. - * If this field contains a relative path, it is to be resolved - * relatively to the overlay's location. - */ - char backing_file[PATH_MAX]; - /* - * The backing filename indicated by the image header. Contrary - * to backing_file, if we ever open this file, auto_backing_file - * is replaced by the resulting BDS's filename (i.e. after a - * bdrv_refresh_filename() run). - */ - char auto_backing_file[PATH_MAX]; - char backing_format[16]; /* if non-zero and backing_file exists */ - - QDict *full_open_options; - char exact_filename[PATH_MAX]; - - BdrvChild *backing; - BdrvChild *file; - - /* I/O Limits */ - BlockLimits bl; - - /* - * Flags honored during pread - */ - unsigned int supported_read_flags; - /* Flags honored during pwrite (so far: BDRV_REQ_FUA, - * BDRV_REQ_WRITE_UNCHANGED). - * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those - * writes will be issued as normal writes without the flag set. - * This is important to note for drivers that do not explicitly - * request a WRITE permission for their children and instead take - * the same permissions as their parent did (this is commonly what - * block filters do). Such drivers have to be aware that the - * parent may have taken a WRITE_UNCHANGED permission only and is - * issuing such requests. Drivers either must make sure that - * these requests do not result in plain WRITE accesses (usually - * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding - * every incoming write request as-is, including potentially that - * flag), or they have to explicitly take the WRITE permission for - * their children. */ - unsigned int supported_write_flags; - /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, - * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ - unsigned int supported_zero_flags; - /* - * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). - * - * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure - * that any added space reads as all zeros. If this can't be guaranteed, - * the operation must fail. - */ - unsigned int supported_truncate_flags; - - /* the following member gives a name to every node on the bs graph. */ - char node_name[32]; - /* element of the list of named nodes building the graph */ - QTAILQ_ENTRY(BlockDriverState) node_list; - /* element of the list of all BlockDriverStates (all_bdrv_states) */ - QTAILQ_ENTRY(BlockDriverState) bs_list; - /* element of the list of monitor-owned BDS */ - QTAILQ_ENTRY(BlockDriverState) monitor_list; - int refcnt; - - /* operation blockers */ - QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; - - /* The node that this node inherited default options from (and a reopen on - * which can affect this node by changing these defaults). This is always a - * parent node of this node. */ - BlockDriverState *inherits_from; - QLIST_HEAD(, BdrvChild) children; - QLIST_HEAD(, BdrvChild) parents; - - QDict *options; - QDict *explicit_options; - BlockdevDetectZeroesOptions detect_zeroes; - - /* The error object in use for blocking operations on backing_hd */ - Error *backing_blocker; - - /* Protected by AioContext lock */ - - /* If we are reading a disk image, give its size in sectors. - * Generally read-only; it is written to by load_snapshot and - * save_snaphost, but the block layer is quiescent during those. - */ - int64_t total_sectors; - - /* threshold limit for writes, in bytes. "High water mark". */ - uint64_t write_threshold_offset; - - /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. - * Reading from the list can be done with either the BQL or the - * dirty_bitmap_mutex. Modifying a bitmap only requires - * dirty_bitmap_mutex. */ - QemuMutex dirty_bitmap_mutex; - QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; - - /* Offset after the highest byte written to */ - Stat64 wr_highest_offset; - - /* If true, copy read backing sectors into image. Can be >1 if more - * than one client has requested copy-on-read. Accessed with atomic - * ops. - */ - int copy_on_read; - - /* number of in-flight requests; overall and serialising. - * Accessed with atomic ops. - */ - unsigned int in_flight; - unsigned int serialising_in_flight; - - /* counter for nested bdrv_io_plug. - * Accessed with atomic ops. - */ - unsigned io_plugged; - - /* do we need to tell the quest if we have a volatile write cache? */ - int enable_write_cache; - - /* Accessed with atomic ops. */ - int quiesce_counter; - int recursive_quiesce_counter; - - unsigned int write_gen; /* Current data generation */ - - /* Protected by reqs_lock. */ - CoMutex reqs_lock; - QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; - CoQueue flush_queue; /* Serializing flush queue */ - bool active_flush_req; /* Flush request in flight? */ - - /* Only read/written by whoever has set active_flush_req to true. */ - unsigned int flushed_gen; /* Flushed write generation */ - - /* BdrvChild links to this node may never be frozen */ - bool never_freeze; - - /* Lock for block-status cache RCU writers */ - CoMutex bsc_modify_lock; - /* Always non-NULL, but must only be dereferenced under an RCU read guard */ - BdrvBlockStatusCache *block_status_cache; -}; - -struct BlockBackendRootState { - int open_flags; - BlockdevDetectZeroesOptions detect_zeroes; -}; - -typedef enum BlockMirrorBackingMode { - /* Reuse the existing backing chain from the source for the target. - * - sync=full: Set backing BDS to NULL. - * - sync=top: Use source's backing BDS. - * - sync=none: Use source as the backing BDS. */ - MIRROR_SOURCE_BACKING_CHAIN, - - /* Open the target's backing chain completely anew */ - MIRROR_OPEN_BACKING_CHAIN, - - /* Do not change the target's backing BDS after job completion */ - MIRROR_LEAVE_BACKING_CHAIN, -} BlockMirrorBackingMode; - - -/* Essential block drivers which must always be statically linked into qemu, and - * which therefore can be accessed without using bdrv_find_format() */ -extern BlockDriver bdrv_file; -extern BlockDriver bdrv_raw; -extern BlockDriver bdrv_qcow2; - -int coroutine_fn bdrv_co_preadv(BdrvChild *child, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int coroutine_fn bdrv_co_pwritev(BdrvChild *child, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); - -static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, - int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - return bdrv_co_preadv(child, offset, bytes, &qiov, flags); -} - -static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child, - int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - return bdrv_co_pwritev(child, offset, bytes, &qiov, flags); -} - -extern unsigned int bdrv_drain_all_count; -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - -bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, - uint64_t align); -BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); - -int get_tmp_filename(char *filename, int size); -BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, - const char *filename); - -void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, - QDict *options); - -/** - * bdrv_add_aio_context_notifier: - * - * If a long-running job intends to be always run in the same AioContext as a - * certain BDS, it may use this function to be notified of changes regarding the - * association of the BDS to an AioContext. - * - * attached_aio_context() is called after the target BDS has been attached to a - * new AioContext; detach_aio_context() is called before the target BDS is being - * detached from its old AioContext. - */ -void bdrv_add_aio_context_notifier(BlockDriverState *bs, - void (*attached_aio_context)(AioContext *new_context, void *opaque), - void (*detach_aio_context)(void *opaque), void *opaque); - -/** - * bdrv_remove_aio_context_notifier: - * - * Unsubscribe of change notifications regarding the BDS's AioContext. The - * parameters given here have to be the same as those given to - * bdrv_add_aio_context_notifier(). - */ -void bdrv_remove_aio_context_notifier(BlockDriverState *bs, - void (*aio_context_attached)(AioContext *, - void *), - void (*aio_context_detached)(void *), - void *opaque); - -/** - * bdrv_wakeup: - * @bs: The BlockDriverState for which an I/O operation has been completed. - * - * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During - * synchronous I/O on a BlockDriverState that is attached to another - * I/O thread, the main thread lets the I/O thread's event loop run, - * waiting for the I/O operation to complete. A bdrv_wakeup will wake - * up the main thread if necessary. - * - * Manual calls to bdrv_wakeup are rarely necessary, because - * bdrv_dec_in_flight already calls it. - */ -void bdrv_wakeup(BlockDriverState *bs); - -#ifdef _WIN32 -int is_windows_drive(const char *filename); -#endif - -/** - * stream_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Block device to operate on. - * @base: Block device that will become the new base, or %NULL to - * flatten the whole backing file chain onto @bs. - * @backing_file_str: The file name that will be written to @bs as the - * the new backing file if the job completes. Ignored if @base is %NULL. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @filter_node_name: The node name that should be assigned to the filter - * driver that the stream job inserts into the graph above - * @bs. NULL means that a node name should be autogenerated. - * @errp: Error object. - * - * Start a streaming operation on @bs. Clusters that are unallocated - * in @bs, but allocated in any image between @base and @bs (both - * exclusive) will be written to @bs. At the end of a successful - * streaming job, the backing file of @bs will be changed to - * @backing_file_str in the written image and to @base in the live - * BlockDriverState. - */ -void stream_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, const char *backing_file_str, - BlockDriverState *bottom, - int creation_flags, int64_t speed, - BlockdevOnError on_error, - const char *filter_node_name, - Error **errp); - -/** - * commit_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Active block device. - * @top: Top block device to be committed. - * @base: Block device that will be written into, and become the new top. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @backing_file_str: String to use as the backing file in @top's overlay - * @filter_node_name: The node name that should be assigned to the filter - * driver that the commit job inserts into the graph above @top. NULL means - * that a node name should be autogenerated. - * @errp: Error object. - * - */ -void commit_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, BlockDriverState *top, - int creation_flags, int64_t speed, - BlockdevOnError on_error, const char *backing_file_str, - const char *filter_node_name, Error **errp); -/** - * commit_active_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Active block device to be committed. - * @base: Block device that will be written into, and become the new top. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @filter_node_name: The node name that should be assigned to the filter - * driver that the commit job inserts into the graph above @bs. NULL means that - * a node name should be autogenerated. - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @auto_complete: Auto complete the job. - * @errp: Error object. - * - */ -BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, int creation_flags, - int64_t speed, BlockdevOnError on_error, - const char *filter_node_name, - BlockCompletionFunc *cb, void *opaque, - bool auto_complete, Error **errp); -/* - * mirror_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Block device to operate on. - * @target: Block device to write to. - * @replaces: Block graph node name to replace once the mirror is done. Can - * only be used when full mirroring is selected. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @granularity: The chosen granularity for the dirty bitmap. - * @buf_size: The amount of data that can be in flight at one time. - * @mode: Whether to collapse all images in the chain to the target. - * @backing_mode: How to establish the target's backing chain after completion. - * @zero_target: Whether the target should be explicitly zero-initialized - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @unmap: Whether to unmap target where source sectors only contain zeroes. - * @filter_node_name: The node name that should be assigned to the filter - * driver that the mirror job inserts into the graph above @bs. NULL means that - * a node name should be autogenerated. - * @copy_mode: When to trigger writes to the target. - * @errp: Error object. - * - * Start a mirroring operation on @bs. Clusters that are allocated - * in @bs will be written to @target until the job is cancelled or - * manually completed. At the end of a successful mirroring job, - * @bs will be switched to read from @target. - */ -void mirror_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, const char *replaces, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, - MirrorCopyMode copy_mode, Error **errp); - -/* - * backup_job_create: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Block device to operate on. - * @target: Block device to write to. - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @sync_mode: What parts of the disk image should be copied to the destination. - * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' - * @bitmap_mode: The bitmap synchronization policy to use. - * @perf: Performance options. All actual fields assumed to be present, - * all ".has_*" fields are ignored. - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @txn: Transaction that this job is part of (may be NULL). - * - * Create a backup operation on @bs. Clusters in @bs are written to @target - * until the job is cancelled or manually completed. - */ -BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, int64_t speed, - MirrorSyncMode sync_mode, - BdrvDirtyBitmap *sync_bitmap, - BitmapSyncMode bitmap_mode, - bool compress, - const char *filter_node_name, - BackupPerf *perf, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - int creation_flags, - BlockCompletionFunc *cb, void *opaque, - JobTxn *txn, Error **errp); - -BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - uint64_t perm, uint64_t shared_perm, - void *opaque, Error **errp); -void bdrv_root_unref_child(BdrvChild *child); - -void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, - uint64_t *shared_perm); - -/** - * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use - * bdrv_child_refresh_perms() instead and make the parent's - * .bdrv_child_perm() implementation return the correct values. - */ -int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, - Error **errp); - -/** - * Calls bs->drv->bdrv_child_perm() and updates the child's permission - * masks with the result. - * Drivers should invoke this function whenever an event occurs that - * makes their .bdrv_child_perm() implementation return different - * values than before, but which will not result in the block layer - * automatically refreshing the permissions. - */ -int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); - -bool bdrv_recurse_can_replace(BlockDriverState *bs, - BlockDriverState *to_replace); - -/* - * Default implementation for BlockDriver.bdrv_child_perm() that can - * be used by block filters and image formats, as long as they use the - * child_of_bds child class and set an appropriate BdrvChildRole. - */ -void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared); - -const char *bdrv_get_parent_name(const BlockDriverState *bs); -void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); -bool blk_dev_has_removable_media(BlockBackend *blk); -bool blk_dev_has_tray(BlockBackend *blk); -void blk_dev_eject_request(BlockBackend *blk, bool force); -bool blk_dev_is_tray_open(BlockBackend *blk); -bool blk_dev_is_medium_locked(BlockBackend *blk); - -void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); - -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); -void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup); -bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, - const BdrvDirtyBitmap *src, - HBitmap **backup, bool lock); - -void bdrv_inc_in_flight(BlockDriverState *bs); -void bdrv_dec_in_flight(BlockDriverState *bs); - -void blockdev_close_all_bdrv_states(void); - -int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset, - BdrvChild *dst, int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); -int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset, - BdrvChild *dst, int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - -int refresh_total_sectors(BlockDriverState *bs, int64_t hint); - -void bdrv_set_monitor_owned(BlockDriverState *bs); -BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp); - -/** - * Simple implementation of bdrv_co_create_opts for protocol drivers - * which only support creation via opening a file - * (usually existing raw storage device) - */ -int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, - const char *filename, - QemuOpts *opts, - Error **errp); -extern QemuOptsList bdrv_create_opts_simple; - -BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, - const char *name, - BlockDriverState **pbs, - Error **errp); -BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, - BlockDirtyBitmapMergeSourceList *bms, - HBitmap **backup, Error **errp); -BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - bool release, - BlockDriverState **bitmap_bs, - Error **errp); - -BdrvChild *bdrv_cow_child(BlockDriverState *bs); -BdrvChild *bdrv_filter_child(BlockDriverState *bs); -BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); -BdrvChild *bdrv_primary_child(BlockDriverState *bs); -BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs); -BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); -BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); - -static inline BlockDriverState *child_bs(BdrvChild *child) -{ - return child ? child->bs : NULL; -} - -static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_cow_child(bs)); -} - -static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_filter_child(bs)); -} - -static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_filter_or_cow_child(bs)); -} - -static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_primary_child(bs)); -} - -/** - * End all quiescent sections started by bdrv_drain_all_begin(). This is - * needed when deleting a BDS before bdrv_drain_all_end() is called. - * - * NOTE: this is an internal helper for bdrv_close() *only*. No one else - * should call it. - */ -void bdrv_drain_all_end_quiesce(BlockDriverState *bs); - -/** - * Check whether the given offset is in the cached block-status data - * region. - * - * If it is, and @pnum is not NULL, *pnum is set to - * `bsc.data_end - offset`, i.e. how many bytes, starting from - * @offset, are data (according to the cache). - * Otherwise, *pnum is not touched. - */ -bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum); - -/** - * If [offset, offset + bytes) overlaps with the currently cached - * block-status region, invalidate the cache. - * - * (To be used by I/O paths that cause data regions to be zero or - * holes.) - */ -void bdrv_bsc_invalidate_range(BlockDriverState *bs, - int64_t offset, int64_t bytes); - -/** - * Mark the range [offset, offset + bytes) as a data region. - */ -void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ #endif /* BLOCK_INT_H */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 87fbb3985f..6525e16fd5 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -74,6 +74,13 @@ typedef struct BlockJob { GSList *nodes; } BlockJob; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * block_job_next: * @job: A block job, or %NULL. @@ -155,6 +162,21 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp); */ void block_job_iostatus_reset(BlockJob *job); +/* + * block_job_get_aio_context: + * + * Returns aio context associated with a block job. + */ +AioContext *block_job_get_aio_context(BlockJob *job); + + +/* + * Common functions that are neither I/O nor Global State. + * + * See include/block/block-common.h for more information about + * the Common API. + */ + /** * block_job_is_internal: * @job: The job to determine if it is user-visible or not. @@ -170,11 +192,4 @@ bool block_job_is_internal(BlockJob *job); */ const BlockJobDriver *block_job_driver(BlockJob *job); -/* - * block_job_get_aio_context: - * - * Returns aio context associated with a block job. - */ -AioContext *block_job_get_aio_context(BlockJob *job); - #endif diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 6633d83da2..6bd9ae2b20 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -39,6 +39,13 @@ struct BlockJobDriver { JobDriver job_driver; /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + /* * Returns whether the job has pending requests for the child or will * submit new requests before the next pause point. This callback is polled * in the context of draining a job node after requesting that the job be @@ -47,6 +54,13 @@ struct BlockJobDriver { bool (*drained_poll)(BlockJob *job); /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* * If the callback is not NULL, it will be invoked before the job is * resumed in a new AioContext. This is the place to move any resources * besides job->blk to the new AioContext. @@ -56,6 +70,13 @@ struct BlockJobDriver { void (*set_speed)(BlockJob *job, int64_t speed); }; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * block_job_create: * @job_id: The id of the newly-created job, or %NULL to have one @@ -98,6 +119,13 @@ void block_job_free(Job *job); */ void block_job_user_resume(Job *job); +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + /** * block_job_ratelimit_get_delay: * diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 940345692f..50ff924710 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo { uint64_t icount; /* record/replay step */ } QEMUSnapshotInfo; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, const char *name); bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, @@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, Error **errp); -/* Group operations. All block drivers are involved. +/* + * Group operations. All block drivers are involved. * These functions will properly handle dataplane (take aio_context_acquire - * when appropriate for appropriate block drivers */ + * when appropriate for appropriate block drivers + */ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, Error **errp); diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h new file mode 100644 index 0000000000..1558a826aa --- /dev/null +++ b/include/qemu/coroutine-tls.h @@ -0,0 +1,165 @@ +/* + * QEMU Thread Local Storage for coroutines + * + * Copyright Red Hat + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * It is forbidden to access Thread Local Storage in coroutines because + * compiler optimizations may cause values to be cached across coroutine + * re-entry. Coroutines can run in more than one thread through the course of + * their life, leading bugs when stale TLS values from the wrong thread are + * used as a result of compiler optimization. + * + * An example is: + * + * ..code-block:: c + * :caption: A coroutine that may see the wrong TLS value + * + * static __thread AioContext *current_aio_context; + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(current_aio_context); + * qemu_coroutine_yield(); + * aio_notify(current_aio_context); // <-- may be stale after yielding! + * } + * + * This header provides macros for safely defining variables in Thread Local + * Storage: + * + * ..code-block:: c + * :caption: A coroutine that safely uses TLS + * + * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context) + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(get_current_aio_context()); + * qemu_coroutine_yield(); + * aio_notify(get_current_aio_context()); // <-- safe + * } + */ + +#ifndef QEMU_COROUTINE_TLS_H +#define QEMU_COROUTINE_TLS_H + +/* + * To stop the compiler from caching TLS values we define accessor functions + * with __attribute__((noinline)) plus asm volatile("") to prevent + * optimizations that override noinline. + * + * The compiler can still analyze noinline code and make optimizations based on + * that knowledge, so an inline asm output operand is used to prevent + * optimizations that make assumptions about the address of the TLS variable. + * + * This is fragile and ultimately needs to be solved by a mechanism that is + * guaranteed to work by the compiler (e.g. stackless coroutines), but for now + * we use this approach to prevent issues. + */ + +/** + * QEMU_DECLARE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Declare an extern variable in Thread Local Storage from a header file: + * + * .. code-block:: c + * :caption: Declaring an extern variable in Thread Local Storage + * + * QEMU_DECLARE_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Declaring a TLS variable using __thread + * + * extern __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DECLARE_CO_TLS(type, var) \ + __attribute__((noinline)) type get_##var(void); \ + __attribute__((noinline)) void set_##var(type v); \ + __attribute__((noinline)) type *get_ptr_##var(void); + +/** + * QEMU_DEFINE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a variable in Thread Local Storage that was previously declared from + * a header file with QEMU_DECLARE_CO_TLS(): + * + * .. code-block:: c + * :caption: Defining a variable in Thread Local Storage + * + * QEMU_DEFINE_CO_TLS(int, my_count) + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a TLS variable using __thread + * + * __thread int my_count; + */ +#define QEMU_DEFINE_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + type get_##var(void) { asm volatile(""); return co_tls_##var; } \ + void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +/** + * QEMU_DEFINE_STATIC_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a static variable in Thread Local Storage: + * + * .. code-block:: c + * :caption: Defining a static variable in Thread Local Storage + * + * QEMU_DEFINE_STATIC_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a static TLS variable using __thread + * + * static __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + static __attribute__((noinline, unused)) \ + type get_##var(void) \ + { asm volatile(""); return co_tls_##var; } \ + static __attribute__((noinline, unused)) \ + void set_##var(type v) \ + { asm volatile(""); co_tls_##var = v; } \ + static __attribute__((noinline, unused)) \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +#endif /* QEMU_COROUTINE_TLS_H */ diff --git a/include/qemu/job.h b/include/qemu/job.h index 6e67b6977f..c105b31076 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -169,6 +169,12 @@ typedef struct Job { * Callbacks and other information about a Job driver. */ struct JobDriver { + + /* + * These fields are initialized when this object is created, + * and are never changed afterwards + */ + /** Derived Job struct size */ size_t instance_size; @@ -184,9 +190,18 @@ struct JobDriver { * aborted. If it returns zero, the job moves into the WAITING state. If it * is the last job to complete in its transaction, all jobs in the * transaction move from WAITING to PENDING. + * + * This callback must be run in the job's context. */ int coroutine_fn (*run)(Job *job, Error **errp); + /* + * Functions run without regard to the BQL that may run in any + * arbitrary thread. These functions do not need to be thread-safe + * because the caller ensures that they are invoked from one + * thread at time. + */ + /** * If the callback is not NULL, it will be invoked when the job transitions * into the paused state. Paused jobs must not perform any asynchronous @@ -201,6 +216,13 @@ struct JobDriver { */ void coroutine_fn (*resume)(Job *job); + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * Called when the job is resumed by the user (i.e. user_paused becomes * false). .user_resume is called before .resume. diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 8dbc6fcb89..7a4d6a0920 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -242,10 +242,52 @@ AioContext *iohandler_get_aio_context(void); * must always be taken outside other locks. This function helps * functions take different paths depending on whether the current * thread is running within the main loop mutex. + * + * This function should never be used in the block layer, because + * unit tests, block layer tools and qemu-storage-daemon do not + * have a BQL. + * Please instead refer to qemu_in_main_thread(). */ bool qemu_mutex_iothread_locked(void); /** + * qemu_in_main_thread: return whether it's possible to safely access + * the global state of the block layer. + * + * Global state of the block layer is not accessible from I/O threads + * or worker threads; only from threads that "own" the default + * AioContext that qemu_get_aio_context() returns. For tests, block + * layer tools and qemu-storage-daemon there is a designated thread that + * runs the event loop for qemu_get_aio_context(), and that is the + * main thread. + * + * For emulators, however, any thread that holds the BQL can act + * as the block layer main thread; this will be any of the actual + * main thread, the vCPU threads or the RCU thread. + * + * For clarity, do not use this function outside the block layer. + */ +bool qemu_in_main_thread(void); + +/* Mark and check that the function is part of the global state API. */ +#define GLOBAL_STATE_CODE() \ + do { \ + assert(qemu_in_main_thread()); \ + } while (0) + +/* Mark and check that the function is part of the I/O API. */ +#define IO_CODE() \ + do { \ + /* nop */ \ + } while (0) + +/* Mark and check that the function is part of the "I/O OR GS" API. */ +#define IO_OR_GS_CODE() \ + do { \ + /* nop */ \ + } while (0) + +/** * qemu_mutex_lock_iothread: Lock the main loop mutex. * * This function locks the main loop mutex. The mutex is taken by diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index e69efbd47f..b063c6fde8 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -29,6 +29,7 @@ #include "qemu/atomic.h" #include "qemu/notify.h" #include "qemu/sys_membarrier.h" +#include "qemu/coroutine-tls.h" #ifdef __cplusplus extern "C" { @@ -76,11 +77,11 @@ struct rcu_reader_data { NotifierList force_rcu; }; -extern __thread struct rcu_reader_data rcu_reader; +QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader) static inline void rcu_read_lock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); unsigned ctr; if (p_rcu_reader->depth++ > 0) { @@ -96,7 +97,7 @@ static inline void rcu_read_lock(void) static inline void rcu_read_unlock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); assert(p_rcu_reader->depth != 0); if (--p_rcu_reader->depth > 0) { diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h new file mode 100644 index 0000000000..2391679c56 --- /dev/null +++ b/include/sysemu/block-backend-common.h @@ -0,0 +1,102 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_COMMON_H +#define BLOCK_BACKEND_COMMON_H + +#include "qemu/iov.h" +#include "block/throttle-groups.h" + +/* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState + * API, which we don't want used by many BlockBackend users. Some of + * the types belong here, and the rest should be split into a common + * header and one for the BlockDriverState API. + */ +#include "block/block.h" + +/* Callbacks for block device models */ +typedef struct BlockDevOps { + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* + * Runs when virtual media changed (monitor commands eject, change) + * Argument load is true on load and false on eject. + * Beware: doesn't run when a host device's physical media + * changes. Sure would be useful if it did. + * Device models with removable media must implement this callback. + */ + void (*change_media_cb)(void *opaque, bool load, Error **errp); + /* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ + void (*eject_request_cb)(void *opaque, bool force); + + /* + * Is the virtual medium locked into the device? + * Device models implement this only when device has such a lock. + */ + bool (*is_medium_locked)(void *opaque); + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + /* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ + bool (*is_tray_open)(void *opaque); + + /* + * Runs when the size changed (e.g. monitor command block_resize) + */ + void (*resize_cb)(void *opaque); + /* + * Runs when the backend receives a drain request. + */ + void (*drained_begin)(void *opaque); + /* + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); + /* + * Is the device still busy? + */ + bool (*drained_poll)(void *opaque); +} BlockDevOps; + +/* + * This struct is embedded in (the private) BlockBackend struct and contains + * fields that must be public. This is in particular for QLIST_ENTRY() and + * friends so that BlockBackends can be kept in lists outside block-backend.c + */ +typedef struct BlockBackendPublic { + ThrottleGroupMember throttle_group_member; +} BlockBackendPublic; + +#endif /* BLOCK_BACKEND_COMMON_H */ diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h new file mode 100644 index 0000000000..2e93a74679 --- /dev/null +++ b/include/sysemu/block-backend-global-state.h @@ -0,0 +1,116 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_GS_H +#define BLOCK_BACKEND_GS_H + +#include "block-backend-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); +BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Error **errp); +BlockBackend *blk_new_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); +int blk_get_refcnt(BlockBackend *blk); +void blk_ref(BlockBackend *blk); +void blk_unref(BlockBackend *blk); +void blk_remove_all_bs(void); +BlockBackend *blk_by_name(const char *name); +BlockBackend *blk_next(BlockBackend *blk); +BlockBackend *blk_all_next(BlockBackend *blk); +bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp); +void monitor_remove_blk(BlockBackend *blk); + +BlockBackendPublic *blk_get_public(BlockBackend *blk); +BlockBackend *blk_by_public(BlockBackendPublic *public); + +void blk_remove_bs(BlockBackend *blk); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); +int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); +bool bdrv_has_blk(BlockDriverState *bs); +bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); + +void blk_iostatus_enable(BlockBackend *blk); +BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); +void blk_iostatus_disable(BlockBackend *blk); +void blk_iostatus_reset(BlockBackend *blk); +int blk_attach_dev(BlockBackend *blk, DeviceState *dev); +void blk_detach_dev(BlockBackend *blk, DeviceState *dev); +DeviceState *blk_get_attached_dev(BlockBackend *blk); +BlockBackend *blk_by_dev(void *dev); +BlockBackend *blk_by_qdev_id(const char *id, Error **errp); +void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); + +void blk_activate(BlockBackend *blk, Error **errp); + +int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); +void blk_aio_cancel(BlockAIOCB *acb); +int blk_commit_all(void); +void blk_drain(BlockBackend *blk); +void blk_drain_all(void); +void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +bool blk_supports_write_perm(BlockBackend *blk); +bool blk_is_sg(BlockBackend *blk); +void blk_set_enable_write_cache(BlockBackend *blk, bool wce); +int blk_get_flags(BlockBackend *blk); +bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); +void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); +void blk_op_block_all(BlockBackend *blk, Error *reason); +void blk_op_unblock_all(BlockBackend *blk, Error *reason); +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp); +void blk_add_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); +void blk_remove_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque); +void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify); +void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify); +BlockBackendRootState *blk_get_root_state(BlockBackend *blk); +void blk_update_root_state(BlockBackend *blk); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); +int blk_get_open_flags_from_root_state(BlockBackend *blk); + +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size); +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); + +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); +void blk_io_limits_disable(BlockBackend *blk); +void blk_io_limits_enable(BlockBackend *blk, const char *group); +void blk_io_limits_update_group(BlockBackend *blk, const char *group); +void blk_set_force_allow_inactivate(BlockBackend *blk); + +void blk_register_buf(BlockBackend *blk, void *host, size_t size); +void blk_unregister_buf(BlockBackend *blk, void *host); + +const BdrvChild *blk_root(BlockBackend *blk); + +int blk_make_empty(BlockBackend *blk, Error **errp); + +#endif /* BLOCK_BACKEND_GS_H */ diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h new file mode 100644 index 0000000000..6517c39295 --- /dev/null +++ b/include/sysemu/block-backend-io.h @@ -0,0 +1,161 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_IO_H +#define BLOCK_BACKEND_IO_H + +#include "block-backend-common.h" + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +const char *blk_name(const BlockBackend *blk); + +BlockDriverState *blk_bs(BlockBackend *blk); + +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); +bool blk_iostatus_is_enabled(const BlockBackend *blk); + +char *blk_get_attached_dev_id(BlockBackend *blk); + +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); + +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_flush(BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, + BlockCompletionFunc *cb, void *opaque); +void blk_aio_cancel_async(BlockAIOCB *acb); +BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + +void blk_inc_in_flight(BlockBackend *blk); +void blk_dec_in_flight(BlockBackend *blk); +bool blk_is_inserted(BlockBackend *blk); +bool blk_is_available(BlockBackend *blk); +void blk_lock_medium(BlockBackend *blk, bool locked); +void blk_eject(BlockBackend *blk, bool eject_flag); +int64_t blk_getlength(BlockBackend *blk); +void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); +int64_t blk_nb_sectors(BlockBackend *blk); +void *blk_try_blockalign(BlockBackend *blk, size_t size); +void *blk_blockalign(BlockBackend *blk, size_t size); +bool blk_is_writable(BlockBackend *blk); +bool blk_enable_write_cache(BlockBackend *blk); +BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); +BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + int error); +void blk_error_action(BlockBackend *blk, BlockErrorAction action, + bool is_read, int error); +void blk_iostatus_set_err(BlockBackend *blk, int error); +int blk_get_max_iov(BlockBackend *blk); +int blk_get_max_hw_iov(BlockBackend *blk); +void blk_set_guest_block_size(BlockBackend *blk, int align); + +void blk_io_plug(BlockBackend *blk); +void blk_io_unplug(BlockBackend *blk); +AioContext *blk_get_aio_context(BlockBackend *blk); +BlockAcctStats *blk_get_stats(BlockBackend *blk); +void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); + +uint32_t blk_get_request_alignment(BlockBackend *blk); +uint32_t blk_get_max_transfer(BlockBackend *blk); +uint64_t blk_get_max_hw_transfer(BlockBackend *blk); + +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, + BdrvRequestFlags flags); +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + +static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); + + assert(bytes <= SIZE_MAX); + + return blk_co_preadv(blk, offset, bytes, &qiov, flags); +} + +static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); + + assert(bytes <= SIZE_MAX); + + return blk_co_pwritev(blk, offset, bytes, &qiov, flags); +} + +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); + +int coroutine_fn blk_co_flush(BlockBackend *blk); +int blk_flush(BlockBackend *blk); + +int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + +int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, + int64_t bytes); +int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); +int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); +int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + +#endif /* BLOCK_BACKEND_IO_H */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index e5e1524f06..038be9fc40 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -13,272 +13,9 @@ #ifndef BLOCK_BACKEND_H #define BLOCK_BACKEND_H -#include "qemu/iov.h" -#include "block/throttle-groups.h" +#include "block-backend-global-state.h" +#include "block-backend-io.h" -/* - * TODO Have to include block/block.h for a bunch of block layer - * types. Unfortunately, this pulls in the whole BlockDriverState - * API, which we don't want used by many BlockBackend users. Some of - * the types belong here, and the rest should be split into a common - * header and one for the BlockDriverState API. - */ -#include "block/block.h" - -/* Callbacks for block device models */ -typedef struct BlockDevOps { - /* - * Runs when virtual media changed (monitor commands eject, change) - * Argument load is true on load and false on eject. - * Beware: doesn't run when a host device's physical media - * changes. Sure would be useful if it did. - * Device models with removable media must implement this callback. - */ - void (*change_media_cb)(void *opaque, bool load, Error **errp); - /* - * Runs when an eject request is issued from the monitor, the tray - * is closed, and the medium is locked. - * Device models that do not implement is_medium_locked will not need - * this callback. Device models that can lock the medium or tray might - * want to implement the callback and unlock the tray when "force" is - * true, even if they do not support eject requests. - */ - void (*eject_request_cb)(void *opaque, bool force); - /* - * Is the virtual tray open? - * Device models implement this only when the device has a tray. - */ - bool (*is_tray_open)(void *opaque); - /* - * Is the virtual medium locked into the device? - * Device models implement this only when device has such a lock. - */ - bool (*is_medium_locked)(void *opaque); - /* - * Runs when the size changed (e.g. monitor command block_resize) - */ - void (*resize_cb)(void *opaque); - /* - * Runs when the backend receives a drain request. - */ - void (*drained_begin)(void *opaque); - /* - * Runs when the backend's last drain request ends. - */ - void (*drained_end)(void *opaque); - /* - * Is the device still busy? - */ - bool (*drained_poll)(void *opaque); -} BlockDevOps; - -/* This struct is embedded in (the private) BlockBackend struct and contains - * fields that must be public. This is in particular for QLIST_ENTRY() and - * friends so that BlockBackends can be kept in lists outside block-backend.c - * */ -typedef struct BlockBackendPublic { - ThrottleGroupMember throttle_group_member; -} BlockBackendPublic; - -BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); -BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, Error **errp); -BlockBackend *blk_new_open(const char *filename, const char *reference, - QDict *options, int flags, Error **errp); -int blk_get_refcnt(BlockBackend *blk); -void blk_ref(BlockBackend *blk); -void blk_unref(BlockBackend *blk); -void blk_remove_all_bs(void); -const char *blk_name(const BlockBackend *blk); -BlockBackend *blk_by_name(const char *name); -BlockBackend *blk_next(BlockBackend *blk); -BlockBackend *blk_all_next(BlockBackend *blk); -bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp); -void monitor_remove_blk(BlockBackend *blk); - -BlockBackendPublic *blk_get_public(BlockBackend *blk); -BlockBackend *blk_by_public(BlockBackendPublic *public); - -BlockDriverState *blk_bs(BlockBackend *blk); -void blk_remove_bs(BlockBackend *blk); -int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); -int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); -bool bdrv_has_blk(BlockDriverState *bs); -bool bdrv_is_root_node(BlockDriverState *bs); -int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, - Error **errp); -void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); - -void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); -void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); -void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); -void blk_iostatus_enable(BlockBackend *blk); -bool blk_iostatus_is_enabled(const BlockBackend *blk); -BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); -void blk_iostatus_disable(BlockBackend *blk); -void blk_iostatus_reset(BlockBackend *blk); -void blk_iostatus_set_err(BlockBackend *blk, int error); -int blk_attach_dev(BlockBackend *blk, DeviceState *dev); -void blk_detach_dev(BlockBackend *blk, DeviceState *dev); -DeviceState *blk_get_attached_dev(BlockBackend *blk); -char *blk_get_attached_dev_id(BlockBackend *blk); -BlockBackend *blk_by_dev(void *dev); -BlockBackend *blk_by_qdev_id(const char *id, Error **errp); -void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); -int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags); -int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); - -static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - assert(bytes <= SIZE_MAX); - - return blk_co_preadv(blk, offset, bytes, &qiov, flags); -} - -static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - assert(bytes <= SIZE_MAX); - - return blk_co_pwritev(blk, offset, bytes, &qiov, flags); -} - -int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); -int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); -int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, - BdrvRequestFlags flags); -int64_t blk_getlength(BlockBackend *blk); -void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); -int64_t blk_nb_sectors(BlockBackend *blk); -BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, - QEMUIOVector *qiov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, - QEMUIOVector *qiov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_flush(BlockBackend *blk, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, - BlockCompletionFunc *cb, void *opaque); -void blk_aio_cancel(BlockAIOCB *acb); -void blk_aio_cancel_async(BlockAIOCB *acb); -int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); -BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); -int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, - int64_t bytes); -int coroutine_fn blk_co_flush(BlockBackend *blk); -int blk_flush(BlockBackend *blk); -int blk_commit_all(void); -void blk_inc_in_flight(BlockBackend *blk); -void blk_dec_in_flight(BlockBackend *blk); -void blk_drain(BlockBackend *blk); -void blk_drain_all(void); -void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, - BlockdevOnError on_write_error); -BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); -BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, - int error); -void blk_error_action(BlockBackend *blk, BlockErrorAction action, - bool is_read, int error); -bool blk_supports_write_perm(BlockBackend *blk); -bool blk_is_writable(BlockBackend *blk); -bool blk_is_sg(BlockBackend *blk); -bool blk_enable_write_cache(BlockBackend *blk); -void blk_set_enable_write_cache(BlockBackend *blk, bool wce); -void blk_invalidate_cache(BlockBackend *blk, Error **errp); -bool blk_is_inserted(BlockBackend *blk); -bool blk_is_available(BlockBackend *blk); -void blk_lock_medium(BlockBackend *blk, bool locked); -void blk_eject(BlockBackend *blk, bool eject_flag); -int blk_get_flags(BlockBackend *blk); -uint32_t blk_get_request_alignment(BlockBackend *blk); -uint32_t blk_get_max_transfer(BlockBackend *blk); -uint64_t blk_get_max_hw_transfer(BlockBackend *blk); -int blk_get_max_iov(BlockBackend *blk); -int blk_get_max_hw_iov(BlockBackend *blk); -void blk_set_guest_block_size(BlockBackend *blk, int align); -void *blk_try_blockalign(BlockBackend *blk, size_t size); -void *blk_blockalign(BlockBackend *blk, size_t size); -bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); -void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); -void blk_op_block_all(BlockBackend *blk, Error *reason); -void blk_op_unblock_all(BlockBackend *blk, Error *reason); -AioContext *blk_get_aio_context(BlockBackend *blk); -int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, - Error **errp); -void blk_add_aio_context_notifier(BlockBackend *blk, - void (*attached_aio_context)(AioContext *new_context, void *opaque), - void (*detach_aio_context)(void *opaque), void *opaque); -void blk_remove_aio_context_notifier(BlockBackend *blk, - void (*attached_aio_context)(AioContext *, - void *), - void (*detach_aio_context)(void *), - void *opaque); -void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify); -void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify); -void blk_io_plug(BlockBackend *blk); -void blk_io_unplug(BlockBackend *blk); -BlockAcctStats *blk_get_stats(BlockBackend *blk); -BlockBackendRootState *blk_get_root_state(BlockBackend *blk); -void blk_update_root_state(BlockBackend *blk); -bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); -int blk_get_open_flags_from_root_state(BlockBackend *blk); - -void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, - BlockCompletionFunc *cb, void *opaque); -int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int64_t bytes); -int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); -int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, - int64_t pos, int size); -int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); -int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); -int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); -BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, - BlockCompletionFunc *cb, - void *opaque, int ret); - -void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); -void blk_io_limits_disable(BlockBackend *blk); -void blk_io_limits_enable(BlockBackend *blk, const char *group); -void blk_io_limits_update_group(BlockBackend *blk, const char *group); -void blk_set_force_allow_inactivate(BlockBackend *blk); - -void blk_register_buf(BlockBackend *blk, void *host, size_t size); -void blk_unregister_buf(BlockBackend *blk, void *host); - -int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, - BlockBackend *blk_out, int64_t off_out, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - -const BdrvChild *blk_root(BlockBackend *blk); - -int blk_make_empty(BlockBackend *blk, Error **errp); +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ #endif diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h index f9fb54d437..3211b16513 100644 --- a/include/sysemu/blockdev.h +++ b/include/sysemu/blockdev.h @@ -13,9 +13,6 @@ #include "block/block.h" #include "qemu/queue.h" -void blockdev_mark_auto_del(BlockBackend *blk); -void blockdev_auto_del(BlockBackend *blk); - typedef enum { IF_DEFAULT = -1, /* for use with drive_add() only */ /* @@ -38,6 +35,16 @@ struct DriveInfo { QTAILQ_ENTRY(DriveInfo) next; }; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +void blockdev_mark_auto_del(BlockBackend *blk); +void blockdev_auto_del(BlockBackend *blk); + DriveInfo *blk_legacy_dinfo(BlockBackend *blk); DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo); BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo); diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 2edf33658a..dd64fb401d 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -55,6 +55,7 @@ int os_mlock(void); typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) +int os_set_daemonize(bool d); bool is_daemonized(void); /** diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index 43f569b5c2..770752222a 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -77,6 +77,14 @@ typedef struct { } qemu_timeval; int qemu_gettimeofday(qemu_timeval *tp); +static inline int os_set_daemonize(bool d) +{ + if (d) { + return -ENOTSUP; + } + return 0; +} + static inline bool is_daemonized(void) { return false; @@ -381,6 +381,8 @@ void job_ref(Job *job) void job_unref(Job *job) { + GLOBAL_STATE_CODE(); + if (--job->refcnt == 0) { assert(job->status == JOB_STATUS_NULL); assert(!timer_pending(&job->sleep_timer)); @@ -602,6 +604,7 @@ bool job_user_paused(Job *job) void job_user_resume(Job *job, Error **errp) { assert(job); + GLOBAL_STATE_CODE(); if (!job->user_paused || job->pause_count <= 0) { error_setg(errp, "Can't resume a job that was not paused"); return; @@ -672,6 +675,7 @@ static void job_update_rc(Job *job) static void job_commit(Job *job) { assert(!job->ret); + GLOBAL_STATE_CODE(); if (job->driver->commit) { job->driver->commit(job); } @@ -680,6 +684,7 @@ static void job_commit(Job *job) static void job_abort(Job *job) { assert(job->ret); + GLOBAL_STATE_CODE(); if (job->driver->abort) { job->driver->abort(job); } @@ -687,6 +692,7 @@ static void job_abort(Job *job) static void job_clean(Job *job) { + GLOBAL_STATE_CODE(); if (job->driver->clean) { job->driver->clean(job); } @@ -726,6 +732,7 @@ static int job_finalize_single(Job *job) static void job_cancel_async(Job *job, bool force) { + GLOBAL_STATE_CODE(); if (job->driver->cancel) { force = job->driver->cancel(job, force); } else { @@ -825,6 +832,7 @@ static void job_completed_txn_abort(Job *job) static int job_prepare(Job *job) { + GLOBAL_STATE_CODE(); if (job->ret == 0 && job->driver->prepare) { job->ret = job->driver->prepare(job); job_update_rc(job); @@ -952,6 +960,7 @@ static void coroutine_fn job_co_entry(void *opaque) Job *job = opaque; assert(job && job->driver && job->driver->run); + assert(job->aio_context == qemu_get_current_aio_context()); job_pause_point(job); job->ret = job->driver->run(job, &job->err); job->deferred_to_main_loop = true; @@ -1054,6 +1063,7 @@ void job_complete(Job *job, Error **errp) { /* Should not be reachable via external interface for internal jobs */ assert(job->id); + GLOBAL_STATE_CODE(); if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { return; } diff --git a/migration/block.c b/migration/block.c index a950977855..077a413325 100644 --- a/migration/block.c +++ b/migration/block.c @@ -932,7 +932,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } - blk_invalidate_cache(blk, &local_err); + blk_activate(blk, &local_err); if (local_err) { error_report_err(local_err); return -EINVAL; diff --git a/migration/migration.c b/migration/migration.c index 9cc344514b..695f0f2900 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -503,9 +503,9 @@ static void process_incoming_migration_bh(void *opaque) if (!migrate_late_block_activate() || (autostart && (!global_state_received() || global_state_get_runstate() == RUN_STATE_RUNNING))) { - /* Make sure all file formats flush their mutable metadata. + /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; @@ -591,8 +591,8 @@ static void process_incoming_migration_co(void *opaque) /* we get COLO info, and know if we are in COLO mode */ if (!ret && migration_incoming_colo_enabled()) { - /* Make sure all file formats flush their mutable metadata */ - bdrv_invalidate_cache_all(&local_err); + /* Make sure all file formats throw away their mutable metadata */ + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); goto fail; @@ -1932,7 +1932,7 @@ static void migrate_fd_cancel(MigrationState *s) if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { Error *local_err = NULL; - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { @@ -3111,7 +3111,7 @@ fail: */ Error *local_err = NULL; - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } @@ -3256,7 +3256,7 @@ fail_invalidate: Error *local_err = NULL; qemu_mutex_lock_iothread(); - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { diff --git a/migration/savevm.c b/migration/savevm.c index 967ff80547..02ed94c180 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1438,7 +1438,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, if (inactivate_disks) { /* Inactivate before sending QEMU_VM_EOF so that the - * bdrv_invalidate_cache_all() on the other end won't fail. */ + * bdrv_activate_all() on the other end won't fail. */ ret = bdrv_inactivate_all(); if (ret) { error_report("%s: bdrv_inactivate_all() failed (%d)", @@ -2013,9 +2013,9 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) trace_loadvm_postcopy_handle_run_bh("after announce"); - /* Make sure all file formats flush their mutable metadata. + /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; @@ -2808,6 +2808,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, g_autoptr(GDateTime) now = g_date_time_new_now_local(); AioContext *aio_context; + GLOBAL_STATE_CODE(); + if (migration_is_blocked(errp)) { return false; } diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c index df97582dd4..ad82c275c4 100644 --- a/monitor/qmp-cmds.c +++ b/monitor/qmp-cmds.c @@ -144,7 +144,7 @@ void qmp_cont(Error **errp) * If there are no inactive block nodes (e.g. because the VM was just * paused rather than completing a migration), bdrv_inactivate_all() simply * doesn't do anything. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/os-posix.c b/os-posix.c index ae6c9f2a5e..24692c8593 100644 --- a/os-posix.c +++ b/os-posix.c @@ -317,6 +317,12 @@ bool is_daemonized(void) return daemonize; } +int os_set_daemonize(bool d) +{ + daemonize = d; + return 0; +} + int os_mlock(void) { #ifdef HAVE_MLOCKALL diff --git a/softmmu/cpus.c b/softmmu/cpus.c index 035395ae13..1681844b61 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "monitor/monitor.h" +#include "qemu/coroutine-tls.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" #include "qapi/qapi-commands-misc.h" @@ -473,11 +474,16 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -static __thread bool iothread_locked = false; +QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) bool qemu_mutex_iothread_locked(void) { - return iothread_locked; + return get_iothread_locked(); +} + +bool qemu_in_main_thread(void) +{ + return qemu_mutex_iothread_locked(); } /* @@ -490,13 +496,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line) g_assert(!qemu_mutex_iothread_locked()); bql_lock(&qemu_global_mutex, file, line); - iothread_locked = true; + set_iothread_locked(true); } void qemu_mutex_unlock_iothread(void) { g_assert(qemu_mutex_iothread_locked()); - iothread_locked = false; + set_iothread_locked(false); qemu_mutex_unlock(&qemu_global_mutex); } diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index a0df820b9d..fe6cf268ff 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -973,6 +973,8 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp) DeviceState *dev; BlockBackend *blk; + GLOBAL_STATE_CODE(); + dev = find_device_state(id, errp); if (dev == NULL) { return NULL; diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c index 504d33aa91..dd18b2cde8 100644 --- a/storage-daemon/qemu-storage-daemon.c +++ b/storage-daemon/qemu-storage-daemon.c @@ -93,6 +93,9 @@ static void help(void) " --chardev <options> configure a character device backend\n" " (see the qemu(1) man page for possible options)\n" "\n" +" --daemonize daemonize the process, and have the parent exit\n" +" once startup is complete\n" +"\n" " --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>]\n" " [,writable=on|off][,bitmap=<name>]\n" " export the specified block node over NBD\n" @@ -144,6 +147,7 @@ QEMU_HELP_BOTTOM "\n", enum { OPTION_BLOCKDEV = 256, OPTION_CHARDEV, + OPTION_DAEMONIZE, OPTION_EXPORT, OPTION_MONITOR, OPTION_NBD_SERVER, @@ -177,13 +181,30 @@ static int getopt_set_loc(int argc, char **argv, const char *optstring, return c; } -static void process_options(int argc, char *argv[]) +/** + * Process QSD command-line arguments. + * + * This is done in two passes: + * + * First (@pre_init_pass is true), we do a pass where all global + * arguments pertaining to the QSD process (like --help or --daemonize) + * are processed. This pass is done before most of the QEMU-specific + * initialization steps (e.g. initializing the block layer or QMP), and + * so must only process arguments that are not really QEMU-specific. + * + * Second (@pre_init_pass is false), we (sequentially) process all + * QEMU/QSD-specific arguments. Many of these arguments are effectively + * translated to QMP commands (like --blockdev for blockdev-add, or + * --export for block-export-add). + */ +static void process_options(int argc, char *argv[], bool pre_init_pass) { int c; static const struct option long_options[] = { {"blockdev", required_argument, NULL, OPTION_BLOCKDEV}, {"chardev", required_argument, NULL, OPTION_CHARDEV}, + {"daemonize", no_argument, NULL, OPTION_DAEMONIZE}, {"export", required_argument, NULL, OPTION_EXPORT}, {"help", no_argument, NULL, 'h'}, {"monitor", required_argument, NULL, OPTION_MONITOR}, @@ -196,11 +217,27 @@ static void process_options(int argc, char *argv[]) }; /* - * In contrast to the system emulator, options are processed in the order - * they are given on the command lines. This means that things must be - * defined first before they can be referenced in another option. + * In contrast to the system emulator, QEMU-specific options are processed + * in the order they are given on the command lines. This means that things + * must be defined first before they can be referenced in another option. */ + optind = 1; while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) { + bool handle_option_pre_init; + + /* Should this argument be processed in the pre-init pass? */ + handle_option_pre_init = + c == '?' || + c == 'h' || + c == 'V' || + c == OPTION_DAEMONIZE || + c == OPTION_PIDFILE; + + /* Process every option only in its respective pass */ + if (pre_init_pass != handle_option_pre_init) { + continue; + } + switch (c) { case '?': exit(EXIT_FAILURE); @@ -246,6 +283,12 @@ static void process_options(int argc, char *argv[]) qemu_opts_del(opts); break; } + case OPTION_DAEMONIZE: + if (os_set_daemonize(true) < 0) { + error_report("--daemonize not supported in this build"); + exit(EXIT_FAILURE); + } + break; case OPTION_EXPORT: { Visitor *v; @@ -334,6 +377,10 @@ int main(int argc, char *argv[]) qemu_init_exec_dir(argv[0]); os_setup_signal_handling(); + process_options(argc, argv, true); + + os_daemonize(); + module_call_init(MODULE_INIT_QOM); module_call_init(MODULE_INIT_TRACE); qemu_add_opts(&qemu_trace_opts); @@ -348,7 +395,7 @@ int main(int argc, char *argv[]) qemu_set_log(LOG_TRACE); qemu_init_main_loop(&error_fatal); - process_options(argc, argv); + process_options(argc, argv, false); /* * Write the pid file after creating chardevs, exports, and NBD servers but @@ -356,6 +403,7 @@ int main(int argc, char *argv[]) * it. */ pid_file_init(); + os_setup_post(); while (!exit_requested) { main_loop_wait(false); diff --git a/stubs/iothread-lock-block.c b/stubs/iothread-lock-block.c new file mode 100644 index 0000000000..c88ed70462 --- /dev/null +++ b/stubs/iothread-lock-block.c @@ -0,0 +1,8 @@ +#include "qemu/osdep.h" +#include "qemu/main-loop.h" + +bool qemu_in_main_thread(void) +{ + return qemu_get_current_aio_context() == qemu_get_aio_context(); +} + diff --git a/stubs/meson.build b/stubs/meson.build index d359cbe1ad..6f80fec761 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -17,6 +17,9 @@ if linux_io_uring.found() stub_ss.add(files('io_uring.c')) endif stub_ss.add(files('iothread-lock.c')) +if have_block + stub_ss.add(files('iothread-lock-block.c')) +endif stub_ss.add(files('isa-bus.c')) stub_ss.add(files('is-daemonized.c')) if libaio.found() diff --git a/tests/check-block.sh b/tests/check-block.sh index 18f7433901..f59496396c 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh @@ -48,18 +48,6 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then skip "bash version too old ==> Not running the qemu-iotests." fi -if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then - if ! command -v gsed >/dev/null 2>&1; then - skip "GNU sed not available ==> Not running the qemu-iotests." - fi -else - # Double-check that we're not using BusyBox' sed which says - # that "This is not GNU sed version 4.0" ... - if sed --version | grep -q 'not GNU sed' ; then - skip "BusyBox sed not supported ==> Not running the qemu-iotests." - fi -fi - cd tests/qemu-iotests # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 index f2ec5c5ceb..8b1143dc16 100755 --- a/tests/qemu-iotests/185 +++ b/tests/qemu-iotests/185 @@ -33,6 +33,12 @@ _cleanup() _rm_test_img "${TEST_IMG}.copy" _cleanup_test_img _cleanup_qemu + + if [ -f "$TEST_DIR/qsd.pid" ]; then + kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")" + rm -f "$TEST_DIR/qsd.pid" + fi + rm -f "$SOCK_DIR/qsd.sock" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -45,7 +51,7 @@ _supported_fmt qcow2 _supported_proto file _supported_os Linux -size=64M +size=$((64 * 1048576)) TEST_IMG="${TEST_IMG}.base" _make_test_img $size echo @@ -216,6 +222,188 @@ wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE' _check_test_img +echo +echo === Start mirror to throttled QSD and exit qemu === +echo + +# Mirror to a throttled QSD instance (so that qemu cannot drain the +# throttling), wait for READY, then write some data to the device, +# and then quit qemu. +# (qemu should force-cancel the job and not wait for the data to be +# written to the target.) + +_make_test_img $size + +# Will be used by this and the next case +set_up_throttled_qsd() { + $QSD \ + --object throttle-group,id=thrgr,limits.bps-total=1048576 \ + --blockdev null-co,node-name=null,size=$size \ + --blockdev throttle,node-name=throttled,throttle-group=thrgr,file=null \ + --nbd-server addr.type=unix,addr.path="$SOCK_DIR/qsd.sock" \ + --export nbd,id=exp,node-name=throttled,name=target,writable=true \ + --pidfile "$TEST_DIR/qsd.pid" \ + --daemonize +} + +set_up_throttled_qsd + +# Need a virtio-blk device so that qemu-io writes will not block the monitor +_launch_qemu \ + --blockdev file,node-name=source-proto,filename="$TEST_IMG" \ + --blockdev qcow2,node-name=source-fmt,file=source-proto \ + --device virtio-blk,id=vblk,drive=source-fmt \ + --blockdev "{\"driver\": \"nbd\", + \"node-name\": \"target\", + \"server\": { + \"type\": \"unix\", + \"path\": \"$SOCK_DIR/qsd.sock\" + }, + \"export\": \"target\"}" + +h=$QEMU_HANDLE +_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return' + +# Use sync=top, so the first pass will not copy the whole image +_send_qemu_cmd $h \ + '{"execute": "blockdev-mirror", + "arguments": { + "job-id": "mirror", + "device": "source-fmt", + "target": "target", + "sync": "top" + }}' \ + 'return' \ + | grep -v JOB_STATUS_CHANGE # Ignore these events during creation + +# This too will be used by this and the next case +# $1: QEMU handle +# $2: Image size +wait_for_job_and_quit() { + h=$1 + size=$2 + + # List of expected events + capture_events='BLOCK_JOB_READY JOB_STATUS_CHANGE' + _wait_event $h 'BLOCK_JOB_READY' + QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before READY + + # Write something to the device for post-READY mirroring. Write it in + # blocks matching the cluster size, each spaced one block apart, so + # that the mirror job will have to spawn one request per cluster. + # Because the number of concurrent requests is limited (to 16), this + # limits the number of bytes concurrently in flight, which speeds up + # cancelling the job (in-flight requests still are waited for). + # To limit the number of bytes in flight, we could alternatively pass + # something for blockdev-mirror's @buf-size parameter, but + # block-commit does not have such a parameter, so we need to figure + # something out that works for both. + + cluster_size=65536 + step=$((cluster_size * 2)) + + echo '--- Writing data to the virtio-blk device ---' + + for ofs in $(seq 0 $step $((size - step))); do + qemu_io_cmd="qemu-io -d vblk/virtio-backend " + qemu_io_cmd+="\\\"aio_write $ofs $cluster_size\\\"" + + # Do not include these requests in the reference output + # (it's just too much) + silent=yes _send_qemu_cmd $h \ + "{\"execute\": \"human-monitor-command\", + \"arguments\": { + \"command-line\": \"$qemu_io_cmd\" + }}" \ + 'return' + done + + # Wait until the job's length is updated to reflect the write requests + + # We have written to half of the device, so this is the expected job length + final_len=$((size / 2)) + timeout=100 # unit: 0.1 seconds + while true; do + len=$( + _send_qemu_cmd $h \ + '{"execute": "query-block-jobs"}' \ + 'return.*"len": [0-9]\+' \ + | grep 'return.*"len": [0-9]\+' \ + | sed -e 's/.*"len": \([0-9]\+\).*/\1/' + ) + if [ "$len" -eq "$final_len" ]; then + break + fi + timeout=$((timeout - 1)) + if [ "$timeout" -eq 0 ]; then + echo "ERROR: Timeout waiting for job to reach len=$final_len" + break + fi + sleep 0.1 + done + + sleep 1 + + _send_qemu_cmd $h \ + '{"execute": "quit"}' \ + 'return' + + # List of expected events + capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' + _wait_event $h 'SHUTDOWN' + QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN + _wait_event $h 'JOB_STATUS_CHANGE' # standby + _wait_event $h 'JOB_STATUS_CHANGE' # ready + _wait_event $h 'JOB_STATUS_CHANGE' # aborting + # Filter the offset (depends on when exactly `quit` was issued) + _wait_event $h 'BLOCK_JOB_CANCELLED' \ + | sed -e 's/"offset": [0-9]\+/"offset": (filtered)/' + _wait_event $h 'JOB_STATUS_CHANGE' # concluded + _wait_event $h 'JOB_STATUS_CHANGE' # null + + wait=yes _cleanup_qemu + + kill -SIGTERM "$(cat "$TEST_DIR/qsd.pid")" +} + +wait_for_job_and_quit $h $size + +echo +echo === Start active commit to throttled QSD and exit qemu === +echo + +# Same as the above, but instead of mirroring, do an active commit + +_make_test_img $size + +set_up_throttled_qsd + +_launch_qemu \ + --blockdev "{\"driver\": \"nbd\", + \"node-name\": \"target\", + \"server\": { + \"type\": \"unix\", + \"path\": \"$SOCK_DIR/qsd.sock\" + }, + \"export\": \"target\"}" \ + --blockdev file,node-name=source-proto,filename="$TEST_IMG" \ + --blockdev qcow2,node-name=source-fmt,file=source-proto,backing=target \ + --device virtio-blk,id=vblk,drive=source-fmt + +h=$QEMU_HANDLE +_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return' + +_send_qemu_cmd $h \ + '{"execute": "block-commit", + "arguments": { + "job-id": "commit", + "device": "source-fmt" + }}' \ + 'return' \ + | grep -v JOB_STATUS_CHANGE # Ignore these events during creation + +wait_for_job_and_quit $h $size + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out index 754a641258..70e8dd6c87 100644 --- a/tests/qemu-iotests/185.out +++ b/tests/qemu-iotests/185.out @@ -116,4 +116,52 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} No errors were found on the image. + +=== Start mirror to throttled QSD and exit qemu === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +{"execute": "qmp_capabilities"} +{"return": {}} +{"execute": "blockdev-mirror", + "arguments": { + "job-id": "mirror", + "device": "source-fmt", + "target": "target", + "sync": "top" + }} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "mirror", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} +--- Writing data to the virtio-blk device --- +{"execute": "quit"} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}} + +=== Start active commit to throttled QSD and exit qemu === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +{"execute": "qmp_capabilities"} +{"return": {}} +{"execute": "block-commit", + "arguments": { + "job-id": "commit", + "device": "source-fmt" + }} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "commit", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} +--- Writing data to the virtio-blk device --- +{"execute": "quit"} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "commit"}} *** done diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271 index 2775b4d130..c7c2cadda0 100755 --- a/tests/qemu-iotests/271 +++ b/tests/qemu-iotests/271 @@ -896,7 +896,7 @@ _make_test_img -o extended_l2=on 1M # Second and third writes in _concurrent_io() are independent and may finish in # different order. So, filter offset out to match both possible variants. _concurrent_io | $QEMU_IO | _filter_qemu_io | \ - $SED -e 's/\(20480\|40960\)/OFFSET/' + sed -e 's/\(20480\|40960\)/OFFSET/' _concurrent_verify | $QEMU_IO | _filter_qemu_io # success, all done diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296 index 099a3eeaa5..f80ef3434a 100755 --- a/tests/qemu-iotests/296 +++ b/tests/qemu-iotests/296 @@ -174,8 +174,12 @@ class EncryptionSetupTestCase(iotests.QMPTestCase): } result = vm.qmp('x-blockdev-amend', **args) - assert result['return'] == {} - vm.run_job('job0') + iotests.log(result) + # Run the job only if it was created + event = ('JOB_STATUS_CHANGE', + {'data': {'id': 'job0', 'status': 'created'}}) + if vm.events_wait([event], timeout=0.0) is not None: + vm.run_job('job0') # test that when the image opened by two qemu processes, # neither of them can update the encryption keys diff --git a/tests/qemu-iotests/296.out b/tests/qemu-iotests/296.out index 42205cc981..609826eaa0 100644 --- a/tests/qemu-iotests/296.out +++ b/tests/qemu-iotests/296.out @@ -1,11 +1,9 @@ -{"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} -Job failed: Failed to get shared "consistent read" lock {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} -Job failed: Failed to get shared "consistent read" lock -{"execute": "job-dismiss", "arguments": {"id": "job0"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} @@ -13,14 +11,9 @@ qemu-img: Failed to get shared "consistent read" lock Is another process using the image [TEST_DIR/test.img]? . -Job failed: Block node is read-only -{"execute": "job-dismiss", "arguments": {"id": "job0"}} -{"return": {}} -Job failed: Failed to get shared "consistent read" lock -{"execute": "job-dismiss", "arguments": {"id": "job0"}} -{"return": {}} -Job failed: Failed to get shared "consistent read" lock -{"execute": "job-dismiss", "arguments": {"id": "job0"}} +{"error": {"class": "GenericError", "desc": "Block node is read-only"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 75cc241580..21819db9c3 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -21,44 +21,44 @@ _filter_date() { - $SED -re 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/' + sed -Ee 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/' } _filter_vmstate_size() { - $SED -r -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \ - -e 's/[0-9. ]{5} B/ SIZE/' + sed -E -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \ + -e 's/[0-9. ]{5} B/ SIZE/' } _filter_generated_node_ids() { - $SED -re 's/\#block[0-9]{3,}/NODE_NAME/' + sed -Ee 's/\#block[0-9]{3,}/NODE_NAME/' } _filter_qom_path() { - $SED -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g' + gsed -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g' } # replace occurrences of the actual TEST_DIR value with TEST_DIR _filter_testdir() { - $SED -e "s#$TEST_DIR/#TEST_DIR/#g" \ - -e "s#$SOCK_DIR/#SOCK_DIR/#g" \ - -e "s#SOCK_DIR/fuse-#TEST_DIR/#g" + sed -e "s#$TEST_DIR/#TEST_DIR/#g" \ + -e "s#$SOCK_DIR/#SOCK_DIR/#g" \ + -e "s#SOCK_DIR/fuse-#TEST_DIR/#g" } # replace occurrences of the actual IMGFMT value with IMGFMT _filter_imgfmt() { - $SED -e "s#$IMGFMT#IMGFMT#g" + sed -e "s#$IMGFMT#IMGFMT#g" } # Replace error message when the format is not supported and delete # the output lines after the first one _filter_qemu_img_check() { - $SED -e '/allocated.*fragmented.*compressed clusters/d' \ + gsed -e '/allocated.*fragmented.*compressed clusters/d' \ -e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \ -e '/Image end offset: [0-9]\+/d' } @@ -66,13 +66,14 @@ _filter_qemu_img_check() # Removes \r from messages _filter_win32() { - $SED -e 's/\r//g' + gsed -e 's/\r//g' } # sanitize qemu-io output _filter_qemu_io() { - _filter_win32 | $SED -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \ + _filter_win32 | \ + gsed -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \ -e "s/: line [0-9][0-9]*: *[0-9][0-9]*\( Aborted\| Killed\)/:\1/" \ -e "s/qemu-io> //g" } @@ -80,7 +81,7 @@ _filter_qemu_io() # replace occurrences of QEMU_PROG with "qemu" _filter_qemu() { - $SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ -e $'s#\r##' # QEMU monitor uses \r\n line endings } @@ -89,7 +90,7 @@ _filter_qemu() _filter_qmp() { _filter_win32 | \ - $SED -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \ + gsed -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \ -e 's#^{"QMP":.*}$#QMP_VERSION#' \ -e '/^ "QMP": {\s*$/, /^ }\s*$/ c\' \ -e ' QMP_VERSION' @@ -98,32 +99,32 @@ _filter_qmp() # readline makes HMP command strings so long that git complains _filter_hmp() { - $SED -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \ + gsed -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \ -e $'s/\e\\[K//g' } # replace block job offset _filter_block_job_offset() { - $SED -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/' + sed -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/' } # replace block job len _filter_block_job_len() { - $SED -e 's/, "len": [0-9]\+,/, "len": LEN,/g' + sed -e 's/, "len": [0-9]\+,/, "len": LEN,/g' } # replace actual image size (depends on the host filesystem) _filter_actual_image_size() { - $SED -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' + gsed -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } # Filename filters for qemu-img create _filter_img_create_filenames() { - $SED \ + sed \ -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ @@ -141,7 +142,7 @@ _do_filter_img_create() # precedes ", fmt=") and the options part ($options, which starts # with "fmt=") # (And just echo everything before the first "^Formatting") - readarray formatting_line < <($SED -e 's/, fmt=/\n/') + readarray formatting_line < <(gsed -e 's/, fmt=/\n/') filename_part=${formatting_line[0]} unset formatting_line[0] @@ -168,11 +169,11 @@ _do_filter_img_create() options=$( echo "$options" \ | tr '\n' '\0' \ - | $SED -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \ + | gsed -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \ | grep -a -e '^fmt' -e '^size' -e '^backing' -e '^preallocation' \ -e '^encryption' "${grep_data_file[@]}" \ | _filter_img_create_filenames \ - | $SED \ + | sed \ -e 's/^\(fmt\)/0-\1/' \ -e 's/^\(size\)/1-\1/' \ -e 's/^\(backing\)/2-\1/' \ @@ -180,9 +181,9 @@ _do_filter_img_create() -e 's/^\(encryption\)/4-\1/' \ -e 's/^\(preallocation\)/8-\1/' \ | LC_ALL=C sort \ - | $SED -e 's/^[0-9]-//' \ + | sed -e 's/^[0-9]-//' \ | tr '\n\0' ' \n' \ - | $SED -e 's/^ *$//' -e 's/ *$//' + | sed -e 's/^ *$//' -e 's/ *$//' ) if [ -n "$options" ]; then @@ -208,7 +209,7 @@ _filter_img_create() _filter_img_create_size() { - $SED -e "s# size=[0-9]\\+# size=SIZE#g" + gsed -e "s# size=[0-9]\\+# size=SIZE#g" } _filter_img_info() @@ -222,7 +223,7 @@ _filter_img_info() discard=0 regex_json_spec_start='^ *"format-specific": \{' - $SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ + gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$SOCK_DIR#SOCK_DIR#g" \ @@ -284,7 +285,7 @@ _filter_qemu_img_map() data_file_filter=(-e "s#$data_file_pattern#\\1#") fi - $SED -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ + sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ -e 's/"offset": [0-9]\+/"offset": OFFSET/g' \ -e 's/Mapped to *//' \ "${data_file_filter[@]}" \ @@ -298,7 +299,7 @@ _filter_nbd() # receive callbacks sometimes, making them unreliable. # # Filter out the TCP port number since this changes between runs. - $SED -e '/nbd\/.*\.c:/d' \ + sed -e '/nbd\/.*\.c:/d' \ -e 's#127\.0\.0\.1:[0-9]*#127.0.0.1:PORT#g' \ -e "s#?socket=$SOCK_DIR#?socket=SOCK_DIR#g" \ -e 's#\(foo\|PORT/\?\|.sock\): Failed to .*$#\1#' @@ -335,14 +336,14 @@ sys.stdout.write(result)' _filter_authz_check_tls() { - $SED -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/' + sed -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/' } _filter_qcow2_compression_type_bit() { - $SED -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \ - -e 's/\(incompatible_features.*\), 3\]/\1]/' \ - -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/' + gsed -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \ + -e 's/\(incompatible_features.*\), 3\]/\1]/' \ + -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/' } # make sure this script returns success diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 9885030b43..3bfd94c2e0 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -17,17 +17,28 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # -SED= -for sed in sed gsed; do - ($sed --version | grep 'GNU sed') > /dev/null 2>&1 - if [ "$?" -eq 0 ]; then - SED=$sed - break - fi -done -if [ -z "$SED" ]; then - echo "$0: GNU sed not found" - exit 1 +# bail out, setting up .notrun file +_notrun() +{ + echo "$*" >"$OUTPUT_DIR/$seq.notrun" + echo "$seq not run: $*" + status=0 + exit +} + +if ! command -v gsed >/dev/null 2>&1; then + if sed --version 2>&1 | grep -v 'not GNU sed' | grep 'GNU sed' > /dev/null; + then + gsed() + { + sed "$@" + } + else + gsed() + { + _notrun "GNU sed not available" + } + fi fi dd() @@ -722,16 +733,6 @@ _img_info() done } -# bail out, setting up .notrun file -# -_notrun() -{ - echo "$*" >"$OUTPUT_DIR/$seq.notrun" - echo "$seq not run: $*" - status=0 - exit -} - # bail out, setting up .casenotrun file # The function _casenotrun() is used as a notifier. It is the # caller's responsibility to make skipped a particular test. @@ -920,7 +921,7 @@ _require_working_luks() IMGFMT='luks' _rm_test_img "$file" if [ $status != 0 ]; then - reason=$(echo "$output" | grep "$file:" | $SED -e "s#.*$file: *##") + reason=$(echo "$output" | grep "$file:" | sed -e "s#.*$file: *##") if [ -z "$reason" ]; then reason="Failed to create a LUKS image" fi diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 6ba65eb1ff..6027780180 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -39,6 +39,7 @@ from contextlib import contextmanager from qemu.machine import qtest from qemu.qmp import QMPMessage +from qemu.aqmp.legacy import QEMUMonitorProtocol # Use this logger for logging messages directly from the iotests module logger = logging.getLogger('qemu.iotests') @@ -348,14 +349,30 @@ class QemuIoInteractive: class QemuStorageDaemon: - def __init__(self, *args: str, instance_id: str = 'a'): + _qmp: Optional[QEMUMonitorProtocol] = None + _qmpsock: Optional[str] = None + # Python < 3.8 would complain if this type were not a string literal + # (importing `annotations` from `__future__` would work; but not on <= 3.6) + _p: 'Optional[subprocess.Popen[bytes]]' = None + + def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False): assert '--pidfile' not in args self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] + if qmp: + self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock') + all_args += ['--chardev', + f'socket,id=qmp-sock,path={self._qmpsock}', + '--monitor', 'qmp-sock'] + + self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True) + # Cannot use with here, we want the subprocess to stay around # pylint: disable=consider-using-with self._p = subprocess.Popen(all_args) + if self._qmp is not None: + self._qmp.accept() while not os.path.exists(self.pidfile): if self._p.poll() is not None: cmd = ' '.join(all_args) @@ -370,11 +387,24 @@ class QemuStorageDaemon: assert self._pid == self._p.pid + def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ + -> QMPMessage: + assert self._qmp is not None + return self._qmp.cmd(cmd, args) + def stop(self, kill_signal=15): self._p.send_signal(kill_signal) self._p.wait() self._p = None + if self._qmp: + self._qmp.close() + + if self._qmpsock is not None: + try: + os.remove(self._qmpsock) + except OSError: + pass try: os.remove(self.pidfile) except OSError: diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io new file mode 100755 index 0000000000..567e8cf21e --- /dev/null +++ b/tests/qemu-iotests/tests/graph-changes-while-io @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# group: rw +# +# Test graph changes while I/O is happening +# +# Copyright (C) 2022 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +from threading import Thread +import iotests +from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ + QemuStorageDaemon + + +top = os.path.join(iotests.test_dir, 'top.img') +nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +def do_qemu_img_bench() -> None: + """ + Do some I/O requests on `nbd_sock`. + """ + assert qemu_img('bench', '-f', 'raw', '-c', '2000000', + f'nbd+unix:///node0?socket={nbd_sock}') == 0 + + +class TestGraphChangesWhileIO(QMPTestCase): + def setUp(self) -> None: + # Create an overlay that can be added at runtime on top of the + # null-co block node that will receive I/O + assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://', + top) == 0 + + # QSD instance with a null-co block node in an I/O thread, + # exported over NBD (on `nbd_sock`, export name "node0") + self.qsd = QemuStorageDaemon( + '--object', 'iothread,id=iothread0', + '--blockdev', 'null-co,node-name=node0,read-zeroes=true', + '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}', + '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' + + 'fixed-iothread=true,writable=true', + qmp=True + ) + + def tearDown(self) -> None: + self.qsd.stop() + + def test_blockdev_add_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench) + bench_thr.start() + + # While qemu-img bench is running, repeatedly add and remove an + # overlay to/from node0 + while bench_thr.is_alive(): + result = self.qsd.qmp('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'overlay', + 'backing': 'node0', + 'file': { + 'driver': 'file', + 'filename': top + } + }) + self.assert_qmp(result, 'return', {}) + + result = self.qsd.qmp('blockdev-del', { + 'node-name': 'overlay' + }) + self.assert_qmp(result, 'return', {}) + + bench_thr.join() + +if __name__ == '__main__': + # Format must support raw backing files + iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], + supported_protocols=['file']) diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out new file mode 100644 index 0000000000..ae1213e6f8 --- /dev/null +++ b/tests/qemu-iotests/tests/graph-changes-while-io.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c index de6f649058..495a4e6f42 100644 --- a/tests/unit/rcutorture.c +++ b/tests/unit/rcutorture.c @@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } @@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg) struct rcu_stress *cp = qatomic_read(&rcu_stress_current); rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg) { rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index aea660aeed..94718c9319 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -279,10 +279,10 @@ static void test_sync_op_check(BdrvChild *c) g_assert_cmpint(ret, ==, -ENOTSUP); } -static void test_sync_op_invalidate_cache(BdrvChild *c) +static void test_sync_op_activate(BdrvChild *c) { /* Early success: Image is not inactive */ - bdrv_invalidate_cache(c->bs, NULL); + bdrv_activate(c->bs, NULL); } @@ -325,8 +325,8 @@ const SyncOpTest sync_op_tests[] = { .name = "/sync-op/check", .fn = test_sync_op_check, }, { - .name = "/sync-op/invalidate_cache", - .fn = test_sync_op_invalidate_cache, + .name = "/sync-op/activate", + .fn = test_sync_op_activate, }, }; diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c index 49641e1936..64b81ae058 100644 --- a/tests/unit/test-rcu-list.c +++ b/tests/unit/test-rcu-list.c @@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); @@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg) long long n_removed_local = 0; struct list_element *el, *prev_el; - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); diff --git a/util/async.c b/util/async.c index 08d25feef5..2ea1172f3e 100644 --- a/util/async.c +++ b/util/async.c @@ -32,6 +32,7 @@ #include "qemu/rcu_queue.h" #include "block/raw-aio.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #include "trace.h" /***********************************************************/ @@ -675,12 +676,13 @@ void aio_context_release(AioContext *ctx) qemu_rec_mutex_unlock(&ctx->lock); } -static __thread AioContext *my_aiocontext; +QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) AioContext *qemu_get_current_aio_context(void) { - if (my_aiocontext) { - return my_aiocontext; + AioContext *ctx = get_my_aiocontext(); + if (ctx) { + return ctx; } if (qemu_mutex_iothread_locked()) { /* Possibly in a vCPU thread. */ @@ -691,6 +693,6 @@ AioContext *qemu_get_current_aio_context(void) void qemu_set_current_aio_context(AioContext *ctx) { - assert(!my_aiocontext); - my_aiocontext = ctx; + assert(!get_my_aiocontext()); + set_my_aiocontext(ctx); } diff --git a/util/rcu.c b/util/rcu.c index c91da9f137..b6d6c71cff 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -65,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr) /* Written to only by each individual reader. Read by both the reader and the * writers. */ -__thread struct rcu_reader_data rcu_reader; +QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader) /* Protected by rcu_registry_lock. */ typedef QLIST_HEAD(, rcu_reader_data) ThreadList; @@ -355,23 +355,23 @@ void drain_call_rcu(void) void rcu_register_thread(void) { - assert(rcu_reader.ctr == 0); + assert(get_ptr_rcu_reader()->ctr == 0); qemu_mutex_lock(&rcu_registry_lock); - QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + QLIST_INSERT_HEAD(®istry, get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_unregister_thread(void) { qemu_mutex_lock(&rcu_registry_lock); - QLIST_REMOVE(&rcu_reader, node); + QLIST_REMOVE(get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_add_force_rcu_notifier(Notifier *n) { qemu_mutex_lock(&rcu_registry_lock); - notifier_list_add(&rcu_reader.force_rcu, n); + notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n); qemu_mutex_unlock(&rcu_registry_lock); } |