diff options
-rw-r--r-- | block.c | 245 | ||||
-rw-r--r-- | block/backup.c | 71 | ||||
-rw-r--r-- | block/block-backend.c | 123 | ||||
-rw-r--r-- | block/commit.c | 53 | ||||
-rw-r--r-- | block/io.c | 97 | ||||
-rw-r--r-- | block/mirror.c | 100 | ||||
-rw-r--r-- | block/parallels.c | 4 | ||||
-rw-r--r-- | block/snapshot.c | 55 | ||||
-rw-r--r-- | block/stream.c | 15 | ||||
-rw-r--r-- | block/vvfat.c | 8 | ||||
-rw-r--r-- | blockdev.c | 60 | ||||
-rw-r--r-- | blockjob.c | 62 | ||||
-rw-r--r-- | dma-helpers.c | 54 | ||||
-rw-r--r-- | hw/block/nvme.c | 6 | ||||
-rw-r--r-- | hw/ide/ahci.c | 6 | ||||
-rw-r--r-- | hw/ide/core.c | 20 | ||||
-rw-r--r-- | hw/ide/internal.h | 6 | ||||
-rw-r--r-- | hw/ide/macio.c | 2 | ||||
-rw-r--r-- | hw/scsi/scsi-disk.c | 8 | ||||
-rw-r--r-- | include/block/block.h | 24 | ||||
-rw-r--r-- | include/block/block_int.h | 3 | ||||
-rw-r--r-- | include/block/blockjob.h | 23 | ||||
-rw-r--r-- | include/sysemu/block-backend.h | 23 | ||||
-rw-r--r-- | include/sysemu/dma.h | 20 | ||||
-rw-r--r-- | migration/block.c | 4 | ||||
-rw-r--r-- | monitor.c | 4 | ||||
-rw-r--r-- | qemu-img.c | 6 | ||||
-rw-r--r-- | qemu-io-cmds.c | 22 | ||||
-rw-r--r-- | qmp.c | 5 | ||||
-rwxr-xr-x | tests/qemu-iotests/041 | 27 | ||||
-rw-r--r-- | tests/qemu-iotests/041.out | 4 | ||||
-rw-r--r-- | tests/test-blockjob-txn.c | 3 | ||||
-rw-r--r-- | tests/test-throttle.c | 6 | ||||
-rw-r--r-- | trace-events | 8 |
34 files changed, 603 insertions, 574 deletions
@@ -64,16 +64,16 @@ static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); -static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, - BlockDriverState *parent, - const BdrvChildRole *child_role, Error **errp); +static BlockDriverState *bdrv_open_inherit(const char *filename, + const char *reference, + QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + Error **errp); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; -static void bdrv_close(BlockDriverState *bs); - #ifdef _WIN32 static int is_windows_drive_prefix(const char *filename) { @@ -220,11 +220,6 @@ void bdrv_register(BlockDriver *bdrv) QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); } -BlockDriverState *bdrv_new_root(void) -{ - return bdrv_new(); -} - BlockDriverState *bdrv_new(void) { BlockDriverState *bs; @@ -664,6 +659,18 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough) return 0; } +static void bdrv_child_cb_drained_begin(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + bdrv_drained_begin(bs); +} + +static void bdrv_child_cb_drained_end(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + bdrv_drained_end(bs); +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -710,6 +717,8 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, const BdrvChildRole child_file = { .inherit_options = bdrv_inherited_options, + .drained_begin = bdrv_child_cb_drained_begin, + .drained_end = bdrv_child_cb_drained_end, }; /* @@ -728,6 +737,8 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, const BdrvChildRole child_format = { .inherit_options = bdrv_inherited_fmt_options, + .drained_begin = bdrv_child_cb_drained_begin, + .drained_end = bdrv_child_cb_drained_end, }; /* @@ -755,6 +766,8 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, static const BdrvChildRole child_backing = { .inherit_options = bdrv_backing_options, + .drained_begin = bdrv_child_cb_drained_begin, + .drained_end = bdrv_child_cb_drained_end, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -1155,18 +1168,41 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) +{ + BlockDriverState *old_bs = child->bs; + + if (old_bs) { + if (old_bs->quiesce_counter && child->role->drained_end) { + child->role->drained_end(child); + } + QLIST_REMOVE(child, next_parent); + } + + child->bs = new_bs; + + if (new_bs) { + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); + if (new_bs->quiesce_counter && child->role->drained_begin) { + child->role->drained_begin(child); + } + } +} + BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role) + const BdrvChildRole *child_role, + void *opaque) { BdrvChild *child = g_new(BdrvChild, 1); *child = (BdrvChild) { - .bs = child_bs, + .bs = NULL, .name = g_strdup(child_name), .role = child_role, + .opaque = opaque, }; - QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent); + bdrv_replace_child(child, child_bs); return child; } @@ -1176,7 +1212,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, const char *child_name, const BdrvChildRole *child_role) { - BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role); + BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role, + parent_bs); QLIST_INSERT_HEAD(&parent_bs->children, child, next); return child; } @@ -1187,7 +1224,9 @@ static void bdrv_detach_child(BdrvChild *child) QLIST_REMOVE(child, next); child->next.le_prev = NULL; } - QLIST_REMOVE(child, next_parent); + + bdrv_replace_child(child, NULL); + g_free(child->name); g_free(child); } @@ -1341,14 +1380,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, qdict_put(options, "driver", qstring_from_str(bs->backing_format)); } - backing_hd = NULL; - ret = bdrv_open_inherit(&backing_hd, - *backing_filename ? backing_filename : NULL, - reference, options, 0, bs, &child_backing, - errp); - if (ret < 0) { + backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL, + reference, options, 0, bs, &child_backing, + errp); + if (!backing_hd) { bs->open_flags |= BDRV_O_NO_BACKING; error_prepend(errp, "Could not open backing file: "); + ret = -EINVAL; goto free_exit; } @@ -1388,7 +1426,6 @@ BdrvChild *bdrv_open_child(const char *filename, BdrvChild *c = NULL; BlockDriverState *bs; QDict *image_options; - int ret; char *bdref_key_dot; const char *reference; @@ -1408,10 +1445,9 @@ BdrvChild *bdrv_open_child(const char *filename, goto done; } - bs = NULL; - ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0, - parent, child_role, errp); - if (ret < 0) { + bs = bdrv_open_inherit(filename, reference, image_options, 0, + parent, child_role, errp); + if (!bs) { goto done; } @@ -1422,15 +1458,16 @@ done: return c; } -static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, - QDict *snapshot_options, Error **errp) +static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + int flags, + QDict *snapshot_options, + Error **errp) { /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char *tmp_filename = g_malloc0(PATH_MAX + 1); int64_t total_size; QemuOpts *opts = NULL; BlockDriverState *bs_snapshot; - Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -1439,7 +1476,6 @@ static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, /* Get the required size from the image */ total_size = bdrv_getlength(bs); if (total_size < 0) { - ret = total_size; error_setg_errno(errp, -total_size, "Could not get image size"); goto out; } @@ -1470,22 +1506,26 @@ static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, qdict_put(snapshot_options, "driver", qstring_from_str("qcow2")); - bs_snapshot = bdrv_new(); - - ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, - flags, &local_err); + bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp); snapshot_options = NULL; - if (ret < 0) { - error_propagate(errp, local_err); + if (!bs_snapshot) { + ret = -EINVAL; goto out; } + /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will + * call bdrv_unref() on it), so in order to be able to return one, we have + * to increase bs_snapshot's refcount here */ + bdrv_ref(bs_snapshot); bdrv_append(bs_snapshot, bs); + g_free(tmp_filename); + return bs_snapshot; + out: QDECREF(snapshot_options); g_free(tmp_filename); - return ret; + return NULL; } /* @@ -1503,10 +1543,12 @@ out: * should be opened. If specified, neither options nor a filename may be given, * nor can an existing BDS be reused (that is, *pbs has to be NULL). */ -static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, - BlockDriverState *parent, - const BdrvChildRole *child_role, Error **errp) +static BlockDriverState *bdrv_open_inherit(const char *filename, + const char *reference, + QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + Error **errp) { int ret; BdrvChild *file = NULL; @@ -1518,7 +1560,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, QDict *snapshot_options = NULL; int snapshot_flags = 0; - assert(pbs); assert(!child_role || !flags); assert(!child_role == !parent); @@ -1526,33 +1567,22 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, bool options_non_empty = options ? qdict_size(options) : false; QDECREF(options); - if (*pbs) { - error_setg(errp, "Cannot reuse an existing BDS when referencing " - "another block device"); - return -EINVAL; - } - if (filename || options_non_empty) { error_setg(errp, "Cannot reference an existing block device with " "additional options or a new filename"); - return -EINVAL; + return NULL; } bs = bdrv_lookup_bs(reference, reference, errp); if (!bs) { - return -ENODEV; + return NULL; } bdrv_ref(bs); - *pbs = bs; - return 0; + return bs; } - if (*pbs) { - bs = *pbs; - } else { - bs = bdrv_new(); - } + bs = bdrv_new(); /* NULL means an empty set of options */ if (options == NULL) { @@ -1562,7 +1592,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, /* json: syntax counts as explicit options, as if in the QDict */ parse_json_protocol(options, &filename, &local_err); if (local_err) { - ret = -EINVAL; goto fail; } @@ -1589,7 +1618,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, drv = bdrv_find_format(drvname); if (!drv) { error_setg(errp, "Unknown driver: '%s'", drvname); - ret = -EINVAL; goto fail; } } @@ -1619,7 +1647,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, file = bdrv_open_child(filename, options, "file", bs, &child_file, true, &local_err); if (local_err) { - ret = -EINVAL; goto fail; } } @@ -1646,7 +1673,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, qdict_put(options, "driver", qstring_from_str(drv->format_name)); } else if (!drv) { error_setg(errp, "Must specify either driver or file"); - ret = -EINVAL; goto fail; } @@ -1689,7 +1715,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, drv->format_name, entry->key); } - ret = -EINVAL; goto close_and_fail; } @@ -1700,25 +1725,30 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ error_setg(errp, "Guest must be stopped for opening of encrypted image"); - ret = -EBUSY; goto close_and_fail; } QDECREF(options); - *pbs = bs; /* For snapshot=on, create a temporary qcow2 overlay. bs points to the * temporary snapshot afterwards. */ if (snapshot_flags) { - ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options, - &local_err); + BlockDriverState *snapshot_bs; + snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags, + snapshot_options, &local_err); snapshot_options = NULL; if (local_err) { goto close_and_fail; } + /* We are not going to return bs but the overlay on top of it + * (snapshot_bs); thus, we have to drop the strong reference to bs + * (which we obtained by calling bdrv_new()). bs will not be deleted, + * though, because the overlay still has a reference to it. */ + bdrv_unref(bs); + bs = snapshot_bs; } - return 0; + return bs; fail: if (file != NULL) { @@ -1729,36 +1759,26 @@ fail: QDECREF(bs->options); QDECREF(options); bs->options = NULL; - if (!*pbs) { - /* If *pbs is NULL, a new BDS has been created in this function and - needs to be freed now. Otherwise, it does not need to be closed, - since it has not really been opened yet. */ - bdrv_unref(bs); - } + bdrv_unref(bs); if (local_err) { error_propagate(errp, local_err); } - return ret; + return NULL; close_and_fail: - /* See fail path, but now the BDS has to be always closed */ - if (*pbs) { - bdrv_close(bs); - } else { - bdrv_unref(bs); - } + bdrv_unref(bs); QDECREF(snapshot_options); QDECREF(options); if (local_err) { error_propagate(errp, local_err); } - return ret; + return NULL; } -int bdrv_open(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, Error **errp) +BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp) { - return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, + return bdrv_open_inherit(filename, reference, options, flags, NULL, NULL, errp); } @@ -2132,6 +2152,7 @@ static void bdrv_close(BlockDriverState *bs) BdrvAioNotifier *ban, *ban_next; assert(!bs->job); + assert(!bs->refcnt); bdrv_drained_begin(bs); /* complete I/O */ bdrv_flush(bs); @@ -2140,8 +2161,6 @@ static void bdrv_close(BlockDriverState *bs) bdrv_release_named_dirty_bitmaps(bs); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); - bdrv_parent_cb_change_media(bs, false); - if (bs->drv) { BdrvChild *child, *next; @@ -2190,8 +2209,7 @@ static void bdrv_close(BlockDriverState *bs) void bdrv_close_all(void) { - BlockDriverState *bs; - AioContext *aio_context; + block_job_cancel_sync_all(); /* Drop references from requests still in flight, such as canceled block * jobs whose AIO context has not been polled yet */ @@ -2200,25 +2218,7 @@ void bdrv_close_all(void) blk_remove_all_bs(); blockdev_close_all_bdrv_states(); - /* Cancel all block jobs */ - while (!QTAILQ_EMPTY(&all_bdrv_states)) { - QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) { - aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); - if (bs->job) { - block_job_cancel_sync(bs->job); - aio_context_release(aio_context); - break; - } - aio_context_release(aio_context); - } - - /* All the remaining BlockDriverStates are referenced directly or - * indirectly from block jobs, so there needs to be at least one BDS - * directly used by a block job */ - assert(bs); - } + assert(QTAILQ_EMPTY(&all_bdrv_states)); } static void change_parent_backing_link(BlockDriverState *from, @@ -2228,10 +2228,8 @@ static void change_parent_backing_link(BlockDriverState *from, QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->role != &child_backing); - c->bs = to; - QLIST_REMOVE(c, next_parent); - QLIST_INSERT_HEAD(&to->parents, c, next_parent); bdrv_ref(to); + bdrv_replace_child(c, to); bdrv_unref(from); } } @@ -3195,9 +3193,9 @@ void bdrv_invalidate_cache_all(Error **errp) { BlockDriverState *bs; Error *local_err = NULL; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while ((it = bdrv_next(it, &bs)) != NULL) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -3239,11 +3237,11 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, int bdrv_inactivate_all(void) { BlockDriverState *bs = NULL; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; int ret = 0; int pass; - while ((it = bdrv_next(it, &bs)) != NULL) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { aio_context_acquire(bdrv_get_aio_context(bs)); } @@ -3252,8 +3250,7 @@ int bdrv_inactivate_all(void) * the second pass sets the BDRV_O_INACTIVE flag so that no further write * is allowed. */ for (pass = 0; pass < 2; pass++) { - it = NULL; - while ((it = bdrv_next(it, &bs)) != NULL) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { ret = bdrv_inactivate_recurse(bs, pass); if (ret < 0) { goto out; @@ -3262,8 +3259,7 @@ int bdrv_inactivate_all(void) } out: - it = NULL; - while ((it = bdrv_next(it, &bs)) != NULL) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { aio_context_release(bdrv_get_aio_context(bs)); } @@ -3547,11 +3543,10 @@ void bdrv_img_create(const char *filename, const char *fmt, qstring_from_str(backing_fmt)); } - bs = NULL; - ret = bdrv_open(&bs, full_backing, NULL, backing_options, - back_flags, &local_err); + bs = bdrv_open(full_backing, NULL, backing_options, back_flags, + &local_err); g_free(full_backing); - if (ret < 0) { + if (!bs) { goto out; } size = bdrv_getlength(bs); @@ -3753,10 +3748,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, bool bdrv_is_first_non_filter(BlockDriverState *candidate) { BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; /* walk down the bs forest recursively */ - while ((it = bdrv_next(it, &bs)) != NULL) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { bool perm; /* try to recurse in this top level bs */ diff --git a/block/backup.c b/block/backup.c index fec45e8212..feeb9f8bf2 100644 --- a/block/backup.c +++ b/block/backup.c @@ -36,7 +36,7 @@ typedef struct CowRequest { typedef struct BackupBlockJob { BlockJob common; - BlockDriverState *target; + BlockBackend *target; /* bitmap for sync=incremental */ BdrvDirtyBitmap *sync_bitmap; MirrorSyncMode sync_mode; @@ -47,6 +47,7 @@ typedef struct BackupBlockJob { uint64_t sectors_read; unsigned long *done_bitmap; int64_t cluster_size; + NotifierWithReturn before_write; QLIST_HEAD(, CowRequest) inflight_reqs; } BackupBlockJob; @@ -93,12 +94,12 @@ static void cow_request_end(CowRequest *req) qemu_co_queue_restart_all(&req->wait_queue); } -static int coroutine_fn backup_do_cow(BlockDriverState *bs, +static int coroutine_fn backup_do_cow(BackupBlockJob *job, int64_t sector_num, int nb_sectors, bool *error_is_read, bool is_write_notifier) { - BackupBlockJob *job = (BackupBlockJob *)bs->job; + BlockBackend *blk = job->common.blk; CowRequest cow_request; struct iovec iov; QEMUIOVector bounce_qiov; @@ -131,20 +132,15 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, start * sectors_per_cluster); if (!bounce_buffer) { - bounce_buffer = qemu_blockalign(bs, job->cluster_size); + bounce_buffer = blk_blockalign(blk, job->cluster_size); } iov.iov_base = bounce_buffer; iov.iov_len = n * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&bounce_qiov, &iov, 1); - if (is_write_notifier) { - ret = bdrv_co_readv_no_serialising(bs, - start * sectors_per_cluster, - n, &bounce_qiov); - } else { - ret = bdrv_co_readv(bs, start * sectors_per_cluster, n, - &bounce_qiov); - } + ret = blk_co_preadv(blk, start * job->cluster_size, + bounce_qiov.size, &bounce_qiov, + is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0); if (ret < 0) { trace_backup_do_cow_read_fail(job, start, ret); if (error_is_read) { @@ -154,13 +150,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, } if (buffer_is_zero(iov.iov_base, iov.iov_len)) { - ret = bdrv_co_write_zeroes(job->target, - start * sectors_per_cluster, - n, BDRV_REQ_MAY_UNMAP); + ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size, + bounce_qiov.size, BDRV_REQ_MAY_UNMAP); } else { - ret = bdrv_co_writev(job->target, - start * sectors_per_cluster, n, - &bounce_qiov); + ret = blk_co_pwritev(job->target, start * job->cluster_size, + bounce_qiov.size, &bounce_qiov, 0); } if (ret < 0) { trace_backup_do_cow_write_fail(job, start, ret); @@ -197,14 +191,16 @@ static int coroutine_fn backup_before_write_notify( NotifierWithReturn *notifier, void *opaque) { + BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write); BdrvTrackedRequest *req = opaque; int64_t sector_num = req->offset >> BDRV_SECTOR_BITS; int nb_sectors = req->bytes >> BDRV_SECTOR_BITS; + assert(req->bs == blk_bs(job->common.blk)); assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true); + return backup_do_cow(job, sector_num, nb_sectors, NULL, true); } static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -221,7 +217,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) { BdrvDirtyBitmap *bm; - BlockDriverState *bs = job->common.bs; + BlockDriverState *bs = blk_bs(job->common.blk); if (ret < 0 || block_job_is_cancelled(&job->common)) { /* Merge the successor back into the parent, delete nothing. */ @@ -279,7 +275,7 @@ static void backup_complete(BlockJob *job, void *opaque) BackupBlockJob *s = container_of(job, BackupBlockJob, common); BackupCompleteData *data = opaque; - bdrv_unref(s->target); + blk_unref(s->target); block_job_completed(job, data->ret); g_free(data); @@ -321,7 +317,6 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) int64_t end; int64_t last_cluster = -1; int64_t sectors_per_cluster = cluster_size_sectors(job); - BlockDriverState *bs = job->common.bs; HBitmapIter hbi; granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); @@ -343,7 +338,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) if (yield_and_check(job)) { return ret; } - ret = backup_do_cow(bs, cluster * sectors_per_cluster, + ret = backup_do_cow(job, cluster * sectors_per_cluster, sectors_per_cluster, &error_is_read, false); if ((ret < 0) && @@ -376,11 +371,8 @@ static void coroutine_fn backup_run(void *opaque) { BackupBlockJob *job = opaque; BackupCompleteData *data; - BlockDriverState *bs = job->common.bs; - BlockDriverState *target = job->target; - NotifierWithReturn before_write = { - .notify = backup_before_write_notify, - }; + BlockDriverState *bs = blk_bs(job->common.blk); + BlockBackend *target = job->target; int64_t start, end; int64_t sectors_per_cluster = cluster_size_sectors(job); int ret = 0; @@ -393,7 +385,8 @@ static void coroutine_fn backup_run(void *opaque) job->done_bitmap = bitmap_new(end); - bdrv_add_before_write_notifier(bs, &before_write); + job->before_write.notify = backup_before_write_notify; + bdrv_add_before_write_notifier(bs, &job->before_write); if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { while (!block_job_is_cancelled(&job->common)) { @@ -445,7 +438,7 @@ static void coroutine_fn backup_run(void *opaque) } } /* FULL sync mode we copy the whole drive. */ - ret = backup_do_cow(bs, start * sectors_per_cluster, + ret = backup_do_cow(job, start * sectors_per_cluster, sectors_per_cluster, &error_is_read, false); if (ret < 0) { /* Depending on error action, fail now or retry cluster */ @@ -461,14 +454,14 @@ static void coroutine_fn backup_run(void *opaque) } } - notifier_with_return_remove(&before_write); + notifier_with_return_remove(&job->before_write); /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); g_free(job->done_bitmap); - bdrv_op_unblock_all(target, job->common.blocker); + bdrv_op_unblock_all(blk_bs(target), job->common.blocker); data = g_malloc(sizeof(*data)); data->ret = ret; @@ -485,6 +478,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, { int64_t len; BlockDriverInfo bdi; + BackupBlockJob *job = NULL; int ret; assert(bs); @@ -542,15 +536,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, goto error; } - BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed, - cb, opaque, errp); + job = block_job_create(&backup_job_driver, bs, speed, cb, opaque, errp); if (!job) { goto error; } + job->target = blk_new(); + blk_insert_bs(job->target, target); + job->on_source_error = on_source_error; job->on_target_error = on_target_error; - job->target = target; job->sync_mode = sync_mode; job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ? sync_bitmap : NULL; @@ -558,7 +553,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, /* If there is no backing file on the target, we cannot rely on COW if our * backup cluster size is smaller than the target cluster size. Even for * targets with a backing file, try to avoid COW if possible. */ - ret = bdrv_get_info(job->target, &bdi); + ret = bdrv_get_info(target, &bdi); if (ret < 0 && !target->backing) { error_setg_errno(errp, -ret, "Couldn't determine the cluster size of the target image, " @@ -584,4 +579,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, if (sync_bitmap) { bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL); } + if (job) { + blk_unref(job->target); + block_job_unref(&job->common); + } } diff --git a/block/block-backend.c b/block/block-backend.c index 6928d61de4..34500e6080 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -19,6 +19,7 @@ #include "sysemu/sysemu.h" #include "qapi-event.h" #include "qemu/id.h" +#include "trace.h" /* Number of coroutines to reserve per attached device model */ #define COROUTINE_POOL_RESERVATION 64 @@ -119,12 +120,14 @@ static const BdrvChildRole child_root = { * Store an error through @errp on failure, unless it's null. * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(Error **errp) +BlockBackend *blk_new(void) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk_set_enable_write_cache(blk, true); + qemu_co_queue_init(&blk->public.throttled_reqs[0]); qemu_co_queue_init(&blk->public.throttled_reqs[1]); @@ -136,27 +139,7 @@ BlockBackend *blk_new(Error **errp) } /* - * Create a new BlockBackend with a new BlockDriverState attached. - * Otherwise just like blk_new(), which see. - */ -BlockBackend *blk_new_with_bs(Error **errp) -{ - BlockBackend *blk; - BlockDriverState *bs; - - blk = blk_new(errp); - if (!blk) { - return NULL; - } - - bs = bdrv_new_root(); - blk->root = bdrv_root_attach_child(bs, "root", &child_root); - blk->root->opaque = blk; - return blk; -} - -/* - * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState. + * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. * * Just as with bdrv_open(), after having called this function the reference to * @options belongs to the block layer (even on failure). @@ -171,21 +154,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp) { BlockBackend *blk; - int ret; - - blk = blk_new_with_bs(errp); - if (!blk) { - QDECREF(options); - return NULL; - } + BlockDriverState *bs; - ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp); - if (ret < 0) { + blk = blk_new(); + bs = bdrv_open(filename, reference, options, flags, errp); + if (!bs) { blk_unref(blk); return NULL; } - blk_set_enable_write_cache(blk, true); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); return blk; } @@ -286,25 +264,11 @@ BlockBackend *blk_next(BlockBackend *blk) : QTAILQ_FIRST(&monitor_block_backends); } -struct BdrvNextIterator { - enum { - BDRV_NEXT_BACKEND_ROOTS, - BDRV_NEXT_MONITOR_OWNED, - } phase; - BlockBackend *blk; - BlockDriverState *bs; -}; - /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by * the monitor or attached to a BlockBackend */ -BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs) +BlockDriverState *bdrv_next(BdrvNextIterator *it) { - if (!it) { - it = g_new(BdrvNextIterator, 1); - *it = (BdrvNextIterator) { - .phase = BDRV_NEXT_BACKEND_ROOTS, - }; - } + BlockDriverState *bs; /* First, return all root nodes of BlockBackends. In order to avoid * returning a BDS twice when multiple BBs refer to it, we only return it @@ -312,11 +276,11 @@ BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs) if (it->phase == BDRV_NEXT_BACKEND_ROOTS) { do { it->blk = blk_all_next(it->blk); - *bs = it->blk ? blk_bs(it->blk) : NULL; - } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk)); + bs = it->blk ? blk_bs(it->blk) : NULL; + } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk)); - if (*bs) { - return it; + if (bs) { + return bs; } it->phase = BDRV_NEXT_MONITOR_OWNED; } @@ -326,10 +290,19 @@ BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs) * by the above block already */ do { it->bs = bdrv_next_monitor_owned(it->bs); - *bs = it->bs; - } while (*bs && bdrv_has_blk(*bs)); + bs = it->bs; + } while (bs && bdrv_has_blk(bs)); - return *bs ? it : NULL; + return bs; +} + +BlockDriverState *bdrv_first(BdrvNextIterator *it) +{ + *it = (BdrvNextIterator) { + .phase = BDRV_NEXT_BACKEND_ROOTS, + }; + + return bdrv_next(it); } /* @@ -509,8 +482,7 @@ void blk_remove_bs(BlockBackend *blk) void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) { bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root); - blk->root->opaque = blk; + blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (blk->public.throttle_state) { @@ -770,11 +742,15 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num, nb_sectors * BDRV_SECTOR_SIZE); } -static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { - int ret = blk_check_byte_request(blk, offset, bytes); + int ret; + + trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags); + + ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } @@ -787,12 +763,14 @@ static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags); } -static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { int ret; + trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags); + ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; @@ -885,8 +863,8 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, return ret; } -int blk_write_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags) +int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags) { return blk_prw(blk, offset, NULL, count, blk_write_entry, flags | BDRV_REQ_ZERO_WRITE); @@ -1001,9 +979,9 @@ static void blk_aio_write_entry(void *opaque) blk_aio_complete(acb); } -BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque) +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) { return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, cb, opaque); @@ -1492,8 +1470,8 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); } -int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags) +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags) { return blk_co_pwritev(blk, offset, count, NULL, flags | BDRV_REQ_ZERO_WRITE); @@ -1704,6 +1682,9 @@ static void blk_root_drained_begin(BdrvChild *child) { BlockBackend *blk = child->opaque; + /* Note that blk->root may not be accessible here yet if we are just + * attaching to a BlockDriverState that is drained. Use child instead. */ + if (blk->public.io_limits_disabled++ == 0) { throttle_group_restart_blk(blk); } diff --git a/block/commit.c b/block/commit.c index f308c8c6f0..8a00e1146c 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,28 +36,36 @@ typedef struct CommitBlockJob { BlockJob common; RateLimit limit; BlockDriverState *active; - BlockDriverState *top; - BlockDriverState *base; + BlockBackend *top; + BlockBackend *base; BlockdevOnError on_error; int base_flags; int orig_overlay_flags; char *backing_file_str; } CommitBlockJob; -static int coroutine_fn commit_populate(BlockDriverState *bs, - BlockDriverState *base, +static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, int64_t sector_num, int nb_sectors, void *buf) { int ret = 0; + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = buf, + .iov_len = nb_sectors * BDRV_SECTOR_SIZE, + }; - ret = bdrv_read(bs, sector_num, buf, nb_sectors); - if (ret) { + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE, + qiov.size, &qiov, 0); + if (ret < 0) { return ret; } - ret = bdrv_write(base, sector_num, buf, nb_sectors); - if (ret) { + ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE, + qiov.size, &qiov, 0); + if (ret < 0) { return ret; } @@ -73,8 +81,8 @@ static void commit_complete(BlockJob *job, void *opaque) CommitBlockJob *s = container_of(job, CommitBlockJob, common); CommitCompleteData *data = opaque; BlockDriverState *active = s->active; - BlockDriverState *top = s->top; - BlockDriverState *base = s->base; + BlockDriverState *top = blk_bs(s->top); + BlockDriverState *base = blk_bs(s->base); BlockDriverState *overlay_bs; int ret = data->ret; @@ -94,6 +102,8 @@ static void commit_complete(BlockJob *job, void *opaque) bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } g_free(s->backing_file_str); + blk_unref(s->top); + blk_unref(s->base); block_job_completed(&s->common, ret); g_free(data); } @@ -102,8 +112,6 @@ static void coroutine_fn commit_run(void *opaque) { CommitBlockJob *s = opaque; CommitCompleteData *data; - BlockDriverState *top = s->top; - BlockDriverState *base = s->base; int64_t sector_num, end; int ret = 0; int n = 0; @@ -111,27 +119,27 @@ static void coroutine_fn commit_run(void *opaque) int bytes_written = 0; int64_t base_len; - ret = s->common.len = bdrv_getlength(top); + ret = s->common.len = blk_getlength(s->top); if (s->common.len < 0) { goto out; } - ret = base_len = bdrv_getlength(base); + ret = base_len = blk_getlength(s->base); if (base_len < 0) { goto out; } if (base_len < s->common.len) { - ret = bdrv_truncate(base, s->common.len); + ret = blk_truncate(s->base, s->common.len); if (ret) { goto out; } } end = s->common.len >> BDRV_SECTOR_BITS; - buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); + buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); for (sector_num = 0; sector_num < end; sector_num += n) { uint64_t delay_ns = 0; @@ -146,7 +154,8 @@ wait: break; } /* Copy if allocated above the base */ - ret = bdrv_is_allocated_above(top, base, sector_num, + ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), + sector_num, COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); copy = (ret == 1); @@ -158,7 +167,7 @@ wait: goto wait; } } - ret = commit_populate(top, base, sector_num, n, buf); + ret = commit_populate(s->top, s->base, sector_num, n, buf); bytes_written += n * BDRV_SECTOR_SIZE; } if (ret < 0) { @@ -253,8 +262,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, return; } - s->base = base; - s->top = top; + s->base = blk_new(); + blk_insert_bs(s->base, base); + + s->top = blk_new(); + blk_insert_bs(s->top, top); + s->active = bs; s->base_flags = orig_base_flags; diff --git a/block/io.c b/block/io.c index 60a6bd8bdb..2d832aa532 100644 --- a/block/io.c +++ b/block/io.c @@ -225,6 +225,34 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs) assert(data.done); } +void bdrv_drained_begin(BlockDriverState *bs) +{ + if (!bs->quiesce_counter++) { + aio_disable_external(bdrv_get_aio_context(bs)); + bdrv_parent_drained_begin(bs); + } + + bdrv_io_unplugged_begin(bs); + bdrv_drain_recurse(bs); + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs); + } else { + bdrv_drain_poll(bs); + } + bdrv_io_unplugged_end(bs); +} + +void bdrv_drained_end(BlockDriverState *bs) +{ + assert(bs->quiesce_counter > 0); + if (--bs->quiesce_counter > 0) { + return; + } + + bdrv_parent_drained_end(bs); + aio_enable_external(bdrv_get_aio_context(bs)); +} + /* * Wait for pending requests to complete on a single BlockDriverState subtree, * and suspend block driver's internal I/O until next request arrives. @@ -238,26 +266,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs) */ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) { - bdrv_parent_drained_begin(bs); - bdrv_io_unplugged_begin(bs); - bdrv_drain_recurse(bs); - bdrv_co_yield_to_drain(bs); - bdrv_io_unplugged_end(bs); - bdrv_parent_drained_end(bs); + assert(qemu_in_coroutine()); + bdrv_drained_begin(bs); + bdrv_drained_end(bs); } void bdrv_drain(BlockDriverState *bs) { - bdrv_parent_drained_begin(bs); - bdrv_io_unplugged_begin(bs); - bdrv_drain_recurse(bs); - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs); - } else { - bdrv_drain_poll(bs); - } - bdrv_io_unplugged_end(bs); - bdrv_parent_drained_end(bs); + bdrv_drained_begin(bs); + bdrv_drained_end(bs); } /* @@ -271,10 +288,10 @@ void bdrv_drain_all(void) /* Always run first iteration so any pending completion BHs run */ bool busy = true; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; GSList *aio_ctxs = NULL, *ctx; - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -302,10 +319,9 @@ void bdrv_drain_all(void) for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { AioContext *aio_context = ctx->data; - it = NULL; aio_context_acquire(aio_context); - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { if (aio_context == bdrv_get_aio_context(bs)) { if (bdrv_requests_pending(bs)) { busy = true; @@ -318,8 +334,7 @@ void bdrv_drain_all(void) } } - it = NULL; - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -1093,24 +1108,6 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); } -int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) -{ - trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors); - - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, - BDRV_REQ_NO_SERIALISING); -} - -int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) -{ - trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); - - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, - BDRV_REQ_COPY_ON_READ); -} - #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, @@ -2543,23 +2540,3 @@ void bdrv_io_unplugged_end(BlockDriverState *bs) } } } - -void bdrv_drained_begin(BlockDriverState *bs) -{ - if (!bs->quiesce_counter++) { - aio_disable_external(bdrv_get_aio_context(bs)); - } - bdrv_parent_drained_begin(bs); - bdrv_drain(bs); -} - -void bdrv_drained_end(BlockDriverState *bs) -{ - bdrv_parent_drained_end(bs); - - assert(bs->quiesce_counter > 0); - if (--bs->quiesce_counter > 0) { - return; - } - aio_enable_external(bdrv_get_aio_context(bs)); -} diff --git a/block/mirror.c b/block/mirror.c index b9986d8218..80fd3c7469 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -20,7 +20,6 @@ #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "qemu/bitmap.h" -#include "qemu/error-report.h" #define SLICE_TIME 100000000ULL /* ns */ #define MAX_IN_FLIGHT 16 @@ -36,7 +35,7 @@ typedef struct MirrorBuffer { typedef struct MirrorBlockJob { BlockJob common; RateLimit limit; - BlockDriverState *target; + BlockBackend *target; BlockDriverState *base; /* The name of the graph node to replace */ char *replaces; @@ -157,7 +156,8 @@ static void mirror_read_complete(void *opaque, int ret) mirror_iteration_done(op, ret); return; } - bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors, + blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov, + op->nb_sectors * BDRV_SECTOR_SIZE, mirror_write_complete, op); } @@ -186,7 +186,7 @@ static int mirror_cow_align(MirrorBlockJob *s, need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors, s->cow_bitmap); if (need_cow) { - bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors, + bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors, &align_sector_num, &align_nb_sectors); } @@ -224,7 +224,7 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s) static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, int nb_sectors) { - BlockDriverState *source = s->common.bs; + BlockBackend *source = s->common.blk; int sectors_per_chunk, nb_chunks; int ret = nb_sectors; MirrorOp *op; @@ -274,7 +274,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); - bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, + blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, + nb_sectors * BDRV_SECTOR_SIZE, mirror_read_complete, op); return ret; } @@ -296,10 +297,11 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, s->in_flight++; s->sectors_in_flight += nb_sectors; if (is_discard) { - bdrv_aio_discard(s->target, sector_num, op->nb_sectors, - mirror_write_complete, op); + blk_aio_discard(s->target, sector_num, op->nb_sectors, + mirror_write_complete, op); } else { - bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, + blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE, + op->nb_sectors * BDRV_SECTOR_SIZE, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, mirror_write_complete, op); } @@ -307,7 +309,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { - BlockDriverState *source = s->common.bs; + BlockDriverState *source = blk_bs(s->common.blk); int64_t sector_num, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ @@ -384,7 +386,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_sector_num; int target_nb_sectors; - bdrv_round_to_clusters(s->target, sector_num, io_sectors, + bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors, &target_sector_num, &target_nb_sectors); if (target_sector_num == sector_num && target_nb_sectors == io_sectors) { @@ -449,7 +451,8 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = s->common.bs; + BlockDriverState *src = blk_bs(s->common.blk); + BlockDriverState *target_bs = blk_bs(s->target); /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ @@ -461,26 +464,25 @@ static void mirror_exit(BlockJob *job, void *opaque) } if (s->should_complete && data->ret == 0) { - BlockDriverState *to_replace = s->common.bs; + BlockDriverState *to_replace = src; if (s->to_replace) { to_replace = s->to_replace; } - /* This was checked in mirror_start_job(), but meanwhile one of the - * nodes could have been newly attached to a BlockBackend. */ - if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) { - error_report("block job: Can't create node with two BlockBackends"); - data->ret = -EINVAL; - goto out; + if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { + bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); } - if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { - bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); - } - bdrv_replace_in_backing_chain(to_replace, s->target); - } + /* The mirror job has no requests in flight any more, but we need to + * drain potential other users of the BDS before changing the graph. */ + bdrv_drained_begin(target_bs); + bdrv_replace_in_backing_chain(to_replace, target_bs); + bdrv_drained_end(target_bs); -out: + /* We just changed the BDS the job BB refers to */ + blk_remove_bs(job->blk); + blk_insert_bs(job->blk, src); + } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); @@ -490,8 +492,8 @@ out: aio_context_release(replace_aio_context); } g_free(s->replaces); - bdrv_op_unblock_all(s->target, s->common.blocker); - bdrv_unref(s->target); + bdrv_op_unblock_all(target_bs, s->common.blocker); + blk_unref(s->target); block_job_completed(&s->common, data->ret); g_free(data); bdrv_drained_end(src); @@ -505,7 +507,8 @@ static void coroutine_fn mirror_run(void *opaque) { MirrorBlockJob *s = opaque; MirrorExitData *data; - BlockDriverState *bs = s->common.bs; + BlockDriverState *bs = blk_bs(s->common.blk); + BlockDriverState *target_bs = blk_bs(s->target); int64_t sector_num, end, length; uint64_t last_pause_ns; BlockDriverInfo bdi; @@ -541,18 +544,18 @@ static void coroutine_fn mirror_run(void *opaque) * the destination do COW. Instead, we copy sectors around the * dirty data if needed. We need a bitmap to do that. */ - bdrv_get_backing_filename(s->target, backing_filename, + bdrv_get_backing_filename(target_bs, backing_filename, sizeof(backing_filename)); - if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) { + if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) { target_cluster_size = bdi.cluster_size; } - if (backing_filename[0] && !s->target->backing + if (backing_filename[0] && !target_bs->backing && s->granularity < target_cluster_size) { s->buf_size = MAX(s->buf_size, target_cluster_size); s->cow_bitmap = bitmap_new(length); } s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS; - s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov); + s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov); end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); @@ -567,7 +570,7 @@ static void coroutine_fn mirror_run(void *opaque) if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; - bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target); + bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs); for (sector_num = 0; sector_num < end; ) { /* Just to make sure we are not exceeding int limit. */ @@ -637,7 +640,7 @@ static void coroutine_fn mirror_run(void *opaque) should_complete = false; if (s->in_flight == 0 && cnt == 0) { trace_mirror_before_flush(s); - ret = bdrv_flush(s->target); + ret = blk_flush(s->target); if (ret < 0) { if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) { @@ -715,7 +718,7 @@ immediate_exit: data->ret = ret; /* Before we switch to target in mirror_exit, make sure data doesn't * change. */ - bdrv_drained_begin(s->common.bs); + bdrv_drained_begin(bs); if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) { /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we @@ -742,7 +745,8 @@ static void mirror_complete(BlockJob *job, Error **errp) Error *local_err = NULL; int ret; - ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err); + ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing", + &local_err); if (ret < 0) { error_propagate(errp, local_err); return; @@ -804,7 +808,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, bool is_none_mode, BlockDriverState *base) { MirrorBlockJob *s; - BlockDriverState *replaced_bs; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -821,30 +824,17 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, buf_size = DEFAULT_MIRROR_BUF_SIZE; } - /* We can't support this case as long as the block layer can't handle - * multiple BlockBackends per BlockDriverState. */ - if (replaces) { - replaced_bs = bdrv_lookup_bs(replaces, replaces, errp); - if (replaced_bs == NULL) { - return; - } - } else { - replaced_bs = bs; - } - if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) { - error_setg(errp, "Can't create node with two BlockBackends"); - return; - } - s = block_job_create(driver, bs, speed, cb, opaque, errp); if (!s) { return; } + s->target = blk_new(); + blk_insert_bs(s->target, target); + s->replaces = g_strdup(replaces); s->on_source_error = on_source_error; s->on_target_error = on_target_error; - s->target = target; s->is_none_mode = is_none_mode; s->base = base; s->granularity = granularity; @@ -854,11 +844,12 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { g_free(s->replaces); + blk_unref(s->target); block_job_unref(&s->common); return; } - bdrv_op_block_all(s->target, s->common.blocker); + bdrv_op_block_all(target, s->common.blocker); s->common.co = qemu_coroutine_create(mirror_run); trace_mirror_start(bs, s, s->common.co, opaque); @@ -931,7 +922,6 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, } } - bdrv_ref(base); mirror_start_job(bs, base, NULL, speed, 0, 0, on_error, on_error, false, cb, opaque, &local_err, &commit_active_job_driver, false, base); diff --git a/block/parallels.c b/block/parallels.c index 88cfacebe3..99fc0f77ef 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -517,8 +517,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) if (ret < 0) { goto exit; } - ret = blk_write_zeroes(file, BDRV_SECTOR_SIZE, - (bat_sectors - 1) << BDRV_SECTOR_BITS, 0); + ret = blk_pwrite_zeroes(file, BDRV_SECTOR_SIZE, + (bat_sectors - 1) << BDRV_SECTOR_BITS, 0); if (ret < 0) { goto exit; } diff --git a/block/snapshot.c b/block/snapshot.c index 3917ec5c91..6e6e34fcf4 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -374,9 +374,9 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) { bool ok = true; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while (ok && (it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); @@ -384,8 +384,12 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) ok = bdrv_can_snapshot(bs); } aio_context_release(ctx); + if (!ok) { + goto fail; + } } +fail: *first_bad_bs = bs; return ok; } @@ -395,20 +399,27 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, { int ret = 0; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; QEMUSnapshotInfo sn1, *snapshot = &sn1; - while (ret == 0 && (it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); if (bdrv_can_snapshot(bs) && bdrv_snapshot_find(bs, snapshot, name) >= 0) { ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err); + if (ret < 0) { + goto fail; + } } aio_context_release(ctx); + if (ret < 0) { + goto fail; + } } +fail: *first_bad_bs = bs; return ret; } @@ -418,9 +429,9 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) { int err = 0; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while (err == 0 && (it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); @@ -428,8 +439,12 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) err = bdrv_snapshot_goto(bs, name); } aio_context_release(ctx); + if (err < 0) { + goto fail; + } } +fail: *first_bad_bs = bs; return err; } @@ -439,9 +454,9 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) QEMUSnapshotInfo sn; int err = 0; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while (err == 0 && (it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); @@ -449,8 +464,12 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) err = bdrv_snapshot_find(bs, &sn, name); } aio_context_release(ctx); + if (err < 0) { + goto fail; + } } +fail: *first_bad_bs = bs; return err; } @@ -462,9 +481,9 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, { int err = 0; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while (err == 0 && (it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); @@ -476,24 +495,32 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, err = bdrv_snapshot_create(bs, sn); } aio_context_release(ctx); + if (err < 0) { + goto fail; + } } +fail: *first_bad_bs = bs; return err; } BlockDriverState *bdrv_all_find_vmstate_bs(void) { - bool not_found = true; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while (not_found && (it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *ctx = bdrv_get_aio_context(bs); + bool found; aio_context_acquire(ctx); - not_found = !bdrv_can_snapshot(bs); + found = bdrv_can_snapshot(bs); aio_context_release(ctx); + + if (found) { + break; + } } return bs; } diff --git a/block/stream.c b/block/stream.c index 40aa32212e..c0efbda34e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -39,7 +39,7 @@ typedef struct StreamBlockJob { char *backing_file_str; } StreamBlockJob; -static int coroutine_fn stream_populate(BlockDriverState *bs, +static int coroutine_fn stream_populate(BlockBackend *blk, int64_t sector_num, int nb_sectors, void *buf) { @@ -52,7 +52,8 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, qemu_iovec_init_external(&qiov, &iov, 1); /* Copy-on-read the unallocated clusters */ - return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); + return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov, + BDRV_REQ_COPY_ON_READ); } typedef struct { @@ -64,6 +65,7 @@ static void stream_complete(BlockJob *job, void *opaque) { StreamBlockJob *s = container_of(job, StreamBlockJob, common); StreamCompleteData *data = opaque; + BlockDriverState *bs = blk_bs(job->blk); BlockDriverState *base = s->base; if (!block_job_is_cancelled(&s->common) && data->reached_end && @@ -75,8 +77,8 @@ static void stream_complete(BlockJob *job, void *opaque) base_fmt = base->drv->format_name; } } - data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt); - bdrv_set_backing_hd(job->bs, base); + data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); + bdrv_set_backing_hd(bs, base); } g_free(s->backing_file_str); @@ -88,7 +90,8 @@ static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; StreamCompleteData *data; - BlockDriverState *bs = s->common.bs; + BlockBackend *blk = s->common.blk; + BlockDriverState *bs = blk_bs(blk); BlockDriverState *base = s->base; int64_t sector_num = 0; int64_t end = -1; @@ -159,7 +162,7 @@ wait: goto wait; } } - ret = stream_populate(bs, sector_num, n, buf); + ret = stream_populate(blk, sector_num, n, buf); } if (ret < 0) { BlockErrorAction action = diff --git a/block/vvfat.c b/block/vvfat.c index 3e484a1dcc..a39dbe67e2 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2998,12 +2998,12 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp) goto err; } - s->qcow = NULL; options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow")); - ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options, - BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp); - if (ret < 0) { + s->qcow = bdrv_open(s->qcow_filename, NULL, options, + BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp); + if (!s->qcow) { + ret = -EINVAL; goto err; } diff --git a/blockdev.c b/blockdev.c index 40e4e6fc6f..717785eb8d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -567,11 +567,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(errp); - if (!blk) { - goto early_err; - } - + blk = blk_new(); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = !(bdrv_flags & BDRV_O_RDWR); @@ -657,7 +653,6 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) QemuOpts *opts; Error *local_error = NULL; BlockdevDetectZeroesOptions detect_zeroes; - int ret; int bdrv_flags = 0; opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp); @@ -688,9 +683,8 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) bdrv_flags |= BDRV_O_INACTIVE; } - bs = NULL; - ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp); - if (ret < 0) { + bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); + if (!bs) { goto fail_no_bs_opts; } @@ -1643,7 +1637,7 @@ typedef struct ExternalSnapshotState { static void external_snapshot_prepare(BlkActionState *common, Error **errp) { - int flags = 0, ret; + int flags = 0; QDict *options = NULL; Error *local_err = NULL; /* Device and node name of the image to generate the snapshot from */ @@ -1768,11 +1762,10 @@ static void external_snapshot_prepare(BlkActionState *common, flags |= BDRV_O_NO_BACKING; } - assert(state->new_bs == NULL); - ret = bdrv_open(&state->new_bs, new_image_file, snapshot_ref, options, - flags, errp); + state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, + errp); /* We will manually add the backing_hd field to the bs later */ - if (ret != 0) { + if (!state->new_bs) { return; } @@ -2540,7 +2533,7 @@ void qmp_blockdev_change_medium(const char *device, const char *filename, { BlockBackend *blk; BlockDriverState *medium_bs = NULL; - int bdrv_flags, ret; + int bdrv_flags; QDict *options = NULL; Error *err = NULL; @@ -2584,9 +2577,8 @@ void qmp_blockdev_change_medium(const char *device, const char *filename, qdict_put(options, "driver", qstring_from_str(format)); } - assert(!medium_bs); - ret = bdrv_open(&medium_bs, filename, NULL, options, bdrv_flags, errp); - if (ret < 0) { + medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); + if (!medium_bs) { goto fail; } @@ -3199,7 +3191,6 @@ static void do_drive_backup(const char *device, const char *target, Error *local_err = NULL; int flags; int64_t size; - int ret; if (!has_speed) { speed = 0; @@ -3283,10 +3274,8 @@ static void do_drive_backup(const char *device, const char *target, qdict_put(options, "driver", qstring_from_str(format)); } - target_bs = NULL; - ret = bdrv_open(&target_bs, target, NULL, options, flags, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); + target_bs = bdrv_open(target, NULL, options, flags, errp); + if (!target_bs) { goto out; } @@ -3304,8 +3293,8 @@ static void do_drive_backup(const char *device, const char *target, backup_start(bs, target_bs, speed, sync, bmap, on_source_error, on_target_error, block_job_cb, bs, txn, &local_err); + bdrv_unref(target_bs); if (local_err != NULL) { - bdrv_unref(target_bs); error_propagate(errp, local_err); goto out; } @@ -3389,12 +3378,10 @@ void do_blockdev_backup(const char *device, const char *target, } target_bs = blk_bs(target_blk); - bdrv_ref(target_bs); bdrv_set_aio_context(target_bs, aio_context); backup_start(bs, target_bs, speed, sync, NULL, on_source_error, on_target_error, block_job_cb, bs, txn, &local_err); if (local_err != NULL) { - bdrv_unref(target_bs); error_propagate(errp, local_err); } out: @@ -3470,10 +3457,6 @@ static void blockdev_mirror_common(BlockDriverState *bs, if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) { return; } - if (bdrv_has_blk(target)) { - error_setg(errp, "Cannot mirror to an attached block device"); - return; - } if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) { sync = MIRROR_SYNC_MODE_FULL; @@ -3511,7 +3494,6 @@ void qmp_drive_mirror(const char *device, const char *target, QDict *options = NULL; int flags; int64_t size; - int ret; blk = blk_by_name(device); if (!blk) { @@ -3620,11 +3602,9 @@ void qmp_drive_mirror(const char *device, const char *target, /* Mirroring takes care of copy-on-write using the source's backing * file. */ - target_bs = NULL; - ret = bdrv_open(&target_bs, target, NULL, options, - flags | BDRV_O_NO_BACKING, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); + target_bs = bdrv_open(target, NULL, options, flags | BDRV_O_NO_BACKING, + errp); + if (!target_bs) { goto out; } @@ -3639,9 +3619,9 @@ void qmp_drive_mirror(const char *device, const char *target, has_on_target_error, on_target_error, has_unmap, unmap, &local_err); + bdrv_unref(target_bs); if (local_err) { error_propagate(errp, local_err); - bdrv_unref(target_bs); } out: aio_context_release(aio_context); @@ -3685,7 +3665,6 @@ void qmp_blockdev_mirror(const char *device, const char *target, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_ref(target_bs); bdrv_set_aio_context(target_bs, aio_context); blockdev_mirror_common(bs, target_bs, @@ -3699,7 +3678,6 @@ void qmp_blockdev_mirror(const char *device, const char *target, &local_err); if (local_err) { error_propagate(errp, local_err); - bdrv_unref(target_bs); } aio_context_release(aio_context); @@ -4164,9 +4142,9 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) { BlockJobInfoList *head = NULL, **p_next = &head; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); diff --git a/blockjob.c b/blockjob.c index 5b840a7df6..c095cc57cb 100644 --- a/blockjob.c +++ b/blockjob.c @@ -50,17 +50,31 @@ struct BlockJobTxn { int refcnt; }; +static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); + +BlockJob *block_job_next(BlockJob *job) +{ + if (!job) { + return QLIST_FIRST(&block_jobs); + } + return QLIST_NEXT(job, job_list); +} + void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp) { + BlockBackend *blk; BlockJob *job; if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; } - bdrv_ref(bs); + + blk = blk_new(); + blk_insert_bs(blk, bs); + job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); @@ -69,13 +83,15 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, job->driver = driver; job->id = g_strdup(bdrv_get_device_name(bs)); - job->bs = bs; + job->blk = blk; job->cb = cb; job->opaque = opaque; job->busy = true; job->refcnt = 1; bs->job = job; + QLIST_INSERT_HEAD(&block_jobs, job, job_list); + /* Only set speed when necessary to avoid NotSupported error */ if (speed != 0) { Error *local_err = NULL; @@ -98,11 +114,13 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { - job->bs->job = NULL; - bdrv_op_unblock_all(job->bs, job->blocker); - bdrv_unref(job->bs); + BlockDriverState *bs = blk_bs(job->blk); + bs->job = NULL; + bdrv_op_unblock_all(bs, job->blocker); + blk_unref(job->blk); error_free(job->blocker); g_free(job->id); + QLIST_REMOVE(job, job_list); g_free(job); } } @@ -140,7 +158,7 @@ static void block_job_completed_txn_abort(BlockJob *job) txn->aborting = true; /* We are the first failed job. Cancel other jobs. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - ctx = bdrv_get_aio_context(other_job->bs); + ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); } QLIST_FOREACH(other_job, &txn->jobs, txn_list) { @@ -157,7 +175,7 @@ static void block_job_completed_txn_abort(BlockJob *job) assert(other_job->completed); } QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { - ctx = bdrv_get_aio_context(other_job->bs); + ctx = blk_get_aio_context(other_job->blk); block_job_completed_single(other_job); aio_context_release(ctx); } @@ -179,7 +197,7 @@ static void block_job_completed_txn_success(BlockJob *job) } /* We are the last completed job, commit the transaction. */ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { - ctx = bdrv_get_aio_context(other_job->bs); + ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); assert(other_job->ret == 0); block_job_completed_single(other_job); @@ -189,9 +207,7 @@ static void block_job_completed_txn_success(BlockJob *job) void block_job_completed(BlockJob *job, int ret) { - BlockDriverState *bs = job->bs; - - assert(bs->job == job); + assert(blk_bs(job->blk)->job == job); assert(!job->completed); job->completed = true; job->ret = ret; @@ -282,11 +298,10 @@ static int block_job_finish_sync(BlockJob *job, void (*finish)(BlockJob *, Error **errp), Error **errp) { - BlockDriverState *bs = job->bs; Error *local_err = NULL; int ret; - assert(bs->job == job); + assert(blk_bs(job->blk)->job == job); block_job_ref(job); finish(job, &local_err); @@ -297,7 +312,7 @@ static int block_job_finish_sync(BlockJob *job, } while (!job->completed) { aio_poll(job->deferred_to_main_loop ? qemu_get_aio_context() : - bdrv_get_aio_context(bs), + blk_get_aio_context(job->blk), true); } ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret; @@ -318,6 +333,19 @@ int block_job_cancel_sync(BlockJob *job) return block_job_finish_sync(job, &block_job_cancel_err, NULL); } +void block_job_cancel_sync_all(void) +{ + BlockJob *job; + AioContext *aio_context; + + while ((job = QLIST_FIRST(&block_jobs))) { + aio_context = blk_get_aio_context(job->blk); + aio_context_acquire(aio_context); + block_job_cancel_sync(job); + aio_context_release(aio_context); + } +} + int block_job_complete_sync(BlockJob *job, Error **errp) { return block_job_finish_sync(job, &block_job_complete, errp); @@ -336,7 +364,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns) if (block_job_is_paused(job)) { qemu_coroutine_yield(); } else { - co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns); + co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns); } job->busy = true; } @@ -465,7 +493,7 @@ static void block_job_defer_to_main_loop_bh(void *opaque) aio_context_acquire(data->aio_context); /* Fetch BDS AioContext again, in case it has changed */ - aio_context = bdrv_get_aio_context(data->job->bs); + aio_context = blk_get_aio_context(data->job->blk); aio_context_acquire(aio_context); data->job->deferred_to_main_loop = false; @@ -485,7 +513,7 @@ void block_job_defer_to_main_loop(BlockJob *job, BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data)); data->job = job; data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data); - data->aio_context = bdrv_get_aio_context(job->bs); + data->aio_context = blk_get_aio_context(job->blk); data->fn = fn; data->opaque = opaque; job->deferred_to_main_loop = true; diff --git a/dma-helpers.c b/dma-helpers.c index a6cc15f534..b521d84ebd 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -70,7 +70,7 @@ void qemu_sglist_destroy(QEMUSGList *qsg) typedef struct { BlockAIOCB common; - BlockBackend *blk; + AioContext *ctx; BlockAIOCB *acb; QEMUSGList *sg; uint64_t offset; @@ -80,6 +80,7 @@ typedef struct { QEMUIOVector iov; QEMUBH *bh; DMAIOFunc *io_func; + void *io_func_opaque; } DMAAIOCB; static void dma_blk_cb(void *opaque, int ret); @@ -154,8 +155,7 @@ static void dma_blk_cb(void *opaque, int ret) if (dbs->iov.size == 0) { trace_dma_map_wait(dbs); - dbs->bh = aio_bh_new(blk_get_aio_context(dbs->blk), - reschedule_dma, dbs); + dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); cpu_register_map_client(dbs->bh); return; } @@ -164,8 +164,8 @@ static void dma_blk_cb(void *opaque, int ret) qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK); } - dbs->acb = dbs->io_func(dbs->blk, dbs->offset, &dbs->iov, 0, - dma_blk_cb, dbs); + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); assert(dbs->acb); } @@ -191,23 +191,25 @@ static const AIOCBInfo dma_aiocb_info = { .cancel_async = dma_aio_cancel, }; -BlockAIOCB *dma_blk_io( - BlockBackend *blk, QEMUSGList *sg, uint64_t sector_num, - DMAIOFunc *io_func, BlockCompletionFunc *cb, +BlockAIOCB *dma_blk_io(AioContext *ctx, + QEMUSGList *sg, uint64_t offset, + DMAIOFunc *io_func, void *io_func_opaque, + BlockCompletionFunc *cb, void *opaque, DMADirection dir) { - DMAAIOCB *dbs = blk_aio_get(&dma_aiocb_info, blk, cb, opaque); + DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); - trace_dma_blk_io(dbs, blk, sector_num, (dir == DMA_DIRECTION_TO_DEVICE)); + trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); dbs->acb = NULL; - dbs->blk = blk; dbs->sg = sg; - dbs->offset = sector_num << BDRV_SECTOR_BITS; + dbs->ctx = ctx; + dbs->offset = offset; dbs->sg_cur_index = 0; dbs->sg_cur_byte = 0; dbs->dir = dir; dbs->io_func = io_func; + dbs->io_func_opaque = io_func_opaque; dbs->bh = NULL; qemu_iovec_init(&dbs->iov, sg->nsg); dma_blk_cb(dbs, 0); @@ -215,19 +217,39 @@ BlockAIOCB *dma_blk_io( } +static +BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + BlockBackend *blk = opaque; + return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); +} + BlockAIOCB *dma_blk_read(BlockBackend *blk, - QEMUSGList *sg, uint64_t sector, + QEMUSGList *sg, uint64_t offset, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_blk_io(blk, sg, sector, blk_aio_preadv, cb, opaque, + return dma_blk_io(blk_get_aio_context(blk), + sg, offset, dma_blk_read_io_func, blk, cb, opaque, DMA_DIRECTION_FROM_DEVICE); } +static +BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque) +{ + BlockBackend *blk = opaque; + return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); +} + BlockAIOCB *dma_blk_write(BlockBackend *blk, - QEMUSGList *sg, uint64_t sector, + QEMUSGList *sg, uint64_t offset, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_blk_io(blk, sg, sector, blk_aio_pwritev, cb, opaque, + return dma_blk_io(blk_get_aio_context(blk), + sg, offset, dma_blk_write_io_func, blk, cb, opaque, DMA_DIRECTION_TO_DEVICE); } diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 173988ee84..9faad29fad 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -239,7 +239,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; uint64_t data_size = (uint64_t)nlb << data_shift; - uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); + uint64_t data_offset = slba << data_shift; int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; @@ -258,8 +258,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, req->has_sg = true; dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct); req->aiocb = is_write ? - dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) : - dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req); + dma_blk_write(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req) : + dma_blk_read(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req); return NVME_NO_COMPLETE; } diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index f244bc01c9..502d4f1c7b 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1006,7 +1006,8 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs) dma_acct_start(ide_state->blk, &ncq_tfs->acct, &ncq_tfs->sglist, BLOCK_ACCT_READ); ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist, - ncq_tfs->lba, ncq_cb, ncq_tfs); + ncq_tfs->lba << BDRV_SECTOR_BITS, + ncq_cb, ncq_tfs); break; case WRITE_FPDMA_QUEUED: DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n", @@ -1018,7 +1019,8 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs) dma_acct_start(ide_state->blk, &ncq_tfs->acct, &ncq_tfs->sglist, BLOCK_ACCT_WRITE); ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist, - ncq_tfs->lba, ncq_cb, ncq_tfs); + ncq_tfs->lba << BDRV_SECTOR_BITS, + ncq_cb, ncq_tfs); break; default: DPRINTF(port, "error: unsupported NCQ command (0x%02x) received\n", diff --git a/hw/ide/core.c b/hw/ide/core.c index fe2bfba489..029f6b9b12 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -441,13 +441,14 @@ static void ide_issue_trim_cb(void *opaque, int ret) } } -BlockAIOCB *ide_issue_trim(BlockBackend *blk, - int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque) +BlockAIOCB *ide_issue_trim( + int64_t offset, QEMUIOVector *qiov, + BlockCompletionFunc *cb, void *cb_opaque, void *opaque) { + BlockBackend *blk = opaque; TrimAIOCB *iocb; - iocb = blk_aio_get(&trim_aiocb_info, blk, cb, opaque); + iocb = blk_aio_get(&trim_aiocb_info, blk, cb, cb_opaque); iocb->blk = blk; iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); iocb->ret = 0; @@ -799,6 +800,7 @@ static void ide_dma_cb(void *opaque, int ret) IDEState *s = opaque; int n; int64_t sector_num; + uint64_t offset; bool stay_active = false; if (ret == -ECANCELED) { @@ -859,18 +861,20 @@ static void ide_dma_cb(void *opaque, int ret) return; } + offset = sector_num << BDRV_SECTOR_BITS; switch (s->dma_cmd) { case IDE_DMA_READ: - s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, sector_num, + s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, offset, ide_dma_cb, s); break; case IDE_DMA_WRITE: - s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, sector_num, + s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, offset, ide_dma_cb, s); break; case IDE_DMA_TRIM: - s->bus->dma->aiocb = dma_blk_io(s->blk, &s->sg, sector_num, - ide_issue_trim, ide_dma_cb, s, + s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), + &s->sg, offset, + ide_issue_trim, s->blk, ide_dma_cb, s, DMA_DIRECTION_TO_DEVICE); break; default: diff --git a/hw/ide/internal.h b/hw/ide/internal.h index ceb9e5994a..773928af77 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -613,9 +613,9 @@ void ide_transfer_start(IDEState *s, uint8_t *buf, int size, EndTransferFunc *end_transfer_func); void ide_transfer_stop(IDEState *s); void ide_set_inactive(IDEState *s, bool more); -BlockAIOCB *ide_issue_trim(BlockBackend *blk, - int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *ide_issue_trim( + int64_t offset, QEMUIOVector *qiov, + BlockCompletionFunc *cb, void *cb_opaque, void *opaque); BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); diff --git a/hw/ide/macio.c b/hw/ide/macio.c index d7d9c0ff3a..42ad68a1c0 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -230,7 +230,7 @@ static void pmac_dma_trim(BlockBackend *blk, s->io_buffer_index += io->len; io->len = 0; - s->bus->dma->aiocb = ide_issue_trim(blk, offset, &io->iov, 0, cb, io); + s->bus->dma->aiocb = ide_issue_trim(offset, &io->iov, cb, io, blk); } static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index ce89c98b4e..8865da53e8 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -335,7 +335,8 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) if (r->req.sg) { dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ); r->req.resid -= r->req.sg->size; - r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, r->sector, + r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, + r->sector << BDRV_SECTOR_BITS, scsi_dma_complete, r); } else { scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); @@ -539,7 +540,8 @@ static void scsi_write_data(SCSIRequest *req) if (r->req.sg) { dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE); r->req.resid -= r->req.sg->size; - r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector, + r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, + r->sector << BDRV_SECTOR_BITS, scsi_dma_complete, r); } else { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, @@ -1778,7 +1780,7 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, nb_sectors * s->qdev.blocksize, BLOCK_ACCT_WRITE); - r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk, + r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk, r->req.cmd.lba * s->qdev.blocksize, nb_sectors * s->qdev.blocksize, flags, scsi_aio_complete, r); diff --git a/include/block/block.h b/include/block/block.h index a8c15e36e7..70ea29947c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -17,7 +17,6 @@ typedef struct BlockJob BlockJob; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; typedef struct BlockJobTxn BlockJobTxn; -typedef struct BdrvNextIterator BdrvNextIterator; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -198,7 +197,6 @@ BlockDriver *bdrv_find_format(const char *format_name); int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); -BlockDriverState *bdrv_new_root(void); BlockDriverState *bdrv_new(void); void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); void bdrv_replace_in_backing_chain(BlockDriverState *old, @@ -214,8 +212,8 @@ BdrvChild *bdrv_open_child(const char *filename, void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); -int bdrv_open(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, Error **errp); +BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, int flags); @@ -244,10 +242,6 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, const void *buf, int count); int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); -int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); -int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); /* @@ -402,7 +396,19 @@ BlockDriverState *bdrv_lookup_bs(const char *device, Error **errp); bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); BlockDriverState *bdrv_next_node(BlockDriverState *bs); -BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs); + +typedef struct BdrvNextIterator { + enum { + BDRV_NEXT_BACKEND_ROOTS, + BDRV_NEXT_MONITOR_OWNED, + } phase; + BlockBackend *blk; + BlockDriverState *bs; +} BdrvNextIterator; + +BlockDriverState *bdrv_first(BdrvNextIterator *it); +BlockDriverState *bdrv_next(BdrvNextIterator *it); + BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); int bdrv_is_encrypted(BlockDriverState *bs); int bdrv_key_required(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index b6f4755725..30a97178c8 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -719,7 +719,8 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, - const BdrvChildRole *child_role); + const BdrvChildRole *child_role, + void *opaque); void bdrv_root_unref_child(BdrvChild *child); const char *bdrv_get_parent_name(const BlockDriverState *bs); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 073a433cf8..86d28070b8 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -82,7 +82,7 @@ struct BlockJob { const BlockJobDriver *driver; /** The block device on which the job is operating. */ - BlockDriverState *bs; + BlockBackend *blk; /** * The ID of the block job. Currently the BlockBackend name of the BDS @@ -135,6 +135,9 @@ struct BlockJob { */ bool deferred_to_main_loop; + /** Element of the list of block jobs */ + QLIST_ENTRY(BlockJob) job_list; + /** Status that is published by the query-block-jobs QMP API */ BlockDeviceIoStatus iostatus; @@ -173,6 +176,17 @@ struct BlockJob { }; /** + * block_job_next: + * @job: A block job, or %NULL. + * + * Get the next element from the list of block jobs after @job, or the + * first one if @job is %NULL. + * + * Returns the requested job, or %NULL if there are no more jobs left. + */ +BlockJob *block_job_next(BlockJob *job); + +/** * block_job_create: * @job_type: The class object for the newly-created job. * @bs: The block @@ -357,6 +371,13 @@ bool block_job_is_paused(BlockJob *job); int block_job_cancel_sync(BlockJob *job); /** + * block_job_cancel_sync_all: + * + * Synchronously cancels all jobs using block_job_cancel_sync(). + */ +void block_job_cancel_sync_all(void); + +/** * block_job_complete_sync: * @job: The job to be completed. * @errp: Error object which may be set by block_job_complete(); this is not diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 68d92b556e..c04af8ea46 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -78,8 +78,7 @@ typedef struct BlockBackendPublic { QLIST_ENTRY(BlockBackendPublic) round_robin; } BlockBackendPublic; -BlockBackend *blk_new(Error **errp); -BlockBackend *blk_new_with_bs(Error **errp); +BlockBackend *blk_new(void); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); @@ -114,11 +113,17 @@ void *blk_get_attached_dev(BlockBackend *blk); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, int count); -int blk_write_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags); -BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags); +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count); int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, BdrvRequestFlags flags); @@ -196,8 +201,8 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); -int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset, - int count, BdrvRequestFlags flags); +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags); int blk_write_compressed(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, int nb_sectors); int blk_truncate(BlockBackend *blk, int64_t offset); diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h index d6e96a4298..34c8eaf64e 100644 --- a/include/sysemu/dma.h +++ b/include/sysemu/dma.h @@ -194,19 +194,19 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len); void qemu_sglist_destroy(QEMUSGList *qsg); #endif -typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t offset, - QEMUIOVector *iov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); - -BlockAIOCB *dma_blk_io(BlockBackend *blk, - QEMUSGList *sg, uint64_t sector_num, - DMAIOFunc *io_func, BlockCompletionFunc *cb, - void *opaque, DMADirection dir); +typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov, + BlockCompletionFunc *cb, void *cb_opaque, + void *opaque); + +BlockAIOCB *dma_blk_io(AioContext *ctx, + QEMUSGList *sg, uint64_t offset, + DMAIOFunc *io_func, void *io_func_opaque, + BlockCompletionFunc *cb, void *opaque, DMADirection dir); BlockAIOCB *dma_blk_read(BlockBackend *blk, - QEMUSGList *sg, uint64_t sector, + QEMUSGList *sg, uint64_t offset, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *dma_blk_write(BlockBackend *blk, - QEMUSGList *sg, uint64_t sector, + QEMUSGList *sg, uint64_t offset, BlockCompletionFunc *cb, void *opaque); uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg); uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg); diff --git a/migration/block.c b/migration/block.c index a7a76a0fb9..e0628d187f 100644 --- a/migration/block.c +++ b/migration/block.c @@ -383,7 +383,7 @@ static void init_blk_migration(QEMUFile *f) BlockDriverState *bs; BlkMigDevState *bmds; int64_t sectors; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -394,7 +394,7 @@ static void init_blk_migration(QEMUFile *f) block_mig_state.zero_blocks = migrate_zero_blocks(); - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { if (bdrv_is_read_only(bs)) { continue; } @@ -3432,12 +3432,12 @@ static void vm_completion(ReadLineState *rs, const char *str) { size_t len; BlockDriverState *bs; - BdrvNextIterator *it = NULL; + BdrvNextIterator it; len = strlen(str); readline_set_completion_index(rs, len); - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { SnapshotInfoList *snapshots, *snapshot; AioContext *ctx = bdrv_get_aio_context(bs); bool ok = false; diff --git a/qemu-img.c b/qemu-img.c index 7ed8ef21cb..4b56ad36aa 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -775,7 +775,7 @@ static void common_block_job_cb(void *opaque, int ret) static void run_block_job(BlockJob *job, Error **errp) { - AioContext *aio_context = bdrv_get_aio_context(job->bs); + AioContext *aio_context = blk_get_aio_context(job->blk); do { aio_poll(aio_context, true); @@ -1606,8 +1606,8 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (s->has_zero_init) { break; } - ret = blk_write_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, - n << BDRV_SECTOR_BITS, 0); + ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, + n << BDRV_SECTOR_BITS, 0); if (ret < 0) { return ret; } diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index e766791ffc..09e879f872 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -451,12 +451,12 @@ typedef struct { bool done; } CoWriteZeroes; -static void coroutine_fn co_write_zeroes_entry(void *opaque) +static void coroutine_fn co_pwrite_zeroes_entry(void *opaque) { CoWriteZeroes *data = opaque; - data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count, - data->flags); + data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->count, + data->flags); data->done = true; if (data->ret < 0) { *data->total = data->ret; @@ -466,8 +466,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) *data->total = data->count; } -static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count, - int flags, int64_t *total) +static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t count, int flags, int64_t *total) { Coroutine *co; CoWriteZeroes data = { @@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count, return -ERANGE; } - co = qemu_coroutine_create(co_write_zeroes_entry); + co = qemu_coroutine_create(co_pwrite_zeroes_entry); qemu_coroutine_enter(co, &data); while (!data.done) { aio_poll(blk_get_aio_context(blk), true); @@ -901,7 +901,7 @@ static void write_help(void) " -C, -- report statistics in a machine parsable format\n" " -q, -- quiet mode, do not show I/O statistics\n" " -u, -- with -z, allow unmapping\n" -" -z, -- write zeroes using blk_co_write_zeroes\n" +" -z, -- write zeroes using blk_co_pwrite_zeroes\n" "\n"); } @@ -1033,7 +1033,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) if (bflag) { cnt = do_save_vmstate(blk, buf, offset, count, &total); } else if (zflag) { - cnt = do_co_write_zeroes(blk, offset, count, flags, &total); + cnt = do_co_pwrite_zeroes(blk, offset, count, flags, &total); } else if (cflag) { cnt = do_write_compressed(blk, buf, offset, count, &total); } else { @@ -1376,7 +1376,7 @@ static void aio_write_help(void) " -i, -- treat request as invalid, for exercising stats\n" " -q, -- quiet mode, do not show I/O statistics\n" " -u, -- with -z, allow unmapping\n" -" -z, -- write zeroes using blk_aio_write_zeroes\n" +" -z, -- write zeroes using blk_aio_pwrite_zeroes\n" "\n"); } @@ -1475,8 +1475,8 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) } ctx->qiov.size = count; - blk_aio_write_zeroes(blk, ctx->offset, count, flags, aio_write_done, - ctx); + blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done, + ctx); } else { nr_iov = argc - optind; ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov, @@ -181,7 +181,7 @@ void qmp_cont(Error **errp) Error *local_err = NULL; BlockBackend *blk; BlockDriverState *bs; - BdrvNextIterator *it; + BdrvNextIterator it; /* if there is a dump in background, we should wait until the dump * finished */ @@ -201,8 +201,7 @@ void qmp_cont(Error **errp) blk_iostatus_reset(blk); } - it = NULL; - while ((it = bdrv_next(it, &bs))) { + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { bdrv_add_key(bs, NULL, &local_err); if (local_err) { error_propagate(errp, local_err); diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index b1c542f99b..ed1d9d464c 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -207,33 +207,6 @@ class TestSingleBlockdev(TestSingleDrive): test_image_not_found = None test_small_buffer2 = None -class TestBlockdevAttached(iotests.QMPTestCase): - image_len = 1 * 1024 * 1024 # MB - - def setUp(self): - iotests.create_image(backing_img, self.image_len) - qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) - qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) - self.vm = iotests.VM().add_drive(test_img) - self.vm.launch() - - def tearDown(self): - self.vm.shutdown() - os.remove(test_img) - os.remove(target_img) - - def test_blockdev_attached(self): - self.assert_no_active_block_jobs() - args = {'options': - {'driver': iotests.imgfmt, - 'id': 'drive1', - 'file': { 'filename': target_img, 'driver': 'file' } } } - result = self.vm.qmp("blockdev-add", **args) - self.assert_qmp(result, 'return', {}) - result = self.vm.qmp('blockdev-mirror', device='drive0', sync='full', - target='drive1') - self.assert_qmp(result, 'error/class', 'GenericError') - class TestSingleDriveZeroLength(TestSingleDrive): image_len = 0 test_small_buffer2 = None diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index b67d0504a6..b0cadc8245 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -............................................................................ +........................................................................... ---------------------------------------------------------------------- -Ran 76 tests +Ran 75 tests OK diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index 55fad9507a..828389bb45 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -15,6 +15,7 @@ #include "qapi/error.h" #include "qemu/main-loop.h" #include "block/blockjob.h" +#include "sysemu/block-backend.h" typedef struct { BlockJob common; @@ -30,7 +31,7 @@ static const BlockJobDriver test_block_job_driver = { static void test_block_job_complete(BlockJob *job, void *opaque) { - BlockDriverState *bs = job->bs; + BlockDriverState *bs = blk_bs(job->blk); int rc = (intptr_t)opaque; if (block_job_is_cancelled(job)) { diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 5ec966c8a4..c02be805f7 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -578,9 +578,9 @@ static void test_groups(void) BlockBackend *blk1, *blk2, *blk3; BlockBackendPublic *blkp1, *blkp2, *blkp3; - blk1 = blk_new_with_bs(&error_abort); - blk2 = blk_new_with_bs(&error_abort); - blk3 = blk_new_with_bs(&error_abort); + blk1 = blk_new(); + blk2 = blk_new(); + blk3 = blk_new(); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); diff --git a/trace-events b/trace-events index b53c3541a3..4450d8f377 100644 --- a/trace-events +++ b/trace-events @@ -61,6 +61,10 @@ virtio_console_chr_event(unsigned int port, int event) "port %u, event %d" bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\"" bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" +# block/block-backend.c +blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x" +blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x" + # block/io.c bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p" @@ -68,8 +72,6 @@ bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs % bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p" bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x opaque %p" bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" -bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" -bdrv_co_readv_no_serialising(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d" bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector, int flags) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x" bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d" @@ -1143,7 +1145,7 @@ win_helper_done(uint32_t tl) "tl=%d" win_helper_retry(uint32_t tl) "tl=%d" # dma-helpers.c -dma_blk_io(void *dbs, void *bs, int64_t sector_num, bool to_dev) "dbs=%p bs=%p sector_num=%" PRId64 " to_dev=%d" +dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d" dma_aio_cancel(void *dbs) "dbs=%p" dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p" dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d" |