diff options
Diffstat (limited to 'block/io.c')
-rw-r--r-- | block/io.c | 293 |
1 files changed, 28 insertions, 265 deletions
diff --git a/block/io.c b/block/io.c index cd6d71a503..60a6bd8bdb 100644 --- a/block/io.c +++ b/block/io.c @@ -27,7 +27,6 @@ #include "sysemu/block-backend.h" #include "block/blockjob.h" #include "block/block_int.h" -#include "block/throttle-groups.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -46,56 +45,26 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); -/* throttling disk I/O limits */ -void bdrv_set_io_limits(BlockDriverState *bs, - ThrottleConfig *cfg) +static void bdrv_parent_drained_begin(BlockDriverState *bs) { - throttle_group_config(bs, cfg); -} + BdrvChild *c; -void bdrv_no_throttling_begin(BlockDriverState *bs) -{ - if (bs->io_limits_disabled++ == 0) { - throttle_group_restart_bs(bs); + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->role->drained_begin) { + c->role->drained_begin(c); + } } } -void bdrv_no_throttling_end(BlockDriverState *bs) +static void bdrv_parent_drained_end(BlockDriverState *bs) { - assert(bs->io_limits_disabled); - --bs->io_limits_disabled; -} - -void bdrv_io_limits_disable(BlockDriverState *bs) -{ - assert(bs->throttle_state); - bdrv_no_throttling_begin(bs); - throttle_group_unregister_bs(bs); - bdrv_no_throttling_end(bs); -} - -/* should be called before bdrv_set_io_limits if a limit is set */ -void bdrv_io_limits_enable(BlockDriverState *bs, const char *group) -{ - assert(!bs->throttle_state); - throttle_group_register_bs(bs, group); -} - -void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group) -{ - /* this bs is not part of any group */ - if (!bs->throttle_state) { - return; - } + BdrvChild *c; - /* this bs is a part of the same group than the one we want */ - if (!g_strcmp0(throttle_group_get_name(bs), group)) { - return; + QLIST_FOREACH(c, &bs->parents, next_parent) { + if (c->role->drained_end) { + c->role->drained_end(c); + } } - - /* need to change the group this bs belong to */ - bdrv_io_limits_disable(bs); - bdrv_io_limits_enable(bs, group); } void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) @@ -182,12 +151,6 @@ bool bdrv_requests_pending(BlockDriverState *bs) if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } - if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { - return true; - } - if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { - return true; - } QLIST_FOREACH(child, &bs->children, next) { if (bdrv_requests_pending(child->bs)) { @@ -275,17 +238,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs) */ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) { - bdrv_no_throttling_begin(bs); + bdrv_parent_drained_begin(bs); bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); bdrv_co_yield_to_drain(bs); bdrv_io_unplugged_end(bs); - bdrv_no_throttling_end(bs); + bdrv_parent_drained_end(bs); } void bdrv_drain(BlockDriverState *bs) { - bdrv_no_throttling_begin(bs); + bdrv_parent_drained_begin(bs); bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); if (qemu_in_coroutine()) { @@ -294,7 +257,7 @@ void bdrv_drain(BlockDriverState *bs) bdrv_drain_poll(bs); } bdrv_io_unplugged_end(bs); - bdrv_no_throttling_end(bs); + bdrv_parent_drained_end(bs); } /* @@ -307,17 +270,18 @@ void bdrv_drain_all(void) { /* Always run first iteration so any pending completion BHs run */ bool busy = true; - BlockDriverState *bs = NULL; + BlockDriverState *bs; + BdrvNextIterator *it = NULL; GSList *aio_ctxs = NULL, *ctx; - while ((bs = bdrv_next(bs))) { + while ((it = bdrv_next(it, &bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); if (bs->job) { block_job_pause(bs->job); } - bdrv_no_throttling_begin(bs); + bdrv_parent_drained_begin(bs); bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); aio_context_release(aio_context); @@ -338,10 +302,10 @@ void bdrv_drain_all(void) for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { AioContext *aio_context = ctx->data; - bs = NULL; + it = NULL; aio_context_acquire(aio_context); - while ((bs = bdrv_next(bs))) { + while ((it = bdrv_next(it, &bs))) { if (aio_context == bdrv_get_aio_context(bs)) { if (bdrv_requests_pending(bs)) { busy = true; @@ -354,13 +318,13 @@ void bdrv_drain_all(void) } } - bs = NULL; - while ((bs = bdrv_next(bs))) { + it = NULL; + while ((it = bdrv_next(it, &bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); bdrv_io_unplugged_end(bs); - bdrv_no_throttling_end(bs); + bdrv_parent_drained_end(bs); if (bs->job) { block_job_resume(bs->job); } @@ -1069,11 +1033,6 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs, flags |= BDRV_REQ_COPY_ON_READ; } - /* throttling disk I/O */ - if (bs->throttle_state) { - throttle_group_co_io_limits_intercept(bs, bytes, false); - } - /* Align read if necessary by padding qiov */ if (offset & (align - 1)) { head_buf = qemu_blockalign(bs, align); @@ -1430,11 +1389,6 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs, return ret; } - /* throttling disk I/O */ - if (bs->throttle_state) { - throttle_group_co_io_limits_intercept(bs, bytes, true); - } - /* * Align write if necessary by performing a read-modify-write cycle. * Pad qiov with the read parts and be sure to have a tracked request not @@ -1925,200 +1879,6 @@ BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, cb, opaque, true); } - -typedef struct MultiwriteCB { - int error; - int num_requests; - int num_callbacks; - struct { - BlockCompletionFunc *cb; - void *opaque; - QEMUIOVector *free_qiov; - } callbacks[]; -} MultiwriteCB; - -static void multiwrite_user_cb(MultiwriteCB *mcb) -{ - int i; - - for (i = 0; i < mcb->num_callbacks; i++) { - mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); - if (mcb->callbacks[i].free_qiov) { - qemu_iovec_destroy(mcb->callbacks[i].free_qiov); - } - g_free(mcb->callbacks[i].free_qiov); - } -} - -static void multiwrite_cb(void *opaque, int ret) -{ - MultiwriteCB *mcb = opaque; - - trace_multiwrite_cb(mcb, ret); - - if (ret < 0 && !mcb->error) { - mcb->error = ret; - } - - mcb->num_requests--; - if (mcb->num_requests == 0) { - multiwrite_user_cb(mcb); - g_free(mcb); - } -} - -static int multiwrite_req_compare(const void *a, const void *b) -{ - const BlockRequest *req1 = a, *req2 = b; - - /* - * Note that we can't simply subtract req2->sector from req1->sector - * here as that could overflow the return value. - */ - if (req1->sector > req2->sector) { - return 1; - } else if (req1->sector < req2->sector) { - return -1; - } else { - return 0; - } -} - -/* - * Takes a bunch of requests and tries to merge them. Returns the number of - * requests that remain after merging. - */ -static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, - int num_reqs, MultiwriteCB *mcb) -{ - int i, outidx; - - // Sort requests by start sector - qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); - - // Check if adjacent requests touch the same clusters. If so, combine them, - // filling up gaps with zero sectors. - outidx = 0; - for (i = 1; i < num_reqs; i++) { - int merge = 0; - int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; - - // Handle exactly sequential writes and overlapping writes. - if (reqs[i].sector <= oldreq_last) { - merge = 1; - } - - if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > - bs->bl.max_iov) { - merge = 0; - } - - if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + - reqs[i].nb_sectors > bs->bl.max_transfer_length) { - merge = 0; - } - - if (merge) { - size_t size; - QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); - qemu_iovec_init(qiov, - reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); - - // Add the first request to the merged one. If the requests are - // overlapping, drop the last sectors of the first request. - size = (reqs[i].sector - reqs[outidx].sector) << 9; - qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); - - // We should need to add any zeros between the two requests - assert (reqs[i].sector <= oldreq_last); - - // Add the second request - qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); - - // Add tail of first request, if necessary - if (qiov->size < reqs[outidx].qiov->size) { - qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, - reqs[outidx].qiov->size - qiov->size); - } - - reqs[outidx].nb_sectors = qiov->size >> 9; - reqs[outidx].qiov = qiov; - - mcb->callbacks[i].free_qiov = reqs[outidx].qiov; - } else { - outidx++; - reqs[outidx].sector = reqs[i].sector; - reqs[outidx].nb_sectors = reqs[i].nb_sectors; - reqs[outidx].qiov = reqs[i].qiov; - } - } - - if (bs->blk) { - block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE, - num_reqs - outidx - 1); - } - - return outidx + 1; -} - -/* - * Submit multiple AIO write requests at once. - * - * On success, the function returns 0 and all requests in the reqs array have - * been submitted. In error case this function returns -1, and any of the - * requests may or may not be submitted yet. In particular, this means that the - * callback will be called for some of the requests, for others it won't. The - * caller must check the error field of the BlockRequest to wait for the right - * callbacks (if error != 0, no callback will be called). - * - * The implementation may modify the contents of the reqs array, e.g. to merge - * requests. However, the fields opaque and error are left unmodified as they - * are used to signal failure for a single request to the caller. - */ -int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) -{ - MultiwriteCB *mcb; - int i; - - /* don't submit writes if we don't have a medium */ - if (bs->drv == NULL) { - for (i = 0; i < num_reqs; i++) { - reqs[i].error = -ENOMEDIUM; - } - return -1; - } - - if (num_reqs == 0) { - return 0; - } - - // Create MultiwriteCB structure - mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); - mcb->num_requests = 0; - mcb->num_callbacks = num_reqs; - - for (i = 0; i < num_reqs; i++) { - mcb->callbacks[i].cb = reqs[i].cb; - mcb->callbacks[i].opaque = reqs[i].opaque; - } - - // Check for mergable requests - num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); - - trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); - - /* Run the aio requests. */ - mcb->num_requests = num_reqs; - for (i = 0; i < num_reqs; i++) { - bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov, - reqs[i].nb_sectors, reqs[i].flags, - multiwrite_cb, mcb, - true); - } - - return 0; -} - void bdrv_aio_cancel(BlockAIOCB *acb) { qemu_aio_ref(acb); @@ -2789,11 +2549,14 @@ void bdrv_drained_begin(BlockDriverState *bs) if (!bs->quiesce_counter++) { aio_disable_external(bdrv_get_aio_context(bs)); } + bdrv_parent_drained_begin(bs); bdrv_drain(bs); } void bdrv_drained_end(BlockDriverState *bs) { + bdrv_parent_drained_end(bs); + assert(bs->quiesce_counter > 0); if (--bs->quiesce_counter > 0) { return; |