aboutsummaryrefslogtreecommitdiff
path: root/block/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/io.c')
-rw-r--r--block/io.c293
1 files changed, 28 insertions, 265 deletions
diff --git a/block/io.c b/block/io.c
index cd6d71a503..60a6bd8bdb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -27,7 +27,6 @@
#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
-#include "block/throttle-groups.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -46,56 +45,26 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- ThrottleConfig *cfg)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
{
- throttle_group_config(bs, cfg);
-}
+ BdrvChild *c;
-void bdrv_no_throttling_begin(BlockDriverState *bs)
-{
- if (bs->io_limits_disabled++ == 0) {
- throttle_group_restart_bs(bs);
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_begin) {
+ c->role->drained_begin(c);
+ }
}
}
-void bdrv_no_throttling_end(BlockDriverState *bs)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
{
- assert(bs->io_limits_disabled);
- --bs->io_limits_disabled;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- assert(bs->throttle_state);
- bdrv_no_throttling_begin(bs);
- throttle_group_unregister_bs(bs);
- bdrv_no_throttling_end(bs);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
-{
- assert(!bs->throttle_state);
- throttle_group_register_bs(bs, group);
-}
-
-void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
-{
- /* this bs is not part of any group */
- if (!bs->throttle_state) {
- return;
- }
+ BdrvChild *c;
- /* this bs is a part of the same group than the one we want */
- if (!g_strcmp0(throttle_group_get_name(bs), group)) {
- return;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_end) {
+ c->role->drained_end(c);
+ }
}
-
- /* need to change the group this bs belong to */
- bdrv_io_limits_disable(bs);
- bdrv_io_limits_enable(bs, group);
}
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
@@ -182,12 +151,6 @@ bool bdrv_requests_pending(BlockDriverState *bs)
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
- if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
- return true;
- }
- if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
- return true;
- }
QLIST_FOREACH(child, &bs->children, next) {
if (bdrv_requests_pending(child->bs)) {
@@ -275,17 +238,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
*/
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
- bdrv_no_throttling_begin(bs);
+ bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
bdrv_co_yield_to_drain(bs);
bdrv_io_unplugged_end(bs);
- bdrv_no_throttling_end(bs);
+ bdrv_parent_drained_end(bs);
}
void bdrv_drain(BlockDriverState *bs)
{
- bdrv_no_throttling_begin(bs);
+ bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
if (qemu_in_coroutine()) {
@@ -294,7 +257,7 @@ void bdrv_drain(BlockDriverState *bs)
bdrv_drain_poll(bs);
}
bdrv_io_unplugged_end(bs);
- bdrv_no_throttling_end(bs);
+ bdrv_parent_drained_end(bs);
}
/*
@@ -307,17 +270,18 @@ void bdrv_drain_all(void)
{
/* Always run first iteration so any pending completion BHs run */
bool busy = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator *it = NULL;
GSList *aio_ctxs = NULL, *ctx;
- while ((bs = bdrv_next(bs))) {
+ while ((it = bdrv_next(it, &bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
if (bs->job) {
block_job_pause(bs->job);
}
- bdrv_no_throttling_begin(bs);
+ bdrv_parent_drained_begin(bs);
bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
aio_context_release(aio_context);
@@ -338,10 +302,10 @@ void bdrv_drain_all(void)
for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
AioContext *aio_context = ctx->data;
- bs = NULL;
+ it = NULL;
aio_context_acquire(aio_context);
- while ((bs = bdrv_next(bs))) {
+ while ((it = bdrv_next(it, &bs))) {
if (aio_context == bdrv_get_aio_context(bs)) {
if (bdrv_requests_pending(bs)) {
busy = true;
@@ -354,13 +318,13 @@ void bdrv_drain_all(void)
}
}
- bs = NULL;
- while ((bs = bdrv_next(bs))) {
+ it = NULL;
+ while ((it = bdrv_next(it, &bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
bdrv_io_unplugged_end(bs);
- bdrv_no_throttling_end(bs);
+ bdrv_parent_drained_end(bs);
if (bs->job) {
block_job_resume(bs->job);
}
@@ -1069,11 +1033,6 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
flags |= BDRV_REQ_COPY_ON_READ;
}
- /* throttling disk I/O */
- if (bs->throttle_state) {
- throttle_group_co_io_limits_intercept(bs, bytes, false);
- }
-
/* Align read if necessary by padding qiov */
if (offset & (align - 1)) {
head_buf = qemu_blockalign(bs, align);
@@ -1430,11 +1389,6 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
return ret;
}
- /* throttling disk I/O */
- if (bs->throttle_state) {
- throttle_group_co_io_limits_intercept(bs, bytes, true);
- }
-
/*
* Align write if necessary by performing a read-modify-write cycle.
* Pad qiov with the read parts and be sure to have a tracked request not
@@ -1925,200 +1879,6 @@ BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
cb, opaque, true);
}
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
-
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
- }
- g_free(mcb->callbacks[i].free_qiov);
- }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
- MultiwriteCB *mcb = opaque;
-
- trace_multiwrite_cb(mcb, ret);
-
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
- }
-
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
-{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
- bs->bl.max_iov) {
- merge = 0;
- }
-
- if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
- reqs[i].nb_sectors > bs->bl.max_transfer_length) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- // Add tail of first request, if necessary
- if (qiov->size < reqs[outidx].qiov->size) {
- qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
- reqs[outidx].qiov->size - qiov->size);
- }
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
-
- if (bs->blk) {
- block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
- num_reqs - outidx - 1);
- }
-
- return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
-
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
-
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, reqs[i].flags,
- multiwrite_cb, mcb,
- true);
- }
-
- return 0;
-}
-
void bdrv_aio_cancel(BlockAIOCB *acb)
{
qemu_aio_ref(acb);
@@ -2789,11 +2549,14 @@ void bdrv_drained_begin(BlockDriverState *bs)
if (!bs->quiesce_counter++) {
aio_disable_external(bdrv_get_aio_context(bs));
}
+ bdrv_parent_drained_begin(bs);
bdrv_drain(bs);
}
void bdrv_drained_end(BlockDriverState *bs)
{
+ bdrv_parent_drained_end(bs);
+
assert(bs->quiesce_counter > 0);
if (--bs->quiesce_counter > 0) {
return;