diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-06-12 18:04:14 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-06-12 18:04:14 +0100 |
commit | 8aeaa055f5d3d4e87bf870892ba301eae57bdc1d (patch) | |
tree | dae0e0c12a82bae3828894b240ff15b6101b1404 | |
parent | 0a2df857a7038c75379cc575de5d4be4c0ac629e (diff) | |
parent | 2db33f88d2b340c049c576ad75d442e4b6ffe768 (diff) |
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Fri Jun 12 15:57:47 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>"
* remotes/stefanha/tags/block-pull-request:
qemu-iotests: expand test 093 to support group throttling
throttle: Update throttle infrastructure copyright
throttle: add the name of the ThrottleGroup to BlockDeviceInfo
throttle: acquire the ThrottleGroup lock in bdrv_swap()
throttle: Add throttle group support
throttle: Add throttle group infrastructure tests
throttle: Add throttle group infrastructure
throttle: Extract timers from ThrottleState into a separate structure
raw-posix: Fix .bdrv_co_get_block_status() for unaligned image size
Revert "iothread: release iothread around aio_poll"
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | async.c | 8 | ||||
-rw-r--r-- | block.c | 38 | ||||
-rw-r--r-- | block/Makefile.objs | 1 | ||||
-rw-r--r-- | block/io.c | 71 | ||||
-rw-r--r-- | block/qapi.c | 8 | ||||
-rw-r--r-- | block/raw-posix.c | 5 | ||||
-rw-r--r-- | block/throttle-groups.c | 496 | ||||
-rw-r--r-- | blockdev.c | 38 | ||||
-rw-r--r-- | hmp.c | 10 | ||||
-rw-r--r-- | include/block/block.h | 3 | ||||
-rw-r--r-- | include/block/block_int.h | 7 | ||||
-rw-r--r-- | include/block/throttle-groups.h | 46 | ||||
-rw-r--r-- | include/qemu/throttle.h | 46 | ||||
-rw-r--r-- | iothread.c | 11 | ||||
-rw-r--r-- | qapi/block-core.json | 29 | ||||
-rw-r--r-- | qemu-options.hx | 1 | ||||
-rw-r--r-- | qmp-commands.hx | 3 | ||||
-rwxr-xr-x | tests/qemu-iotests/093 | 93 | ||||
-rw-r--r-- | tests/test-aio.c | 19 | ||||
-rw-r--r-- | tests/test-throttle.c | 163 | ||||
-rw-r--r-- | util/throttle.c | 81 |
21 files changed, 956 insertions, 221 deletions
@@ -280,6 +280,12 @@ static void aio_timerlist_notify(void *opaque) aio_notify(opaque); } +static void aio_rfifolock_cb(void *opaque) +{ + /* Kick owner thread in case they are blocked in aio_poll() */ + aio_notify(opaque); +} + AioContext *aio_context_new(Error **errp) { int ret; @@ -297,7 +303,7 @@ AioContext *aio_context_new(Error **errp) event_notifier_test_and_clear); ctx->thread_pool = NULL; qemu_mutex_init(&ctx->bh_lock); - rfifolock_init(&ctx->lock, NULL, NULL); + rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); return ctx; @@ -36,6 +36,7 @@ #include "qmp-commands.h" #include "qemu/timer.h" #include "qapi-event.h" +#include "block/throttle-groups.h" #ifdef CONFIG_BSD #include <sys/types.h> @@ -1822,12 +1823,18 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; /* i/o throttled req */ - memcpy(&bs_dest->throttle_state, - &bs_src->throttle_state, - sizeof(ThrottleState)); + bs_dest->throttle_state = bs_src->throttle_state, + bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + bs_dest->pending_reqs[0] = bs_src->pending_reqs[0]; + bs_dest->pending_reqs[1] = bs_src->pending_reqs[1]; bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; - bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + memcpy(&bs_dest->round_robin, + &bs_src->round_robin, + sizeof(bs_dest->round_robin)); + memcpy(&bs_dest->throttle_timers, + &bs_src->throttle_timers, + sizeof(ThrottleTimers)); /* r/w error */ bs_dest->on_read_error = bs_src->on_read_error; @@ -1881,12 +1888,21 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); } + /* If the BlockDriverState is part of a throttling group acquire + * its lock since we're going to mess with the protected fields. + * Otherwise there's no need to worry since no one else can touch + * them. */ + if (bs_old->throttle_state) { + throttle_group_lock(bs_old); + } + /* bs_new must be unattached and shouldn't have anything fancy enabled */ assert(!bs_new->blk); assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->io_limits_enabled == false); - assert(!throttle_have_timer(&bs_new->throttle_state)); + assert(bs_new->throttle_state == NULL); + assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); tmp = *bs_new; *bs_new = *bs_old; @@ -1903,7 +1919,13 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->job == NULL); assert(bs_new->io_limits_enabled == false); - assert(!throttle_have_timer(&bs_new->throttle_state)); + assert(bs_new->throttle_state == NULL); + assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); + + /* Release the ThrottleGroup lock */ + if (bs_old->throttle_state) { + throttle_group_unlock(bs_old); + } /* insert the nodes back into the graph node list if needed */ if (bs_new->node_name[0] != '\0') { @@ -3691,7 +3713,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs) } if (bs->io_limits_enabled) { - throttle_detach_aio_context(&bs->throttle_state); + throttle_timers_detach_aio_context(&bs->throttle_timers); } if (bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); @@ -3727,7 +3749,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->drv->bdrv_attach_aio_context(bs, new_context); } if (bs->io_limits_enabled) { - throttle_attach_aio_context(&bs->throttle_state, new_context); + throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); } QLIST_FOREACH(ban, &bs->aio_notifiers, list) { diff --git a/block/Makefile.objs b/block/Makefile.objs index 0d8c2a4ab6..c34fd7cdc2 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -10,6 +10,7 @@ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o block-obj-y += null.o mirror.o io.o +block-obj-y += throttle-groups.o block-obj-y += nbd.o nbd-client.o sheepdog.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o diff --git a/block/io.c b/block/io.c index e394d92626..bb4f78784e 100644 --- a/block/io.c +++ b/block/io.c @@ -23,9 +23,9 @@ */ #include "trace.h" -#include "sysemu/qtest.h" #include "block/blockjob.h" #include "block/block_int.h" +#include "block/throttle-groups.h" #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ @@ -65,7 +65,7 @@ void bdrv_set_io_limits(BlockDriverState *bs, { int i; - throttle_config(&bs->throttle_state, cfg); + throttle_group_config(bs, cfg); for (i = 0; i < 2; i++) { qemu_co_enter_next(&bs->throttled_reqs[i]); @@ -95,72 +95,33 @@ static bool bdrv_start_throttled_reqs(BlockDriverState *bs) void bdrv_io_limits_disable(BlockDriverState *bs) { bs->io_limits_enabled = false; - bdrv_start_throttled_reqs(bs); - - throttle_destroy(&bs->throttle_state); -} - -static void bdrv_throttle_read_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[0]); -} - -static void bdrv_throttle_write_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[1]); + throttle_group_unregister_bs(bs); } /* should be called before bdrv_set_io_limits if a limit is set */ -void bdrv_io_limits_enable(BlockDriverState *bs) +void bdrv_io_limits_enable(BlockDriverState *bs, const char *group) { - int clock_type = QEMU_CLOCK_REALTIME; - - if (qtest_enabled()) { - /* For testing block IO throttling only */ - clock_type = QEMU_CLOCK_VIRTUAL; - } assert(!bs->io_limits_enabled); - throttle_init(&bs->throttle_state, - bdrv_get_aio_context(bs), - clock_type, - bdrv_throttle_read_timer_cb, - bdrv_throttle_write_timer_cb, - bs); + throttle_group_register_bs(bs, group); bs->io_limits_enabled = true; } -/* This function makes an IO wait if needed - * - * @nb_sectors: the number of sectors of the IO - * @is_write: is the IO a write - */ -static void bdrv_io_limits_intercept(BlockDriverState *bs, - unsigned int bytes, - bool is_write) +void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group) { - /* does this io must wait */ - bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - - /* if must wait or any request of this type throttled queue the IO */ - if (must_wait || - !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { - qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + /* this bs is not part of any group */ + if (!bs->throttle_state) { + return; } - /* the IO will be executed, do the accounting */ - throttle_account(&bs->throttle_state, is_write, bytes); - - - /* if the next request must wait -> do nothing */ - if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + /* this bs is a part of the same group than the one we want */ + if (!g_strcmp0(throttle_group_get_name(bs), group)) { return; } - /* else queue next request for execution */ - qemu_co_queue_next(&bs->throttled_reqs[is_write]); + /* need to change the group this bs belong to */ + bdrv_io_limits_disable(bs); + bdrv_io_limits_enable(bs, group); } void bdrv_setup_io_funcs(BlockDriver *bdrv) @@ -967,7 +928,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, /* throttling disk I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, false); + throttle_group_co_io_limits_intercept(bs, bytes, false); } /* Align read if necessary by padding qiov */ @@ -1297,7 +1258,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, /* throttling disk I/O */ if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, true); + throttle_group_co_io_limits_intercept(bs, bytes, true); } /* diff --git a/block/qapi.c b/block/qapi.c index 18d2b95f54..a738148bce 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -24,6 +24,7 @@ #include "block/qapi.h" #include "block/block_int.h" +#include "block/throttle-groups.h" #include "block/write-threshold.h" #include "qmp-commands.h" #include "qapi-visit.h" @@ -65,7 +66,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) if (bs->io_limits_enabled) { ThrottleConfig cfg; - throttle_get_config(&bs->throttle_state, &cfg); + + throttle_group_get_config(bs, &cfg); + info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; @@ -90,6 +93,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->has_iops_size = cfg.op_size; info->iops_size = cfg.op_size; + + info->has_group = true; + info->group = g_strdup(throttle_group_get_name(bs)); } info->write_threshold = bdrv_write_threshold_get(bs); diff --git a/block/raw-posix.c b/block/raw-posix.c index 2990e954ae..44ade8cf4e 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1848,8 +1848,9 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, *pnum = nb_sectors; ret = BDRV_BLOCK_DATA; } else if (data == start) { - /* On a data extent, compute sectors to the end of the extent. */ - *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); + /* On a data extent, compute sectors to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); ret = BDRV_BLOCK_DATA; } else { /* On a hole, compute sectors to the beginning of the next extent. */ diff --git a/block/throttle-groups.c b/block/throttle-groups.c new file mode 100644 index 0000000000..efc462fbc5 --- /dev/null +++ b/block/throttle-groups.c @@ -0,0 +1,496 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "block/throttle-groups.h" +#include "qemu/queue.h" +#include "qemu/thread.h" +#include "sysemu/qtest.h" + +/* The ThrottleGroup structure (with its ThrottleState) is shared + * among different BlockDriverState and it's independent from + * AioContext, so in order to use it from different threads it needs + * its own locking. + * + * This locking is however handled internally in this file, so it's + * mostly transparent to outside users (but see the documentation in + * throttle_groups_lock()). + * + * The whole ThrottleGroup structure is private and invisible to + * outside users, that only use it through its ThrottleState. + * + * In addition to the ThrottleGroup structure, BlockDriverState has + * fields that need to be accessed by other members of the group and + * therefore also need to be protected by this lock. Once a BDS is + * registered in a group those fields can be accessed by other threads + * any time. + * + * Again, all this is handled internally and is mostly transparent to + * the outside. The 'throttle_timers' field however has an additional + * constraint because it may be temporarily invalid (see for example + * bdrv_set_aio_context()). Therefore in this file a thread will + * access some other BDS's timers only after verifying that that BDS + * has throttled requests in the queue. + */ +typedef struct ThrottleGroup { + char *name; /* This is constant during the lifetime of the group */ + + QemuMutex lock; /* This lock protects the following four fields */ + ThrottleState ts; + QLIST_HEAD(, BlockDriverState) head; + BlockDriverState *tokens[2]; + bool any_timer_armed[2]; + + /* These two are protected by the global throttle_groups_lock */ + unsigned refcount; + QTAILQ_ENTRY(ThrottleGroup) list; +} ThrottleGroup; + +static QemuMutex throttle_groups_lock; +static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = + QTAILQ_HEAD_INITIALIZER(throttle_groups); + +/* Increments the reference count of a ThrottleGroup given its name. + * + * If no ThrottleGroup is found with the given name a new one is + * created. + * + * @name: the name of the ThrottleGroup + * @ret: the ThrottleGroup + */ +static ThrottleGroup *throttle_group_incref(const char *name) +{ + ThrottleGroup *tg = NULL; + ThrottleGroup *iter; + + qemu_mutex_lock(&throttle_groups_lock); + + /* Look for an existing group with that name */ + QTAILQ_FOREACH(iter, &throttle_groups, list) { + if (!strcmp(name, iter->name)) { + tg = iter; + break; + } + } + + /* Create a new one if not found */ + if (!tg) { + tg = g_new0(ThrottleGroup, 1); + tg->name = g_strdup(name); + qemu_mutex_init(&tg->lock); + throttle_init(&tg->ts); + QLIST_INIT(&tg->head); + + QTAILQ_INSERT_TAIL(&throttle_groups, tg, list); + } + + tg->refcount++; + + qemu_mutex_unlock(&throttle_groups_lock); + + return tg; +} + +/* Decrease the reference count of a ThrottleGroup. + * + * When the reference count reaches zero the ThrottleGroup is + * destroyed. + * + * @tg: The ThrottleGroup to unref + */ +static void throttle_group_unref(ThrottleGroup *tg) +{ + qemu_mutex_lock(&throttle_groups_lock); + if (--tg->refcount == 0) { + QTAILQ_REMOVE(&throttle_groups, tg, list); + qemu_mutex_destroy(&tg->lock); + g_free(tg->name); + g_free(tg); + } + qemu_mutex_unlock(&throttle_groups_lock); +} + +/* Get the name from a BlockDriverState's ThrottleGroup. The name (and + * the pointer) is guaranteed to remain constant during the lifetime + * of the group. + * + * @bs: a BlockDriverState that is member of a throttling group + * @ret: the name of the group. + */ +const char *throttle_group_get_name(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + return tg->name; +} + +/* Return the next BlockDriverState in the round-robin sequence, + * simulating a circular list. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @ret: the next BlockDriverState in the sequence + */ +static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + BlockDriverState *next = QLIST_NEXT(bs, round_robin); + + if (!next) { + return QLIST_FIRST(&tg->head); + } + + return next; +} + +/* Return the next BlockDriverState in the round-robin sequence with + * pending I/O requests. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: the next BlockDriverState with pending requests, or bs + * if there is none. + */ +static BlockDriverState *next_throttle_token(BlockDriverState *bs, + bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + BlockDriverState *token, *start; + + start = token = tg->tokens[is_write]; + + /* get next bs round in round robin style */ + token = throttle_group_next_bs(token); + while (token != start && !token->pending_reqs[is_write]) { + token = throttle_group_next_bs(token); + } + + /* If no IO are queued for scheduling on the next round robin token + * then decide the token is the current bs because chances are + * the current bs get the current request queued. + */ + if (token == start && !token->pending_reqs[is_write]) { + token = bs; + } + + return token; +} + +/* Check if the next I/O request for a BlockDriverState needs to be + * throttled or not. If there's no timer set in this group, set one + * and update the token accordingly. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + * @ret: whether the I/O request needs to be throttled or not + */ +static bool throttle_group_schedule_timer(BlockDriverState *bs, + bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool must_wait; + + /* Check if any of the timers in this group is already armed */ + if (tg->any_timer_armed[is_write]) { + return true; + } + + must_wait = throttle_schedule_timer(ts, tt, is_write); + + /* If a timer just got armed, set bs as the current token */ + if (must_wait) { + tg->tokens[is_write] = bs; + tg->any_timer_armed[is_write] = true; + } + + return must_wait; +} + +/* Look for the next pending I/O request and schedule it. + * + * This assumes that tg->lock is held. + * + * @bs: the current BlockDriverState + * @is_write: the type of operation (read/write) + */ +static void schedule_next_request(BlockDriverState *bs, bool is_write) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + bool must_wait; + BlockDriverState *token; + + /* Check if there's any pending request to schedule next */ + token = next_throttle_token(bs, is_write); + if (!token->pending_reqs[is_write]) { + return; + } + + /* Set a timer for the request if it needs to be throttled */ + must_wait = throttle_group_schedule_timer(token, is_write); + + /* If it doesn't have to wait, queue it for immediate execution */ + if (!must_wait) { + /* Give preference to requests from the current bs */ + if (qemu_in_coroutine() && + qemu_co_queue_next(&bs->throttled_reqs[is_write])) { + token = bs; + } else { + ThrottleTimers *tt = &token->throttle_timers; + int64_t now = qemu_clock_get_ns(tt->clock_type); + timer_mod(tt->timers[is_write], now + 1); + tg->any_timer_armed[is_write] = true; + } + tg->tokens[is_write] = token; + } +} + +/* Check if an I/O request needs to be throttled, wait and set a timer + * if necessary, and schedule the next request using a round robin + * algorithm. + * + * @bs: the current BlockDriverState + * @bytes: the number of bytes for this I/O + * @is_write: the type of operation (read/write) + */ +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write) +{ + bool must_wait; + BlockDriverState *token; + + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + + /* First we check if this I/O has to be throttled. */ + token = next_throttle_token(bs, is_write); + must_wait = throttle_group_schedule_timer(token, is_write); + + /* Wait if there's a timer set or queued requests of this type */ + if (must_wait || bs->pending_reqs[is_write]) { + bs->pending_reqs[is_write]++; + qemu_mutex_unlock(&tg->lock); + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); + qemu_mutex_lock(&tg->lock); + bs->pending_reqs[is_write]--; + } + + /* The I/O will be executed, so do the accounting */ + throttle_account(bs->throttle_state, is_write, bytes); + + /* Schedule the next request */ + schedule_next_request(bs, is_write); + + qemu_mutex_unlock(&tg->lock); +} + +/* Update the throttle configuration for a particular group. Similar + * to throttle_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration to set + */ +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleTimers *tt = &bs->throttle_timers; + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_config(ts, tt, cfg); + /* throttle_config() cancels the timers */ + tg->any_timer_armed[0] = tg->any_timer_armed[1] = false; + qemu_mutex_unlock(&tg->lock); +} + +/* Get the throttle configuration from a particular group. Similar to + * throttle_get_config(), but guarantees atomicity within the + * throttling group. + * + * @bs: a BlockDriverState that is member of the group + * @cfg: the configuration will be written here + */ +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); + throttle_get_config(ts, cfg); + qemu_mutex_unlock(&tg->lock); +} + +/* ThrottleTimers callback. This wakes up a request that was waiting + * because it had been throttled. + * + * @bs: the BlockDriverState whose request had been throttled + * @is_write: the type of operation (read/write) + */ +static void timer_cb(BlockDriverState *bs, bool is_write) +{ + ThrottleState *ts = bs->throttle_state; + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + bool empty_queue; + + /* The timer has just been fired, so we can update the flag */ + qemu_mutex_lock(&tg->lock); + tg->any_timer_armed[is_write] = false; + qemu_mutex_unlock(&tg->lock); + + /* Run the request that was waiting for this timer */ + empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]); + + /* If the request queue was empty then we have to take care of + * scheduling the next one */ + if (empty_queue) { + qemu_mutex_lock(&tg->lock); + schedule_next_request(bs, is_write); + qemu_mutex_unlock(&tg->lock); + } +} + +static void read_timer_cb(void *opaque) +{ + timer_cb(opaque, false); +} + +static void write_timer_cb(void *opaque) +{ + timer_cb(opaque, true); +} + +/* Register a BlockDriverState in the throttling group, also + * initializing its timers and updating its throttle_state pointer to + * point to it. If a throttling group with that name does not exist + * yet, it will be created. + * + * @bs: the BlockDriverState to insert + * @groupname: the name of the group + */ +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) +{ + int i; + ThrottleGroup *tg = throttle_group_incref(groupname); + int clock_type = QEMU_CLOCK_REALTIME; + + if (qtest_enabled()) { + /* For testing block IO throttling only */ + clock_type = QEMU_CLOCK_VIRTUAL; + } + + bs->throttle_state = &tg->ts; + + qemu_mutex_lock(&tg->lock); + /* If the ThrottleGroup is new set this BlockDriverState as the token */ + for (i = 0; i < 2; i++) { + if (!tg->tokens[i]) { + tg->tokens[i] = bs; + } + } + + QLIST_INSERT_HEAD(&tg->head, bs, round_robin); + + throttle_timers_init(&bs->throttle_timers, + bdrv_get_aio_context(bs), + clock_type, + read_timer_cb, + write_timer_cb, + bs); + + qemu_mutex_unlock(&tg->lock); +} + +/* Unregister a BlockDriverState from its group, removing it from the + * list, destroying the timers and setting the throttle_state pointer + * to NULL. + * + * The group will be destroyed if it's empty after this operation. + * + * @bs: the BlockDriverState to remove + */ +void throttle_group_unregister_bs(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + int i; + + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + if (tg->tokens[i] == bs) { + BlockDriverState *token = throttle_group_next_bs(bs); + /* Take care of the case where this is the last bs in the group */ + if (token == bs) { + token = NULL; + } + tg->tokens[i] = token; + } + } + + /* remove the current bs from the list */ + QLIST_REMOVE(bs, round_robin); + throttle_timers_destroy(&bs->throttle_timers); + qemu_mutex_unlock(&tg->lock); + + throttle_group_unref(tg); + bs->throttle_state = NULL; +} + +/* Acquire the lock of this throttling group. + * + * You won't normally need to use this. None of the functions from the + * ThrottleGroup API require you to acquire the lock since all of them + * deal with it internally. + * + * This should only be used in exceptional cases when you want to + * access the protected fields of a BlockDriverState directly + * (e.g. bdrv_swap()). + * + * @bs: a BlockDriverState that is member of the group + */ +void throttle_group_lock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_lock(&tg->lock); +} + +/* Release the lock of this throttling group. + * + * See the comments in throttle_group_lock(). + */ +void throttle_group_unlock(BlockDriverState *bs) +{ + ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); + qemu_mutex_unlock(&tg->lock); +} + +static void throttle_groups_init(void) +{ + qemu_mutex_init(&throttle_groups_lock); +} + +block_init(throttle_groups_init); diff --git a/blockdev.c b/blockdev.c index de94a8bcb3..d4274efe55 100644 --- a/blockdev.c +++ b/blockdev.c @@ -34,6 +34,7 @@ #include "sysemu/blockdev.h" #include "hw/block/block.h" #include "block/blockjob.h" +#include "block/throttle-groups.h" #include "monitor/monitor.h" #include "qemu/option.h" #include "qemu/config-file.h" @@ -357,6 +358,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *id; bool has_driver_specific_opts; BlockdevDetectZeroesOptions detect_zeroes; + const char *throttling_group; /* Check common options by copying from bs_opts to opts, all other options * stay in bs_opts for processing by bdrv_open(). */ @@ -459,6 +461,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0); + throttling_group = qemu_opt_get(opts, "throttling.group"); + if (!check_throttle_config(&cfg, &error)) { error_propagate(errp, error); goto early_err; @@ -547,7 +551,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, /* disk I/O throttling */ if (throttle_enabled(&cfg)) { - bdrv_io_limits_enable(bs); + if (!throttling_group) { + throttling_group = blk_name(blk); + } + bdrv_io_limits_enable(bs, throttling_group); bdrv_set_io_limits(bs, &cfg); } @@ -711,6 +718,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) { "iops_size", "throttling.iops-size" }, + { "group", "throttling.group" }, + { "readonly", "read-only" }, }; @@ -1951,7 +1960,9 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, bool has_iops_wr_max, int64_t iops_wr_max, bool has_iops_size, - int64_t iops_size, Error **errp) + int64_t iops_size, + bool has_group, + const char *group, Error **errp) { ThrottleConfig cfg; BlockDriverState *bs; @@ -2004,14 +2015,19 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - if (!bs->io_limits_enabled && throttle_enabled(&cfg)) { - bdrv_io_limits_enable(bs); - } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) { - bdrv_io_limits_disable(bs); - } - - if (bs->io_limits_enabled) { + if (throttle_enabled(&cfg)) { + /* Enable I/O limits if they're not enabled yet, otherwise + * just update the throttling group. */ + if (!bs->io_limits_enabled) { + bdrv_io_limits_enable(bs, has_group ? group : device); + } else if (has_group) { + bdrv_io_limits_update_group(bs, group); + } + /* Set the new throttling configuration */ bdrv_set_io_limits(bs, &cfg); + } else if (bs->io_limits_enabled) { + /* If all throttling settings are set to 0, disable I/O limits */ + bdrv_io_limits_disable(bs); } aio_context_release(aio_context); @@ -3189,6 +3205,10 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_NUMBER, .help = "when limiting by iops max size of an I/O in bytes", },{ + .name = "throttling.group", + .type = QEMU_OPT_STRING, + .help = "name of the block throttling group", + },{ .name = "copy-on-read", .type = QEMU_OPT_BOOL, .help = "copy read data from backing file into image file", @@ -400,7 +400,8 @@ static void print_block_info(Monitor *mon, BlockInfo *info, " iops_max=%" PRId64 " iops_rd_max=%" PRId64 " iops_wr_max=%" PRId64 - " iops_size=%" PRId64 "\n", + " iops_size=%" PRId64 + " group=%s\n", inserted->bps, inserted->bps_rd, inserted->bps_wr, @@ -413,7 +414,8 @@ static void print_block_info(Monitor *mon, BlockInfo *info, inserted->iops_max, inserted->iops_rd_max, inserted->iops_wr_max, - inserted->iops_size); + inserted->iops_size, + inserted->group); } if (verbose) { @@ -1357,7 +1359,9 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict) false, 0, false, /* No default I/O size */ - 0, &err); + 0, + false, + NULL, &err); hmp_handle_error(mon, &err); } diff --git a/include/block/block.h b/include/block/block.h index f7680b6e68..f6fb8c497b 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -173,8 +173,9 @@ void bdrv_stats_print(Monitor *mon, const QObject *data); void bdrv_info_stats(Monitor *mon, QObject **ret_data); /* disk I/O throttling */ -void bdrv_io_limits_enable(BlockDriverState *bs); +void bdrv_io_limits_enable(BlockDriverState *bs, const char *group); void bdrv_io_limits_disable(BlockDriverState *bs); +void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group); void bdrv_init(void); void bdrv_init_with_whitelist(void); diff --git a/include/block/block_int.h b/include/block/block_int.h index f004378d58..b174852099 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -379,9 +379,14 @@ struct BlockDriverState { unsigned int serialising_in_flight; /* I/O throttling */ - ThrottleState throttle_state; CoQueue throttled_reqs[2]; bool io_limits_enabled; + /* The following fields are protected by the ThrottleGroup lock. + * See the ThrottleGroup documentation for details. */ + ThrottleState *throttle_state; + ThrottleTimers throttle_timers; + unsigned pending_reqs[2]; + QLIST_ENTRY(BlockDriverState) round_robin; /* I/O stats (display with "info blockstats"). */ BlockAcctStats stats; diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h new file mode 100644 index 0000000000..fab113f6d1 --- /dev/null +++ b/include/block/throttle-groups.h @@ -0,0 +1,46 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef THROTTLE_GROUPS_H +#define THROTTLE_GROUPS_H + +#include "qemu/throttle.h" +#include "block/block_int.h" + +const char *throttle_group_get_name(BlockDriverState *bs); + +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg); +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg); + +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname); +void throttle_group_unregister_bs(BlockDriverState *bs); + +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write); + +void throttle_group_lock(BlockDriverState *bs); +void throttle_group_unlock(BlockDriverState *bs); + +#endif diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h index b890613a9c..5af76f0ff4 100644 --- a/include/qemu/throttle.h +++ b/include/qemu/throttle.h @@ -1,10 +1,12 @@ /* * QEMU throttling infrastructure * - * Copyright (C) Nodalink, SARL. 2013 + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015 * - * Author: - * Benoît Canet <benoit.canet@irqsave.net> + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -65,14 +67,17 @@ typedef struct ThrottleConfig { typedef struct ThrottleState { ThrottleConfig cfg; /* configuration */ int64_t previous_leak; /* timestamp of the last leak done */ - QEMUTimer * timers[2]; /* timers used to do the throttling */ +} ThrottleState; + +typedef struct ThrottleTimers { + QEMUTimer *timers[2]; /* timers used to do the throttling */ QEMUClockType clock_type; /* the clock used */ /* Callbacks */ QEMUTimerCB *read_timer_cb; QEMUTimerCB *write_timer_cb; void *timer_opaque; -} ThrottleState; +} ThrottleTimers; /* operations on single leaky buckets */ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta); @@ -86,20 +91,23 @@ bool throttle_compute_timer(ThrottleState *ts, int64_t *next_timestamp); /* init/destroy cycle */ -void throttle_init(ThrottleState *ts, - AioContext *aio_context, - QEMUClockType clock_type, - void (read_timer)(void *), - void (write_timer)(void *), - void *timer_opaque); +void throttle_init(ThrottleState *ts); + +void throttle_timers_init(ThrottleTimers *tt, + AioContext *aio_context, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque); -void throttle_destroy(ThrottleState *ts); +void throttle_timers_destroy(ThrottleTimers *tt); -void throttle_detach_aio_context(ThrottleState *ts); +void throttle_timers_detach_aio_context(ThrottleTimers *tt); -void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context); +void throttle_timers_attach_aio_context(ThrottleTimers *tt, + AioContext *new_context); -bool throttle_have_timer(ThrottleState *ts); +bool throttle_timers_are_initialized(ThrottleTimers *tt); /* configuration */ bool throttle_enabled(ThrottleConfig *cfg); @@ -108,12 +116,16 @@ bool throttle_conflicting(ThrottleConfig *cfg); bool throttle_is_valid(ThrottleConfig *cfg); -void throttle_config(ThrottleState *ts, ThrottleConfig *cfg); +void throttle_config(ThrottleState *ts, + ThrottleTimers *tt, + ThrottleConfig *cfg); void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg); /* usage */ -bool throttle_schedule_timer(ThrottleState *ts, bool is_write); +bool throttle_schedule_timer(ThrottleState *ts, + ThrottleTimers *tt, + bool is_write); void throttle_account(ThrottleState *ts, bool is_write, uint64_t size); diff --git a/iothread.c b/iothread.c index 0416fc4268..878a594ef4 100644 --- a/iothread.c +++ b/iothread.c @@ -31,14 +31,21 @@ typedef ObjectClass IOThreadClass; static void *iothread_run(void *opaque) { IOThread *iothread = opaque; + bool blocking; qemu_mutex_lock(&iothread->init_done_lock); iothread->thread_id = qemu_get_thread_id(); qemu_cond_signal(&iothread->init_done_cond); qemu_mutex_unlock(&iothread->init_done_lock); - while (!atomic_read(&iothread->stopping)) { - aio_poll(iothread->ctx, true); + while (!iothread->stopping) { + aio_context_acquire(iothread->ctx); + blocking = true; + while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) { + /* Progress was made, keep going */ + blocking = false; + } + aio_context_release(iothread->ctx); } return NULL; } diff --git a/qapi/block-core.json b/qapi/block-core.json index 8411d4f83a..afa9d3d1f3 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -259,6 +259,8 @@ # # @iops_size: #optional an I/O size in bytes (Since 1.7) # +# @group: #optional throttle group name (Since 2.4) +# # @cache: the cache mode used for the block device (since: 2.3) # # @write_threshold: configured write threshold for the device. @@ -278,7 +280,7 @@ '*bps_max': 'int', '*bps_rd_max': 'int', '*bps_wr_max': 'int', '*iops_max': 'int', '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int', 'cache': 'BlockdevCacheInfo', + '*iops_size': 'int', '*group': 'str', 'cache': 'BlockdevCacheInfo', 'write_threshold': 'int' } } ## @@ -1062,6 +1064,27 @@ # # Change I/O throttle limits for a block drive. # +# Since QEMU 2.4, each device with I/O limits is member of a throttle +# group. +# +# If two or more devices are members of the same group, the limits +# will apply to the combined I/O of the whole group in a round-robin +# fashion. Therefore, setting new I/O limits to a device will affect +# the whole group. +# +# The name of the group can be specified using the 'group' parameter. +# If the parameter is unset, it is assumed to be the current group of +# that device. If it's not in any group yet, the name of the device +# will be used as the name for its group. +# +# The 'group' parameter can also be used to move a device to a +# different group. In this case the limits specified in the parameters +# will be applied to the new group only. +# +# I/O limits can be disabled by setting all of them to 0. In this case +# the device will be removed from its group and the rest of its +# members will no be affected. The 'group' parameter is ignored. +# # @device: The name of the device # # @bps: total throughput limit in bytes per second @@ -1090,6 +1113,8 @@ # # @iops_size: #optional an I/O size in bytes (Since 1.7) # +# @group: #optional throttle group name (Since 2.4) +# # Returns: Nothing on success # If @device is not a valid block device, DeviceNotFound # @@ -1101,7 +1126,7 @@ '*bps_max': 'int', '*bps_rd_max': 'int', '*bps_wr_max': 'int', '*iops_max': 'int', '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int' } } + '*iops_size': 'int', '*group': 'str' } } ## # @block-stream: diff --git a/qemu-options.hx b/qemu-options.hx index 1d281f6818..289ddcb944 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -468,6 +468,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n" " [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n" " [[,iops_size=is]]\n" + " [[,group=g]]\n" " use 'file' as a drive image\n", QEMU_ARCH_ALL) STEXI @item -drive @var{option}[,@var{option}[,@var{option}[,...]]] diff --git a/qmp-commands.hx b/qmp-commands.hx index c97d0d7667..1db6524e97 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1853,7 +1853,7 @@ EQMP { .name = "block_set_io_throttle", - .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?", + .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?,group:s?", .mhandler.cmd_new = qmp_marshal_input_block_set_io_throttle, }, @@ -1879,6 +1879,7 @@ Arguments: - "iops_rd_max": read I/O operations max (json-int) - "iops_wr_max": write I/O operations max (json-int) - "iops_size": I/O size in bytes when limiting (json-int) +- "group": throttle group name (json-string) Example: diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093 index b9096a55d4..c0e9e2b0b5 100755 --- a/tests/qemu-iotests/093 +++ b/tests/qemu-iotests/093 @@ -3,6 +3,7 @@ # Tests for IO throttling # # Copyright (C) 2015 Red Hat, Inc. +# Copyright (C) 2015 Igalia, S.L. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,6 +23,7 @@ import iotests class ThrottleTestCase(iotests.QMPTestCase): test_img = "null-aio://" + max_drives = 3 def blockstats(self, device): result = self.vm.qmp("query-blockstats") @@ -32,26 +34,31 @@ class ThrottleTestCase(iotests.QMPTestCase): raise Exception("Device not found for blockstats: %s" % device) def setUp(self): - self.vm = iotests.VM().add_drive(self.test_img) + self.vm = iotests.VM() + for i in range(0, self.max_drives): + self.vm.add_drive(self.test_img) self.vm.launch() def tearDown(self): self.vm.shutdown() - def do_test_throttle(self, seconds, params): + def do_test_throttle(self, ndrives, seconds, params): def check_limit(limit, num): # IO throttling algorithm is discrete, allow 10% error so the test # is more robust return limit == 0 or \ - (num < seconds * limit * 1.1 - and num > seconds * limit * 0.9) + (num < seconds * limit * 1.1 / ndrives + and num > seconds * limit * 0.9 / ndrives) nsec_per_sec = 1000000000 - params['device'] = 'drive0' + params['group'] = 'test' - result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params) - self.assert_qmp(result, 'return', {}) + # Set the I/O throttling parameters to all drives + for i in range(0, ndrives): + params['device'] = 'drive%d' % i + result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params) + self.assert_qmp(result, 'return', {}) # Set vm clock to a known value ns = seconds * nsec_per_sec @@ -66,32 +73,60 @@ class ThrottleTestCase(iotests.QMPTestCase): params['iops'] / 2, params['iops_rd']) rd_nr *= seconds * 2 + rd_nr /= ndrives wr_nr = max(params['bps'] / rq_size / 2, params['bps_wr'] / rq_size, params['iops'] / 2, params['iops_wr']) wr_nr *= seconds * 2 + wr_nr /= ndrives + + # Send I/O requests to all drives for i in range(rd_nr): - self.vm.hmp_qemu_io("drive0", "aio_read %d %d" % (i * rq_size, rq_size)) - for i in range(wr_nr): - self.vm.hmp_qemu_io("drive0", "aio_write %d %d" % (i * rq_size, rq_size)) + for drive in range(0, ndrives): + self.vm.hmp_qemu_io("drive%d" % drive, "aio_read %d %d" % + (i * rq_size, rq_size)) - start_rd_bytes, start_rd_iops, start_wr_bytes, start_wr_iops = self.blockstats('drive0') + for i in range(wr_nr): + for drive in range(0, ndrives): + self.vm.hmp_qemu_io("drive%d" % drive, "aio_write %d %d" % + (i * rq_size, rq_size)) + + # We'll store the I/O stats for each drive in these arrays + start_rd_bytes = [0] * ndrives + start_rd_iops = [0] * ndrives + start_wr_bytes = [0] * ndrives + start_wr_iops = [0] * ndrives + end_rd_bytes = [0] * ndrives + end_rd_iops = [0] * ndrives + end_wr_bytes = [0] * ndrives + end_wr_iops = [0] * ndrives + + # Read the stats before advancing the clock + for i in range(0, ndrives): + start_rd_bytes[i], start_rd_iops[i], start_wr_bytes[i], \ + start_wr_iops[i] = self.blockstats('drive%d' % i) self.vm.qtest("clock_step %d" % ns) - end_rd_bytes, end_rd_iops, end_wr_bytes, end_wr_iops = self.blockstats('drive0') - - rd_bytes = end_rd_bytes - start_rd_bytes - rd_iops = end_rd_iops - start_rd_iops - wr_bytes = end_wr_bytes - start_wr_bytes - wr_iops = end_wr_iops - start_wr_iops - self.assertTrue(check_limit(params['bps'], rd_bytes + wr_bytes)) - self.assertTrue(check_limit(params['bps_rd'], rd_bytes)) - self.assertTrue(check_limit(params['bps_wr'], wr_bytes)) - self.assertTrue(check_limit(params['iops'], rd_iops + wr_iops)) - self.assertTrue(check_limit(params['iops_rd'], rd_iops)) - self.assertTrue(check_limit(params['iops_wr'], wr_iops)) + # Read the stats after advancing the clock + for i in range(0, ndrives): + end_rd_bytes[i], end_rd_iops[i], end_wr_bytes[i], \ + end_wr_iops[i] = self.blockstats('drive%d' % i) + + # Check that the I/O is within the limits and evenly distributed + for i in range(0, ndrives): + rd_bytes = end_rd_bytes[i] - start_rd_bytes[i] + rd_iops = end_rd_iops[i] - start_rd_iops[i] + wr_bytes = end_wr_bytes[i] - start_wr_bytes[i] + wr_iops = end_wr_iops[i] - start_wr_iops[i] + + self.assertTrue(check_limit(params['bps'], rd_bytes + wr_bytes)) + self.assertTrue(check_limit(params['bps_rd'], rd_bytes)) + self.assertTrue(check_limit(params['bps_wr'], wr_bytes)) + self.assertTrue(check_limit(params['iops'], rd_iops + wr_iops)) + self.assertTrue(check_limit(params['iops_rd'], rd_iops)) + self.assertTrue(check_limit(params['iops_wr'], wr_iops)) def test_all(self): params = {"bps": 4096, @@ -101,11 +136,13 @@ class ThrottleTestCase(iotests.QMPTestCase): "iops_rd": 10, "iops_wr": 10, } - # Pick each out of all possible params and test - for tk in params: - limits = dict([(k, 0) for k in params]) - limits[tk] = params[tk] - self.do_test_throttle(5, limits) + # Repeat the test with different numbers of drives + for ndrives in range(1, self.max_drives + 1): + # Pick each out of all possible params and test + for tk in params: + limits = dict([(k, 0) for k in params]) + limits[tk] = params[tk] * ndrives + self.do_test_throttle(ndrives, 5, limits) class ThrottleTestCoroutine(ThrottleTestCase): test_img = "null-co://" diff --git a/tests/test-aio.c b/tests/test-aio.c index 4b0cb45d31..a7cb5c9915 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -107,7 +107,6 @@ static void test_notify(void) typedef struct { QemuMutex start_lock; - EventNotifier notifier; bool thread_acquired; } AcquireTestData; @@ -119,8 +118,6 @@ static void *test_acquire_thread(void *opaque) qemu_mutex_lock(&data->start_lock); qemu_mutex_unlock(&data->start_lock); - g_usleep(500000); - event_notifier_set(&data->notifier); aio_context_acquire(ctx); aio_context_release(ctx); @@ -129,19 +126,20 @@ static void *test_acquire_thread(void *opaque) return NULL; } -static void dummy_notifier_read(EventNotifier *n) +static void dummy_notifier_read(EventNotifier *unused) { - event_notifier_test_and_clear(n); + g_assert(false); /* should never be invoked */ } static void test_acquire(void) { QemuThread thread; + EventNotifier notifier; AcquireTestData data; /* Dummy event notifier ensures aio_poll() will block */ - event_notifier_init(&data.notifier, false); - aio_set_event_notifier(ctx, &data.notifier, dummy_notifier_read); + event_notifier_init(¬ifier, false); + aio_set_event_notifier(ctx, ¬ifier, dummy_notifier_read); g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ qemu_mutex_init(&data.start_lock); @@ -155,13 +153,12 @@ static void test_acquire(void) /* Block in aio_poll(), let other thread kick us and acquire context */ aio_context_acquire(ctx); qemu_mutex_unlock(&data.start_lock); /* let the thread run */ - g_assert(aio_poll(ctx, true)); - g_assert(!data.thread_acquired); + g_assert(!aio_poll(ctx, true)); aio_context_release(ctx); qemu_thread_join(&thread); - aio_set_event_notifier(ctx, &data.notifier, NULL); - event_notifier_cleanup(&data.notifier); + aio_set_event_notifier(ctx, ¬ifier, NULL); + event_notifier_cleanup(¬ifier); g_assert(data.thread_acquired); } diff --git a/tests/test-throttle.c b/tests/test-throttle.c index d8ba415e43..016844546a 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -1,10 +1,12 @@ /* * Throttle infrastructure tests * - * Copyright Nodalink, SARL. 2013 + * Copyright Nodalink, EURL. 2013-2014 + * Copyright Igalia, S.L. 2015 * * Authors: - * Benoît Canet <benoit.canet@irqsave.net> + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. @@ -15,11 +17,13 @@ #include "block/aio.h" #include "qemu/throttle.h" #include "qemu/error-report.h" +#include "block/throttle-groups.h" static AioContext *ctx; static LeakyBucket bkt; static ThrottleConfig cfg; static ThrottleState ts; +static ThrottleTimers tt; /* useful function */ static bool double_cmp(double x, double y) @@ -103,17 +107,19 @@ static void test_init(void) { int i; - /* fill the structure with crap */ + /* fill the structures with crap */ memset(&ts, 1, sizeof(ts)); + memset(&tt, 1, sizeof(tt)); - /* init the structure */ - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + /* init structures */ + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* check initialized fields */ - g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL); - g_assert(ts.timers[0]); - g_assert(ts.timers[1]); + g_assert(tt.clock_type == QEMU_CLOCK_VIRTUAL); + g_assert(tt.timers[0]); + g_assert(tt.timers[1]); /* check other fields where cleared */ g_assert(!ts.previous_leak); @@ -124,17 +130,18 @@ static void test_init(void) g_assert(!ts.cfg.buckets[i].level); } - throttle_destroy(&ts); + throttle_timers_destroy(&tt); } static void test_destroy(void) { int i; - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); - throttle_destroy(&ts); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); + throttle_timers_destroy(&tt); for (i = 0; i < 2; i++) { - g_assert(!ts.timers[i]); + g_assert(!tt.timers[i]); } } @@ -170,11 +177,12 @@ static void test_config_functions(void) orig_cfg.op_size = 1; - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* structure reset by throttle_init previous_leak should be null */ g_assert(!ts.previous_leak); - throttle_config(&ts, &orig_cfg); + throttle_config(&ts, &tt, &orig_cfg); /* has previous leak been initialized by throttle_config ? */ g_assert(ts.previous_leak); @@ -182,7 +190,7 @@ static void test_config_functions(void) /* get back the fixed configuration */ throttle_get_config(&ts, &final_cfg); - throttle_destroy(&ts); + throttle_timers_destroy(&tt); g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].avg == 153); g_assert(final_cfg.buckets[THROTTLE_BPS_READ].avg == 56); @@ -323,43 +331,47 @@ static void test_is_valid(void) static void test_have_timer(void) { - /* zero the structure */ + /* zero structures */ memset(&ts, 0, sizeof(ts)); + memset(&tt, 0, sizeof(tt)); /* no timer set should return false */ - g_assert(!throttle_have_timer(&ts)); + g_assert(!throttle_timers_are_initialized(&tt)); - /* init the structure */ - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + /* init structures */ + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* timer set by init should return true */ - g_assert(throttle_have_timer(&ts)); + g_assert(throttle_timers_are_initialized(&tt)); - throttle_destroy(&ts); + throttle_timers_destroy(&tt); } static void test_detach_attach(void) { - /* zero the structure */ + /* zero structures */ memset(&ts, 0, sizeof(ts)); + memset(&tt, 0, sizeof(tt)); /* init the structure */ - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); /* timer set by init should return true */ - g_assert(throttle_have_timer(&ts)); + g_assert(throttle_timers_are_initialized(&tt)); /* timer should no longer exist after detaching */ - throttle_detach_aio_context(&ts); - g_assert(!throttle_have_timer(&ts)); + throttle_timers_detach_aio_context(&tt); + g_assert(!throttle_timers_are_initialized(&tt)); /* timer should exist again after attaching */ - throttle_attach_aio_context(&ts, ctx); - g_assert(throttle_have_timer(&ts)); + throttle_timers_attach_aio_context(&tt, ctx); + g_assert(throttle_timers_are_initialized(&tt)); - throttle_destroy(&ts); + throttle_timers_destroy(&tt); } static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ @@ -387,9 +399,10 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ cfg.op_size = op_size; - throttle_init(&ts, ctx, QEMU_CLOCK_VIRTUAL, - read_timer_cb, write_timer_cb, &ts); - throttle_config(&ts, &cfg); + throttle_init(&ts); + throttle_timers_init(&tt, ctx, QEMU_CLOCK_VIRTUAL, + read_timer_cb, write_timer_cb, &ts); + throttle_config(&ts, &tt, &cfg); /* account a read */ throttle_account(&ts, false, size); @@ -414,7 +427,7 @@ static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ return false; } - throttle_destroy(&ts); + throttle_timers_destroy(&tt); return true; } @@ -490,23 +503,80 @@ static void test_accounting(void) (64.0 / 13))); } +static void test_groups(void) +{ + ThrottleConfig cfg1, cfg2; + BlockDriverState *bdrv1, *bdrv2, *bdrv3; + + bdrv1 = bdrv_new(); + bdrv2 = bdrv_new(); + bdrv3 = bdrv_new(); + + g_assert(bdrv1->throttle_state == NULL); + g_assert(bdrv2->throttle_state == NULL); + g_assert(bdrv3->throttle_state == NULL); + + throttle_group_register_bs(bdrv1, "bar"); + throttle_group_register_bs(bdrv2, "foo"); + throttle_group_register_bs(bdrv3, "bar"); + + g_assert(bdrv1->throttle_state != NULL); + g_assert(bdrv2->throttle_state != NULL); + g_assert(bdrv3->throttle_state != NULL); + + g_assert(!strcmp(throttle_group_get_name(bdrv1), "bar")); + g_assert(!strcmp(throttle_group_get_name(bdrv2), "foo")); + g_assert(bdrv1->throttle_state == bdrv3->throttle_state); + + /* Setting the config of a group member affects the whole group */ + memset(&cfg1, 0, sizeof(cfg1)); + cfg1.buckets[THROTTLE_BPS_READ].avg = 500000; + cfg1.buckets[THROTTLE_BPS_WRITE].avg = 285000; + cfg1.buckets[THROTTLE_OPS_READ].avg = 20000; + cfg1.buckets[THROTTLE_OPS_WRITE].avg = 12000; + throttle_group_config(bdrv1, &cfg1); + + throttle_group_get_config(bdrv1, &cfg1); + throttle_group_get_config(bdrv3, &cfg2); + g_assert(!memcmp(&cfg1, &cfg2, sizeof(cfg1))); + + cfg2.buckets[THROTTLE_BPS_READ].avg = 4547; + cfg2.buckets[THROTTLE_BPS_WRITE].avg = 1349; + cfg2.buckets[THROTTLE_OPS_READ].avg = 123; + cfg2.buckets[THROTTLE_OPS_WRITE].avg = 86; + throttle_group_config(bdrv3, &cfg1); + + throttle_group_get_config(bdrv1, &cfg1); + throttle_group_get_config(bdrv3, &cfg2); + g_assert(!memcmp(&cfg1, &cfg2, sizeof(cfg1))); + + throttle_group_unregister_bs(bdrv1); + throttle_group_unregister_bs(bdrv2); + throttle_group_unregister_bs(bdrv3); + + g_assert(bdrv1->throttle_state == NULL); + g_assert(bdrv2->throttle_state == NULL); + g_assert(bdrv3->throttle_state == NULL); +} + int main(int argc, char **argv) { - GSource *src; Error *local_error = NULL; - init_clocks(); + qemu_init_main_loop(&local_error); + ctx = qemu_get_aio_context(); - ctx = aio_context_new(&local_error); if (!ctx) { error_report("Failed to create AIO Context: '%s'", - error_get_pretty(local_error)); - error_free(local_error); + local_error ? error_get_pretty(local_error) : + "Failed to initialize the QEMU main loop"); + if (local_error) { + error_free(local_error); + } exit(1); } - src = aio_get_g_source(ctx); - g_source_attach(src, NULL); - g_source_unref(src); + + bdrv_init(); do {} while (g_main_context_iteration(NULL, false)); @@ -523,6 +593,7 @@ int main(int argc, char **argv) g_test_add_func("/throttle/config/is_valid", test_is_valid); g_test_add_func("/throttle/config_functions", test_config_functions); g_test_add_func("/throttle/accounting", test_accounting); + g_test_add_func("/throttle/groups", test_groups); return g_test_run(); } diff --git a/util/throttle.c b/util/throttle.c index f976ac7de5..706c13111e 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -1,10 +1,12 @@ /* * QEMU throttling infrastructure * - * Copyright (C) Nodalink, SARL. 2013 + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015 * - * Author: - * Benoît Canet <benoit.canet@irqsave.net> + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -159,29 +161,36 @@ bool throttle_compute_timer(ThrottleState *ts, } /* Add timers to event loop */ -void throttle_attach_aio_context(ThrottleState *ts, AioContext *new_context) +void throttle_timers_attach_aio_context(ThrottleTimers *tt, + AioContext *new_context) { - ts->timers[0] = aio_timer_new(new_context, ts->clock_type, SCALE_NS, - ts->read_timer_cb, ts->timer_opaque); - ts->timers[1] = aio_timer_new(new_context, ts->clock_type, SCALE_NS, - ts->write_timer_cb, ts->timer_opaque); + tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->read_timer_cb, tt->timer_opaque); + tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->write_timer_cb, tt->timer_opaque); } /* To be called first on the ThrottleState */ -void throttle_init(ThrottleState *ts, - AioContext *aio_context, - QEMUClockType clock_type, - QEMUTimerCB *read_timer_cb, - QEMUTimerCB *write_timer_cb, - void *timer_opaque) +void throttle_init(ThrottleState *ts) { memset(ts, 0, sizeof(ThrottleState)); +} + +/* To be called first on the ThrottleTimers */ +void throttle_timers_init(ThrottleTimers *tt, + AioContext *aio_context, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque) +{ + memset(tt, 0, sizeof(ThrottleTimers)); - ts->clock_type = clock_type; - ts->read_timer_cb = read_timer_cb; - ts->write_timer_cb = write_timer_cb; - ts->timer_opaque = timer_opaque; - throttle_attach_aio_context(ts, aio_context); + tt->clock_type = clock_type; + tt->read_timer_cb = read_timer_cb; + tt->write_timer_cb = write_timer_cb; + tt->timer_opaque = timer_opaque; + throttle_timers_attach_aio_context(tt, aio_context); } /* destroy a timer */ @@ -195,25 +204,25 @@ static void throttle_timer_destroy(QEMUTimer **timer) } /* Remove timers from event loop */ -void throttle_detach_aio_context(ThrottleState *ts) +void throttle_timers_detach_aio_context(ThrottleTimers *tt) { int i; for (i = 0; i < 2; i++) { - throttle_timer_destroy(&ts->timers[i]); + throttle_timer_destroy(&tt->timers[i]); } } -/* To be called last on the ThrottleState */ -void throttle_destroy(ThrottleState *ts) +/* To be called last on the ThrottleTimers */ +void throttle_timers_destroy(ThrottleTimers *tt) { - throttle_detach_aio_context(ts); + throttle_timers_detach_aio_context(tt); } /* is any throttling timer configured */ -bool throttle_have_timer(ThrottleState *ts) +bool throttle_timers_are_initialized(ThrottleTimers *tt) { - if (ts->timers[0]) { + if (tt->timers[0]) { return true; } @@ -324,9 +333,12 @@ static void throttle_cancel_timer(QEMUTimer *timer) /* Used to configure the throttle * * @ts: the throttle state we are working on + * @tt: the throttle timers we use in this aio context * @cfg: the config to set */ -void throttle_config(ThrottleState *ts, ThrottleConfig *cfg) +void throttle_config(ThrottleState *ts, + ThrottleTimers *tt, + ThrottleConfig *cfg) { int i; @@ -336,10 +348,10 @@ void throttle_config(ThrottleState *ts, ThrottleConfig *cfg) throttle_fix_bucket(&ts->cfg.buckets[i]); } - ts->previous_leak = qemu_clock_get_ns(ts->clock_type); + ts->previous_leak = qemu_clock_get_ns(tt->clock_type); for (i = 0; i < 2; i++) { - throttle_cancel_timer(ts->timers[i]); + throttle_cancel_timer(tt->timers[i]); } } @@ -358,12 +370,15 @@ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) * * NOTE: this function is not unit tested due to it's usage of timer_mod * + * @tt: the timers structure * @is_write: the type of operation (read/write) * @ret: true if the timer has been scheduled else false */ -bool throttle_schedule_timer(ThrottleState *ts, bool is_write) +bool throttle_schedule_timer(ThrottleState *ts, + ThrottleTimers *tt, + bool is_write) { - int64_t now = qemu_clock_get_ns(ts->clock_type); + int64_t now = qemu_clock_get_ns(tt->clock_type); int64_t next_timestamp; bool must_wait; @@ -378,12 +393,12 @@ bool throttle_schedule_timer(ThrottleState *ts, bool is_write) } /* request throttled and timer pending -> do nothing */ - if (timer_pending(ts->timers[is_write])) { + if (timer_pending(tt->timers[is_write])) { return true; } /* request throttled and timer not pending -> arm timer */ - timer_mod(ts->timers[is_write], next_timestamp); + timer_mod(tt->timers[is_write], next_timestamp); return true; } |