diff options
Diffstat (limited to 'block.c')
-rw-r--r-- | block.c | 338 |
1 files changed, 95 insertions, 243 deletions
@@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors); -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait); - static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -123,69 +116,101 @@ int is_windows_drive(const char *filename) #endif /* throttling disk I/O limits */ -void bdrv_io_limits_disable(BlockDriverState *bs) +void bdrv_set_io_limits(BlockDriverState *bs, + ThrottleConfig *cfg) { - bs->io_limits_enabled = false; + int i; - do {} while (qemu_co_enter_next(&bs->throttled_reqs)); + throttle_config(&bs->throttle_state, cfg); - if (bs->block_timer) { - timer_del(bs->block_timer); - timer_free(bs->block_timer); - bs->block_timer = NULL; + for (i = 0; i < 2; i++) { + qemu_co_enter_next(&bs->throttled_reqs[i]); } +} + +/* this function drain all the throttled IOs */ +static bool bdrv_start_throttled_reqs(BlockDriverState *bs) +{ + bool drained = false; + bool enabled = bs->io_limits_enabled; + int i; + + bs->io_limits_enabled = false; + + for (i = 0; i < 2; i++) { + while (qemu_co_enter_next(&bs->throttled_reqs[i])) { + drained = true; + } + } + + bs->io_limits_enabled = enabled; - bs->slice_start = 0; - bs->slice_end = 0; + return drained; } -static void bdrv_block_timer(void *opaque) +void bdrv_io_limits_disable(BlockDriverState *bs) { - BlockDriverState *bs = opaque; + bs->io_limits_enabled = false; - qemu_co_enter_next(&bs->throttled_reqs); + bdrv_start_throttled_reqs(bs); + + throttle_destroy(&bs->throttle_state); } -void bdrv_io_limits_enable(BlockDriverState *bs) +static void bdrv_throttle_read_timer_cb(void *opaque) { - bs->block_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, bdrv_block_timer, bs); - bs->io_limits_enabled = true; + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[0]); } -bool bdrv_io_limits_enabled(BlockDriverState *bs) +static void bdrv_throttle_write_timer_cb(void *opaque) { - BlockIOLimit *io_limits = &bs->io_limits; - return io_limits->bps[BLOCK_IO_LIMIT_READ] - || io_limits->bps[BLOCK_IO_LIMIT_WRITE] - || io_limits->bps[BLOCK_IO_LIMIT_TOTAL] - || io_limits->iops[BLOCK_IO_LIMIT_READ] - || io_limits->iops[BLOCK_IO_LIMIT_WRITE] - || io_limits->iops[BLOCK_IO_LIMIT_TOTAL]; + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[1]); } +/* should be called before bdrv_set_io_limits if a limit is set */ +void bdrv_io_limits_enable(BlockDriverState *bs) +{ + assert(!bs->io_limits_enabled); + throttle_init(&bs->throttle_state, + QEMU_CLOCK_VIRTUAL, + bdrv_throttle_read_timer_cb, + bdrv_throttle_write_timer_cb, + bs); + bs->io_limits_enabled = true; +} + +/* This function makes an IO wait if needed + * + * @nb_sectors: the number of sectors of the IO + * @is_write: is the IO a write + */ static void bdrv_io_limits_intercept(BlockDriverState *bs, - bool is_write, int nb_sectors) + int nb_sectors, + bool is_write) { - int64_t wait_time = -1; + /* does this io must wait */ + bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { - qemu_co_queue_wait(&bs->throttled_reqs); + /* if must wait or any request of this type throttled queue the IO */ + if (must_wait || + !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); } - /* In fact, we hope to keep each request's timing, in FIFO mode. The next - * throttled requests will not be dequeued until the current request is - * allowed to be serviced. So if the current request still exceeds the - * limits, it will be inserted to the head. All requests followed it will - * be still in throttled_reqs queue. - */ + /* the IO will be executed, do the accounting */ + throttle_account(&bs->throttle_state, + is_write, + nb_sectors * BDRV_SECTOR_SIZE); - while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) { - timer_mod(bs->block_timer, - wait_time + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - qemu_co_queue_wait_insert_head(&bs->throttled_reqs); + /* if the next request must wait -> do nothing */ + if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + return; } - qemu_co_queue_next(&bs->throttled_reqs); + /* else queue next request for execution */ + qemu_co_queue_next(&bs->throttled_reqs[is_write]); } /* check if the path starts with "<protocol>:" */ @@ -305,7 +330,8 @@ BlockDriverState *bdrv_new(const char *device_name) bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); notifier_with_return_list_init(&bs->before_write_notifiers); - qemu_co_queue_init(&bs->throttled_reqs); + qemu_co_queue_init(&bs->throttled_reqs[0]); + qemu_co_queue_init(&bs->throttled_reqs[1]); return bs; } @@ -1112,11 +1138,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, bdrv_dev_change_media_cb(bs, true); } - /* throttling disk I/O limits */ - if (bs->io_limits_enabled) { - bdrv_io_limits_enable(bs); - } - return 0; unlink_and_fail: @@ -1435,7 +1456,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs) if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { + if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { + return true; + } + if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { return true; } if (bs->file && bdrv_requests_pending(bs->file)) { @@ -1481,7 +1505,7 @@ void bdrv_drain_all(void) * a busy wait. */ QTAILQ_FOREACH(bs, &bdrv_states, list) { - while (qemu_co_enter_next(&bs->throttled_reqs)) { + if (bdrv_start_throttled_reqs(bs)) { busy = true; } } @@ -1523,13 +1547,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; - /* i/o timing parameters */ - bs_dest->slice_start = bs_src->slice_start; - bs_dest->slice_end = bs_src->slice_end; - bs_dest->slice_submitted = bs_src->slice_submitted; - bs_dest->io_limits = bs_src->io_limits; - bs_dest->throttled_reqs = bs_src->throttled_reqs; - bs_dest->block_timer = bs_src->block_timer; + /* i/o throttled req */ + memcpy(&bs_dest->throttle_state, + &bs_src->throttle_state, + sizeof(ThrottleState)); + bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; + bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; bs_dest->io_limits_enabled = bs_src->io_limits_enabled; /* r/w error */ @@ -1576,7 +1599,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->dev == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); tmp = *bs_new; *bs_new = *bs_old; @@ -1595,7 +1618,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->job == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); bdrv_rebind(bs_new); bdrv_rebind(bs_old); @@ -2538,11 +2561,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, return -EIO; } - /* throttling disk read I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, false, nb_sectors); - } - if (bs->copy_on_read) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -2554,6 +2572,11 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, false); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, false); if (flags & BDRV_REQ_COPY_ON_READ) { @@ -2679,15 +2702,15 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, return -EIO; } - /* throttling disk write I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, true, nb_sectors); - } - if (bs->copy_on_read_in_flight) { wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, true); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, true); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); @@ -2805,14 +2828,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) *nb_sectors_ptr = length; } -/* throttling disk io limits */ -void bdrv_set_io_limits(BlockDriverState *bs, - BlockIOLimit *io_limits) -{ - bs->io_limits = *io_limits; - bs->io_limits_enabled = bdrv_io_limits_enabled(bs); -} - void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { @@ -3622,169 +3637,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) acb->aiocb_info->cancel(acb); } -/* block I/O throttling */ -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait) -{ - uint64_t bps_limit = 0; - uint64_t extension; - double bytes_limit, bytes_base, bytes_res; - double slice_time, wait_time; - - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.bps[is_write]) { - bps_limit = bs->io_limits.bps[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - bytes_limit = bps_limit * slice_time; - bytes_base = bs->slice_submitted.bytes[is_write]; - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bytes_base += bs->slice_submitted.bytes[!is_write]; - } - - /* bytes_base: the bytes of data which have been read/written; and - * it is obtained from the history statistic info. - * bytes_res: the remaining bytes of data which need to be read/written. - * (bytes_base + bytes_res) / bps_limit: used to calcuate - * the total time for completing reading/writting all data. - */ - bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE; - - if (bytes_base + bytes_res <= bytes_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch */ - wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time; - - /* When the I/O rate at runtime exceeds the limits, - * bs->slice_end need to be extended in order that the current statistic - * info can be kept until the timer fire, so it is increased and tuned - * based on the result of experiment. - */ - extension = wait_time * NANOSECONDS_PER_SECOND; - extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * - BLOCK_IO_SLICE_TIME; - bs->slice_end += extension; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait) -{ - uint64_t iops_limit = 0; - double ios_limit, ios_base; - double slice_time, wait_time; - - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.iops[is_write]) { - iops_limit = bs->io_limits.iops[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - ios_limit = iops_limit * slice_time; - ios_base = bs->slice_submitted.ios[is_write]; - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - ios_base += bs->slice_submitted.ios[!is_write]; - } - - if (ios_base + 1 <= ios_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch, in seconds */ - wait_time = (ios_base + 1) / iops_limit; - if (wait_time > elapsed_time) { - wait_time = wait_time - elapsed_time; - } else { - wait_time = 0; - } - - /* Exceeded current slice, extend it by another slice time */ - bs->slice_end += BLOCK_IO_SLICE_TIME; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait) -{ - int64_t now, max_wait; - uint64_t bps_wait = 0, iops_wait = 0; - double elapsed_time; - int bps_ret, iops_ret; - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (now > bs->slice_end) { - bs->slice_start = now; - bs->slice_end = now + BLOCK_IO_SLICE_TIME; - memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); - } - - elapsed_time = now - bs->slice_start; - elapsed_time /= (NANOSECONDS_PER_SECOND); - - bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors, - is_write, elapsed_time, &bps_wait); - iops_ret = bdrv_exceed_iops_limits(bs, is_write, - elapsed_time, &iops_wait); - if (bps_ret || iops_ret) { - max_wait = bps_wait > iops_wait ? bps_wait : iops_wait; - if (wait) { - *wait = max_wait; - } - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (bs->slice_end < now + max_wait) { - bs->slice_end = now + max_wait; - } - - return true; - } - - if (wait) { - *wait = 0; - } - - bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * - BDRV_SECTOR_SIZE; - bs->slice_submitted.ios[is_write]++; - - return false; -} - /**************************************************************/ /* async block device emulation */ |