diff options
Diffstat (limited to 'block.c')
-rw-r--r-- | block.c | 556 |
1 files changed, 231 insertions, 325 deletions
@@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors); -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait); - static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -123,69 +116,101 @@ int is_windows_drive(const char *filename) #endif /* throttling disk I/O limits */ -void bdrv_io_limits_disable(BlockDriverState *bs) +void bdrv_set_io_limits(BlockDriverState *bs, + ThrottleConfig *cfg) { - bs->io_limits_enabled = false; + int i; + + throttle_config(&bs->throttle_state, cfg); + + for (i = 0; i < 2; i++) { + qemu_co_enter_next(&bs->throttled_reqs[i]); + } +} + +/* this function drain all the throttled IOs */ +static bool bdrv_start_throttled_reqs(BlockDriverState *bs) +{ + bool drained = false; + bool enabled = bs->io_limits_enabled; + int i; - do {} while (qemu_co_enter_next(&bs->throttled_reqs)); + bs->io_limits_enabled = false; - if (bs->block_timer) { - timer_del(bs->block_timer); - timer_free(bs->block_timer); - bs->block_timer = NULL; + for (i = 0; i < 2; i++) { + while (qemu_co_enter_next(&bs->throttled_reqs[i])) { + drained = true; + } } - bs->slice_start = 0; - bs->slice_end = 0; + bs->io_limits_enabled = enabled; + + return drained; } -static void bdrv_block_timer(void *opaque) +void bdrv_io_limits_disable(BlockDriverState *bs) { - BlockDriverState *bs = opaque; + bs->io_limits_enabled = false; + + bdrv_start_throttled_reqs(bs); - qemu_co_enter_next(&bs->throttled_reqs); + throttle_destroy(&bs->throttle_state); } -void bdrv_io_limits_enable(BlockDriverState *bs) +static void bdrv_throttle_read_timer_cb(void *opaque) { - bs->block_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, bdrv_block_timer, bs); - bs->io_limits_enabled = true; + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[0]); +} + +static void bdrv_throttle_write_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[1]); } -bool bdrv_io_limits_enabled(BlockDriverState *bs) +/* should be called before bdrv_set_io_limits if a limit is set */ +void bdrv_io_limits_enable(BlockDriverState *bs) { - BlockIOLimit *io_limits = &bs->io_limits; - return io_limits->bps[BLOCK_IO_LIMIT_READ] - || io_limits->bps[BLOCK_IO_LIMIT_WRITE] - || io_limits->bps[BLOCK_IO_LIMIT_TOTAL] - || io_limits->iops[BLOCK_IO_LIMIT_READ] - || io_limits->iops[BLOCK_IO_LIMIT_WRITE] - || io_limits->iops[BLOCK_IO_LIMIT_TOTAL]; + assert(!bs->io_limits_enabled); + throttle_init(&bs->throttle_state, + QEMU_CLOCK_VIRTUAL, + bdrv_throttle_read_timer_cb, + bdrv_throttle_write_timer_cb, + bs); + bs->io_limits_enabled = true; } +/* This function makes an IO wait if needed + * + * @nb_sectors: the number of sectors of the IO + * @is_write: is the IO a write + */ static void bdrv_io_limits_intercept(BlockDriverState *bs, - bool is_write, int nb_sectors) + int nb_sectors, + bool is_write) { - int64_t wait_time = -1; + /* does this io must wait */ + bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { - qemu_co_queue_wait(&bs->throttled_reqs); + /* if must wait or any request of this type throttled queue the IO */ + if (must_wait || + !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); } - /* In fact, we hope to keep each request's timing, in FIFO mode. The next - * throttled requests will not be dequeued until the current request is - * allowed to be serviced. So if the current request still exceeds the - * limits, it will be inserted to the head. All requests followed it will - * be still in throttled_reqs queue. - */ + /* the IO will be executed, do the accounting */ + throttle_account(&bs->throttle_state, + is_write, + nb_sectors * BDRV_SECTOR_SIZE); - while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) { - timer_mod(bs->block_timer, - wait_time + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - qemu_co_queue_wait_insert_head(&bs->throttled_reqs); + /* if the next request must wait -> do nothing */ + if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + return; } - qemu_co_queue_next(&bs->throttled_reqs); + /* else queue next request for execution */ + qemu_co_queue_next(&bs->throttled_reqs[is_write]); } /* check if the path starts with "<protocol>:" */ @@ -305,7 +330,9 @@ BlockDriverState *bdrv_new(const char *device_name) bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); notifier_with_return_list_init(&bs->before_write_notifiers); - qemu_co_queue_init(&bs->throttled_reqs); + qemu_co_queue_init(&bs->throttled_reqs[0]); + qemu_co_queue_init(&bs->throttled_reqs[1]); + bs->refcnt = 1; return bs; } @@ -876,7 +903,7 @@ fail: if (!bs->drv) { QDECREF(bs->options); } - bdrv_delete(bs); + bdrv_unref(bs); return ret; } @@ -927,7 +954,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options) *backing_filename ? backing_filename : NULL, options, back_flags, back_drv); if (ret < 0) { - bdrv_delete(bs->backing_hd); + bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; bs->open_flags |= BDRV_O_NO_BACKING; return ret; @@ -1002,12 +1029,12 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, bs1 = bdrv_new(""); ret = bdrv_open(bs1, filename, NULL, 0, drv); if (ret < 0) { - bdrv_delete(bs1); + bdrv_unref(bs1); goto fail; } total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; - bdrv_delete(bs1); + bdrv_unref(bs1); ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename)); if (ret < 0) { @@ -1081,7 +1108,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, } if (bs->file != file) { - bdrv_delete(file); + bdrv_unref(file); file = NULL; } @@ -1112,16 +1139,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, bdrv_dev_change_media_cb(bs, true); } - /* throttling disk I/O limits */ - if (bs->io_limits_enabled) { - bdrv_io_limits_enable(bs); - } - return 0; unlink_and_fail: if (file != NULL) { - bdrv_delete(file); + bdrv_unref(file); } if (bs->is_temporary) { unlink(filename); @@ -1382,7 +1404,7 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { - bdrv_delete(bs->backing_hd); + bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; } bs->drv->bdrv_close(bs); @@ -1407,7 +1429,7 @@ void bdrv_close(BlockDriverState *bs) bs->options = NULL; if (bs->file != NULL) { - bdrv_delete(bs->file); + bdrv_unref(bs->file); bs->file = NULL; } } @@ -1435,7 +1457,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs) if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { + if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { + return true; + } + if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { return true; } if (bs->file && bdrv_requests_pending(bs->file)) { @@ -1481,7 +1506,7 @@ void bdrv_drain_all(void) * a busy wait. */ QTAILQ_FOREACH(bs, &bdrv_states, list) { - while (qemu_co_enter_next(&bs->throttled_reqs)) { + if (bdrv_start_throttled_reqs(bs)) { busy = true; } } @@ -1523,13 +1548,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; - /* i/o timing parameters */ - bs_dest->slice_start = bs_src->slice_start; - bs_dest->slice_end = bs_src->slice_end; - bs_dest->slice_submitted = bs_src->slice_submitted; - bs_dest->io_limits = bs_src->io_limits; - bs_dest->throttled_reqs = bs_src->throttled_reqs; - bs_dest->block_timer = bs_src->block_timer; + /* i/o throttled req */ + memcpy(&bs_dest->throttle_state, + &bs_src->throttle_state, + sizeof(ThrottleState)); + bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; + bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; bs_dest->io_limits_enabled = bs_src->io_limits_enabled; /* r/w error */ @@ -1543,6 +1567,9 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* dirty bitmap */ bs_dest->dirty_bitmap = bs_src->dirty_bitmap; + /* reference count */ + bs_dest->refcnt = bs_src->refcnt; + /* job */ bs_dest->in_use = bs_src->in_use; bs_dest->job = bs_src->job; @@ -1576,7 +1603,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->dev == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); tmp = *bs_new; *bs_new = *bs_old; @@ -1595,7 +1622,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->job == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); bdrv_rebind(bs_new); bdrv_rebind(bs_old); @@ -1626,11 +1653,12 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) bs_new->drv ? bs_new->drv->format_name : ""); } -void bdrv_delete(BlockDriverState *bs) +static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); assert(!bs->in_use); + assert(!bs->refcnt); bdrv_close(bs); @@ -1829,8 +1857,11 @@ int bdrv_commit(BlockDriverState *bs) buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); for (sector = 0; sector < total_sectors; sector += n) { - if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { - + ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); + if (ret < 0) { + goto ro_cleanup; + } + if (ret) { if (bdrv_read(bs, sector, buf, n) != 0) { ret = -EIO; goto ro_cleanup; @@ -2146,7 +2177,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { /* so that bdrv_close() does not recursively close the chain */ intermediate_state->bs->backing_hd = NULL; - bdrv_delete(intermediate_state->bs); + bdrv_unref(intermediate_state->bs); } ret = 0; @@ -2538,11 +2569,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, return -EIO; } - /* throttling disk read I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, false, nb_sectors); - } - if (bs->copy_on_read) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -2554,12 +2580,17 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, false); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, false); if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; - ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum); + ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); if (ret < 0) { goto out; } @@ -2679,15 +2710,15 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, return -EIO; } - /* throttling disk write I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, true, nb_sectors); - } - if (bs->copy_on_read_in_flight) { wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, true); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, true); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); @@ -2711,6 +2742,9 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { bs->wr_highest_sector = sector_num + nb_sectors - 1; } + if (bs->growable && ret >= 0) { + bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); + } tracked_request_end(&req); @@ -2785,7 +2819,7 @@ int64_t bdrv_getlength(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - if (bs->growable || bdrv_dev_has_removable_media(bs)) { + if (bdrv_dev_has_removable_media(bs)) { if (drv->bdrv_getlength) { return drv->bdrv_getlength(bs); } @@ -2805,14 +2839,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) *nb_sectors_ptr = length; } -/* throttling disk io limits */ -void bdrv_set_io_limits(BlockDriverState *bs, - BlockIOLimit *io_limits) -{ - bs->io_limits = *io_limits; - bs->io_limits_enabled = bdrv_io_limits_enabled(bs); -} - void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { @@ -3005,6 +3031,11 @@ int bdrv_has_zero_init(BlockDriverState *bs) { assert(bs->drv); + /* If BS is a copy on write image, it is initialized to + the contents of the base image, which may not be zeroes. */ + if (bs->backing_hd) { + return 0; + } if (bs->drv->bdrv_has_zero_init) { return bs->drv->bdrv_has_zero_init(bs); } @@ -3013,15 +3044,15 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } -typedef struct BdrvCoIsAllocatedData { +typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; int64_t sector_num; int nb_sectors; int *pnum; - int ret; + int64_t ret; bool done; -} BdrvCoIsAllocatedData; +} BdrvCoGetBlockStatusData; /* * Returns true iff the specified sector is present in the disk image. Drivers @@ -3038,12 +3069,20 @@ typedef struct BdrvCoIsAllocatedData { * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes * beyond the end of the disk image it will be clamped. */ -int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) { + int64_t length; int64_t n; + int64_t ret, ret2; + + length = bdrv_getlength(bs); + if (length < 0) { + return length; + } - if (sector_num >= bs->total_sectors) { + if (sector_num >= (length >> BDRV_SECTOR_BITS)) { *pnum = 0; return 0; } @@ -3053,35 +3092,69 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, nb_sectors = n; } - if (!bs->drv->bdrv_co_is_allocated) { + if (!bs->drv->bdrv_co_get_block_status) { *pnum = nb_sectors; - return 1; + ret = BDRV_BLOCK_DATA; + if (bs->drv->protocol_name) { + ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); + } + return ret; + } + + ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); + if (ret < 0) { + return ret; + } + + if (!(ret & BDRV_BLOCK_DATA)) { + if (bdrv_has_zero_init(bs)) { + ret |= BDRV_BLOCK_ZERO; + } else { + BlockDriverState *bs2 = bs->backing_hd; + int64_t length2 = bdrv_getlength(bs2); + if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) { + ret |= BDRV_BLOCK_ZERO; + } + } + } + + if (bs->file && + (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && + (ret & BDRV_BLOCK_OFFSET_VALID)) { + ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, + *pnum, pnum); + if (ret2 >= 0) { + /* Ignore errors. This is just providing extra information, it + * is useful but not necessary. + */ + ret |= (ret2 & BDRV_BLOCK_ZERO); + } } - return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum); + return ret; } -/* Coroutine wrapper for bdrv_is_allocated() */ -static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque) +/* Coroutine wrapper for bdrv_get_block_status() */ +static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) { - BdrvCoIsAllocatedData *data = opaque; + BdrvCoGetBlockStatusData *data = opaque; BlockDriverState *bs = data->bs; - data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors, - data->pnum); + data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, + data->pnum); data->done = true; } /* - * Synchronous wrapper around bdrv_co_is_allocated(). + * Synchronous wrapper around bdrv_co_get_block_status(). * - * See bdrv_co_is_allocated() for details. + * See bdrv_co_get_block_status() for details. */ -int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum) +int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) { Coroutine *co; - BdrvCoIsAllocatedData data = { + BdrvCoGetBlockStatusData data = { .bs = bs, .sector_num = sector_num, .nb_sectors = nb_sectors, @@ -3089,14 +3162,31 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, .done = false, }; - co = qemu_coroutine_create(bdrv_is_allocated_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - qemu_aio_wait(); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_get_block_status_co_entry(&data); + } else { + co = qemu_coroutine_create(bdrv_get_block_status_co_entry); + qemu_coroutine_enter(co, &data); + while (!data.done) { + qemu_aio_wait(); + } } return data.ret; } +int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); + if (ret < 0) { + return ret; + } + return + (ret & BDRV_BLOCK_DATA) || + ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs)); +} + /* * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * @@ -3109,10 +3199,10 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, * allocated/unallocated state. * */ -int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum) +int bdrv_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum) { BlockDriverState *intermediate; int ret, n = nb_sectors; @@ -3120,8 +3210,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, intermediate = top; while (intermediate && intermediate != base) { int pnum_inter; - ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors, - &pnum_inter); + ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, + &pnum_inter); if (ret < 0) { return ret; } else if (ret) { @@ -3148,44 +3238,6 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, return 0; } -/* Coroutine wrapper for bdrv_is_allocated_above() */ -static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque) -{ - BdrvCoIsAllocatedData *data = opaque; - BlockDriverState *top = data->bs; - BlockDriverState *base = data->base; - - data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num, - data->nb_sectors, data->pnum); - data->done = true; -} - -/* - * Synchronous wrapper around bdrv_co_is_allocated_above(). - * - * See bdrv_co_is_allocated_above() for details. - */ -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - int64_t sector_num, int nb_sectors, int *pnum) -{ - Coroutine *co; - BdrvCoIsAllocatedData data = { - .bs = top, - .base = base, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .pnum = pnum, - .done = false, - }; - - co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - qemu_aio_wait(); - } - return data.ret; -} - const char *bdrv_get_encrypted_filename(BlockDriverState *bs) { if (bs->backing_hd && bs->backing_hd->encrypted) @@ -3622,169 +3674,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) acb->aiocb_info->cancel(acb); } -/* block I/O throttling */ -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait) -{ - uint64_t bps_limit = 0; - uint64_t extension; - double bytes_limit, bytes_base, bytes_res; - double slice_time, wait_time; - - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.bps[is_write]) { - bps_limit = bs->io_limits.bps[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - bytes_limit = bps_limit * slice_time; - bytes_base = bs->slice_submitted.bytes[is_write]; - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bytes_base += bs->slice_submitted.bytes[!is_write]; - } - - /* bytes_base: the bytes of data which have been read/written; and - * it is obtained from the history statistic info. - * bytes_res: the remaining bytes of data which need to be read/written. - * (bytes_base + bytes_res) / bps_limit: used to calcuate - * the total time for completing reading/writting all data. - */ - bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE; - - if (bytes_base + bytes_res <= bytes_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch */ - wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time; - - /* When the I/O rate at runtime exceeds the limits, - * bs->slice_end need to be extended in order that the current statistic - * info can be kept until the timer fire, so it is increased and tuned - * based on the result of experiment. - */ - extension = wait_time * NANOSECONDS_PER_SECOND; - extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * - BLOCK_IO_SLICE_TIME; - bs->slice_end += extension; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait) -{ - uint64_t iops_limit = 0; - double ios_limit, ios_base; - double slice_time, wait_time; - - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.iops[is_write]) { - iops_limit = bs->io_limits.iops[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - ios_limit = iops_limit * slice_time; - ios_base = bs->slice_submitted.ios[is_write]; - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - ios_base += bs->slice_submitted.ios[!is_write]; - } - - if (ios_base + 1 <= ios_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch, in seconds */ - wait_time = (ios_base + 1) / iops_limit; - if (wait_time > elapsed_time) { - wait_time = wait_time - elapsed_time; - } else { - wait_time = 0; - } - - /* Exceeded current slice, extend it by another slice time */ - bs->slice_end += BLOCK_IO_SLICE_TIME; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait) -{ - int64_t now, max_wait; - uint64_t bps_wait = 0, iops_wait = 0; - double elapsed_time; - int bps_ret, iops_ret; - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (now > bs->slice_end) { - bs->slice_start = now; - bs->slice_end = now + BLOCK_IO_SLICE_TIME; - memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); - } - - elapsed_time = now - bs->slice_start; - elapsed_time /= (NANOSECONDS_PER_SECOND); - - bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors, - is_write, elapsed_time, &bps_wait); - iops_ret = bdrv_exceed_iops_limits(bs, is_write, - elapsed_time, &iops_wait); - if (bps_ret || iops_ret) { - max_wait = bps_wait > iops_wait ? bps_wait : iops_wait; - if (wait) { - *wait = max_wait; - } - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (bs->slice_end < now + max_wait) { - bs->slice_end = now + max_wait; - } - - return true; - } - - if (wait) { - *wait = 0; - } - - bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * - BDRV_SECTOR_SIZE; - bs->slice_submitted.ios[is_write]++; - - return false; -} - /**************************************************************/ /* async block device emulation */ @@ -4445,6 +4334,23 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs) } } +/* Get a reference to bs */ +void bdrv_ref(BlockDriverState *bs) +{ + bs->refcnt++; +} + +/* Release a previously grabbed reference to bs. + * If after releasing, reference count is zero, the BlockDriverState is + * deleted. */ +void bdrv_unref(BlockDriverState *bs) +{ + assert(bs->refcnt > 0); + if (--bs->refcnt == 0) { + bdrv_delete(bs); + } +} + void bdrv_set_in_use(BlockDriverState *bs, int in_use) { assert(bs->in_use != in_use); @@ -4658,7 +4564,7 @@ out: free_option_parameters(param); if (bs) { - bdrv_delete(bs); + bdrv_unref(bs); } } |