diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-09-03 09:43:26 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-09-03 09:43:26 +0100 |
commit | 54b89db5309d5fa8b5d3fe5fe56f81704e2f9706 (patch) | |
tree | a6e1a30323544794a2e41df3f0f47b07acd50247 /block/io.c | |
parent | d39b626343a11f7034d044fc676315e076fc847a (diff) | |
parent | 5396234b96a2ac743f48644529771498e036e698 (diff) |
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request
# gpg: Signature made Tue 27 Aug 2019 21:16:27 BST
# gpg: using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8
* remotes/stefanha/tags/block-pull-request:
block/qcow2: implement .bdrv_co_pwritev(_compressed)_part
block/qcow2: implement .bdrv_co_preadv_part
block/qcow2: refactor qcow2_co_preadv to use buffer-based io
block/io: introduce bdrv_co_p{read, write}v_part
block/io: bdrv_aligned_pwritev: use and support qiov_offset
block/io: bdrv_aligned_preadv: use and support qiov_offset
block/io: bdrv_co_do_copy_on_readv: lazy allocation
block/io: bdrv_co_do_copy_on_readv: use and support qiov_offset
block: define .*_part io handlers in BlockDriver
block/io: refactor padding
util/iov: improve qemu_iovec_is_zero
util/iov: introduce qemu_iovec_init_extended
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block/io.c')
-rw-r--r-- | block/io.c | 541 |
1 files changed, 324 insertions, 217 deletions
diff --git a/block/io.c b/block/io.c index 56bbf195bb..0fa10831ed 100644 --- a/block/io.c +++ b/block/io.c @@ -146,7 +146,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) /* Default alignment based on whether driver has byte interface */ bs->bl.request_alignment = (drv->bdrv_co_preadv || - drv->bdrv_aio_preadv) ? 1 : 512; + drv->bdrv_aio_preadv || + drv->bdrv_co_preadv_part) ? 1 : 512; /* Take some limits from the children as a default */ if (bs->file) { @@ -1044,11 +1045,14 @@ static void bdrv_co_io_em_complete(void *opaque, int ret) static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + QEMUIOVector *qiov, + size_t qiov_offset, int flags) { BlockDriver *drv = bs->drv; int64_t sector_num; unsigned int nb_sectors; + QEMUIOVector local_qiov; + int ret; assert(!(flags & ~BDRV_REQ_MASK)); assert(!(flags & BDRV_REQ_NO_FALLBACK)); @@ -1057,8 +1061,19 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, return -ENOMEDIUM; } + if (drv->bdrv_co_preadv_part) { + return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset, + flags); + } + + if (qiov_offset > 0 || bytes != qiov->size) { + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + qiov = &local_qiov; + } + if (drv->bdrv_co_preadv) { - return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); + ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); + goto out; } if (drv->bdrv_aio_preadv) { @@ -1070,10 +1085,12 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags, bdrv_co_io_em_complete, &co); if (acb == NULL) { - return -EIO; + ret = -EIO; + goto out; } else { qemu_coroutine_yield(); - return co.ret; + ret = co.ret; + goto out; } } @@ -1085,16 +1102,25 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, assert(bytes <= BDRV_REQUEST_MAX_BYTES); assert(drv->bdrv_co_readv); - return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); + +out: + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + + return ret; } static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector *qiov, int flags) + QEMUIOVector *qiov, + size_t qiov_offset, int flags) { BlockDriver *drv = bs->drv; int64_t sector_num; unsigned int nb_sectors; + QEMUIOVector local_qiov; int ret; assert(!(flags & ~BDRV_REQ_MASK)); @@ -1104,6 +1130,18 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, return -ENOMEDIUM; } + if (drv->bdrv_co_pwritev_part) { + ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, + flags & bs->supported_write_flags); + flags &= ~bs->supported_write_flags; + goto emulate_flags; + } + + if (qiov_offset > 0 || bytes != qiov->size) { + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + qiov = &local_qiov; + } + if (drv->bdrv_co_pwritev) { ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags & bs->supported_write_flags); @@ -1147,29 +1185,49 @@ emulate_flags: ret = bdrv_co_flush(bs); } + if (qiov == &local_qiov) { + qemu_iovec_destroy(&local_qiov); + } + return ret; } static int coroutine_fn bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov) + uint64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset) { BlockDriver *drv = bs->drv; + QEMUIOVector local_qiov; + int ret; if (!drv) { return -ENOMEDIUM; } - if (!drv->bdrv_co_pwritev_compressed) { + if (!block_driver_can_compress(drv)) { return -ENOTSUP; } - return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); + if (drv->bdrv_co_pwritev_compressed_part) { + return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes, + qiov, qiov_offset); + } + + if (qiov_offset == 0) { + return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); + } + + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); + ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov); + qemu_iovec_destroy(&local_qiov); + + return ret; } static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - int flags) + size_t qiov_offset, int flags) { BlockDriverState *bs = child->bs; @@ -1178,10 +1236,9 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, * modifying the image file. This is critical for zero-copy guest I/O * where anything might happen inside guest memory. */ - void *bounce_buffer; + void *bounce_buffer = NULL; BlockDriver *drv = bs->drv; - QEMUIOVector local_qiov; int64_t cluster_offset; int64_t cluster_bytes; size_t skip_bytes; @@ -1214,14 +1271,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, cluster_offset, cluster_bytes); - bounce_buffer = qemu_try_blockalign(bs, - MIN(MIN(max_transfer, cluster_bytes), - MAX_BOUNCE_BUFFER)); - if (bounce_buffer == NULL) { - ret = -ENOMEM; - goto err; - } - while (cluster_bytes) { int64_t pnum; @@ -1244,12 +1293,25 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, assert(skip_bytes < pnum); if (ret <= 0) { + QEMUIOVector local_qiov; + /* Must copy-on-read; use the bounce buffer */ pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + if (!bounce_buffer) { + int64_t max_we_need = MAX(pnum, cluster_bytes - pnum); + int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER); + int64_t bounce_buffer_len = MIN(max_we_need, max_allowed); + + bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len); + if (!bounce_buffer) { + ret = -ENOMEM; + goto err; + } + } qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); ret = bdrv_driver_preadv(bs, cluster_offset, pnum, - &local_qiov, 0); + &local_qiov, 0, 0); if (ret < 0) { goto err; } @@ -1267,7 +1329,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, * necessary to flush even in cache=writethrough mode. */ ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, - &local_qiov, + &local_qiov, 0, BDRV_REQ_WRITE_UNCHANGED); } @@ -1281,16 +1343,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, } if (!(flags & BDRV_REQ_PREFETCH)) { - qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes, + qemu_iovec_from_buf(qiov, qiov_offset + progress, + bounce_buffer + skip_bytes, pnum - skip_bytes); } } else if (!(flags & BDRV_REQ_PREFETCH)) { /* Read directly into the destination */ - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); - ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, - &local_qiov, 0); - qemu_iovec_destroy(&local_qiov); + ret = bdrv_driver_preadv(bs, offset + progress, + MIN(pnum - skip_bytes, bytes - progress), + qiov, qiov_offset + progress, 0); if (ret < 0) { goto err; } @@ -1315,7 +1376,7 @@ err: */ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, - int64_t align, QEMUIOVector *qiov, int flags) + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) { BlockDriverState *bs = child->bs; int64_t total_bytes, max_bytes; @@ -1326,7 +1387,6 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); - assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), align); @@ -1364,7 +1424,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, } if (!ret || pnum != bytes) { - ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, flags); + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, + qiov, qiov_offset, flags); goto out; } else if (flags & BDRV_REQ_PREFETCH) { goto out; @@ -1380,7 +1441,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); if (bytes <= max_bytes && bytes <= max_transfer) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0); goto out; } @@ -1388,17 +1449,12 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, int num; if (max_bytes) { - QEMUIOVector local_qiov; - num = MIN(bytes_remaining, MIN(max_bytes, max_transfer)); assert(num); - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining, - num, &local_qiov, 0); + num, qiov, bytes - bytes_remaining, 0); max_bytes -= num; - qemu_iovec_destroy(&local_qiov); } else { num = bytes_remaining; ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0, @@ -1415,28 +1471,187 @@ out: } /* - * Handle a read request in coroutine context + * Request padding + * + * |<---- align ----->| |<----- align ---->| + * |<- head ->|<------------- bytes ------------->|<-- tail -->| + * | | | | | | + * -*----------$-------*-------- ... --------*-----$------------*--- + * | | | | | | + * | offset | | end | + * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) + * [buf ... ) [tail_buf ) + * + * @buf is an aligned allocation needed to store @head and @tail paddings. @head + * is placed at the beginning of @buf and @tail at the @end. + * + * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk + * around tail, if tail exists. + * + * @merge_reads is true for small requests, + * if @buf_len == @head + bytes + @tail. In this case it is possible that both + * head and tail exist but @buf_len == align and @tail_buf == @buf. + */ +typedef struct BdrvRequestPadding { + uint8_t *buf; + size_t buf_len; + uint8_t *tail_buf; + size_t head; + size_t tail; + bool merge_reads; + QEMUIOVector local_qiov; +} BdrvRequestPadding; + +static bool bdrv_init_padding(BlockDriverState *bs, + int64_t offset, int64_t bytes, + BdrvRequestPadding *pad) +{ + uint64_t align = bs->bl.request_alignment; + size_t sum; + + memset(pad, 0, sizeof(*pad)); + + pad->head = offset & (align - 1); + pad->tail = ((offset + bytes) & (align - 1)); + if (pad->tail) { + pad->tail = align - pad->tail; + } + + if ((!pad->head && !pad->tail) || !bytes) { + return false; + } + + sum = pad->head + bytes + pad->tail; + pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; + pad->buf = qemu_blockalign(bs, pad->buf_len); + pad->merge_reads = sum == pad->buf_len; + if (pad->tail) { + pad->tail_buf = pad->buf + pad->buf_len - align; + } + + return true; +} + +static int bdrv_padding_rmw_read(BdrvChild *child, + BdrvTrackedRequest *req, + BdrvRequestPadding *pad, + bool zero_middle) +{ + QEMUIOVector local_qiov; + BlockDriverState *bs = child->bs; + uint64_t align = bs->bl.request_alignment; + int ret; + + assert(req->serialising && pad->buf); + + if (pad->head || pad->merge_reads) { + uint64_t bytes = pad->merge_reads ? pad->buf_len : align; + + qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); + + if (pad->head) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); + } + if (pad->merge_reads && pad->tail) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); + } + ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, + align, &local_qiov, 0, 0); + if (ret < 0) { + return ret; + } + if (pad->head) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + } + if (pad->merge_reads && pad->tail) { + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + } + + if (pad->merge_reads) { + goto zero_mem; + } + } + + if (pad->tail) { + qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); + + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv( + child, req, + req->overlap_offset + req->overlap_bytes - align, + align, align, &local_qiov, 0, 0); + if (ret < 0) { + return ret; + } + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + } + +zero_mem: + if (zero_middle) { + memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); + } + + return 0; +} + +static void bdrv_padding_destroy(BdrvRequestPadding *pad) +{ + if (pad->buf) { + qemu_vfree(pad->buf); + qemu_iovec_destroy(&pad->local_qiov); + } +} + +/* + * bdrv_pad_request + * + * Exchange request parameters with padded request if needed. Don't include RMW + * read of padding, bdrv_padding_rmw_read() should be called separately if + * needed. + * + * All parameters except @bs are in-out: they represent original request at + * function call and padded (if padding needed) at function finish. + * + * Function always succeeds. */ +static bool bdrv_pad_request(BlockDriverState *bs, + QEMUIOVector **qiov, size_t *qiov_offset, + int64_t *offset, unsigned int *bytes, + BdrvRequestPadding *pad) +{ + if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { + return false; + } + + qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, + *qiov, *qiov_offset, *bytes, + pad->buf + pad->buf_len - pad->tail, pad->tail); + *bytes += pad->head + pad->tail; + *offset -= pad->head; + *qiov = &pad->local_qiov; + *qiov_offset = 0; + + return true; +} + int coroutine_fn bdrv_co_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags); +} + +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ BlockDriverState *bs = child->bs; - BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - - uint64_t align = bs->bl.request_alignment; - uint8_t *head_buf = NULL; - uint8_t *tail_buf = NULL; - QEMUIOVector local_qiov; - bool use_local_qiov = false; + BdrvRequestPadding pad; int ret; - trace_bdrv_co_preadv(child->bs, offset, bytes, flags); - - if (!drv) { - return -ENOMEDIUM; - } + trace_bdrv_co_preadv(bs, offset, bytes, flags); ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { @@ -1450,43 +1665,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, flags |= BDRV_REQ_COPY_ON_READ; } - /* Align read if necessary by padding qiov */ - if (offset & (align - 1)) { - head_buf = qemu_blockalign(bs, align); - qemu_iovec_init(&local_qiov, qiov->niov + 2); - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - - bytes += offset & (align - 1); - offset = offset & ~(align - 1); - } - - if ((offset + bytes) & (align - 1)) { - if (!use_local_qiov) { - qemu_iovec_init(&local_qiov, qiov->niov + 1); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - } - tail_buf = qemu_blockalign(bs, align); - qemu_iovec_add(&local_qiov, tail_buf, - align - ((offset + bytes) & (align - 1))); - - bytes = ROUND_UP(bytes, align); - } + bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad); tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); - ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, - use_local_qiov ? &local_qiov : qiov, - flags); + ret = bdrv_aligned_preadv(child, &req, offset, bytes, + bs->bl.request_alignment, + qiov, qiov_offset, flags); tracked_request_end(&req); bdrv_dec_in_flight(bs); - if (use_local_qiov) { - qemu_iovec_destroy(&local_qiov); - qemu_vfree(head_buf); - qemu_vfree(tail_buf); - } + bdrv_padding_destroy(&pad); return ret; } @@ -1579,7 +1767,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, } qemu_iovec_init_buf(&qiov, buf, num); - ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags); + ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags); /* Keep bounce buffer around if it is big enough for all * all future requests. @@ -1694,7 +1882,7 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes, */ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, - int64_t align, QEMUIOVector *qiov, int flags) + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) { BlockDriverState *bs = child->bs; BlockDriver *drv = bs->drv; @@ -1714,7 +1902,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); - assert(!qiov || bytes == qiov->size); + assert(!qiov || qiov_offset + bytes <= qiov->size); max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), align); @@ -1722,7 +1910,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && - qemu_iovec_is_zero(qiov)) { + qemu_iovec_is_zero(qiov, qiov_offset, bytes)) { flags |= BDRV_REQ_ZERO_WRITE; if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { flags |= BDRV_REQ_MAY_UNMAP; @@ -1735,15 +1923,15 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); } else if (flags & BDRV_REQ_WRITE_COMPRESSED) { - ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov); + ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, + qiov, qiov_offset); } else if (bytes <= max_transfer) { bdrv_debug_event(bs, BLKDBG_PWRITEV); - ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags); + ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags); } else { bdrv_debug_event(bs, BLKDBG_PWRITEV); while (bytes_remaining) { int num = MIN(bytes_remaining, max_transfer); - QEMUIOVector local_qiov; int local_flags = flags; assert(num); @@ -1753,12 +1941,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, * need to flush on the last iteration */ local_flags &= ~BDRV_REQ_FUA; } - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining, - num, &local_qiov, local_flags); - qemu_iovec_destroy(&local_qiov); + num, qiov, bytes - bytes_remaining, + local_flags); if (ret < 0) { break; } @@ -1782,44 +1968,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, BdrvTrackedRequest *req) { BlockDriverState *bs = child->bs; - uint8_t *buf = NULL; QEMUIOVector local_qiov; uint64_t align = bs->bl.request_alignment; - unsigned int head_padding_bytes, tail_padding_bytes; int ret = 0; + bool padding; + BdrvRequestPadding pad; - head_padding_bytes = offset & (align - 1); - tail_padding_bytes = (align - (offset + bytes)) & (align - 1); - - - assert(flags & BDRV_REQ_ZERO_WRITE); - if (head_padding_bytes || tail_padding_bytes) { - buf = qemu_blockalign(bs, align); - qemu_iovec_init_buf(&local_qiov, buf, align); - } - if (head_padding_bytes) { - uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); - - /* RMW the unaligned part before head. */ + padding = bdrv_init_padding(bs, offset, bytes, &pad); + if (padding) { mark_request_serialising(req, align); wait_serialising_requests(req); - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, - align, &local_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); - memset(buf + head_padding_bytes, 0, zero_bytes); - ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, - align, &local_qiov, - flags & ~BDRV_REQ_ZERO_WRITE); - if (ret < 0) { - goto fail; + bdrv_padding_rmw_read(child, req, &pad, true); + + if (pad.head || pad.merge_reads) { + int64_t aligned_offset = offset & ~(align - 1); + int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; + + qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); + ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, + align, &local_qiov, 0, + flags & ~BDRV_REQ_ZERO_WRITE); + if (ret < 0 || pad.merge_reads) { + /* Error or all work is done */ + goto out; + } + offset += write_bytes - pad.head; + bytes -= write_bytes - pad.head; } - offset += zero_bytes; - bytes -= zero_bytes; } assert(!bytes || (offset & (align - 1)) == 0); @@ -1827,9 +2003,9 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, /* Write the aligned part in the middle. */ uint64_t aligned_bytes = bytes & ~(align - 1); ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, - NULL, flags); + NULL, 0, flags); if (ret < 0) { - goto fail; + goto out; } bytes -= aligned_bytes; offset += aligned_bytes; @@ -1837,26 +2013,18 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, assert(!bytes || (offset & (align - 1)) == 0); if (bytes) { - assert(align == tail_padding_bytes + bytes); - /* RMW the unaligned part after tail. */ - mark_request_serialising(req, align); - wait_serialising_requests(req); - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(child, req, offset, align, - align, &local_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + assert(align == pad.tail + bytes); - memset(buf, 0, bytes); + qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); ret = bdrv_aligned_pwritev(child, req, offset, align, align, - &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); + &local_qiov, 0, + flags & ~BDRV_REQ_ZERO_WRITE); } -fail: - qemu_vfree(buf); - return ret; +out: + bdrv_padding_destroy(&pad); + + return ret; } /* @@ -1866,13 +2034,17 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags); +} + +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ BlockDriverState *bs = child->bs; BdrvTrackedRequest req; uint64_t align = bs->bl.request_alignment; - uint8_t *head_buf = NULL; - uint8_t *tail_buf = NULL; - QEMUIOVector local_qiov; - bool use_local_qiov = false; + BdrvRequestPadding pad; int ret; trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); @@ -1899,86 +2071,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, goto out; } - if (offset & (align - 1)) { - QEMUIOVector head_qiov; - + if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) { mark_request_serialising(&req, align); wait_serialising_requests(&req); - - head_buf = qemu_blockalign(bs, align); - qemu_iovec_init_buf(&head_qiov, head_buf, align); - - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, - align, &head_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); - - qemu_iovec_init(&local_qiov, qiov->niov + 2); - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - - bytes += offset & (align - 1); - offset = offset & ~(align - 1); - - /* We have read the tail already if the request is smaller - * than one aligned block. - */ - if (bytes < align) { - qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes); - bytes = align; - } - } - - if ((offset + bytes) & (align - 1)) { - QEMUIOVector tail_qiov; - size_t tail_bytes; - bool waited; - - mark_request_serialising(&req, align); - waited = wait_serialising_requests(&req); - assert(!waited || !use_local_qiov); - - tail_buf = qemu_blockalign(bs, align); - qemu_iovec_init_buf(&tail_qiov, tail_buf, align); - - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), - align, align, &tail_qiov, 0); - if (ret < 0) { - goto fail; - } - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); - - if (!use_local_qiov) { - qemu_iovec_init(&local_qiov, qiov->niov + 1); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - } - - tail_bytes = (offset + bytes) & (align - 1); - qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); - - bytes = ROUND_UP(bytes, align); + bdrv_padding_rmw_read(child, &req, &pad, false); } ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, - use_local_qiov ? &local_qiov : qiov, - flags); + qiov, qiov_offset, flags); -fail: + bdrv_padding_destroy(&pad); - if (use_local_qiov) { - qemu_iovec_destroy(&local_qiov); - } - qemu_vfree(head_buf); - qemu_vfree(tail_buf); out: tracked_request_end(&req); bdrv_dec_in_flight(bs); + return ret; } |