diff options
-rw-r--r-- | block.c | 183 | ||||
-rw-r--r-- | block.h | 15 | ||||
-rw-r--r-- | block_int.h | 6 | ||||
-rw-r--r-- | cutils.c | 25 | ||||
-rw-r--r-- | qemu-common.h | 1 |
5 files changed, 230 insertions, 0 deletions
@@ -1354,6 +1354,189 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, return ret; } + +typedef struct MultiwriteCB { + int error; + int num_requests; + int num_callbacks; + struct { + BlockDriverCompletionFunc *cb; + void *opaque; + QEMUIOVector *free_qiov; + void *free_buf; + } callbacks[]; +} MultiwriteCB; + +static void multiwrite_user_cb(MultiwriteCB *mcb) +{ + int i; + + for (i = 0; i < mcb->num_callbacks; i++) { + mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); + qemu_free(mcb->callbacks[i].free_qiov); + qemu_free(mcb->callbacks[i].free_buf); + } +} + +static void multiwrite_cb(void *opaque, int ret) +{ + MultiwriteCB *mcb = opaque; + + if (ret < 0) { + mcb->error = ret; + multiwrite_user_cb(mcb); + } + + mcb->num_requests--; + if (mcb->num_requests == 0) { + if (mcb->error == 0) { + multiwrite_user_cb(mcb); + } + qemu_free(mcb); + } +} + +static int multiwrite_req_compare(const void *a, const void *b) +{ + return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector); +} + +/* + * Takes a bunch of requests and tries to merge them. Returns the number of + * requests that remain after merging. + */ +static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, + int num_reqs, MultiwriteCB *mcb) +{ + int i, outidx; + + // Sort requests by start sector + qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); + + // Check if adjacent requests touch the same clusters. If so, combine them, + // filling up gaps with zero sectors. + outidx = 0; + for (i = 1; i < num_reqs; i++) { + int merge = 0; + int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; + + // This handles the cases that are valid for all block drivers, namely + // exactly sequential writes and overlapping writes. + if (reqs[i].sector <= oldreq_last) { + merge = 1; + } + + // The block driver may decide that it makes sense to combine requests + // even if there is a gap of some sectors between them. In this case, + // the gap is filled with zeros (therefore only applicable for yet + // unused space in format like qcow2). + if (!merge && bs->drv->bdrv_merge_requests) { + merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]); + } + + if (merge) { + size_t size; + QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov)); + qemu_iovec_init(qiov, + reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); + + // Add the first request to the merged one. If the requests are + // overlapping, drop the last sectors of the first request. + size = (reqs[i].sector - reqs[outidx].sector) << 9; + qemu_iovec_concat(qiov, reqs[outidx].qiov, size); + + // We might need to add some zeros between the two requests + if (reqs[i].sector > oldreq_last) { + size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9; + uint8_t *buf = qemu_blockalign(bs, zero_bytes); + memset(buf, 0, zero_bytes); + qemu_iovec_add(qiov, buf, zero_bytes); + mcb->callbacks[i].free_buf = buf; + } + + // Add the second request + qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); + + reqs[outidx].nb_sectors += reqs[i].nb_sectors; + reqs[outidx].qiov = qiov; + + mcb->callbacks[i].free_qiov = reqs[outidx].qiov; + } else { + outidx++; + reqs[outidx].sector = reqs[i].sector; + reqs[outidx].nb_sectors = reqs[i].nb_sectors; + reqs[outidx].qiov = reqs[i].qiov; + } + } + + return outidx + 1; +} + +/* + * Submit multiple AIO write requests at once. + * + * On success, the function returns 0 and all requests in the reqs array have + * been submitted. In error case this function returns -1, and any of the + * requests may or may not be submitted yet. In particular, this means that the + * callback will be called for some of the requests, for others it won't. The + * caller must check the error field of the BlockRequest to wait for the right + * callbacks (if error != 0, no callback will be called). + * + * The implementation may modify the contents of the reqs array, e.g. to merge + * requests. However, the fields opaque and error are left unmodified as they + * are used to signal failure for a single request to the caller. + */ +int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) +{ + BlockDriverAIOCB *acb; + MultiwriteCB *mcb; + int i; + + if (num_reqs == 0) { + return 0; + } + + // Create MultiwriteCB structure + mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); + mcb->num_requests = 0; + mcb->num_callbacks = num_reqs; + + for (i = 0; i < num_reqs; i++) { + mcb->callbacks[i].cb = reqs[i].cb; + mcb->callbacks[i].opaque = reqs[i].opaque; + } + + // Check for mergable requests + num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); + + // Run the aio requests + for (i = 0; i < num_reqs; i++) { + acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov, + reqs[i].nb_sectors, multiwrite_cb, mcb); + + if (acb == NULL) { + // We can only fail the whole thing if no request has been + // submitted yet. Otherwise we'll wait for the submitted AIOs to + // complete and report the error in the callback. + if (mcb->num_requests == 0) { + reqs[i].error = EIO; + goto fail; + } else { + mcb->error = EIO; + break; + } + } else { + mcb->num_requests++; + } + } + + return 0; + +fail: + free(mcb); + return -1; +} + void bdrv_aio_cancel(BlockDriverAIOCB *acb) { acb->pool->cancel(acb); @@ -87,6 +87,21 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, BlockDriverCompletionFunc *cb, void *opaque); void bdrv_aio_cancel(BlockDriverAIOCB *acb); +typedef struct BlockRequest { + /* Fields to be filled by multiwrite caller */ + int64_t sector; + int nb_sectors; + QEMUIOVector *qiov; + BlockDriverCompletionFunc *cb; + void *opaque; + + /* Filled by multiwrite implementation */ + int error; +} BlockRequest; + +int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, + int num_reqs); + /* sg packet commands */ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf); BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, diff --git a/block_int.h b/block_int.h index 0902fd4733..3e4b4a387b 100644 --- a/block_int.h +++ b/block_int.h @@ -70,6 +70,12 @@ struct BlockDriver { int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque); + int (*bdrv_aio_multiwrite)(BlockDriverState *bs, BlockRequest *reqs, + int num_reqs); + int (*bdrv_merge_requests)(BlockDriverState *bs, BlockRequest* a, + BlockRequest *b); + + const char *protocol_name; int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset); int64_t (*bdrv_getlength)(BlockDriverState *bs); @@ -151,6 +151,31 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) ++qiov->niov; } +/* + * Copies iovecs from src to the end dst until src is completely copied or the + * total size of the copied iovec reaches size. The size of the last copied + * iovec is changed in order to fit the specified total size if it isn't a + * perfect fit already. + */ +void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) +{ + int i; + size_t done; + + assert(dst->nalloc != -1); + + done = 0; + for (i = 0; (i < src->niov) && (done != size); i++) { + if (done + src->iov[i].iov_len > size) { + qemu_iovec_add(dst, src->iov[i].iov_base, size - done); + break; + } else { + qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len); + } + done += src->iov[i].iov_len; + } +} + void qemu_iovec_destroy(QEMUIOVector *qiov) { assert(qiov->nalloc != -1); diff --git a/qemu-common.h b/qemu-common.h index 74ac88fba0..f3cfb683ea 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -223,6 +223,7 @@ typedef struct QEMUIOVector { void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); +void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); |