aboutsummaryrefslogtreecommitdiff
path: root/hw/block/virtio-blk.c
diff options
context:
space:
mode:
authorPeter Lieven <pl@kamp.de>2015-02-02 14:52:21 +0100
committerKevin Wolf <kwolf@redhat.com>2015-02-06 17:24:21 +0100
commit95f7142abc86a916682bd735aecd90172ffa0d30 (patch)
treed491d0f48d2f6b645383fa6d314264d861780dd8 /hw/block/virtio-blk.c
parent454057b7d9b9ad141bd5df8c4075745e56b4870f (diff)
virtio-blk: introduce multiread
this patch finally introduces multiread support to virtio-blk. While multiwrite support was there for a long time, read support was missing. The complete merge logic is moved into virtio-blk.c which has been the only user of request merging ever since. This is required to be able to merge chunks of requests and immediately invoke callbacks for those requests. Secondly, this is required to switch to direct invocation of coroutines which is planned at a later stage. The following benchmarks show the performance of running fio with 4 worker threads on a local ram disk. The numbers show the average of 10 test runs after 1 run as warmup phase. | 4k | 64k | 4k MB/s | rd seq | rd rand | rd seq | rd rand | wr seq | wr rand --------------+--------+---------+--------+---------+--------+-------- master | 1221 | 1187 | 4178 | 4114 | 1745 | 1213 multiread | 1829 | 1189 | 4639 | 4110 | 1894 | 1216 Signed-off-by: Peter Lieven <pl@kamp.de> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Diffstat (limited to 'hw/block/virtio-blk.c')
-rw-r--r--hw/block/virtio-blk.c296
1 files changed, 202 insertions, 94 deletions
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e04adb8f16..d0a01a8864 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -34,6 +34,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
req->dev = s;
req->qiov.size = 0;
req->next = NULL;
+ req->mr_next = NULL;
return req;
}
@@ -84,20 +85,32 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
static void virtio_blk_rw_complete(void *opaque, int ret)
{
- VirtIOBlockReq *req = opaque;
+ VirtIOBlockReq *next = opaque;
- trace_virtio_blk_rw_complete(req, ret);
+ while (next) {
+ VirtIOBlockReq *req = next;
+ next = req->mr_next;
+ trace_virtio_blk_rw_complete(req, ret);
- if (ret) {
- int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
- bool is_read = !(p & VIRTIO_BLK_T_OUT);
- if (virtio_blk_handle_rw_error(req, -ret, is_read))
- return;
- }
+ if (req->qiov.nalloc != -1) {
+ /* If nalloc is != 1 req->qiov is a local copy of the original
+ * external iovec. It was allocated in submit_merged_requests
+ * to be able to merge requests. */
+ qemu_iovec_destroy(&req->qiov);
+ }
- virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
- virtio_blk_free_request(req);
+ if (ret) {
+ int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
+ bool is_read = !(p & VIRTIO_BLK_T_OUT);
+ if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
+ continue;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
+ virtio_blk_free_request(req);
+ }
}
static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -291,24 +304,127 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
}
}
-void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb)
+static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
+ int start, int num_reqs, int niov)
{
- int i, ret;
+ QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
+ int64_t sector_num = mrb->reqs[start]->sector_num;
+ int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
+ bool is_write = mrb->is_write;
+
+ if (num_reqs > 1) {
+ int i;
+ struct iovec *tmp_iov = qiov->iov;
+ int tmp_niov = qiov->niov;
+
+ /* mrb->reqs[start]->qiov was initialized from external so we can't
+ * modifiy it here. We need to initialize it locally and then add the
+ * external iovecs. */
+ qemu_iovec_init(qiov, niov);
+
+ for (i = 0; i < tmp_niov; i++) {
+ qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
+ }
- if (!mrb->num_writes) {
+ for (i = start + 1; i < start + num_reqs; i++) {
+ qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
+ mrb->reqs[i]->qiov.size);
+ mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+ nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
+ }
+ assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);
+
+ trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num,
+ nb_sectors, is_write);
+ block_acct_merge_done(blk_get_stats(blk),
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
+ num_reqs - 1);
+ }
+
+ if (is_write) {
+ blk_aio_writev(blk, sector_num, qiov, nb_sectors,
+ virtio_blk_rw_complete, mrb->reqs[start]);
+ } else {
+ blk_aio_readv(blk, sector_num, qiov, nb_sectors,
+ virtio_blk_rw_complete, mrb->reqs[start]);
+ }
+}
+
+static int multireq_compare(const void *a, const void *b)
+{
+ const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a,
+ *req2 = *(VirtIOBlockReq **)b;
+
+ /*
+ * Note that we can't simply subtract sector_num1 from sector_num2
+ * here as that could overflow the return value.
+ */
+ if (req1->sector_num > req2->sector_num) {
+ return 1;
+ } else if (req1->sector_num < req2->sector_num) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
+{
+ int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
+ int max_xfer_len = 0;
+ int64_t sector_num = 0;
+
+ if (mrb->num_reqs == 1) {
+ submit_requests(blk, mrb, 0, 1, -1);
+ mrb->num_reqs = 0;
return;
}
- ret = blk_aio_multiwrite(blk, mrb->blkreq, mrb->num_writes);
- if (ret != 0) {
- for (i = 0; i < mrb->num_writes; i++) {
- if (mrb->blkreq[i].error) {
- virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO);
+ max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk);
+ max_xfer_len = MIN_NON_ZERO(max_xfer_len, INT_MAX);
+
+ qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs),
+ &multireq_compare);
+
+ for (i = 0; i < mrb->num_reqs; i++) {
+ VirtIOBlockReq *req = mrb->reqs[i];
+ if (num_reqs > 0) {
+ bool merge = true;
+
+ /* merge would exceed maximum number of IOVs */
+ if (niov + req->qiov.niov > IOV_MAX) {
+ merge = false;
+ }
+
+ /* merge would exceed maximum transfer length of backend device */
+ if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) {
+ merge = false;
+ }
+
+ /* requests are not sequential */
+ if (sector_num + nb_sectors != req->sector_num) {
+ merge = false;
+ }
+
+ if (!merge) {
+ submit_requests(blk, mrb, start, num_reqs, niov);
+ num_reqs = 0;
}
}
+
+ if (num_reqs == 0) {
+ sector_num = req->sector_num;
+ nb_sectors = niov = 0;
+ start = i;
+ }
+
+ nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE;
+ niov += req->qiov.niov;
+ num_reqs++;
}
- mrb->num_writes = 0;
+ submit_requests(blk, mrb, start, num_reqs, niov);
+ mrb->num_reqs = 0;
}
static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
@@ -319,7 +435,9 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
/*
* Make sure all outstanding writes are posted to the backing device.
*/
- virtio_submit_multiwrite(req->dev->blk, mrb);
+ if (mrb->is_write && mrb->num_reqs > 0) {
+ virtio_blk_submit_multireq(req->dev->blk, mrb);
+ }
blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req);
}
@@ -329,6 +447,9 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
uint64_t total_sectors;
+ if (nb_sectors > INT_MAX) {
+ return false;
+ }
if (sector & dev->sector_mask) {
return false;
}
@@ -342,60 +463,6 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
return true;
}
-static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
-{
- BlockRequest *blkreq;
- uint64_t sector;
-
- sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
-
- trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
-
- if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- virtio_blk_free_request(req);
- return;
- }
-
- block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size,
- BLOCK_ACCT_WRITE);
-
- if (mrb->num_writes == VIRTIO_BLK_MAX_MERGE_REQS) {
- virtio_submit_multiwrite(req->dev->blk, mrb);
- }
-
- blkreq = &mrb->blkreq[mrb->num_writes];
- blkreq->sector = sector;
- blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE;
- blkreq->qiov = &req->qiov;
- blkreq->cb = virtio_blk_rw_complete;
- blkreq->opaque = req;
- blkreq->error = 0;
-
- mrb->num_writes++;
-}
-
-static void virtio_blk_handle_read(VirtIOBlockReq *req)
-{
- uint64_t sector;
-
- sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
-
- trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
-
- if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- virtio_blk_free_request(req);
- return;
- }
-
- block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size,
- BLOCK_ACCT_READ);
- blk_aio_readv(req->dev->blk, sector, &req->qiov,
- req->qiov.size / BDRV_SECTOR_SIZE,
- virtio_blk_rw_complete, req);
-}
-
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -430,11 +497,57 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
- if (type & VIRTIO_BLK_T_FLUSH) {
+ /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER
+ * is an optional flag. Altough a guest should not send this flag if
+ * not negotiated we ignored it in the past. So keep ignoring it. */
+ switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
+ case VIRTIO_BLK_T_IN:
+ {
+ bool is_write = type & VIRTIO_BLK_T_OUT;
+ req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev),
+ &req->out.sector);
+
+ if (is_write) {
+ qemu_iovec_init_external(&req->qiov, iov, out_num);
+ trace_virtio_blk_handle_write(req, req->sector_num,
+ req->qiov.size / BDRV_SECTOR_SIZE);
+ } else {
+ qemu_iovec_init_external(&req->qiov, in_iov, in_num);
+ trace_virtio_blk_handle_read(req, req->sector_num,
+ req->qiov.size / BDRV_SECTOR_SIZE);
+ }
+
+ if (!virtio_blk_sect_range_ok(req->dev, req->sector_num,
+ req->qiov.size)) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ virtio_blk_free_request(req);
+ return;
+ }
+
+ block_acct_start(blk_get_stats(req->dev->blk),
+ &req->acct, req->qiov.size,
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+
+ /* merge would exceed maximum number of requests or IO direction
+ * changes */
+ if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
+ is_write != mrb->is_write)) {
+ virtio_blk_submit_multireq(req->dev->blk, mrb);
+ }
+
+ assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
+ mrb->reqs[mrb->num_reqs++] = req;
+ mrb->is_write = is_write;
+ break;
+ }
+ case VIRTIO_BLK_T_FLUSH:
virtio_blk_handle_flush(req, mrb);
- } else if (type & VIRTIO_BLK_T_SCSI_CMD) {
+ break;
+ case VIRTIO_BLK_T_SCSI_CMD:
virtio_blk_handle_scsi(req);
- } else if (type & VIRTIO_BLK_T_GET_ID) {
+ break;
+ case VIRTIO_BLK_T_GET_ID:
+ {
VirtIOBlock *s = req->dev;
/*
@@ -448,14 +561,9 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
iov_from_buf(in_iov, in_num, 0, serial, size);
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
virtio_blk_free_request(req);
- } else if (type & VIRTIO_BLK_T_OUT) {
- qemu_iovec_init_external(&req->qiov, iov, out_num);
- virtio_blk_handle_write(req, mrb);
- } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
- /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
- qemu_iovec_init_external(&req->qiov, in_iov, in_num);
- virtio_blk_handle_read(req);
- } else {
+ break;
+ }
+ default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
}
@@ -465,9 +573,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
VirtIOBlockReq *req;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
/* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
* dataplane here instead of waiting for .set_status().
@@ -481,7 +587,9 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
virtio_blk_handle_request(req, &mrb);
}
- virtio_submit_multiwrite(s->blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->blk, &mrb);
+ }
/*
* FIXME: Want to check for completions before returning to guest mode,
@@ -494,9 +602,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)
{
VirtIOBlock *s = opaque;
VirtIOBlockReq *req = s->rq;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
qemu_bh_delete(s->bh);
s->bh = NULL;
@@ -509,7 +615,9 @@ static void virtio_blk_dma_restart_bh(void *opaque)
req = next;
}
- virtio_submit_multiwrite(s->blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->blk, &mrb);
+ }
}
static void virtio_blk_dma_restart_cb(void *opaque, int running,