aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/block/dataplane/virtio-blk.c8
-rw-r--r--hw/block/virtio-blk.c296
-rw-r--r--include/hw/virtio/virtio-blk.h19
-rw-r--r--trace-events1
4 files changed, 218 insertions, 106 deletions
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 39c5d7103c..be957d1117 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -96,9 +96,7 @@ static void handle_notify(EventNotifier *e)
event_notifier_test_and_clear(&s->host_notifier);
blk_io_plug(s->conf->conf.blk);
for (;;) {
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
int ret;
/* Disable guest->host notifies to avoid unnecessary vmexits */
@@ -120,7 +118,9 @@ static void handle_notify(EventNotifier *e)
virtio_blk_handle_request(req, &mrb);
}
- virtio_submit_multiwrite(s->conf->conf.blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->conf->conf.blk, &mrb);
+ }
if (likely(ret == -EAGAIN)) { /* vring emptied */
/* Re-enable guest->host notifies and stop processing the vring.
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e04adb8f16..d0a01a8864 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -34,6 +34,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
req->dev = s;
req->qiov.size = 0;
req->next = NULL;
+ req->mr_next = NULL;
return req;
}
@@ -84,20 +85,32 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
static void virtio_blk_rw_complete(void *opaque, int ret)
{
- VirtIOBlockReq *req = opaque;
+ VirtIOBlockReq *next = opaque;
- trace_virtio_blk_rw_complete(req, ret);
+ while (next) {
+ VirtIOBlockReq *req = next;
+ next = req->mr_next;
+ trace_virtio_blk_rw_complete(req, ret);
- if (ret) {
- int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
- bool is_read = !(p & VIRTIO_BLK_T_OUT);
- if (virtio_blk_handle_rw_error(req, -ret, is_read))
- return;
- }
+ if (req->qiov.nalloc != -1) {
+ /* If nalloc is != 1 req->qiov is a local copy of the original
+ * external iovec. It was allocated in submit_merged_requests
+ * to be able to merge requests. */
+ qemu_iovec_destroy(&req->qiov);
+ }
- virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
- virtio_blk_free_request(req);
+ if (ret) {
+ int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
+ bool is_read = !(p & VIRTIO_BLK_T_OUT);
+ if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
+ continue;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ block_acct_done(blk_get_stats(req->dev->blk), &req->acct);
+ virtio_blk_free_request(req);
+ }
}
static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -291,24 +304,127 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
}
}
-void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb)
+static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
+ int start, int num_reqs, int niov)
{
- int i, ret;
+ QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
+ int64_t sector_num = mrb->reqs[start]->sector_num;
+ int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
+ bool is_write = mrb->is_write;
+
+ if (num_reqs > 1) {
+ int i;
+ struct iovec *tmp_iov = qiov->iov;
+ int tmp_niov = qiov->niov;
+
+ /* mrb->reqs[start]->qiov was initialized from external so we can't
+ * modifiy it here. We need to initialize it locally and then add the
+ * external iovecs. */
+ qemu_iovec_init(qiov, niov);
+
+ for (i = 0; i < tmp_niov; i++) {
+ qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len);
+ }
- if (!mrb->num_writes) {
+ for (i = start + 1; i < start + num_reqs; i++) {
+ qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0,
+ mrb->reqs[i]->qiov.size);
+ mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
+ nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
+ }
+ assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);
+
+ trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num,
+ nb_sectors, is_write);
+ block_acct_merge_done(blk_get_stats(blk),
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
+ num_reqs - 1);
+ }
+
+ if (is_write) {
+ blk_aio_writev(blk, sector_num, qiov, nb_sectors,
+ virtio_blk_rw_complete, mrb->reqs[start]);
+ } else {
+ blk_aio_readv(blk, sector_num, qiov, nb_sectors,
+ virtio_blk_rw_complete, mrb->reqs[start]);
+ }
+}
+
+static int multireq_compare(const void *a, const void *b)
+{
+ const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a,
+ *req2 = *(VirtIOBlockReq **)b;
+
+ /*
+ * Note that we can't simply subtract sector_num1 from sector_num2
+ * here as that could overflow the return value.
+ */
+ if (req1->sector_num > req2->sector_num) {
+ return 1;
+ } else if (req1->sector_num < req2->sector_num) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
+{
+ int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
+ int max_xfer_len = 0;
+ int64_t sector_num = 0;
+
+ if (mrb->num_reqs == 1) {
+ submit_requests(blk, mrb, 0, 1, -1);
+ mrb->num_reqs = 0;
return;
}
- ret = blk_aio_multiwrite(blk, mrb->blkreq, mrb->num_writes);
- if (ret != 0) {
- for (i = 0; i < mrb->num_writes; i++) {
- if (mrb->blkreq[i].error) {
- virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO);
+ max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk);
+ max_xfer_len = MIN_NON_ZERO(max_xfer_len, INT_MAX);
+
+ qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs),
+ &multireq_compare);
+
+ for (i = 0; i < mrb->num_reqs; i++) {
+ VirtIOBlockReq *req = mrb->reqs[i];
+ if (num_reqs > 0) {
+ bool merge = true;
+
+ /* merge would exceed maximum number of IOVs */
+ if (niov + req->qiov.niov > IOV_MAX) {
+ merge = false;
+ }
+
+ /* merge would exceed maximum transfer length of backend device */
+ if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) {
+ merge = false;
+ }
+
+ /* requests are not sequential */
+ if (sector_num + nb_sectors != req->sector_num) {
+ merge = false;
+ }
+
+ if (!merge) {
+ submit_requests(blk, mrb, start, num_reqs, niov);
+ num_reqs = 0;
}
}
+
+ if (num_reqs == 0) {
+ sector_num = req->sector_num;
+ nb_sectors = niov = 0;
+ start = i;
+ }
+
+ nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE;
+ niov += req->qiov.niov;
+ num_reqs++;
}
- mrb->num_writes = 0;
+ submit_requests(blk, mrb, start, num_reqs, niov);
+ mrb->num_reqs = 0;
}
static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
@@ -319,7 +435,9 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
/*
* Make sure all outstanding writes are posted to the backing device.
*/
- virtio_submit_multiwrite(req->dev->blk, mrb);
+ if (mrb->is_write && mrb->num_reqs > 0) {
+ virtio_blk_submit_multireq(req->dev->blk, mrb);
+ }
blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req);
}
@@ -329,6 +447,9 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
uint64_t total_sectors;
+ if (nb_sectors > INT_MAX) {
+ return false;
+ }
if (sector & dev->sector_mask) {
return false;
}
@@ -342,60 +463,6 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
return true;
}
-static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
-{
- BlockRequest *blkreq;
- uint64_t sector;
-
- sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
-
- trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
-
- if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- virtio_blk_free_request(req);
- return;
- }
-
- block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size,
- BLOCK_ACCT_WRITE);
-
- if (mrb->num_writes == VIRTIO_BLK_MAX_MERGE_REQS) {
- virtio_submit_multiwrite(req->dev->blk, mrb);
- }
-
- blkreq = &mrb->blkreq[mrb->num_writes];
- blkreq->sector = sector;
- blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE;
- blkreq->qiov = &req->qiov;
- blkreq->cb = virtio_blk_rw_complete;
- blkreq->opaque = req;
- blkreq->error = 0;
-
- mrb->num_writes++;
-}
-
-static void virtio_blk_handle_read(VirtIOBlockReq *req)
-{
- uint64_t sector;
-
- sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
-
- trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
-
- if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- virtio_blk_free_request(req);
- return;
- }
-
- block_acct_start(blk_get_stats(req->dev->blk), &req->acct, req->qiov.size,
- BLOCK_ACCT_READ);
- blk_aio_readv(req->dev->blk, sector, &req->qiov,
- req->qiov.size / BDRV_SECTOR_SIZE,
- virtio_blk_rw_complete, req);
-}
-
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -430,11 +497,57 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
- if (type & VIRTIO_BLK_T_FLUSH) {
+ /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER
+ * is an optional flag. Altough a guest should not send this flag if
+ * not negotiated we ignored it in the past. So keep ignoring it. */
+ switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
+ case VIRTIO_BLK_T_IN:
+ {
+ bool is_write = type & VIRTIO_BLK_T_OUT;
+ req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev),
+ &req->out.sector);
+
+ if (is_write) {
+ qemu_iovec_init_external(&req->qiov, iov, out_num);
+ trace_virtio_blk_handle_write(req, req->sector_num,
+ req->qiov.size / BDRV_SECTOR_SIZE);
+ } else {
+ qemu_iovec_init_external(&req->qiov, in_iov, in_num);
+ trace_virtio_blk_handle_read(req, req->sector_num,
+ req->qiov.size / BDRV_SECTOR_SIZE);
+ }
+
+ if (!virtio_blk_sect_range_ok(req->dev, req->sector_num,
+ req->qiov.size)) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ virtio_blk_free_request(req);
+ return;
+ }
+
+ block_acct_start(blk_get_stats(req->dev->blk),
+ &req->acct, req->qiov.size,
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
+
+ /* merge would exceed maximum number of requests or IO direction
+ * changes */
+ if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
+ is_write != mrb->is_write)) {
+ virtio_blk_submit_multireq(req->dev->blk, mrb);
+ }
+
+ assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
+ mrb->reqs[mrb->num_reqs++] = req;
+ mrb->is_write = is_write;
+ break;
+ }
+ case VIRTIO_BLK_T_FLUSH:
virtio_blk_handle_flush(req, mrb);
- } else if (type & VIRTIO_BLK_T_SCSI_CMD) {
+ break;
+ case VIRTIO_BLK_T_SCSI_CMD:
virtio_blk_handle_scsi(req);
- } else if (type & VIRTIO_BLK_T_GET_ID) {
+ break;
+ case VIRTIO_BLK_T_GET_ID:
+ {
VirtIOBlock *s = req->dev;
/*
@@ -448,14 +561,9 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
iov_from_buf(in_iov, in_num, 0, serial, size);
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
virtio_blk_free_request(req);
- } else if (type & VIRTIO_BLK_T_OUT) {
- qemu_iovec_init_external(&req->qiov, iov, out_num);
- virtio_blk_handle_write(req, mrb);
- } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
- /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
- qemu_iovec_init_external(&req->qiov, in_iov, in_num);
- virtio_blk_handle_read(req);
- } else {
+ break;
+ }
+ default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
}
@@ -465,9 +573,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
VirtIOBlockReq *req;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
/* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
* dataplane here instead of waiting for .set_status().
@@ -481,7 +587,9 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
virtio_blk_handle_request(req, &mrb);
}
- virtio_submit_multiwrite(s->blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->blk, &mrb);
+ }
/*
* FIXME: Want to check for completions before returning to guest mode,
@@ -494,9 +602,7 @@ static void virtio_blk_dma_restart_bh(void *opaque)
{
VirtIOBlock *s = opaque;
VirtIOBlockReq *req = s->rq;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
+ MultiReqBuffer mrb = {};
qemu_bh_delete(s->bh);
s->bh = NULL;
@@ -509,7 +615,9 @@ static void virtio_blk_dma_restart_bh(void *opaque)
req = next;
}
- virtio_submit_multiwrite(s->blk, &mrb);
+ if (mrb.num_reqs) {
+ virtio_blk_submit_multireq(s->blk, &mrb);
+ }
}
static void virtio_blk_dma_restart_cb(void *opaque, int running,
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 6ef3fa57cf..2027a64093 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -134,29 +134,32 @@ typedef struct VirtIOBlock {
struct VirtIOBlockDataPlane *dataplane;
} VirtIOBlock;
-#define VIRTIO_BLK_MAX_MERGE_REQS 32
-
-typedef struct MultiReqBuffer {
- BlockRequest blkreq[VIRTIO_BLK_MAX_MERGE_REQS];
- unsigned int num_writes;
-} MultiReqBuffer;
-
typedef struct VirtIOBlockReq {
+ int64_t sector_num;
VirtIOBlock *dev;
VirtQueueElement elem;
struct virtio_blk_inhdr *in;
struct virtio_blk_outhdr out;
QEMUIOVector qiov;
struct VirtIOBlockReq *next;
+ struct VirtIOBlockReq *mr_next;
BlockAcctCookie acct;
} VirtIOBlockReq;
+#define VIRTIO_BLK_MAX_MERGE_REQS 32
+
+typedef struct MultiReqBuffer {
+ VirtIOBlockReq *reqs[VIRTIO_BLK_MAX_MERGE_REQS];
+ unsigned int num_reqs;
+ bool is_write;
+} MultiReqBuffer;
+
VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
void virtio_blk_free_request(VirtIOBlockReq *req);
void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb);
-void virtio_submit_multiwrite(BlockBackend *blk, MultiReqBuffer *mrb);
+void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb);
#endif
diff --git a/trace-events b/trace-events
index 907da7ed8a..57f357f7f7 100644
--- a/trace-events
+++ b/trace-events
@@ -116,6 +116,7 @@ virtio_blk_req_complete(void *req, int status) "req %p status %d"
virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
+virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t sector, size_t nsectors, bool is_write) "mrb %p start %d num_reqs %d sector %"PRIu64" nsectors %zu is_write %d"
# hw/block/dataplane/virtio-blk.c
virtio_blk_data_plane_start(void *s) "dataplane %p"