diff options
-rw-r--r-- | block/backup.c | 5 | ||||
-rw-r--r-- | block/block-backend.c | 13 | ||||
-rw-r--r-- | block/commit.c | 7 | ||||
-rw-r--r-- | block/io.c | 89 | ||||
-rw-r--r-- | block/parallels.c | 13 | ||||
-rw-r--r-- | block/qcow.c | 21 | ||||
-rw-r--r-- | block/qcow2.c | 12 | ||||
-rw-r--r-- | block/qed-table.c | 16 | ||||
-rw-r--r-- | block/qed.c | 31 | ||||
-rw-r--r-- | block/stream.c | 7 | ||||
-rw-r--r-- | block/vmdk.c | 7 | ||||
-rw-r--r-- | hw/block/virtio-blk.c | 245 | ||||
-rw-r--r-- | hw/core/machine.c | 2 | ||||
-rw-r--r-- | hw/ide/atapi.c | 14 | ||||
-rw-r--r-- | hw/ide/core.c | 19 | ||||
-rw-r--r-- | hw/net/virtio-net.c | 31 | ||||
-rw-r--r-- | hw/virtio/virtio.c | 15 | ||||
-rw-r--r-- | include/hw/ide/internal.h | 3 | ||||
-rw-r--r-- | include/hw/virtio/virtio-blk.h | 6 | ||||
-rw-r--r-- | include/hw/virtio/virtio.h | 15 | ||||
-rw-r--r-- | include/qemu/iov.h | 64 | ||||
-rw-r--r-- | migration/block.c | 10 | ||||
-rw-r--r-- | qemu-img.c | 10 | ||||
-rw-r--r-- | tests/test-bdrv-drain.c | 29 | ||||
-rw-r--r-- | tests/virtio-blk-test.c | 127 |
25 files changed, 525 insertions, 286 deletions
diff --git a/block/backup.c b/block/backup.c index 435414e964..9988753249 100644 --- a/block/backup.c +++ b/block/backup.c @@ -107,7 +107,6 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, void **bounce_buffer) { int ret; - struct iovec iov; QEMUIOVector qiov; BlockBackend *blk = job->common.blk; int nbytes; @@ -119,9 +118,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, if (!*bounce_buffer) { *bounce_buffer = blk_blockalign(blk, job->cluster_size); } - iov.iov_base = *bounce_buffer; - iov.iov_len = nbytes; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, *bounce_buffer, nbytes); ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags); if (ret < 0) { diff --git a/block/block-backend.c b/block/block-backend.c index f6ea824308..6cc25569ef 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1204,17 +1204,8 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, int64_t bytes, CoroutineEntry co_entry, BdrvRequestFlags flags) { - QEMUIOVector qiov; - struct iovec iov; - BlkRwCo rwco; - - iov = (struct iovec) { - .iov_base = buf, - .iov_len = bytes, - }; - qemu_iovec_init_external(&qiov, &iov, 1); - - rwco = (BlkRwCo) { + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + BlkRwCo rwco = { .blk = blk, .offset = offset, .iobuf = &qiov, diff --git a/block/commit.c b/block/commit.c index 53148e610b..d500a93068 100644 --- a/block/commit.c +++ b/block/commit.c @@ -47,14 +47,9 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, void *buf) { int ret = 0; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buf, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); assert(bytes < SIZE_MAX); - qemu_iovec_init_external(&qiov, &iov, 1); ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0); if (ret < 0) { diff --git a/block/io.c b/block/io.c index 213ca03d8d..2ba603c7bc 100644 --- a/block/io.c +++ b/block/io.c @@ -843,17 +843,13 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf, int nb_sectors, bool is_write, BdrvRequestFlags flags) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *)buf, - .iov_len = nb_sectors * BDRV_SECTOR_SIZE, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, + nb_sectors * BDRV_SECTOR_SIZE); if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { return -EINVAL; } - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS, &qiov, is_write, flags); } @@ -880,13 +876,8 @@ int bdrv_write(BdrvChild *child, int64_t sector_num, int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_prwv_co(child, offset, &qiov, true, BDRV_REQ_ZERO_WRITE | flags); } @@ -950,17 +941,12 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *)buf, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_preadv(child, offset, &qiov); } @@ -978,17 +964,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *) buf, - .iov_len = bytes, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_pwritev(child, offset, &qiov); } @@ -1165,7 +1146,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, void *bounce_buffer; BlockDriver *drv = bs->drv; - struct iovec iov; QEMUIOVector local_qiov; int64_t cluster_offset; int64_t cluster_bytes; @@ -1230,9 +1210,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, if (ret <= 0) { /* Must copy-on-read; use the bounce buffer */ - iov.iov_base = bounce_buffer; - iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER); - qemu_iovec_init_external(&local_qiov, &iov, 1); + pnum = MIN(pnum, MAX_BOUNCE_BUFFER); + qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); ret = bdrv_driver_preadv(bs, cluster_offset, pnum, &local_qiov, 0); @@ -1477,7 +1456,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, { BlockDriver *drv = bs->drv; QEMUIOVector qiov; - struct iovec iov = {0}; + void *buf = NULL; int ret = 0; bool need_flush = false; int head = 0; @@ -1547,16 +1526,14 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, need_flush = true; } num = MIN(num, max_transfer); - iov.iov_len = num; - if (iov.iov_base == NULL) { - iov.iov_base = qemu_try_blockalign(bs, num); - if (iov.iov_base == NULL) { + if (buf == NULL) { + buf = qemu_try_blockalign0(bs, num); + if (buf == NULL) { ret = -ENOMEM; goto fail; } - memset(iov.iov_base, 0, num); } - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, num); ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags); @@ -1564,8 +1541,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, * all future requests. */ if (num < max_transfer) { - qemu_vfree(iov.iov_base); - iov.iov_base = NULL; + qemu_vfree(buf); + buf = NULL; } } @@ -1577,7 +1554,7 @@ fail: if (ret == 0 && need_flush) { ret = bdrv_co_flush(bs); } - qemu_vfree(iov.iov_base); + qemu_vfree(buf); return ret; } @@ -1763,7 +1740,6 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, BlockDriverState *bs = child->bs; uint8_t *buf = NULL; QEMUIOVector local_qiov; - struct iovec iov; uint64_t align = bs->bl.request_alignment; unsigned int head_padding_bytes, tail_padding_bytes; int ret = 0; @@ -1775,11 +1751,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, assert(flags & BDRV_REQ_ZERO_WRITE); if (head_padding_bytes || tail_padding_bytes) { buf = qemu_blockalign(bs, align); - iov = (struct iovec) { - .iov_base = buf, - .iov_len = align, - }; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, buf, align); } if (head_padding_bytes) { uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); @@ -1885,17 +1857,12 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, if (offset & (align - 1)) { QEMUIOVector head_qiov; - struct iovec head_iov; mark_request_serialising(&req, align); wait_serialising_requests(&req); head_buf = qemu_blockalign(bs, align); - head_iov = (struct iovec) { - .iov_base = head_buf, - .iov_len = align, - }; - qemu_iovec_init_external(&head_qiov, &head_iov, 1); + qemu_iovec_init_buf(&head_qiov, head_buf, align); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, @@ -1924,7 +1891,6 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, if ((offset + bytes) & (align - 1)) { QEMUIOVector tail_qiov; - struct iovec tail_iov; size_t tail_bytes; bool waited; @@ -1933,11 +1899,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, assert(!waited || !use_local_qiov); tail_buf = qemu_blockalign(bs, align); - tail_iov = (struct iovec) { - .iov_base = tail_buf, - .iov_len = align, - }; - qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); + qemu_iovec_init_buf(&tail_qiov, tail_buf, align); bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), @@ -2468,15 +2430,9 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *) buf, - .iov_len = size, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret; - qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_writev_vmstate(bs, &qiov, pos); if (ret < 0) { return ret; @@ -2493,14 +2449,9 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buf, - .iov_len = size, - }; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret; - qemu_iovec_init_external(&qiov, &iov, 1); ret = bdrv_readv_vmstate(bs, &qiov, pos); if (ret < 0) { return ret; diff --git a/block/parallels.c b/block/parallels.c index cc9445879d..15bc97b759 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -220,23 +220,20 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, if (bs->backing) { int64_t nb_cow_sectors = to_allocate * s->tracks; int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; - QEMUIOVector qiov; - struct iovec iov = { - .iov_len = nb_cow_bytes, - .iov_base = qemu_blockalign(bs, nb_cow_bytes) - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = + QEMU_IOVEC_INIT_BUF(qiov, qemu_blockalign(bs, nb_cow_bytes), + nb_cow_bytes); ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, nb_cow_bytes, &qiov, 0); if (ret < 0) { - qemu_vfree(iov.iov_base); + qemu_vfree(qemu_iovec_buf(&qiov)); return ret; } ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE, nb_cow_bytes, &qiov, 0); - qemu_vfree(iov.iov_base); + qemu_vfree(qemu_iovec_buf(&qiov)); if (ret < 0) { return ret; } diff --git a/block/qcow.c b/block/qcow.c index 0a235bf393..409c700d33 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -628,7 +628,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, int offset_in_cluster; int ret = 0, n; uint64_t cluster_offset; - struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; @@ -661,9 +660,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, if (!cluster_offset) { if (bs->backing) { /* read from the base image */ - hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n; - qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); + qemu_iovec_init_buf(&hd_qiov, buf, n); qemu_co_mutex_unlock(&s->lock); /* qcow2 emits this on bs->file instead of bs->backing */ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); @@ -688,9 +685,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset, ret = -EIO; break; } - hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n; - qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); + qemu_iovec_init_buf(&hd_qiov, buf, n); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); ret = bdrv_co_preadv(bs->file, cluster_offset + offset_in_cluster, @@ -733,7 +728,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, int offset_in_cluster; uint64_t cluster_offset; int ret = 0, n; - struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; void *orig_buf; @@ -779,9 +773,7 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset, } } - hd_iov.iov_base = (void *)buf; - hd_iov.iov_len = n; - qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); + qemu_iovec_init_buf(&hd_qiov, buf, n); qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster, @@ -1062,7 +1054,6 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, { BDRVQcowState *s = bs->opaque; QEMUIOVector hd_qiov; - struct iovec iov; z_stream strm; int ret, out_len; uint8_t *buf, *out_buf; @@ -1128,11 +1119,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, } cluster_offset &= s->cluster_offset_mask; - iov = (struct iovec) { - .iov_base = out_buf, - .iov_len = out_len, - }; - qemu_iovec_init_external(&hd_qiov, &iov, 1); + qemu_iovec_init_buf(&hd_qiov, out_buf, out_len); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); if (ret < 0) { diff --git a/block/qcow2.c b/block/qcow2.c index 65a54c9ac6..b6d475229e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3894,7 +3894,6 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, { BDRVQcow2State *s = bs->opaque; QEMUIOVector hd_qiov; - struct iovec iov; int ret; size_t out_len; uint8_t *buf, *out_buf; @@ -3960,11 +3959,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, goto fail; } - iov = (struct iovec) { - .iov_base = out_buf, - .iov_len = out_len, - }; - qemu_iovec_init_external(&hd_qiov, &iov, 1); + qemu_iovec_init_buf(&hd_qiov, out_buf, out_len); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); @@ -3990,7 +3985,6 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, int ret = 0, csize, nb_csectors; uint64_t coffset; uint8_t *buf, *out_buf; - struct iovec iov; QEMUIOVector local_qiov; int offset_in_cluster = offset_into_cluster(s, offset); @@ -4002,9 +3996,7 @@ qcow2_co_preadv_compressed(BlockDriverState *bs, if (!buf) { return -ENOMEM; } - iov.iov_base = buf; - iov.iov_len = csize; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, buf, csize); out_buf = qemu_blockalign(bs, s->cluster_size); diff --git a/block/qed-table.c b/block/qed-table.c index 7df5680adb..c497bd4aec 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -21,16 +21,11 @@ /* Called with table_lock held. */ static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table) { - QEMUIOVector qiov; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF( + qiov, table->offsets, s->header.cluster_size * s->header.table_size); int noffsets; int i, ret; - struct iovec iov = { - .iov_base = table->offsets, - .iov_len = s->header.cluster_size * s->header.table_size, - }; - qemu_iovec_init_external(&qiov, &iov, 1); - trace_qed_read_table(s, offset, table); qemu_co_mutex_unlock(&s->table_lock); @@ -71,7 +66,6 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; unsigned int start, end, i; QEDTable *new_table; - struct iovec iov; QEMUIOVector qiov; size_t len_bytes; int ret; @@ -85,11 +79,7 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, len_bytes = (end - start) * sizeof(uint64_t); new_table = qemu_blockalign(s->bs, len_bytes); - iov = (struct iovec) { - .iov_base = new_table->offsets, - .iov_len = len_bytes, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, new_table->offsets, len_bytes); /* Byteswap table */ for (i = start; i < end; i++) { diff --git a/block/qed.c b/block/qed.c index 1280870024..c5e6d6ad41 100644 --- a/block/qed.c +++ b/block/qed.c @@ -113,18 +113,13 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s) int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE); size_t len = nsectors * BDRV_SECTOR_SIZE; uint8_t *buf; - struct iovec iov; QEMUIOVector qiov; int ret; assert(s->allocating_acb || s->allocating_write_reqs_plugged); buf = qemu_blockalign(s->bs, len); - iov = (struct iovec) { - .iov_base = buf, - .iov_len = len, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, len); ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0); if (ret < 0) { @@ -913,7 +908,6 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, { QEMUIOVector qiov; QEMUIOVector *backing_qiov = NULL; - struct iovec iov; int ret; /* Skip copy entirely if there is no work to do */ @@ -921,11 +915,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, return 0; } - iov = (struct iovec) { - .iov_base = qemu_blockalign(s->bs, len), - .iov_len = len, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, qemu_blockalign(s->bs, len), len); ret = qed_read_backing_file(s, pos, &qiov, &backing_qiov); @@ -946,7 +936,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s, } ret = 0; out: - qemu_vfree(iov.iov_base); + qemu_vfree(qemu_iovec_buf(&qiov)); return ret; } @@ -1447,8 +1437,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, BdrvRequestFlags flags) { BDRVQEDState *s = bs->opaque; - QEMUIOVector qiov; - struct iovec iov; + + /* + * Zero writes start without an I/O buffer. If a buffer becomes necessary + * then it will be allocated during request processing. + */ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); /* Fall back if the request is not aligned */ if (qed_offset_into_cluster(s, offset) || @@ -1456,13 +1450,6 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, return -ENOTSUP; } - /* Zero writes start without an I/O buffer. If a buffer becomes necessary - * then it will be allocated during request processing. - */ - iov.iov_base = NULL; - iov.iov_len = bytes; - - qemu_iovec_init_external(&qiov, &iov, 1); return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov, bytes >> BDRV_SECTOR_BITS, QED_AIOCB_WRITE | QED_AIOCB_ZERO); diff --git a/block/stream.c b/block/stream.c index 7a49ac0992..e14579ff80 100644 --- a/block/stream.c +++ b/block/stream.c @@ -41,14 +41,9 @@ static int coroutine_fn stream_populate(BlockBackend *blk, int64_t offset, uint64_t bytes, void *buf) { - struct iovec iov = { - .iov_base = buf, - .iov_len = bytes, - }; - QEMUIOVector qiov; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); assert(bytes < SIZE_MAX); - qemu_iovec_init_external(&qiov, &iov, 1); /* Copy-on-read the unallocated clusters */ return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); diff --git a/block/vmdk.c b/block/vmdk.c index 096e8eb662..41048741cd 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1371,7 +1371,6 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, VmdkGrainMarker *data = NULL; uLongf buf_len; QEMUIOVector local_qiov; - struct iovec iov; int64_t write_offset; int64_t write_end_sector; @@ -1399,11 +1398,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, data->size = cpu_to_le32(buf_len); n_bytes = buf_len + sizeof(VmdkGrainMarker); - iov = (struct iovec) { - .iov_base = data, - .iov_len = n_bytes, - }; - qemu_iovec_init_external(&local_qiov, &iov, 1); + qemu_iovec_init_buf(&local_qiov, data, n_bytes); BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED); } else { diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cf7f47eaba..c159a3d5f7 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -28,9 +28,28 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" -/* We don't support discard yet, hide associated config fields. */ +/* Config size before the discard support (hide associated config fields) */ #define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \ max_discard_sectors) +/* + * Starting from the discard feature, we can use this array to properly + * set the config size depending on the features enabled. + */ +static VirtIOFeature feature_sizes[] = { + {.flags = 1ULL << VIRTIO_BLK_F_DISCARD, + .end = virtio_endof(struct virtio_blk_config, discard_sector_alignment)}, + {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES, + .end = virtio_endof(struct virtio_blk_config, write_zeroes_may_unmap)}, + {} +}; + +static void virtio_blk_set_config_size(VirtIOBlock *s, uint64_t host_features) +{ + s->config_size = MAX(VIRTIO_BLK_CFG_SIZE, + virtio_feature_get_config_size(feature_sizes, host_features)); + + assert(s->config_size <= sizeof(struct virtio_blk_config)); +} static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, VirtIOBlockReq *req) @@ -65,7 +84,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) } static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - bool is_read) + bool is_read, bool acct_failed) { VirtIOBlock *s = req->dev; BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); @@ -78,7 +97,9 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, s->rq = req; } else if (action == BLOCK_ERROR_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); - block_acct_failed(blk_get_stats(s->blk), &req->acct); + if (acct_failed) { + block_acct_failed(blk_get_stats(s->blk), &req->acct); + } virtio_blk_free_request(req); } @@ -116,7 +137,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) * the memory until the request is completed (which will * happen on the other side of the migration). */ - if (virtio_blk_handle_rw_error(req, -ret, is_read)) { + if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) { continue; } } @@ -135,7 +156,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); if (ret) { - if (virtio_blk_handle_rw_error(req, -ret, 0)) { + if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { goto out; } } @@ -148,6 +169,30 @@ out: aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } +static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) +{ + VirtIOBlockReq *req = opaque; + VirtIOBlock *s = req->dev; + bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & + ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + if (ret) { + if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { + goto out; + } + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + if (is_write_zeroes) { + block_acct_done(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); + +out: + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); +} + #ifdef __linux__ typedef struct { @@ -243,7 +288,7 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) */ scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; - if (!blk->conf.scsi) { + if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { status = VIRTIO_BLK_S_UNSUPP; goto fail; } @@ -481,6 +526,84 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, return true; } +static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req, + struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes) +{ + VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + uint64_t sector; + uint32_t num_sectors, flags, max_sectors; + uint8_t err_status; + int bytes; + + sector = virtio_ldq_p(vdev, &dwz_hdr->sector); + num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors); + flags = virtio_ldl_p(vdev, &dwz_hdr->flags); + max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors : + s->conf.max_discard_sectors; + + /* + * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check + * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in + * the integer variable. + */ + if (unlikely(num_sectors > max_sectors)) { + err_status = VIRTIO_BLK_S_IOERR; + goto err; + } + + bytes = num_sectors << BDRV_SECTOR_BITS; + + if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) { + err_status = VIRTIO_BLK_S_IOERR; + goto err; + } + + /* + * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard + * and write zeroes commands if any unknown flag is set. + */ + if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { + err_status = VIRTIO_BLK_S_UNSUPP; + goto err; + } + + if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */ + int blk_aio_flags = 0; + + if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { + blk_aio_flags |= BDRV_REQ_MAY_UNMAP; + } + + block_acct_start(blk_get_stats(s->blk), &req->acct, bytes, + BLOCK_ACCT_WRITE); + + blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS, + bytes, blk_aio_flags, + virtio_blk_discard_write_zeroes_complete, req); + } else { /* VIRTIO_BLK_T_DISCARD */ + /* + * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for + * discard commands if the unmap flag is set. + */ + if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { + err_status = VIRTIO_BLK_S_UNSUPP; + goto err; + } + + blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes, + virtio_blk_discard_write_zeroes_complete, req); + } + + return VIRTIO_BLK_S_OK; + +err: + if (is_write_zeroes) { + block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE); + } + return err_status; +} + static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) { uint32_t type; @@ -582,6 +705,47 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) virtio_blk_free_request(req); break; } + /* + * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with + * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement, + * so we must mask it for these requests, then we will check if it is set. + */ + case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT: + case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT: + { + struct virtio_blk_discard_write_zeroes dwz_hdr; + size_t out_len = iov_size(out_iov, out_num); + bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) == + VIRTIO_BLK_T_WRITE_ZEROES; + uint8_t err_status; + + /* + * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains + * more than one segment. + */ + if (unlikely(!(type & VIRTIO_BLK_T_OUT) || + out_len > sizeof(dwz_hdr))) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); + virtio_blk_free_request(req); + return 0; + } + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr, + sizeof(dwz_hdr)) != sizeof(dwz_hdr))) { + virtio_error(vdev, "virtio-blk discard/write_zeroes header" + " too short"); + return -1; + } + + err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr, + is_write_zeroes); + if (err_status != VIRTIO_BLK_S_OK) { + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); + } + + break; + } default: virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); virtio_blk_free_request(req); @@ -761,8 +925,25 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) blkcfg.alignment_offset = 0; blkcfg.wce = blk_enable_write_cache(s->blk); virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues); - memcpy(config, &blkcfg, VIRTIO_BLK_CFG_SIZE); - QEMU_BUILD_BUG_ON(VIRTIO_BLK_CFG_SIZE > sizeof(blkcfg)); + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) { + virtio_stl_p(vdev, &blkcfg.max_discard_sectors, + s->conf.max_discard_sectors); + virtio_stl_p(vdev, &blkcfg.discard_sector_alignment, + blk_size >> BDRV_SECTOR_BITS); + /* + * We support only one segment per request since multiple segments + * are not widely used and there are no userspace APIs that allow + * applications to submit multiple segments in a single call. + */ + virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1); + } + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) { + virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors, + s->conf.max_write_zeroes_sectors); + blkcfg.write_zeroes_may_unmap = 1; + virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1); + } + memcpy(config, &blkcfg, s->config_size); } static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) @@ -770,8 +951,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) VirtIOBlock *s = VIRTIO_BLK(vdev); struct virtio_blk_config blkcfg; - memcpy(&blkcfg, config, VIRTIO_BLK_CFG_SIZE); - QEMU_BUILD_BUG_ON(VIRTIO_BLK_CFG_SIZE > sizeof(blkcfg)); + memcpy(&blkcfg, config, s->config_size); aio_context_acquire(blk_get_aio_context(s->blk)); blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); @@ -783,12 +963,15 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, { VirtIOBlock *s = VIRTIO_BLK(vdev); + /* Firstly sync all virtio-blk possible supported features */ + features |= s->host_features; + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { - if (s->conf.scsi) { + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); return 0; } @@ -797,9 +980,6 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); } - if (s->conf.config_wce) { - virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); - } if (blk_enable_write_cache(s->blk)) { virtio_add_feature(&features, VIRTIO_BLK_F_WCE); } @@ -954,7 +1134,28 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, VIRTIO_BLK_CFG_SIZE); + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) && + (!conf->max_discard_sectors || + conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) { + error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")" + ", must be between 1 and %d", + conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS); + return; + } + + if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) && + (!conf->max_write_zeroes_sectors || + conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) { + error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32 + "), must be between 1 and %d", + conf->max_write_zeroes_sectors, + (int)BDRV_REQUEST_MAX_SECTORS); + return; + } + + virtio_blk_set_config_size(s, s->host_features); + + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size); s->blk = conf->conf.blk; s->rq = NULL; @@ -1013,9 +1214,11 @@ static Property virtio_blk_properties[] = { DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf), DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), - DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true), + DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, + VIRTIO_BLK_F_CONFIG_WCE, true), #ifdef __linux__ - DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false), + DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, + VIRTIO_BLK_F_SCSI, false), #endif DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, true), @@ -1023,6 +1226,14 @@ static Property virtio_blk_properties[] = { DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128), DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, IOThread *), + DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, + VIRTIO_BLK_F_DISCARD, true), + DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features, + VIRTIO_BLK_F_WRITE_ZEROES, true), + DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock, + conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS), + DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock, + conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/core/machine.c b/hw/core/machine.c index 077fbd182a..766ca5899d 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -33,6 +33,8 @@ GlobalProperty hw_compat_3_1[] = { { "usb-kbd", "serial", "42" }, { "usb-mouse", "serial", "42" }, { "usb-kbd", "serial", "42" }, + { "virtio-blk-device", "discard", "false" }, + { "virtio-blk-device", "write-zeroes", "false" }, }; const size_t hw_compat_3_1_len = G_N_ELEMENTS(hw_compat_3_1); diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 39e473f9c2..1b0f66cc08 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -174,16 +174,15 @@ static void cd_read_sector_cb(void *opaque, int ret) static int cd_read_sector(IDEState *s) { + void *buf; + if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) { block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ); return -EINVAL; } - s->iov.iov_base = (s->cd_sector_size == 2352) ? - s->io_buffer + 16 : s->io_buffer; - - s->iov.iov_len = ATAPI_SECTOR_SIZE; - qemu_iovec_init_external(&s->qiov, &s->iov, 1); + buf = (s->cd_sector_size == 2352) ? s->io_buffer + 16 : s->io_buffer; + qemu_iovec_init_buf(&s->qiov, buf, ATAPI_SECTOR_SIZE); trace_cd_read_sector(s->lba); @@ -421,9 +420,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) data_offset = 0; } trace_ide_atapi_cmd_read_dma_cb_aio(s, s->lba, n); - s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset); - s->bus->dma->iov.iov_len = n * ATAPI_SECTOR_SIZE; - qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1); + qemu_iovec_init_buf(&s->bus->dma->qiov, s->io_buffer + data_offset, + n * ATAPI_SECTOR_SIZE); s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2, &s->bus->dma->qiov, n * 4, diff --git a/hw/ide/core.c b/hw/ide/core.c index 84832008b8..6afadf894f 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -629,13 +629,15 @@ static void ide_buffered_readv_cb(void *opaque, int ret) IDEBufferedRequest *req = opaque; if (!req->orphaned) { if (!ret) { - qemu_iovec_from_buf(req->original_qiov, 0, req->iov.iov_base, + assert(req->qiov.size == req->original_qiov->size); + qemu_iovec_from_buf(req->original_qiov, 0, + req->qiov.local_iov.iov_base, req->original_qiov->size); } req->original_cb(req->original_opaque, ret); } QLIST_REMOVE(req, list); - qemu_vfree(req->iov.iov_base); + qemu_vfree(qemu_iovec_buf(&req->qiov)); g_free(req); } @@ -660,9 +662,8 @@ BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, req->original_qiov = iov; req->original_cb = cb; req->original_opaque = opaque; - req->iov.iov_base = qemu_blockalign(blk_bs(s->blk), iov->size); - req->iov.iov_len = iov->size; - qemu_iovec_init_external(&req->qiov, &req->iov, 1); + qemu_iovec_init_buf(&req->qiov, blk_blockalign(s->blk, iov->size), + iov->size); aioreq = blk_aio_preadv(s->blk, sector_num << BDRV_SECTOR_BITS, &req->qiov, 0, ide_buffered_readv_cb, req); @@ -774,9 +775,7 @@ static void ide_sector_read(IDEState *s) return; } - s->iov.iov_base = s->io_buffer; - s->iov.iov_len = n * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&s->qiov, &s->iov, 1); + qemu_iovec_init_buf(&s->qiov, s->io_buffer, n * BDRV_SECTOR_SIZE); block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); @@ -1045,9 +1044,7 @@ static void ide_sector_write(IDEState *s) return; } - s->iov.iov_base = s->io_buffer; - s->iov.iov_len = n * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&s->qiov, &s->iov, 1); + qemu_iovec_init_buf(&s->qiov, s->io_buffer, n * BDRV_SECTOR_SIZE); block_acct_start(blk_get_stats(s->blk), &s->acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 3f319ef723..6e6b146022 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -82,29 +82,17 @@ static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( #endif -/* - * Calculate the number of bytes up to and including the given 'field' of - * 'container'. - */ -#define endof(container, field) \ - (offsetof(container, field) + sizeof_field(container, field)) - -typedef struct VirtIOFeature { - uint64_t flags; - size_t end; -} VirtIOFeature; - static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, - .end = endof(struct virtio_net_config, mac)}, + .end = virtio_endof(struct virtio_net_config, mac)}, {.flags = 1ULL << VIRTIO_NET_F_STATUS, - .end = endof(struct virtio_net_config, status)}, + .end = virtio_endof(struct virtio_net_config, status)}, {.flags = 1ULL << VIRTIO_NET_F_MQ, - .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, + .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, {.flags = 1ULL << VIRTIO_NET_F_MTU, - .end = endof(struct virtio_net_config, mtu)}, + .end = virtio_endof(struct virtio_net_config, mtu)}, {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, - .end = endof(struct virtio_net_config, duplex)}, + .end = virtio_endof(struct virtio_net_config, duplex)}, {} }; @@ -2580,15 +2568,10 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) { - int i, config_size = 0; virtio_add_feature(&host_features, VIRTIO_NET_F_MAC); - for (i = 0; feature_sizes[i].flags != 0; i++) { - if (host_features & feature_sizes[i].flags) { - config_size = MAX(feature_sizes[i].end, config_size); - } - } - n->config_size = config_size; + n->config_size = virtio_feature_get_config_size(feature_sizes, + host_features); } void virtio_net_set_netclient_name(VirtIONet *n, const char *name, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a1ff647a66..2626a895cb 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2036,6 +2036,21 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } +size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes, + uint64_t host_features) +{ + size_t config_size = 0; + int i; + + for (i = 0; feature_sizes[i].flags != 0; i++) { + if (host_features & feature_sizes[i].flags) { + config_size = MAX(feature_sizes[i].end, config_size); + } + } + + return config_size; +} + int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) { int i, ret; diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 880413ddc7..8efd03132b 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -346,7 +346,6 @@ extern const char *IDE_DMA_CMD_lookup[IDE_DMA__COUNT]; typedef struct IDEBufferedRequest { QLIST_ENTRY(IDEBufferedRequest) list; - struct iovec iov; QEMUIOVector qiov; QEMUIOVector *original_qiov; BlockCompletionFunc *original_cb; @@ -405,7 +404,6 @@ struct IDEState { int atapi_dma; /* true if dma is requested for the packet cmd */ BlockAcctCookie acct; BlockAIOCB *pio_aiocb; - struct iovec iov; QEMUIOVector qiov; QLIST_HEAD(, IDEBufferedRequest) buffered_requests; /* ATA DMA state */ @@ -457,7 +455,6 @@ struct IDEDMAOps { struct IDEDMA { const struct IDEDMAOps *ops; - struct iovec iov; QEMUIOVector qiov; BlockAIOCB *aiocb; }; diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 5117431d96..cddcfbebe9 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -35,11 +35,11 @@ struct VirtIOBlkConf BlockConf conf; IOThread *iothread; char *serial; - uint32_t scsi; - uint32_t config_wce; uint32_t request_merging; uint16_t num_queues; uint16_t queue_size; + uint32_t max_discard_sectors; + uint32_t max_write_zeroes_sectors; }; struct VirtIOBlockDataPlane; @@ -57,6 +57,8 @@ typedef struct VirtIOBlock { bool dataplane_disabled; bool dataplane_started; struct VirtIOBlockDataPlane *dataplane; + uint64_t host_features; + size_t config_size; } VirtIOBlock; typedef struct VirtIOBlockReq { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9c1fa07d6d..ce9516236a 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -37,6 +37,21 @@ static inline hwaddr vring_align(hwaddr addr, return QEMU_ALIGN_UP(addr, align); } +/* + * Calculate the number of bytes up to and including the given 'field' of + * 'container'. + */ +#define virtio_endof(container, field) \ + (offsetof(container, field) + sizeof_field(container, field)) + +typedef struct VirtIOFeature { + uint64_t flags; + size_t end; +} VirtIOFeature; + +size_t virtio_feature_get_config_size(VirtIOFeature *features, + uint64_t host_features); + typedef struct VirtQueue VirtQueue; #define VIRTQUEUE_MAX_SIZE 1024 diff --git a/include/qemu/iov.h b/include/qemu/iov.h index 5f433c7768..48b45987b7 100644 --- a/include/qemu/iov.h +++ b/include/qemu/iov.h @@ -133,10 +133,70 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, typedef struct QEMUIOVector { struct iovec *iov; int niov; - int nalloc; - size_t size; + + /* + * For external @iov (qemu_iovec_init_external()) or allocated @iov + * (qemu_iovec_init()), @size is the cumulative size of iovecs and + * @local_iov is invalid and unused. + * + * For embedded @iov (QEMU_IOVEC_INIT_BUF() or qemu_iovec_init_buf()), + * @iov is equal to &@local_iov, and @size is valid, as it has same + * offset and type as @local_iov.iov_len, which is guaranteed by + * static assertion below. + * + * @nalloc is always valid and is -1 both for embedded and external + * cases. It is included in the union only to ensure the padding prior + * to the @size field will not result in a 0-length array. + */ + union { + struct { + int nalloc; + struct iovec local_iov; + }; + struct { + char __pad[sizeof(int) + offsetof(struct iovec, iov_len)]; + size_t size; + }; + }; } QEMUIOVector; +QEMU_BUILD_BUG_ON(offsetof(QEMUIOVector, size) != + offsetof(QEMUIOVector, local_iov.iov_len)); + +#define QEMU_IOVEC_INIT_BUF(self, buf, len) \ +{ \ + .iov = &(self).local_iov, \ + .niov = 1, \ + .nalloc = -1, \ + .local_iov = { \ + .iov_base = (void *)(buf), /* cast away const */ \ + .iov_len = (len), \ + }, \ +} + +/* + * qemu_iovec_init_buf + * + * Initialize embedded QEMUIOVector. + * + * Note: "const" is used over @buf pointer to make it simple to pass + * const pointers, appearing in read functions. Then this "const" is + * cast away by QEMU_IOVEC_INIT_BUF(). + */ +static inline void qemu_iovec_init_buf(QEMUIOVector *qiov, + const void *buf, size_t len) +{ + *qiov = (QEMUIOVector) QEMU_IOVEC_INIT_BUF(*qiov, buf, len); +} + +static inline void *qemu_iovec_buf(QEMUIOVector *qiov) +{ + /* Only supports embedded iov */ + assert(qiov->nalloc == -1 && qiov->iov == &qiov->local_iov); + + return qiov->local_iov.iov_base; +} + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); diff --git a/migration/block.c b/migration/block.c index 0e24e18d13..83c633fb3f 100644 --- a/migration/block.c +++ b/migration/block.c @@ -83,7 +83,6 @@ typedef struct BlkMigBlock { BlkMigDevState *bmds; int64_t sector; int nr_sectors; - struct iovec iov; QEMUIOVector qiov; BlockAIOCB *aiocb; @@ -314,9 +313,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->sector = cur_sector; blk->nr_sectors = nr_sectors; - blk->iov.iov_base = blk->buf; - blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + qemu_iovec_init_buf(&blk->qiov, blk->buf, nr_sectors * BDRV_SECTOR_SIZE); blk_mig_lock(); block_mig_state.submitted++; @@ -556,9 +553,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, blk->nr_sectors = nr_sectors; if (is_async) { - blk->iov.iov_base = blk->buf; - blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; - qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + qemu_iovec_init_buf(&blk->qiov, blk->buf, + nr_sectors * BDRV_SECTOR_SIZE); blk->aiocb = blk_aio_preadv(bmds->blk, sector * BDRV_SECTOR_SIZE, diff --git a/qemu-img.c b/qemu-img.c index 25288c4d18..7853935049 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1670,7 +1670,6 @@ static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, { int n, ret; QEMUIOVector qiov; - struct iovec iov; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { @@ -1686,9 +1685,7 @@ static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, bs_sectors = s->src_sectors[src_cur]; n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); - iov.iov_base = buf; - iov.iov_len = n << BDRV_SECTOR_BITS; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, n << BDRV_SECTOR_BITS); ret = blk_co_preadv( blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, @@ -1712,7 +1709,6 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, { int ret; QEMUIOVector qiov; - struct iovec iov; while (nb_sectors > 0) { int n = nb_sectors; @@ -1740,9 +1736,7 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, (s->compressed && !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))) { - iov.iov_base = buf; - iov.iov_len = n << BDRV_SECTOR_BITS; - qemu_iovec_init_external(&qiov, &iov, 1); + qemu_iovec_init_buf(&qiov, buf, n << BDRV_SECTOR_BITS); ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS, n << BDRV_SECTOR_BITS, &qiov, flags); diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index ee1740ff06..821be405f0 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -204,12 +204,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) BlockAIOCB *acb; int aio_ret; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, @@ -670,12 +665,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) AioContext *ctx_a = iothread_get_aio_context(a); AioContext *ctx_b = iothread_get_aio_context(b); - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); /* bdrv_drain_all() may only be called from the main loop thread */ if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) { @@ -1148,13 +1138,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) BlockDriverState *bs = blk_bs(blk); BDRVTestTopState *tts = bs->opaque; void *buffer = g_malloc(65536); - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buffer, - .iov_len = 65536, - }; - - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536); /* Pretend some internal write operation from parent to child. * Important: We have to read from the child, not from the parent! @@ -1365,12 +1349,7 @@ static void test_detach_indirect(bool by_parent_cb) BdrvChild *child_a, *child_b; BlockAIOCB *acb; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); if (!by_parent_cb) { detach_by_driver_cb_role = child_file; diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 04c608764b..8d2fc9c710 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -46,6 +46,12 @@ typedef struct QVirtioBlkReq { uint8_t status; } QVirtioBlkReq; +#ifdef HOST_WORDS_BIGENDIAN +const bool host_is_big_endian = true; +#else +const bool host_is_big_endian; /* false */ +#endif + static char *drive_create(void) { int fd, ret; @@ -125,12 +131,6 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot) static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req) { -#ifdef HOST_WORDS_BIGENDIAN - const bool host_is_big_endian = true; -#else - const bool host_is_big_endian = false; -#endif - if (qvirtio_is_big_endian(d) != host_is_big_endian) { req->type = bswap32(req->type); req->ioprio = bswap32(req->ioprio); @@ -138,13 +138,37 @@ static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req) } } + +static inline void virtio_blk_fix_dwz_hdr(QVirtioDevice *d, + struct virtio_blk_discard_write_zeroes *dwz_hdr) +{ + if (qvirtio_is_big_endian(d) != host_is_big_endian) { + dwz_hdr->sector = bswap64(dwz_hdr->sector); + dwz_hdr->num_sectors = bswap32(dwz_hdr->num_sectors); + dwz_hdr->flags = bswap32(dwz_hdr->flags); + } +} + static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d, QVirtioBlkReq *req, uint64_t data_size) { uint64_t addr; uint8_t status = 0xFF; - g_assert_cmpuint(data_size % 512, ==, 0); + switch (req->type) { + case VIRTIO_BLK_T_IN: + case VIRTIO_BLK_T_OUT: + g_assert_cmpuint(data_size % 512, ==, 0); + break; + case VIRTIO_BLK_T_DISCARD: + case VIRTIO_BLK_T_WRITE_ZEROES: + g_assert_cmpuint(data_size % + sizeof(struct virtio_blk_discard_write_zeroes), ==, 0); + break; + default: + g_assert_cmpuint(data_size, ==, 0); + } + addr = guest_alloc(alloc, sizeof(*req) + data_size); virtio_blk_fix_request(d, req); @@ -231,6 +255,95 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, guest_free(alloc, req_addr); + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { + struct virtio_blk_discard_write_zeroes dwz_hdr; + void *expected; + + /* + * WRITE_ZEROES request on the same sector of previous test where + * we wrote "TEST". + */ + req.type = VIRTIO_BLK_T_WRITE_ZEROES; + req.data = (char *) &dwz_hdr; + dwz_hdr.sector = 0; + dwz_hdr.num_sectors = 1; + dwz_hdr.flags = 0; + + virtio_blk_fix_dwz_hdr(dev, &dwz_hdr); + + req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr)); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, sizeof(dwz_hdr), false, true); + qvirtqueue_add(vq, req_addr + 16 + sizeof(dwz_hdr), 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 16 + sizeof(dwz_hdr)); + g_assert_cmpint(status, ==, 0); + + guest_free(alloc, req_addr); + + /* Read request to check if the sector contains all zeroes */ + req.type = VIRTIO_BLK_T_IN; + req.ioprio = 1; + req.sector = 0; + req.data = g_malloc0(512); + + req_addr = virtio_blk_request(alloc, dev, &req, 512); + + g_free(req.data); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, true, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 528); + g_assert_cmpint(status, ==, 0); + + data = g_malloc(512); + expected = g_malloc0(512); + memread(req_addr + 16, data, 512); + g_assert_cmpmem(data, 512, expected, 512); + g_free(expected); + g_free(data); + + guest_free(alloc, req_addr); + } + + if (features & (1u << VIRTIO_BLK_F_DISCARD)) { + struct virtio_blk_discard_write_zeroes dwz_hdr; + + req.type = VIRTIO_BLK_T_DISCARD; + req.data = (char *) &dwz_hdr; + dwz_hdr.sector = 0; + dwz_hdr.num_sectors = 1; + dwz_hdr.flags = 0; + + virtio_blk_fix_dwz_hdr(dev, &dwz_hdr); + + req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr)); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, sizeof(dwz_hdr), false, true); + qvirtqueue_add(vq, req_addr + 16 + sizeof(dwz_hdr), 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 16 + sizeof(dwz_hdr)); + g_assert_cmpint(status, ==, 0); + + guest_free(alloc, req_addr); + } + if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { /* Write and read with 2 descriptor layout */ /* Write request */ |