aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/backup.c5
-rw-r--r--block/block-backend.c13
-rw-r--r--block/commit.c7
-rw-r--r--block/io.c89
-rw-r--r--block/parallels.c13
-rw-r--r--block/qcow.c21
-rw-r--r--block/qcow2.c12
-rw-r--r--block/qed-table.c16
-rw-r--r--block/qed.c31
-rw-r--r--block/stream.c7
-rw-r--r--block/vmdk.c7
-rw-r--r--hw/block/virtio-blk.c245
-rw-r--r--hw/core/machine.c2
-rw-r--r--hw/ide/atapi.c14
-rw-r--r--hw/ide/core.c19
-rw-r--r--hw/net/virtio-net.c31
-rw-r--r--hw/virtio/virtio.c15
-rw-r--r--include/hw/ide/internal.h3
-rw-r--r--include/hw/virtio/virtio-blk.h6
-rw-r--r--include/hw/virtio/virtio.h15
-rw-r--r--include/qemu/iov.h64
-rw-r--r--migration/block.c10
-rw-r--r--qemu-img.c10
-rw-r--r--tests/test-bdrv-drain.c29
-rw-r--r--tests/virtio-blk-test.c127
25 files changed, 525 insertions, 286 deletions
diff --git a/block/backup.c b/block/backup.c
index 435414e964..9988753249 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -107,7 +107,6 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
void **bounce_buffer)
{
int ret;
- struct iovec iov;
QEMUIOVector qiov;
BlockBackend *blk = job->common.blk;
int nbytes;
@@ -119,9 +118,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
if (!*bounce_buffer) {
*bounce_buffer = blk_blockalign(blk, job->cluster_size);
}
- iov.iov_base = *bounce_buffer;
- iov.iov_len = nbytes;
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, *bounce_buffer, nbytes);
ret = blk_co_preadv(blk, start, qiov.size, &qiov, read_flags);
if (ret < 0) {
diff --git a/block/block-backend.c b/block/block-backend.c
index f6ea824308..6cc25569ef 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1204,17 +1204,8 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
int64_t bytes, CoroutineEntry co_entry,
BdrvRequestFlags flags)
{
- QEMUIOVector qiov;
- struct iovec iov;
- BlkRwCo rwco;
-
- iov = (struct iovec) {
- .iov_base = buf,
- .iov_len = bytes,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- rwco = (BlkRwCo) {
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ BlkRwCo rwco = {
.blk = blk,
.offset = offset,
.iobuf = &qiov,
diff --git a/block/commit.c b/block/commit.c
index 53148e610b..d500a93068 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -47,14 +47,9 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
void *buf)
{
int ret = 0;
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = bytes,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
assert(bytes < SIZE_MAX);
- qemu_iovec_init_external(&qiov, &iov, 1);
ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0);
if (ret < 0) {
diff --git a/block/io.c b/block/io.c
index 213ca03d8d..2ba603c7bc 100644
--- a/block/io.c
+++ b/block/io.c
@@ -843,17 +843,13 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
int nb_sectors, bool is_write, BdrvRequestFlags flags)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf,
+ nb_sectors * BDRV_SECTOR_SIZE);
if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
return -EINVAL;
}
- qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
&qiov, is_write, flags);
}
@@ -880,13 +876,8 @@ int bdrv_write(BdrvChild *child, int64_t sector_num,
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = NULL,
- .iov_len = bytes,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
- qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_prwv_co(child, offset, &qiov, true,
BDRV_REQ_ZERO_WRITE | flags);
}
@@ -950,17 +941,12 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = bytes,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
- qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_preadv(child, offset, &qiov);
}
@@ -978,17 +964,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = bytes,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
- qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_pwritev(child, offset, &qiov);
}
@@ -1165,7 +1146,6 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
void *bounce_buffer;
BlockDriver *drv = bs->drv;
- struct iovec iov;
QEMUIOVector local_qiov;
int64_t cluster_offset;
int64_t cluster_bytes;
@@ -1230,9 +1210,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
if (ret <= 0) {
/* Must copy-on-read; use the bounce buffer */
- iov.iov_base = bounce_buffer;
- iov.iov_len = pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
- qemu_iovec_init_external(&local_qiov, &iov, 1);
+ pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
+ qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
&local_qiov, 0);
@@ -1477,7 +1456,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
- struct iovec iov = {0};
+ void *buf = NULL;
int ret = 0;
bool need_flush = false;
int head = 0;
@@ -1547,16 +1526,14 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
need_flush = true;
}
num = MIN(num, max_transfer);
- iov.iov_len = num;
- if (iov.iov_base == NULL) {
- iov.iov_base = qemu_try_blockalign(bs, num);
- if (iov.iov_base == NULL) {
+ if (buf == NULL) {
+ buf = qemu_try_blockalign0(bs, num);
+ if (buf == NULL) {
ret = -ENOMEM;
goto fail;
}
- memset(iov.iov_base, 0, num);
}
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, buf, num);
ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
@@ -1564,8 +1541,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
* all future requests.
*/
if (num < max_transfer) {
- qemu_vfree(iov.iov_base);
- iov.iov_base = NULL;
+ qemu_vfree(buf);
+ buf = NULL;
}
}
@@ -1577,7 +1554,7 @@ fail:
if (ret == 0 && need_flush) {
ret = bdrv_co_flush(bs);
}
- qemu_vfree(iov.iov_base);
+ qemu_vfree(buf);
return ret;
}
@@ -1763,7 +1740,6 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
- struct iovec iov;
uint64_t align = bs->bl.request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
@@ -1775,11 +1751,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
assert(flags & BDRV_REQ_ZERO_WRITE);
if (head_padding_bytes || tail_padding_bytes) {
buf = qemu_blockalign(bs, align);
- iov = (struct iovec) {
- .iov_base = buf,
- .iov_len = align,
- };
- qemu_iovec_init_external(&local_qiov, &iov, 1);
+ qemu_iovec_init_buf(&local_qiov, buf, align);
}
if (head_padding_bytes) {
uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
@@ -1885,17 +1857,12 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
if (offset & (align - 1)) {
QEMUIOVector head_qiov;
- struct iovec head_iov;
mark_request_serialising(&req, align);
wait_serialising_requests(&req);
head_buf = qemu_blockalign(bs, align);
- head_iov = (struct iovec) {
- .iov_base = head_buf,
- .iov_len = align,
- };
- qemu_iovec_init_external(&head_qiov, &head_iov, 1);
+ qemu_iovec_init_buf(&head_qiov, head_buf, align);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
@@ -1924,7 +1891,6 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
if ((offset + bytes) & (align - 1)) {
QEMUIOVector tail_qiov;
- struct iovec tail_iov;
size_t tail_bytes;
bool waited;
@@ -1933,11 +1899,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
assert(!waited || !use_local_qiov);
tail_buf = qemu_blockalign(bs, align);
- tail_iov = (struct iovec) {
- .iov_base = tail_buf,
- .iov_len = align,
- };
- qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
+ qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
@@ -2468,15 +2430,9 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = size,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret;
- qemu_iovec_init_external(&qiov, &iov, 1);
-
ret = bdrv_writev_vmstate(bs, &qiov, pos);
if (ret < 0) {
return ret;
@@ -2493,14 +2449,9 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = size,
- };
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret;
- qemu_iovec_init_external(&qiov, &iov, 1);
ret = bdrv_readv_vmstate(bs, &qiov, pos);
if (ret < 0) {
return ret;
diff --git a/block/parallels.c b/block/parallels.c
index cc9445879d..15bc97b759 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -220,23 +220,20 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
if (bs->backing) {
int64_t nb_cow_sectors = to_allocate * s->tracks;
int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_len = nb_cow_bytes,
- .iov_base = qemu_blockalign(bs, nb_cow_bytes)
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ QEMUIOVector qiov =
+ QEMU_IOVEC_INIT_BUF(qiov, qemu_blockalign(bs, nb_cow_bytes),
+ nb_cow_bytes);
ret = bdrv_co_preadv(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE,
nb_cow_bytes, &qiov, 0);
if (ret < 0) {
- qemu_vfree(iov.iov_base);
+ qemu_vfree(qemu_iovec_buf(&qiov));
return ret;
}
ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE,
nb_cow_bytes, &qiov, 0);
- qemu_vfree(iov.iov_base);
+ qemu_vfree(qemu_iovec_buf(&qiov));
if (ret < 0) {
return ret;
}
diff --git a/block/qcow.c b/block/qcow.c
index 0a235bf393..409c700d33 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -628,7 +628,6 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
int offset_in_cluster;
int ret = 0, n;
uint64_t cluster_offset;
- struct iovec hd_iov;
QEMUIOVector hd_qiov;
uint8_t *buf;
void *orig_buf;
@@ -661,9 +660,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
if (!cluster_offset) {
if (bs->backing) {
/* read from the base image */
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_iovec_init_buf(&hd_qiov, buf, n);
qemu_co_mutex_unlock(&s->lock);
/* qcow2 emits this on bs->file instead of bs->backing */
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
@@ -688,9 +685,7 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
ret = -EIO;
break;
}
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_iovec_init_buf(&hd_qiov, buf, n);
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
ret = bdrv_co_preadv(bs->file, cluster_offset + offset_in_cluster,
@@ -733,7 +728,6 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
int offset_in_cluster;
uint64_t cluster_offset;
int ret = 0, n;
- struct iovec hd_iov;
QEMUIOVector hd_qiov;
uint8_t *buf;
void *orig_buf;
@@ -779,9 +773,7 @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
}
}
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_iovec_init_buf(&hd_qiov, buf, n);
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
@@ -1062,7 +1054,6 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
QEMUIOVector hd_qiov;
- struct iovec iov;
z_stream strm;
int ret, out_len;
uint8_t *buf, *out_buf;
@@ -1128,11 +1119,7 @@ qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
}
cluster_offset &= s->cluster_offset_mask;
- iov = (struct iovec) {
- .iov_base = out_buf,
- .iov_len = out_len,
- };
- qemu_iovec_init_external(&hd_qiov, &iov, 1);
+ qemu_iovec_init_buf(&hd_qiov, out_buf, out_len);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
if (ret < 0) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 65a54c9ac6..b6d475229e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3894,7 +3894,6 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
{
BDRVQcow2State *s = bs->opaque;
QEMUIOVector hd_qiov;
- struct iovec iov;
int ret;
size_t out_len;
uint8_t *buf, *out_buf;
@@ -3960,11 +3959,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
goto fail;
}
- iov = (struct iovec) {
- .iov_base = out_buf,
- .iov_len = out_len,
- };
- qemu_iovec_init_external(&hd_qiov, &iov, 1);
+ qemu_iovec_init_buf(&hd_qiov, out_buf, out_len);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
@@ -3990,7 +3985,6 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
int ret = 0, csize, nb_csectors;
uint64_t coffset;
uint8_t *buf, *out_buf;
- struct iovec iov;
QEMUIOVector local_qiov;
int offset_in_cluster = offset_into_cluster(s, offset);
@@ -4002,9 +3996,7 @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
if (!buf) {
return -ENOMEM;
}
- iov.iov_base = buf;
- iov.iov_len = csize;
- qemu_iovec_init_external(&local_qiov, &iov, 1);
+ qemu_iovec_init_buf(&local_qiov, buf, csize);
out_buf = qemu_blockalign(bs, s->cluster_size);
diff --git a/block/qed-table.c b/block/qed-table.c
index 7df5680adb..c497bd4aec 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -21,16 +21,11 @@
/* Called with table_lock held. */
static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
{
- QEMUIOVector qiov;
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(
+ qiov, table->offsets, s->header.cluster_size * s->header.table_size);
int noffsets;
int i, ret;
- struct iovec iov = {
- .iov_base = table->offsets,
- .iov_len = s->header.cluster_size * s->header.table_size,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
-
trace_qed_read_table(s, offset, table);
qemu_co_mutex_unlock(&s->table_lock);
@@ -71,7 +66,6 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
unsigned int start, end, i;
QEDTable *new_table;
- struct iovec iov;
QEMUIOVector qiov;
size_t len_bytes;
int ret;
@@ -85,11 +79,7 @@ static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
len_bytes = (end - start) * sizeof(uint64_t);
new_table = qemu_blockalign(s->bs, len_bytes);
- iov = (struct iovec) {
- .iov_base = new_table->offsets,
- .iov_len = len_bytes,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, new_table->offsets, len_bytes);
/* Byteswap table */
for (i = start; i < end; i++) {
diff --git a/block/qed.c b/block/qed.c
index 1280870024..c5e6d6ad41 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -113,18 +113,13 @@ static int coroutine_fn qed_write_header(BDRVQEDState *s)
int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE);
size_t len = nsectors * BDRV_SECTOR_SIZE;
uint8_t *buf;
- struct iovec iov;
QEMUIOVector qiov;
int ret;
assert(s->allocating_acb || s->allocating_write_reqs_plugged);
buf = qemu_blockalign(s->bs, len);
- iov = (struct iovec) {
- .iov_base = buf,
- .iov_len = len,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, buf, len);
ret = bdrv_co_preadv(s->bs->file, 0, qiov.size, &qiov, 0);
if (ret < 0) {
@@ -913,7 +908,6 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
{
QEMUIOVector qiov;
QEMUIOVector *backing_qiov = NULL;
- struct iovec iov;
int ret;
/* Skip copy entirely if there is no work to do */
@@ -921,11 +915,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
return 0;
}
- iov = (struct iovec) {
- .iov_base = qemu_blockalign(s->bs, len),
- .iov_len = len,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, qemu_blockalign(s->bs, len), len);
ret = qed_read_backing_file(s, pos, &qiov, &backing_qiov);
@@ -946,7 +936,7 @@ static int coroutine_fn qed_copy_from_backing_file(BDRVQEDState *s,
}
ret = 0;
out:
- qemu_vfree(iov.iov_base);
+ qemu_vfree(qemu_iovec_buf(&qiov));
return ret;
}
@@ -1447,8 +1437,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
BdrvRequestFlags flags)
{
BDRVQEDState *s = bs->opaque;
- QEMUIOVector qiov;
- struct iovec iov;
+
+ /*
+ * Zero writes start without an I/O buffer. If a buffer becomes necessary
+ * then it will be allocated during request processing.
+ */
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
/* Fall back if the request is not aligned */
if (qed_offset_into_cluster(s, offset) ||
@@ -1456,13 +1450,6 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
return -ENOTSUP;
}
- /* Zero writes start without an I/O buffer. If a buffer becomes necessary
- * then it will be allocated during request processing.
- */
- iov.iov_base = NULL;
- iov.iov_len = bytes;
-
- qemu_iovec_init_external(&qiov, &iov, 1);
return qed_co_request(bs, offset >> BDRV_SECTOR_BITS, &qiov,
bytes >> BDRV_SECTOR_BITS,
QED_AIOCB_WRITE | QED_AIOCB_ZERO);
diff --git a/block/stream.c b/block/stream.c
index 7a49ac0992..e14579ff80 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -41,14 +41,9 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
int64_t offset, uint64_t bytes,
void *buf)
{
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = bytes,
- };
- QEMUIOVector qiov;
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
assert(bytes < SIZE_MAX);
- qemu_iovec_init_external(&qiov, &iov, 1);
/* Copy-on-read the unallocated clusters */
return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
diff --git a/block/vmdk.c b/block/vmdk.c
index 096e8eb662..41048741cd 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1371,7 +1371,6 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
VmdkGrainMarker *data = NULL;
uLongf buf_len;
QEMUIOVector local_qiov;
- struct iovec iov;
int64_t write_offset;
int64_t write_end_sector;
@@ -1399,11 +1398,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
data->size = cpu_to_le32(buf_len);
n_bytes = buf_len + sizeof(VmdkGrainMarker);
- iov = (struct iovec) {
- .iov_base = data,
- .iov_len = n_bytes,
- };
- qemu_iovec_init_external(&local_qiov, &iov, 1);
+ qemu_iovec_init_buf(&local_qiov, data, n_bytes);
BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
} else {
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index cf7f47eaba..c159a3d5f7 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -28,9 +28,28 @@
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
-/* We don't support discard yet, hide associated config fields. */
+/* Config size before the discard support (hide associated config fields) */
#define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \
max_discard_sectors)
+/*
+ * Starting from the discard feature, we can use this array to properly
+ * set the config size depending on the features enabled.
+ */
+static VirtIOFeature feature_sizes[] = {
+ {.flags = 1ULL << VIRTIO_BLK_F_DISCARD,
+ .end = virtio_endof(struct virtio_blk_config, discard_sector_alignment)},
+ {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES,
+ .end = virtio_endof(struct virtio_blk_config, write_zeroes_may_unmap)},
+ {}
+};
+
+static void virtio_blk_set_config_size(VirtIOBlock *s, uint64_t host_features)
+{
+ s->config_size = MAX(VIRTIO_BLK_CFG_SIZE,
+ virtio_feature_get_config_size(feature_sizes, host_features));
+
+ assert(s->config_size <= sizeof(struct virtio_blk_config));
+}
static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
VirtIOBlockReq *req)
@@ -65,7 +84,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
}
static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
- bool is_read)
+ bool is_read, bool acct_failed)
{
VirtIOBlock *s = req->dev;
BlockErrorAction action = blk_get_error_action(s->blk, is_read, error);
@@ -78,7 +97,9 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
s->rq = req;
} else if (action == BLOCK_ERROR_ACTION_REPORT) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- block_acct_failed(blk_get_stats(s->blk), &req->acct);
+ if (acct_failed) {
+ block_acct_failed(blk_get_stats(s->blk), &req->acct);
+ }
virtio_blk_free_request(req);
}
@@ -116,7 +137,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
* the memory until the request is completed (which will
* happen on the other side of the migration).
*/
- if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
+ if (virtio_blk_handle_rw_error(req, -ret, is_read, true)) {
continue;
}
}
@@ -135,7 +156,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
if (ret) {
- if (virtio_blk_handle_rw_error(req, -ret, 0)) {
+ if (virtio_blk_handle_rw_error(req, -ret, 0, true)) {
goto out;
}
}
@@ -148,6 +169,30 @@ out:
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
}
+static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+ VirtIOBlock *s = req->dev;
+ bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) &
+ ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES;
+
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ if (ret) {
+ if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
+ goto out;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ if (is_write_zeroes) {
+ block_acct_done(blk_get_stats(s->blk), &req->acct);
+ }
+ virtio_blk_free_request(req);
+
+out:
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+}
+
#ifdef __linux__
typedef struct {
@@ -243,7 +288,7 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req)
*/
scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base;
- if (!blk->conf.scsi) {
+ if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) {
status = VIRTIO_BLK_S_UNSUPP;
goto fail;
}
@@ -481,6 +526,84 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
return true;
}
+static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req,
+ struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes)
+{
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ uint64_t sector;
+ uint32_t num_sectors, flags, max_sectors;
+ uint8_t err_status;
+ int bytes;
+
+ sector = virtio_ldq_p(vdev, &dwz_hdr->sector);
+ num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors);
+ flags = virtio_ldl_p(vdev, &dwz_hdr->flags);
+ max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors :
+ s->conf.max_discard_sectors;
+
+ /*
+ * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check
+ * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in
+ * the integer variable.
+ */
+ if (unlikely(num_sectors > max_sectors)) {
+ err_status = VIRTIO_BLK_S_IOERR;
+ goto err;
+ }
+
+ bytes = num_sectors << BDRV_SECTOR_BITS;
+
+ if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) {
+ err_status = VIRTIO_BLK_S_IOERR;
+ goto err;
+ }
+
+ /*
+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+ * and write zeroes commands if any unknown flag is set.
+ */
+ if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+ err_status = VIRTIO_BLK_S_UNSUPP;
+ goto err;
+ }
+
+ if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */
+ int blk_aio_flags = 0;
+
+ if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+ blk_aio_flags |= BDRV_REQ_MAY_UNMAP;
+ }
+
+ block_acct_start(blk_get_stats(s->blk), &req->acct, bytes,
+ BLOCK_ACCT_WRITE);
+
+ blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS,
+ bytes, blk_aio_flags,
+ virtio_blk_discard_write_zeroes_complete, req);
+ } else { /* VIRTIO_BLK_T_DISCARD */
+ /*
+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+ * discard commands if the unmap flag is set.
+ */
+ if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+ err_status = VIRTIO_BLK_S_UNSUPP;
+ goto err;
+ }
+
+ blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes,
+ virtio_blk_discard_write_zeroes_complete, req);
+ }
+
+ return VIRTIO_BLK_S_OK;
+
+err:
+ if (is_write_zeroes) {
+ block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE);
+ }
+ return err_status;
+}
+
static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -582,6 +705,47 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
virtio_blk_free_request(req);
break;
}
+ /*
+ * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
+ * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
+ * so we must mask it for these requests, then we will check if it is set.
+ */
+ case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
+ case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
+ {
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
+ size_t out_len = iov_size(out_iov, out_num);
+ bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) ==
+ VIRTIO_BLK_T_WRITE_ZEROES;
+ uint8_t err_status;
+
+ /*
+ * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains
+ * more than one segment.
+ */
+ if (unlikely(!(type & VIRTIO_BLK_T_OUT) ||
+ out_len > sizeof(dwz_hdr))) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+ virtio_blk_free_request(req);
+ return 0;
+ }
+
+ if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
+ sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
+ virtio_error(vdev, "virtio-blk discard/write_zeroes header"
+ " too short");
+ return -1;
+ }
+
+ err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr,
+ is_write_zeroes);
+ if (err_status != VIRTIO_BLK_S_OK) {
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ }
+
+ break;
+ }
default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
@@ -761,8 +925,25 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blkcfg.alignment_offset = 0;
blkcfg.wce = blk_enable_write_cache(s->blk);
virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
- memcpy(config, &blkcfg, VIRTIO_BLK_CFG_SIZE);
- QEMU_BUILD_BUG_ON(VIRTIO_BLK_CFG_SIZE > sizeof(blkcfg));
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) {
+ virtio_stl_p(vdev, &blkcfg.max_discard_sectors,
+ s->conf.max_discard_sectors);
+ virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
+ blk_size >> BDRV_SECTOR_BITS);
+ /*
+ * We support only one segment per request since multiple segments
+ * are not widely used and there are no userspace APIs that allow
+ * applications to submit multiple segments in a single call.
+ */
+ virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
+ }
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) {
+ virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
+ s->conf.max_write_zeroes_sectors);
+ blkcfg.write_zeroes_may_unmap = 1;
+ virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
+ }
+ memcpy(config, &blkcfg, s->config_size);
}
static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
@@ -770,8 +951,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
VirtIOBlock *s = VIRTIO_BLK(vdev);
struct virtio_blk_config blkcfg;
- memcpy(&blkcfg, config, VIRTIO_BLK_CFG_SIZE);
- QEMU_BUILD_BUG_ON(VIRTIO_BLK_CFG_SIZE > sizeof(blkcfg));
+ memcpy(&blkcfg, config, s->config_size);
aio_context_acquire(blk_get_aio_context(s->blk));
blk_set_enable_write_cache(s->blk, blkcfg.wce != 0);
@@ -783,12 +963,15 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
+ /* Firstly sync all virtio-blk possible supported features */
+ features |= s->host_features;
+
virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
- if (s->conf.scsi) {
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) {
error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0");
return 0;
}
@@ -797,9 +980,6 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
virtio_add_feature(&features, VIRTIO_BLK_F_SCSI);
}
- if (s->conf.config_wce) {
- virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
- }
if (blk_enable_write_cache(s->blk)) {
virtio_add_feature(&features, VIRTIO_BLK_F_WCE);
}
@@ -954,7 +1134,28 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, VIRTIO_BLK_CFG_SIZE);
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
+ (!conf->max_discard_sectors ||
+ conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
+ error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
+ ", must be between 1 and %d",
+ conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS);
+ return;
+ }
+
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
+ (!conf->max_write_zeroes_sectors ||
+ conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
+ error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
+ "), must be between 1 and %d",
+ conf->max_write_zeroes_sectors,
+ (int)BDRV_REQUEST_MAX_SECTORS);
+ return;
+ }
+
+ virtio_blk_set_config_size(s, s->host_features);
+
+ virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size);
s->blk = conf->conf.blk;
s->rq = NULL;
@@ -1013,9 +1214,11 @@ static Property virtio_blk_properties[] = {
DEFINE_BLOCK_ERROR_PROPERTIES(VirtIOBlock, conf.conf),
DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf),
DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
- DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true),
+ DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features,
+ VIRTIO_BLK_F_CONFIG_WCE, true),
#ifdef __linux__
- DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false),
+ DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features,
+ VIRTIO_BLK_F_SCSI, false),
#endif
DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
true),
@@ -1023,6 +1226,14 @@ static Property virtio_blk_properties[] = {
DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128),
DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
IOThread *),
+ DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features,
+ VIRTIO_BLK_F_DISCARD, true),
+ DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features,
+ VIRTIO_BLK_F_WRITE_ZEROES, true),
+ DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock,
+ conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS),
+ DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock,
+ conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 077fbd182a..766ca5899d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -33,6 +33,8 @@ GlobalProperty hw_compat_3_1[] = {
{ "usb-kbd", "serial", "42" },
{ "usb-mouse", "serial", "42" },
{ "usb-kbd", "serial", "42" },
+ { "virtio-blk-device", "discard", "false" },
+ { "virtio-blk-device", "write-zeroes", "false" },
};
const size_t hw_compat_3_1_len = G_N_ELEMENTS(hw_compat_3_1);
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 39e473f9c2..1b0f66cc08 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -174,16 +174,15 @@ static void cd_read_sector_cb(void *opaque, int ret)
static int cd_read_sector(IDEState *s)
{
+ void *buf;
+
if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_READ);
return -EINVAL;
}
- s->iov.iov_base = (s->cd_sector_size == 2352) ?
- s->io_buffer + 16 : s->io_buffer;
-
- s->iov.iov_len = ATAPI_SECTOR_SIZE;
- qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+ buf = (s->cd_sector_size == 2352) ? s->io_buffer + 16 : s->io_buffer;
+ qemu_iovec_init_buf(&s->qiov, buf, ATAPI_SECTOR_SIZE);
trace_cd_read_sector(s->lba);
@@ -421,9 +420,8 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
data_offset = 0;
}
trace_ide_atapi_cmd_read_dma_cb_aio(s, s->lba, n);
- s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset);
- s->bus->dma->iov.iov_len = n * ATAPI_SECTOR_SIZE;
- qemu_iovec_init_external(&s->bus->dma->qiov, &s->bus->dma->iov, 1);
+ qemu_iovec_init_buf(&s->bus->dma->qiov, s->io_buffer + data_offset,
+ n * ATAPI_SECTOR_SIZE);
s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2,
&s->bus->dma->qiov, n * 4,
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 84832008b8..6afadf894f 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -629,13 +629,15 @@ static void ide_buffered_readv_cb(void *opaque, int ret)
IDEBufferedRequest *req = opaque;
if (!req->orphaned) {
if (!ret) {
- qemu_iovec_from_buf(req->original_qiov, 0, req->iov.iov_base,
+ assert(req->qiov.size == req->original_qiov->size);
+ qemu_iovec_from_buf(req->original_qiov, 0,
+ req->qiov.local_iov.iov_base,
req->original_qiov->size);
}
req->original_cb(req->original_opaque, ret);
}
QLIST_REMOVE(req, list);
- qemu_vfree(req->iov.iov_base);
+ qemu_vfree(qemu_iovec_buf(&req->qiov));
g_free(req);
}
@@ -660,9 +662,8 @@ BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num,
req->original_qiov = iov;
req->original_cb = cb;
req->original_opaque = opaque;
- req->iov.iov_base = qemu_blockalign(blk_bs(s->blk), iov->size);
- req->iov.iov_len = iov->size;
- qemu_iovec_init_external(&req->qiov, &req->iov, 1);
+ qemu_iovec_init_buf(&req->qiov, blk_blockalign(s->blk, iov->size),
+ iov->size);
aioreq = blk_aio_preadv(s->blk, sector_num << BDRV_SECTOR_BITS,
&req->qiov, 0, ide_buffered_readv_cb, req);
@@ -774,9 +775,7 @@ static void ide_sector_read(IDEState *s)
return;
}
- s->iov.iov_base = s->io_buffer;
- s->iov.iov_len = n * BDRV_SECTOR_SIZE;
- qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+ qemu_iovec_init_buf(&s->qiov, s->io_buffer, n * BDRV_SECTOR_SIZE);
block_acct_start(blk_get_stats(s->blk), &s->acct,
n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
@@ -1045,9 +1044,7 @@ static void ide_sector_write(IDEState *s)
return;
}
- s->iov.iov_base = s->io_buffer;
- s->iov.iov_len = n * BDRV_SECTOR_SIZE;
- qemu_iovec_init_external(&s->qiov, &s->iov, 1);
+ qemu_iovec_init_buf(&s->qiov, s->io_buffer, n * BDRV_SECTOR_SIZE);
block_acct_start(blk_get_stats(s->blk), &s->acct,
n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3f319ef723..6e6b146022 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -82,29 +82,17 @@ static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
#endif
-/*
- * Calculate the number of bytes up to and including the given 'field' of
- * 'container'.
- */
-#define endof(container, field) \
- (offsetof(container, field) + sizeof_field(container, field))
-
-typedef struct VirtIOFeature {
- uint64_t flags;
- size_t end;
-} VirtIOFeature;
-
static VirtIOFeature feature_sizes[] = {
{.flags = 1ULL << VIRTIO_NET_F_MAC,
- .end = endof(struct virtio_net_config, mac)},
+ .end = virtio_endof(struct virtio_net_config, mac)},
{.flags = 1ULL << VIRTIO_NET_F_STATUS,
- .end = endof(struct virtio_net_config, status)},
+ .end = virtio_endof(struct virtio_net_config, status)},
{.flags = 1ULL << VIRTIO_NET_F_MQ,
- .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
+ .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
{.flags = 1ULL << VIRTIO_NET_F_MTU,
- .end = endof(struct virtio_net_config, mtu)},
+ .end = virtio_endof(struct virtio_net_config, mtu)},
{.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
- .end = endof(struct virtio_net_config, duplex)},
+ .end = virtio_endof(struct virtio_net_config, duplex)},
{}
};
@@ -2580,15 +2568,10 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
{
- int i, config_size = 0;
virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
- for (i = 0; feature_sizes[i].flags != 0; i++) {
- if (host_features & feature_sizes[i].flags) {
- config_size = MAX(feature_sizes[i].end, config_size);
- }
- }
- n->config_size = config_size;
+ n->config_size = virtio_feature_get_config_size(feature_sizes,
+ host_features);
}
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index a1ff647a66..2626a895cb 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2036,6 +2036,21 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
return ret;
}
+size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
+ uint64_t host_features)
+{
+ size_t config_size = 0;
+ int i;
+
+ for (i = 0; feature_sizes[i].flags != 0; i++) {
+ if (host_features & feature_sizes[i].flags) {
+ config_size = MAX(feature_sizes[i].end, config_size);
+ }
+ }
+
+ return config_size;
+}
+
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
{
int i, ret;
diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h
index 880413ddc7..8efd03132b 100644
--- a/include/hw/ide/internal.h
+++ b/include/hw/ide/internal.h
@@ -346,7 +346,6 @@ extern const char *IDE_DMA_CMD_lookup[IDE_DMA__COUNT];
typedef struct IDEBufferedRequest {
QLIST_ENTRY(IDEBufferedRequest) list;
- struct iovec iov;
QEMUIOVector qiov;
QEMUIOVector *original_qiov;
BlockCompletionFunc *original_cb;
@@ -405,7 +404,6 @@ struct IDEState {
int atapi_dma; /* true if dma is requested for the packet cmd */
BlockAcctCookie acct;
BlockAIOCB *pio_aiocb;
- struct iovec iov;
QEMUIOVector qiov;
QLIST_HEAD(, IDEBufferedRequest) buffered_requests;
/* ATA DMA state */
@@ -457,7 +455,6 @@ struct IDEDMAOps {
struct IDEDMA {
const struct IDEDMAOps *ops;
- struct iovec iov;
QEMUIOVector qiov;
BlockAIOCB *aiocb;
};
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 5117431d96..cddcfbebe9 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -35,11 +35,11 @@ struct VirtIOBlkConf
BlockConf conf;
IOThread *iothread;
char *serial;
- uint32_t scsi;
- uint32_t config_wce;
uint32_t request_merging;
uint16_t num_queues;
uint16_t queue_size;
+ uint32_t max_discard_sectors;
+ uint32_t max_write_zeroes_sectors;
};
struct VirtIOBlockDataPlane;
@@ -57,6 +57,8 @@ typedef struct VirtIOBlock {
bool dataplane_disabled;
bool dataplane_started;
struct VirtIOBlockDataPlane *dataplane;
+ uint64_t host_features;
+ size_t config_size;
} VirtIOBlock;
typedef struct VirtIOBlockReq {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9c1fa07d6d..ce9516236a 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -37,6 +37,21 @@ static inline hwaddr vring_align(hwaddr addr,
return QEMU_ALIGN_UP(addr, align);
}
+/*
+ * Calculate the number of bytes up to and including the given 'field' of
+ * 'container'.
+ */
+#define virtio_endof(container, field) \
+ (offsetof(container, field) + sizeof_field(container, field))
+
+typedef struct VirtIOFeature {
+ uint64_t flags;
+ size_t end;
+} VirtIOFeature;
+
+size_t virtio_feature_get_config_size(VirtIOFeature *features,
+ uint64_t host_features);
+
typedef struct VirtQueue VirtQueue;
#define VIRTQUEUE_MAX_SIZE 1024
diff --git a/include/qemu/iov.h b/include/qemu/iov.h
index 5f433c7768..48b45987b7 100644
--- a/include/qemu/iov.h
+++ b/include/qemu/iov.h
@@ -133,10 +133,70 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
typedef struct QEMUIOVector {
struct iovec *iov;
int niov;
- int nalloc;
- size_t size;
+
+ /*
+ * For external @iov (qemu_iovec_init_external()) or allocated @iov
+ * (qemu_iovec_init()), @size is the cumulative size of iovecs and
+ * @local_iov is invalid and unused.
+ *
+ * For embedded @iov (QEMU_IOVEC_INIT_BUF() or qemu_iovec_init_buf()),
+ * @iov is equal to &@local_iov, and @size is valid, as it has same
+ * offset and type as @local_iov.iov_len, which is guaranteed by
+ * static assertion below.
+ *
+ * @nalloc is always valid and is -1 both for embedded and external
+ * cases. It is included in the union only to ensure the padding prior
+ * to the @size field will not result in a 0-length array.
+ */
+ union {
+ struct {
+ int nalloc;
+ struct iovec local_iov;
+ };
+ struct {
+ char __pad[sizeof(int) + offsetof(struct iovec, iov_len)];
+ size_t size;
+ };
+ };
} QEMUIOVector;
+QEMU_BUILD_BUG_ON(offsetof(QEMUIOVector, size) !=
+ offsetof(QEMUIOVector, local_iov.iov_len));
+
+#define QEMU_IOVEC_INIT_BUF(self, buf, len) \
+{ \
+ .iov = &(self).local_iov, \
+ .niov = 1, \
+ .nalloc = -1, \
+ .local_iov = { \
+ .iov_base = (void *)(buf), /* cast away const */ \
+ .iov_len = (len), \
+ }, \
+}
+
+/*
+ * qemu_iovec_init_buf
+ *
+ * Initialize embedded QEMUIOVector.
+ *
+ * Note: "const" is used over @buf pointer to make it simple to pass
+ * const pointers, appearing in read functions. Then this "const" is
+ * cast away by QEMU_IOVEC_INIT_BUF().
+ */
+static inline void qemu_iovec_init_buf(QEMUIOVector *qiov,
+ const void *buf, size_t len)
+{
+ *qiov = (QEMUIOVector) QEMU_IOVEC_INIT_BUF(*qiov, buf, len);
+}
+
+static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
+{
+ /* Only supports embedded iov */
+ assert(qiov->nalloc == -1 && qiov->iov == &qiov->local_iov);
+
+ return qiov->local_iov.iov_base;
+}
+
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
diff --git a/migration/block.c b/migration/block.c
index 0e24e18d13..83c633fb3f 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -83,7 +83,6 @@ typedef struct BlkMigBlock {
BlkMigDevState *bmds;
int64_t sector;
int nr_sectors;
- struct iovec iov;
QEMUIOVector qiov;
BlockAIOCB *aiocb;
@@ -314,9 +313,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
blk->sector = cur_sector;
blk->nr_sectors = nr_sectors;
- blk->iov.iov_base = blk->buf;
- blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
- qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+ qemu_iovec_init_buf(&blk->qiov, blk->buf, nr_sectors * BDRV_SECTOR_SIZE);
blk_mig_lock();
block_mig_state.submitted++;
@@ -556,9 +553,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
blk->nr_sectors = nr_sectors;
if (is_async) {
- blk->iov.iov_base = blk->buf;
- blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
- qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+ qemu_iovec_init_buf(&blk->qiov, blk->buf,
+ nr_sectors * BDRV_SECTOR_SIZE);
blk->aiocb = blk_aio_preadv(bmds->blk,
sector * BDRV_SECTOR_SIZE,
diff --git a/qemu-img.c b/qemu-img.c
index 25288c4d18..7853935049 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1670,7 +1670,6 @@ static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
{
int n, ret;
QEMUIOVector qiov;
- struct iovec iov;
assert(nb_sectors <= s->buf_sectors);
while (nb_sectors > 0) {
@@ -1686,9 +1685,7 @@ static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
bs_sectors = s->src_sectors[src_cur];
n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
- iov.iov_base = buf;
- iov.iov_len = n << BDRV_SECTOR_BITS;
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, buf, n << BDRV_SECTOR_BITS);
ret = blk_co_preadv(
blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
@@ -1712,7 +1709,6 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
{
int ret;
QEMUIOVector qiov;
- struct iovec iov;
while (nb_sectors > 0) {
int n = nb_sectors;
@@ -1740,9 +1736,7 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
(s->compressed &&
!buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
{
- iov.iov_base = buf;
- iov.iov_len = n << BDRV_SECTOR_BITS;
- qemu_iovec_init_external(&qiov, &iov, 1);
+ qemu_iovec_init_buf(&qiov, buf, n << BDRV_SECTOR_BITS);
ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
n << BDRV_SECTOR_BITS, &qiov, flags);
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index ee1740ff06..821be405f0 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -204,12 +204,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
BlockAIOCB *acb;
int aio_ret;
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = NULL,
- .iov_len = 0,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
@@ -670,12 +665,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
AioContext *ctx_a = iothread_get_aio_context(a);
AioContext *ctx_b = iothread_get_aio_context(b);
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = NULL,
- .iov_len = 0,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
/* bdrv_drain_all() may only be called from the main loop thread */
if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) {
@@ -1148,13 +1138,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
BlockDriverState *bs = blk_bs(blk);
BDRVTestTopState *tts = bs->opaque;
void *buffer = g_malloc(65536);
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = 65536,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
/* Pretend some internal write operation from parent to child.
* Important: We have to read from the child, not from the parent!
@@ -1365,12 +1349,7 @@ static void test_detach_indirect(bool by_parent_cb)
BdrvChild *child_a, *child_b;
BlockAIOCB *acb;
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = NULL,
- .iov_len = 0,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
if (!by_parent_cb) {
detach_by_driver_cb_role = child_file;
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 04c608764b..8d2fc9c710 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -46,6 +46,12 @@ typedef struct QVirtioBlkReq {
uint8_t status;
} QVirtioBlkReq;
+#ifdef HOST_WORDS_BIGENDIAN
+const bool host_is_big_endian = true;
+#else
+const bool host_is_big_endian; /* false */
+#endif
+
static char *drive_create(void)
{
int fd, ret;
@@ -125,12 +131,6 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot)
static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req)
{
-#ifdef HOST_WORDS_BIGENDIAN
- const bool host_is_big_endian = true;
-#else
- const bool host_is_big_endian = false;
-#endif
-
if (qvirtio_is_big_endian(d) != host_is_big_endian) {
req->type = bswap32(req->type);
req->ioprio = bswap32(req->ioprio);
@@ -138,13 +138,37 @@ static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req)
}
}
+
+static inline void virtio_blk_fix_dwz_hdr(QVirtioDevice *d,
+ struct virtio_blk_discard_write_zeroes *dwz_hdr)
+{
+ if (qvirtio_is_big_endian(d) != host_is_big_endian) {
+ dwz_hdr->sector = bswap64(dwz_hdr->sector);
+ dwz_hdr->num_sectors = bswap32(dwz_hdr->num_sectors);
+ dwz_hdr->flags = bswap32(dwz_hdr->flags);
+ }
+}
+
static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d,
QVirtioBlkReq *req, uint64_t data_size)
{
uint64_t addr;
uint8_t status = 0xFF;
- g_assert_cmpuint(data_size % 512, ==, 0);
+ switch (req->type) {
+ case VIRTIO_BLK_T_IN:
+ case VIRTIO_BLK_T_OUT:
+ g_assert_cmpuint(data_size % 512, ==, 0);
+ break;
+ case VIRTIO_BLK_T_DISCARD:
+ case VIRTIO_BLK_T_WRITE_ZEROES:
+ g_assert_cmpuint(data_size %
+ sizeof(struct virtio_blk_discard_write_zeroes), ==, 0);
+ break;
+ default:
+ g_assert_cmpuint(data_size, ==, 0);
+ }
+
addr = guest_alloc(alloc, sizeof(*req) + data_size);
virtio_blk_fix_request(d, req);
@@ -231,6 +255,95 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
guest_free(alloc, req_addr);
+ if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
+ void *expected;
+
+ /*
+ * WRITE_ZEROES request on the same sector of previous test where
+ * we wrote "TEST".
+ */
+ req.type = VIRTIO_BLK_T_WRITE_ZEROES;
+ req.data = (char *) &dwz_hdr;
+ dwz_hdr.sector = 0;
+ dwz_hdr.num_sectors = 1;
+ dwz_hdr.flags = 0;
+
+ virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+ req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+ free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+ qvirtqueue_add(vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+ qvirtqueue_add(vq, req_addr + 16 + sizeof(dwz_hdr), 1, true, false);
+
+ qvirtqueue_kick(dev, vq, free_head);
+
+ qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+ QVIRTIO_BLK_TIMEOUT_US);
+ status = readb(req_addr + 16 + sizeof(dwz_hdr));
+ g_assert_cmpint(status, ==, 0);
+
+ guest_free(alloc, req_addr);
+
+ /* Read request to check if the sector contains all zeroes */
+ req.type = VIRTIO_BLK_T_IN;
+ req.ioprio = 1;
+ req.sector = 0;
+ req.data = g_malloc0(512);
+
+ req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+ g_free(req.data);
+
+ free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+ qvirtqueue_add(vq, req_addr + 16, 512, true, true);
+ qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+ qvirtqueue_kick(dev, vq, free_head);
+
+ qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+ QVIRTIO_BLK_TIMEOUT_US);
+ status = readb(req_addr + 528);
+ g_assert_cmpint(status, ==, 0);
+
+ data = g_malloc(512);
+ expected = g_malloc0(512);
+ memread(req_addr + 16, data, 512);
+ g_assert_cmpmem(data, 512, expected, 512);
+ g_free(expected);
+ g_free(data);
+
+ guest_free(alloc, req_addr);
+ }
+
+ if (features & (1u << VIRTIO_BLK_F_DISCARD)) {
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
+
+ req.type = VIRTIO_BLK_T_DISCARD;
+ req.data = (char *) &dwz_hdr;
+ dwz_hdr.sector = 0;
+ dwz_hdr.num_sectors = 1;
+ dwz_hdr.flags = 0;
+
+ virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+ req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+ free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+ qvirtqueue_add(vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+ qvirtqueue_add(vq, req_addr + 16 + sizeof(dwz_hdr), 1, true, false);
+
+ qvirtqueue_kick(dev, vq, free_head);
+
+ qvirtio_wait_used_elem(dev, vq, free_head, NULL,
+ QVIRTIO_BLK_TIMEOUT_US);
+ status = readb(req_addr + 16 + sizeof(dwz_hdr));
+ g_assert_cmpint(status, ==, 0);
+
+ guest_free(alloc, req_addr);
+ }
+
if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
/* Write and read with 2 descriptor layout */
/* Write request */