aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/block-backend.c114
-rw-r--r--block/bochs.c51
-rw-r--r--block/cloop.c38
-rw-r--r--block/crypto.c2
-rw-r--r--block/curl.c10
-rw-r--r--block/dmg.c40
-rw-r--r--block/io.c514
-rw-r--r--block/iscsi.c19
-rw-r--r--block/linux-aio.c57
-rw-r--r--block/nbd-client.c11
-rw-r--r--block/nbd-client.h2
-rw-r--r--block/nbd.c37
-rw-r--r--block/parallels.c5
-rw-r--r--block/qcow.c8
-rw-r--r--block/qcow2.c76
-rw-r--r--block/qed.c12
-rw-r--r--block/quorum.c94
-rw-r--r--block/raw-aio.h15
-rw-r--r--block/raw-posix.c21
-rw-r--r--block/raw_bsd.c15
-rw-r--r--block/sheepdog.c15
-rw-r--r--block/throttle-groups.c18
-rw-r--r--block/vdi.c131
-rw-r--r--block/vhdx.c5
-rw-r--r--block/vmdk.c367
-rw-r--r--block/vpc.c175
-rw-r--r--block/vvfat.c55
27 files changed, 1051 insertions, 856 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index 16c9d5e0f2..a1e2c7fa20 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1,7 +1,7 @@
/*
* QEMU Block backends
*
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
@@ -692,7 +692,7 @@ static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
return ret;
}
- return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags);
+ return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
}
static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
@@ -710,7 +710,7 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
flags |= BDRV_REQ_FUA;
}
- return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
+ return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
}
typedef struct BlkRwCo {
@@ -772,55 +772,28 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
return rwco.ret;
}
-static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors, CoroutineEntry co_entry,
- BdrvRequestFlags flags)
-{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return -EINVAL;
- }
-
- return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
- nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
- return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
-}
-
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int count)
{
BlockDriverState *bs = blk_bs(blk);
- bool enabled;
int ret;
- ret = blk_check_request(blk, sector_num, nb_sectors);
+ ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = blk_read(blk, sector_num, buf, nb_sectors);
- bs->io_limits_enabled = enabled;
+ bdrv_no_throttling_begin(bs);
+ ret = blk_pread(blk, offset, buf, count);
+ bdrv_no_throttling_end(bs);
return ret;
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
-{
- return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
- blk_write_entry, 0);
-}
-
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int blk_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
- flags | BDRV_REQ_ZERO_WRITE);
+ return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
}
static void error_callback_bh(void *opaque)
@@ -932,18 +905,12 @@ static void blk_aio_write_entry(void *opaque)
blk_aio_complete(acb);
}
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
+BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, NULL,
- blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
- cb, opaque);
+ return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
@@ -955,9 +922,11 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
return count;
}
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+ BdrvRequestFlags flags)
{
- int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
+ int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+ flags);
if (ret < 0) {
return ret;
}
@@ -991,30 +960,20 @@ int64_t blk_nb_sectors(BlockBackend *blk)
return bdrv_nb_sectors(blk_bs(blk));
}
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
- blk_aio_read_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_read_entry, flags, cb, opaque);
}
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
- blk_aio_write_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_write_entry, flags, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
@@ -1444,15 +1403,10 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return -EINVAL;
- }
-
- return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, NULL,
+ return blk_co_pwritev(blk, offset, count, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
diff --git a/block/bochs.c b/block/bochs.c
index af8b7abdfd..f0e18c0b84 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,6 +104,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
bs->read_only = 1; // no write support yet
+ bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
if (ret < 0) {
@@ -221,38 +222,52 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
}
-static int bochs_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
+ BDRVBochsState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ uint64_t bytes_done = 0;
+ QEMUIOVector local_qiov;
int ret;
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+
while (nb_sectors > 0) {
int64_t block_offset = seek_to_sector(bs, sector_num);
if (block_offset < 0) {
- return block_offset;
- } else if (block_offset > 0) {
- ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
+ ret = block_offset;
+ goto fail;
+ }
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, 512);
+
+ if (block_offset > 0) {
+ ret = bdrv_co_preadv(bs->file->bs, block_offset, 512,
+ &local_qiov, 0);
if (ret < 0) {
- return ret;
+ goto fail;
}
} else {
- memset(buf, 0, 512);
+ qemu_iovec_memset(&local_qiov, 0, 0, 512);
}
nb_sectors--;
sector_num++;
- buf += 512;
+ bytes_done += 512;
}
- return 0;
-}
-static coroutine_fn int bochs_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVBochsState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = bochs_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
@@ -267,7 +282,7 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
- .bdrv_read = bochs_co_read,
+ .bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,
};
diff --git a/block/cloop.c b/block/cloop.c
index a84f14019c..fc1ca3a05a 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,6 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
bs->read_only = 1;
+ bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
/* read header */
ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
@@ -229,33 +230,38 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
return 0;
}
-static int cloop_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVCloopState *s = bs->opaque;
- int i;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int ret, i;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_co_mutex_lock(&s->lock);
for (i = 0; i < nb_sectors; i++) {
+ void *data;
uint32_t sector_offset_in_block =
((sector_num + i) % s->sectors_per_block),
block_num = (sector_num + i) / s->sectors_per_block;
if (cloop_read_block(bs, block_num) != 0) {
- return -1;
+ ret = -EIO;
+ goto fail;
}
- memcpy(buf + i * 512,
- s->uncompressed_block + sector_offset_in_block * 512, 512);
+
+ data = s->uncompressed_block + sector_offset_in_block * 512;
+ qemu_iovec_from_buf(qiov, i * 512, data, 512);
}
- return 0;
-}
-static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVCloopState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = cloop_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
@@ -273,7 +279,7 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
- .bdrv_read = cloop_co_read,
+ .bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,
};
diff --git a/block/crypto.c b/block/crypto.c
index 2424a4c9c9..758e14e032 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -91,7 +91,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
struct BlockCryptoCreateData *data = opaque;
ssize_t ret;
- ret = blk_pwrite(data->blk, offset, buf, buflen);
+ ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write encryption header");
return ret;
diff --git a/block/curl.c b/block/curl.c
index 5a8f8b6239..da9f5e85de 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -36,10 +36,16 @@
// #define DEBUG_VERBOSE
#ifdef DEBUG_CURL
-#define DPRINTF(fmt, ...) do { printf(fmt, ## __VA_ARGS__); } while (0)
+#define DEBUG_CURL_PRINT 1
#else
-#define DPRINTF(fmt, ...) do { } while (0)
+#define DEBUG_CURL_PRINT 0
#endif
+#define DPRINTF(fmt, ...) \
+ do { \
+ if (DEBUG_CURL_PRINT) { \
+ fprintf(stderr, fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
#if LIBCURL_VERSION_NUM >= 0x071000
/* The multi interface timer callback was introduced in 7.16.0 */
diff --git a/block/dmg.c b/block/dmg.c
index a496eb7c9b..1ea5f22d82 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -440,6 +440,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
bs->read_only = 1;
+ bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+
s->n_chunks = 0;
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
/* used by dmg_read_mish_block to keep track of the current I/O position */
@@ -659,38 +661,42 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
return 0;
}
-static int dmg_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVDMGState *s = bs->opaque;
- int i;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int ret, i;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_co_mutex_lock(&s->lock);
for (i = 0; i < nb_sectors; i++) {
uint32_t sector_offset_in_chunk;
+ void *data;
+
if (dmg_read_chunk(bs, sector_num + i) != 0) {
- return -1;
+ ret = -EIO;
+ goto fail;
}
/* Special case: current chunk is all zeroes. Do not perform a memcpy as
* s->uncompressed_chunk may be too small to cover the large all-zeroes
* section. dmg_read_chunk is called to find s->current_chunk */
if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
- memset(buf + i * 512, 0, 512);
+ qemu_iovec_memset(qiov, i * 512, 0, 512);
continue;
}
sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
- memcpy(buf + i * 512,
- s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
+ data = s->uncompressed_chunk + sector_offset_in_chunk * 512;
+ qemu_iovec_from_buf(qiov, i * 512, data, 512);
}
- return 0;
-}
-static coroutine_fn int dmg_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVDMGState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = dmg_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -715,7 +721,7 @@ static BlockDriver bdrv_dmg = {
.instance_size = sizeof(BDRVDMGState),
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
- .bdrv_read = dmg_co_read,
+ .bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};
diff --git a/block/io.c b/block/io.c
index a7dbf85b19..cd6d71a503 100644
--- a/block/io.c
+++ b/block/io.c
@@ -34,18 +34,6 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -62,48 +50,35 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
void bdrv_set_io_limits(BlockDriverState *bs,
ThrottleConfig *cfg)
{
- int i;
-
throttle_group_config(bs, cfg);
-
- for (i = 0; i < 2; i++) {
- qemu_co_enter_next(&bs->throttled_reqs[i]);
- }
}
-/* this function drain all the throttled IOs */
-static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
+void bdrv_no_throttling_begin(BlockDriverState *bs)
{
- bool drained = false;
- bool enabled = bs->io_limits_enabled;
- int i;
-
- bs->io_limits_enabled = false;
-
- for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
- drained = true;
- }
+ if (bs->io_limits_disabled++ == 0) {
+ throttle_group_restart_bs(bs);
}
+}
- bs->io_limits_enabled = enabled;
-
- return drained;
+void bdrv_no_throttling_end(BlockDriverState *bs)
+{
+ assert(bs->io_limits_disabled);
+ --bs->io_limits_disabled;
}
void bdrv_io_limits_disable(BlockDriverState *bs)
{
- bs->io_limits_enabled = false;
- bdrv_start_throttled_reqs(bs);
+ assert(bs->throttle_state);
+ bdrv_no_throttling_begin(bs);
throttle_group_unregister_bs(bs);
+ bdrv_no_throttling_end(bs);
}
/* should be called before bdrv_set_io_limits if a limit is set */
void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
{
- assert(!bs->io_limits_enabled);
+ assert(!bs->throttle_state);
throttle_group_register_bs(bs, group);
- bs->io_limits_enabled = true;
}
void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
@@ -123,24 +98,6 @@ void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
bdrv_io_limits_enable(bs, group);
}
-void bdrv_setup_io_funcs(BlockDriver *bdrv)
-{
- /* Block drivers without coroutine functions need emulation */
- if (!bdrv->bdrv_co_readv) {
- bdrv->bdrv_co_readv = bdrv_co_readv_em;
- bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
- /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
- * the block driver lacks aio we need to emulate that too.
- */
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- }
- }
-}
-
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
{
BlockDriver *drv = bs->drv;
@@ -260,18 +217,29 @@ typedef struct {
bool done;
} BdrvCoDrainData;
+static void bdrv_drain_poll(BlockDriverState *bs)
+{
+ bool busy = true;
+
+ while (busy) {
+ /* Keep iterating */
+ busy = bdrv_requests_pending(bs);
+ busy |= aio_poll(bdrv_get_aio_context(bs), busy);
+ }
+}
+
static void bdrv_co_drain_bh_cb(void *opaque)
{
BdrvCoDrainData *data = opaque;
Coroutine *co = data->co;
qemu_bh_delete(data->bh);
- bdrv_drain(data->bs);
+ bdrv_drain_poll(data->bs);
data->done = true;
qemu_coroutine_enter(co, NULL);
}
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
{
BdrvCoDrainData data;
@@ -305,21 +273,28 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
* not depend on events in other AioContexts. In that case, use
* bdrv_drain_all() instead.
*/
-void bdrv_drain(BlockDriverState *bs)
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
- bool busy = true;
+ bdrv_no_throttling_begin(bs);
+ bdrv_io_unplugged_begin(bs);
+ bdrv_drain_recurse(bs);
+ bdrv_co_yield_to_drain(bs);
+ bdrv_io_unplugged_end(bs);
+ bdrv_no_throttling_end(bs);
+}
+void bdrv_drain(BlockDriverState *bs)
+{
+ bdrv_no_throttling_begin(bs);
+ bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
if (qemu_in_coroutine()) {
- bdrv_co_drain(bs);
- return;
- }
- while (busy) {
- /* Keep iterating */
- bdrv_flush_io_queue(bs);
- busy = bdrv_requests_pending(bs);
- busy |= aio_poll(bdrv_get_aio_context(bs), busy);
+ bdrv_co_yield_to_drain(bs);
+ } else {
+ bdrv_drain_poll(bs);
}
+ bdrv_io_unplugged_end(bs);
+ bdrv_no_throttling_end(bs);
}
/*
@@ -342,6 +317,8 @@ void bdrv_drain_all(void)
if (bs->job) {
block_job_pause(bs->job);
}
+ bdrv_no_throttling_begin(bs);
+ bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
aio_context_release(aio_context);
@@ -366,7 +343,6 @@ void bdrv_drain_all(void)
aio_context_acquire(aio_context);
while ((bs = bdrv_next(bs))) {
if (aio_context == bdrv_get_aio_context(bs)) {
- bdrv_flush_io_queue(bs);
if (bdrv_requests_pending(bs)) {
busy = true;
aio_poll(aio_context, busy);
@@ -383,6 +359,8 @@ void bdrv_drain_all(void)
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
+ bdrv_io_unplugged_end(bs);
+ bdrv_no_throttling_end(bs);
if (bs->job) {
block_job_resume(bs->job);
}
@@ -581,13 +559,13 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
RwCo *rwco = opaque;
if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
+ rwco->ret = bdrv_co_preadv(rwco->bs, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
} else {
- rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
+ rwco->ret = bdrv_co_pwritev(rwco->bs, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
}
}
@@ -608,17 +586,6 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
.flags = flags,
};
- /**
- * In sync call context, when the vcpu is blocked, this throttling timer
- * will not fire; so the I/O throttling function has to be disabled here
- * if it has been enabled.
- */
- if (bs->io_limits_enabled) {
- fprintf(stderr, "Disabling I/O throttling on '%s' due "
- "to synchronous I/O.\n", bdrv_get_device_name(bs));
- bdrv_io_limits_disable(bs);
- }
-
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
@@ -685,7 +652,8 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
* Completely zero out a block device with the help of bdrv_write_zeroes.
* The operation is sped up by checking the block status and only writing
* zeroes to the device if they currently do not return zeroes. Optional
- * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
+ * BDRV_REQ_FUA).
*
* Returns < 0 on error, 0 on success. For error codes see bdrv_write().
*/
@@ -800,6 +768,109 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return 0;
}
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+
+ if (drv->bdrv_co_preadv) {
+ return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_readv) {
+ return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ }
+}
+
+static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+ int ret;
+
+ if (drv->bdrv_co_pwritev) {
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
+ goto emulate_flags;
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_writev_flags) {
+ ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ } else if (drv->bdrv_co_writev) {
+ assert(!bs->supported_write_flags);
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ }
+
+emulate_flags:
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ return ret;
+}
+
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
@@ -836,8 +907,9 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
+ cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ &bounce_qiov, 0);
if (ret < 0) {
goto err;
}
@@ -850,8 +922,9 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
*/
- ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE,
+ cluster_nb_sectors * BDRV_SECTOR_SIZE,
+ &bounce_qiov, 0);
}
if (ret < 0) {
@@ -880,7 +953,6 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
- BlockDriver *drv = bs->drv;
int ret;
int64_t sector_num = offset >> BDRV_SECTOR_BITS;
@@ -921,7 +993,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
/* Forward the request to the BlockDriver */
if (!bs->zero_beyond_eof) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
} else {
/* Read zeros after EOF */
int64_t total_sectors, max_nb_sectors;
@@ -935,7 +1007,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS);
if (nb_sectors < max_nb_sectors) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
} else if (max_nb_sectors > 0) {
QEMUIOVector local_qiov;
@@ -943,8 +1015,9 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
qemu_iovec_concat(&local_qiov, qiov, 0,
max_nb_sectors * BDRV_SECTOR_SIZE);
- ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
- &local_qiov);
+ ret = bdrv_driver_preadv(bs, offset,
+ max_nb_sectors * BDRV_SECTOR_SIZE,
+ &local_qiov, 0);
qemu_iovec_destroy(&local_qiov);
} else {
@@ -967,7 +1040,7 @@ out:
/*
* Handle a read request in coroutine context
*/
-int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -997,7 +1070,7 @@ int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
}
/* throttling disk I/O */
- if (bs->io_limits_enabled) {
+ if (bs->throttle_state) {
throttle_group_co_io_limits_intercept(bs, bytes, false);
}
@@ -1049,8 +1122,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
return -EINVAL;
}
- return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+ return bdrv_co_preadv(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
}
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
@@ -1088,6 +1161,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
QEMUIOVector qiov;
struct iovec iov = {0};
int ret = 0;
+ bool need_flush = false;
int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
BDRV_REQUEST_MAX_SECTORS);
@@ -1120,13 +1194,29 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
ret = -ENOTSUP;
/* First try the efficient write zeroes operation */
if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, num,
+ flags & bs->supported_zero_flags);
+ if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
+ need_flush = true;
+ }
+ } else {
+ assert(!bs->supported_zero_flags);
}
if (ret == -ENOTSUP) {
/* Fall back to bounce buffer if write zeroes is unsupported */
int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
MAX_WRITE_ZEROES_BOUNCE_BUFFER);
+ BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
+
+ if ((flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* No need for bdrv_driver_pwrite() to do a fallback
+ * flush on each chunk; use just one at the end */
+ write_flags &= ~BDRV_REQ_FUA;
+ need_flush = true;
+ }
num = MIN(num, max_xfer_len);
iov.iov_len = num * BDRV_SECTOR_SIZE;
if (iov.iov_base == NULL) {
@@ -1139,7 +1229,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
}
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
+ ret = bdrv_driver_pwritev(bs, sector_num * BDRV_SECTOR_SIZE,
+ num * BDRV_SECTOR_SIZE, &qiov,
+ write_flags);
/* Keep bounce buffer around if it is big enough for all
* all future requests.
@@ -1155,6 +1247,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
}
fail:
+ if (ret == 0 && need_flush) {
+ ret = bdrv_co_flush(bs);
+ }
qemu_vfree(iov.iov_base);
return ret;
}
@@ -1199,23 +1294,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
- } else if (drv->bdrv_co_writev_flags) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
- flags);
} else {
- assert(drv->supported_write_flags == 0);
bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- if (ret == 0 && (flags & BDRV_REQ_FUA) &&
- !(drv->supported_write_flags & BDRV_REQ_FUA))
- {
- ret = bdrv_co_flush(bs);
- }
-
bdrv_set_dirty(bs, sector_num, nb_sectors);
if (bs->wr_highest_offset < offset + bytes) {
@@ -1320,7 +1404,7 @@ fail:
/*
* Handle a write request in coroutine context
*/
-int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1347,7 +1431,7 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
}
/* throttling disk I/O */
- if (bs->io_limits_enabled) {
+ if (bs->throttle_state) {
throttle_group_co_io_limits_intercept(bs, bytes, true);
}
@@ -1455,8 +1539,8 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
return -EINVAL;
}
- return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+ return bdrv_co_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
}
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -2064,80 +2148,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* async block device emulation */
-typedef struct BlockAIOCBSync {
- BlockAIOCB common;
- QEMUBH *bh;
- int ret;
- /* vector translation state */
- QEMUIOVector *qiov;
- uint8_t *bounce;
- int is_write;
-} BlockAIOCBSync;
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
- .aiocb_size = sizeof(BlockAIOCBSync),
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
- BlockAIOCBSync *acb = opaque;
-
- if (!acb->is_write && acb->ret >= 0) {
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- }
- qemu_vfree(acb->bounce);
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- BlockAIOCBSync *acb;
-
- acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
- acb->is_write = is_write;
- acb->qiov = qiov;
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
-
- if (acb->bounce == NULL) {
- acb->ret = -ENOMEM;
- } else if (is_write) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
- } else {
- acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
- }
-
- qemu_bh_schedule(acb->bh);
-
- return &acb->common;
-}
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
-
-
typedef struct BlockAIOCBCoroutine {
BlockAIOCB common;
BlockRequest req;
@@ -2314,59 +2324,6 @@ void qemu_aio_unref(void *p)
/**************************************************************/
/* Coroutine block device emulation */
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
- int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
- CoroutineIOCompletion *co = opaque;
-
- co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *iov,
- bool is_write)
-{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockAIOCB *acb;
-
- if (is_write) {
- acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- } else {
- acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- }
-
- trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
- if (!acb) {
- return -EIO;
- }
- qemu_coroutine_yield();
-
- return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
-
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
-
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
{
RwCo *rwco = opaque;
@@ -2763,33 +2720,68 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
void bdrv_io_plug(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_plug) {
- drv->bdrv_io_plug(bs);
- } else if (bs->file) {
- bdrv_io_plug(bs->file->bs);
+ BdrvChild *child;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_plug(child->bs);
+ }
+
+ if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
}
}
void bdrv_io_unplug(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_unplug) {
- drv->bdrv_io_unplug(bs);
- } else if (bs->file) {
- bdrv_io_unplug(bs->file->bs);
+ BdrvChild *child;
+
+ assert(bs->io_plugged);
+ if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplug(child->bs);
}
}
-void bdrv_flush_io_queue(BlockDriverState *bs)
+void bdrv_io_unplugged_begin(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_flush_io_queue) {
- drv->bdrv_flush_io_queue(bs);
- } else if (bs->file) {
- bdrv_flush_io_queue(bs->file->bs);
+ BdrvChild *child;
+
+ if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_begin(child->bs);
+ }
+}
+
+void bdrv_io_unplugged_end(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ assert(bs->io_plug_disabled);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_end(child->bs);
+ }
+
+ if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
}
- bdrv_start_throttled_reqs(bs);
}
void bdrv_drained_begin(BlockDriverState *bs)
diff --git a/block/iscsi.c b/block/iscsi.c
index 302baf84c1..10f3906bcc 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -456,8 +456,11 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
- bool fua;
+ bool fua = flags & BDRV_REQ_FUA;
+ if (fua) {
+ assert(iscsilun->dpofua);
+ }
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
@@ -472,7 +475,6 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA);
if (iscsilun->use_16_for_rw) {
iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
@@ -513,13 +515,6 @@ retry:
return 0;
}
-static int coroutine_fn
-iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0);
-}
-
static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
int64_t sector_num, int nb_sectors)
@@ -1555,6 +1550,10 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
task = NULL;
iscsi_modesense_sync(iscsilun);
+ if (iscsilun->dpofua) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
/* Check the write protect flag of the LUN if we want to write */
if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
@@ -1847,9 +1846,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_discard = iscsi_co_discard,
.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
.bdrv_co_readv = iscsi_co_readv,
- .bdrv_co_writev = iscsi_co_writev,
.bdrv_co_writev_flags = iscsi_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 805757e02e..90ec98ee23 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -30,7 +30,7 @@
struct qemu_laiocb {
BlockAIOCB common;
- struct qemu_laio_state *ctx;
+ LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
size_t nbytes;
@@ -46,7 +46,7 @@ typedef struct {
QSIMPLEQ_HEAD(, qemu_laiocb) pending;
} LaioQueue;
-struct qemu_laio_state {
+struct LinuxAioState {
io_context_t ctx;
EventNotifier e;
@@ -60,7 +60,7 @@ struct qemu_laio_state {
int event_max;
};
-static void ioq_submit(struct qemu_laio_state *s);
+static void ioq_submit(LinuxAioState *s);
static inline ssize_t io_event_ret(struct io_event *ev)
{
@@ -70,8 +70,7 @@ static inline ssize_t io_event_ret(struct io_event *ev)
/*
* Completes an AIO request (calls the callback and frees the ACB).
*/
-static void qemu_laio_process_completion(struct qemu_laio_state *s,
- struct qemu_laiocb *laiocb)
+static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
{
int ret;
@@ -99,7 +98,7 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
*
* The function is somewhat tricky because it supports nested event loops, for
* example when a request callback invokes aio_poll(). In order to do this,
- * the completion events array and index are kept in qemu_laio_state. The BH
+ * the completion events array and index are kept in LinuxAioState. The BH
* reschedules itself as long as there are completions pending so it will
* either be called again in a nested event loop or will be called after all
* events have been completed. When there are no events left to complete, the
@@ -107,7 +106,7 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
*/
static void qemu_laio_completion_bh(void *opaque)
{
- struct qemu_laio_state *s = opaque;
+ LinuxAioState *s = opaque;
/* Fetch more completion events when empty */
if (s->event_idx == s->event_max) {
@@ -136,7 +135,7 @@ static void qemu_laio_completion_bh(void *opaque)
laiocb->ret = io_event_ret(&s->events[s->event_idx]);
s->event_idx++;
- qemu_laio_process_completion(s, laiocb);
+ qemu_laio_process_completion(laiocb);
}
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
@@ -146,7 +145,7 @@ static void qemu_laio_completion_bh(void *opaque)
static void qemu_laio_completion_cb(EventNotifier *e)
{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+ LinuxAioState *s = container_of(e, LinuxAioState, e);
if (event_notifier_test_and_clear(&s->e)) {
qemu_bh_schedule(s->completion_bh);
@@ -185,7 +184,7 @@ static void ioq_init(LaioQueue *io_q)
io_q->blocked = false;
}
-static void ioq_submit(struct qemu_laio_state *s)
+static void ioq_submit(LinuxAioState *s)
{
int ret, len;
struct qemu_laiocb *aiocb;
@@ -216,33 +215,25 @@ static void ioq_submit(struct qemu_laio_state *s)
s->io_q.blocked = (s->io_q.n > 0);
}
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
+void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
{
- struct qemu_laio_state *s = aio_ctx;
-
- s->io_q.plugged++;
+ assert(!s->io_q.plugged);
+ s->io_q.plugged = 1;
}
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
{
- struct qemu_laio_state *s = aio_ctx;
-
- assert(s->io_q.plugged > 0 || !unplug);
-
- if (unplug && --s->io_q.plugged > 0) {
- return;
- }
-
+ assert(s->io_q.plugged);
+ s->io_q.plugged = 0;
if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
}
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type)
{
- struct qemu_laio_state *s = aio_ctx;
struct qemu_laiocb *laiocb;
struct iocb *iocbs;
off_t offset = sector_num * 512;
@@ -284,26 +275,22 @@ out_free_aiocb:
return NULL;
}
-void laio_detach_aio_context(void *s_, AioContext *old_context)
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
{
- struct qemu_laio_state *s = s_;
-
aio_set_event_notifier(old_context, &s->e, false, NULL);
qemu_bh_delete(s->completion_bh);
}
-void laio_attach_aio_context(void *s_, AioContext *new_context)
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
{
- struct qemu_laio_state *s = s_;
-
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
aio_set_event_notifier(new_context, &s->e, false,
qemu_laio_completion_cb);
}
-void *laio_init(void)
+LinuxAioState *laio_init(void)
{
- struct qemu_laio_state *s;
+ LinuxAioState *s;
s = g_malloc0(sizeof(*s));
if (event_notifier_init(&s->e, false) < 0) {
@@ -325,10 +312,8 @@ out_free_state:
return NULL;
}
-void laio_cleanup(void *s_)
+void laio_cleanup(LinuxAioState *s)
{
- struct qemu_laio_state *s = s_;
-
event_notifier_cleanup(&s->e);
if (io_destroy(s->ctx) != 0) {
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 878e879ace..4d13444409 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -243,15 +243,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
- int offset, int *flags)
+ int offset, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_WRITE };
struct nbd_reply reply;
ssize_t ret;
- if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) {
- *flags &= ~BDRV_REQ_FUA;
+ if (flags & BDRV_REQ_FUA) {
+ assert(client->nbdflags & NBD_FLAG_SEND_FUA);
request.type |= NBD_CMD_FLAG_FUA;
}
@@ -291,7 +291,7 @@ int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
}
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int *flags)
+ int nb_sectors, QEMUIOVector *qiov, int flags)
{
int offset = 0;
int ret;
@@ -414,6 +414,9 @@ int nbd_client_init(BlockDriverState *bs,
logout("Failed to negotiate with the NBD server\n");
return ret;
}
+ if (client->nbdflags & NBD_FLAG_SEND_FUA) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index bc7aec0795..c618dadc39 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -48,7 +48,7 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int *flags);
+ int nb_sectors, QEMUIOVector *qiov, int flags);
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
diff --git a/block/nbd.c b/block/nbd.c
index f7ea3b3608..6015e8b537 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -355,31 +355,6 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
}
-static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int flags)
-{
- int ret;
-
- ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags);
- if (ret < 0) {
- return ret;
- }
-
- /* The flag wasn't sent to the server, so we need to emulate it with an
- * explicit flush */
- if (flags & BDRV_REQ_FUA) {
- ret = nbd_client_co_flush(bs);
- }
-
- return ret;
-}
-
-static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
-}
-
static int nbd_co_flush(BlockDriverState *bs)
{
return nbd_client_co_flush(bs);
@@ -476,9 +451,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_co_writev_flags = nbd_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
+ .bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
@@ -496,9 +469,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_co_writev_flags = nbd_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
+ .bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
@@ -516,9 +487,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_co_writev_flags = nbd_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
+ .bdrv_co_writev_flags = nbd_client_co_writev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
diff --git a/block/parallels.c b/block/parallels.c
index 324ed43ac4..cddbfc4012 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -512,11 +512,12 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &header, sizeof(header));
- ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0);
if (ret < 0) {
goto exit;
}
- ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
+ ret = blk_write_zeroes(file, BDRV_SECTOR_SIZE,
+ (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
if (ret < 0) {
goto exit;
}
diff --git a/block/qcow.c b/block/qcow.c
index 60ddb12eca..d6dc1b05b3 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -853,14 +853,14 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* write all the data */
- ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
+ ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header), 0);
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
ret = blk_pwrite(qcow_blk, sizeof(header),
- backing_file, backing_filename_len);
+ backing_file, backing_filename_len, 0);
if (ret != backing_filename_len) {
goto exit;
}
@@ -869,8 +869,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
tmp = g_malloc0(BDRV_SECTOR_SIZE);
for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
BDRV_SECTOR_SIZE); i++) {
- ret = blk_pwrite(qcow_blk, header_size +
- BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+ tmp, BDRV_SECTOR_SIZE, 0);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
goto exit;
diff --git a/block/qcow2.c b/block/qcow2.c
index 470734be9f..62febfc386 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1757,13 +1757,6 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
qcow2_close(bs);
- bdrv_invalidate_cache(bs->file->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- bs->drv = NULL;
- return;
- }
-
memset(s, 0, sizeof(BDRVQcow2State));
options = qdict_clone_shallow(bs->options);
@@ -2207,7 +2200,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
- ret = blk_pwrite(blk, 0, header, cluster_size);
+ ret = blk_pwrite(blk, 0, header, cluster_size, 0);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
- ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
+ ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
g_free(refcount_table);
if (ret < 0) {
@@ -2411,21 +2404,74 @@ finish:
return ret;
}
+
+static bool is_zero_cluster(BlockDriverState *bs, int64_t start)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int nr;
+ BlockDriverState *file;
+ int64_t res = bdrv_get_block_status_above(bs, NULL, start,
+ s->cluster_sectors, &nr, &file);
+ return res >= 0 && ((res & BDRV_BLOCK_ZERO) || !(res & BDRV_BLOCK_DATA));
+}
+
+static bool is_zero_cluster_top_locked(BlockDriverState *bs, int64_t start)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int nr = s->cluster_sectors;
+ uint64_t off;
+ int ret;
+
+ ret = qcow2_get_cluster_offset(bs, start << BDRV_SECTOR_BITS, &nr, &off);
+ return ret == QCOW2_CLUSTER_UNALLOCATED || ret == QCOW2_CLUSTER_ZERO;
+}
+
static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
BDRVQcow2State *s = bs->opaque;
- /* Emulate misaligned zero writes */
- if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
- return -ENOTSUP;
+ int head = sector_num % s->cluster_sectors;
+ int tail = (sector_num + nb_sectors) % s->cluster_sectors;
+
+ if (head != 0 || tail != 0) {
+ int64_t cl_end = -1;
+
+ sector_num -= head;
+ nb_sectors += head;
+
+ if (tail != 0) {
+ nb_sectors += s->cluster_sectors - tail;
+ }
+
+ if (!is_zero_cluster(bs, sector_num)) {
+ return -ENOTSUP;
+ }
+
+ if (nb_sectors > s->cluster_sectors) {
+ /* Technically the request can cover 2 clusters, f.e. 4k write
+ at s->cluster_sectors - 2k offset. One of these cluster can
+ be zeroed, one unallocated */
+ cl_end = sector_num + nb_sectors - s->cluster_sectors;
+ if (!is_zero_cluster(bs, cl_end)) {
+ return -ENOTSUP;
+ }
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+ /* We can have new write after previous check */
+ if (!is_zero_cluster_top_locked(bs, sector_num) ||
+ (cl_end > 0 && !is_zero_cluster_top_locked(bs, cl_end))) {
+ qemu_co_mutex_unlock(&s->lock);
+ return -ENOTSUP;
+ }
+ } else {
+ qemu_co_mutex_lock(&s->lock);
}
/* Whatever is left can use real zero clusters */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
+ ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
return ret;
diff --git a/block/qed.c b/block/qed.c
index 0af52741df..10ce18eb66 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -601,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
qed_header_cpu_to_le(&header, &le_header);
- ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
+ ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header), 0);
if (ret < 0) {
goto out;
}
ret = blk_pwrite(blk, sizeof(le_header), backing_file,
- header.backing_filename_size);
+ header.backing_filename_size, 0);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
- ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
+ ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size, 0);
if (ret < 0) {
goto out;
}
@@ -1594,12 +1594,6 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
- bdrv_invalidate_cache(bs->file->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
diff --git a/block/quorum.c b/block/quorum.c
index da15465a9a..1ec3511528 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -14,6 +14,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
@@ -67,6 +68,9 @@ typedef struct QuorumVotes {
typedef struct BDRVQuorumState {
BdrvChild **children; /* children BlockDriverStates */
int num_children; /* children count */
+ unsigned next_child_index; /* the index of the next child that should
+ * be added
+ */
int threshold; /* if less than threshold children reads gave the
* same result a quorum error occurs.
*/
@@ -747,21 +751,6 @@ static int64_t quorum_getlength(BlockDriverState *bs)
return result;
}
-static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
- BDRVQuorumState *s = bs->opaque;
- Error *local_err = NULL;
- int i;
-
- for (i = 0; i < s->num_children; i++) {
- bdrv_invalidate_cache(s->children[i]->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
- }
-}
-
static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
{
BDRVQuorumState *s = bs->opaque;
@@ -898,9 +887,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto exit;
}
- if (s->num_children < 2) {
+ if (s->num_children < 1) {
error_setg(&local_err,
- "Number of provided children must be greater than 1");
+ "Number of provided children must be 1 or more");
ret = -EINVAL;
goto exit;
}
@@ -964,6 +953,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
opened[i] = true;
}
+ s->next_child_index = s->num_children;
g_free(opened);
goto exit;
@@ -1020,6 +1010,72 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
}
}
+static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
+ Error **errp)
+{
+ BDRVQuorumState *s = bs->opaque;
+ BdrvChild *child;
+ char indexstr[32];
+ int ret;
+
+ assert(s->num_children <= INT_MAX / sizeof(BdrvChild *));
+ if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
+ s->next_child_index == UINT_MAX) {
+ error_setg(errp, "Too many children");
+ return;
+ }
+
+ ret = snprintf(indexstr, 32, "children.%u", s->next_child_index);
+ if (ret < 0 || ret >= 32) {
+ error_setg(errp, "cannot generate child name");
+ return;
+ }
+ s->next_child_index++;
+
+ bdrv_drained_begin(bs);
+
+ /* We can safely add the child now */
+ bdrv_ref(child_bs);
+ child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+ s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
+ s->children[s->num_children++] = child;
+
+ bdrv_drained_end(bs);
+}
+
+static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
+ Error **errp)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_children; i++) {
+ if (s->children[i] == child) {
+ break;
+ }
+ }
+
+ /* we have checked it in bdrv_del_child() */
+ assert(i < s->num_children);
+
+ if (s->num_children <= s->threshold) {
+ error_setg(errp,
+ "The number of children cannot be lower than the vote threshold %d",
+ s->threshold);
+ return;
+ }
+
+ bdrv_drained_begin(bs);
+
+ /* We can safely remove this child now */
+ memmove(&s->children[i], &s->children[i + 1],
+ (s->num_children - i - 1) * sizeof(BdrvChild *));
+ s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+ bdrv_unref_child(bs, child);
+
+ bdrv_drained_end(bs);
+}
+
static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVQuorumState *s = bs->opaque;
@@ -1070,11 +1126,13 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
- .bdrv_invalidate_cache = quorum_invalidate_cache,
.bdrv_detach_aio_context = quorum_detach_aio_context,
.bdrv_attach_aio_context = quorum_attach_aio_context,
+ .bdrv_add_child = quorum_add_child,
+ .bdrv_del_child = quorum_del_child,
+
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 811e375018..714714e016 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -35,15 +35,16 @@
/* linux-aio.c - Linux native implementation */
#ifdef CONFIG_LINUX_AIO
-void *laio_init(void);
-void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
+typedef struct LinuxAioState LinuxAioState;
+LinuxAioState *laio_init(void);
+void laio_cleanup(LinuxAioState *s);
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type);
-void laio_detach_aio_context(void *s, AioContext *old_context);
-void laio_attach_aio_context(void *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
+void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s);
#endif
#ifdef _WIN32
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 906d5c9411..a4f5a1ba5f 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -139,7 +139,7 @@ typedef struct BDRVRawState {
#ifdef CONFIG_LINUX_AIO
int use_aio;
- void *aio_ctx;
+ LinuxAioState *aio_ctx;
#endif
#ifdef CONFIG_XFS
bool is_xfs:1;
@@ -398,7 +398,7 @@ static void raw_attach_aio_context(BlockDriverState *bs,
}
#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
+static int raw_set_aio(LinuxAioState **aio_ctx, int *use_aio, int bdrv_flags)
{
int ret = -1;
assert(aio_ctx != NULL);
@@ -517,6 +517,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->has_discard = true;
s->has_write_zeroes = true;
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
s->needs_alignment = true;
}
@@ -1345,17 +1346,7 @@ static void raw_aio_unplug(BlockDriverState *bs)
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx, true);
- }
-#endif
-}
-
-static void raw_aio_flush_io_queue(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx, false);
+ laio_io_unplug(bs, s->aio_ctx);
}
#endif
}
@@ -1949,7 +1940,6 @@ BlockDriver bdrv_file = {
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2398,7 +2388,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2528,7 +2517,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2664,7 +2652,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index a6cc7e9918..3385ed448d 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -105,8 +105,8 @@ raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
+ ret = bdrv_co_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
fail:
if (qiov == &local_qiov) {
@@ -116,13 +116,6 @@ fail:
return ret;
}
-static int coroutine_fn
-raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- QEMUIOVector *qiov)
-{
- return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
-}
-
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum,
@@ -211,6 +204,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->sg = bs->file->bs->sg;
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP;
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
@@ -256,9 +251,7 @@ BlockDriver bdrv_raw = {
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
- .bdrv_co_writev = &raw_co_writev,
.bdrv_co_writev_flags = &raw_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_write_zeroes = &raw_co_write_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 33e0a33824..23fbace1f9 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -294,13 +294,16 @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
#undef DPRINTF
#ifdef DEBUG_SDOG
-#define DPRINTF(fmt, args...) \
- do { \
- fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
- } while (0)
+#define DEBUG_SDOG_PRINT 1
#else
-#define DPRINTF(fmt, args...)
+#define DEBUG_SDOG_PRINT 0
#endif
+#define DPRINTF(fmt, args...) \
+ do { \
+ if (DEBUG_SDOG_PRINT) { \
+ fprintf(stderr, "%s %d: " fmt, __func__, __LINE__, ##args); \
+ } \
+ } while (0)
typedef struct SheepdogAIOCB SheepdogAIOCB;
@@ -1678,7 +1681,7 @@ static int sd_prealloc(const char *filename, Error **errp)
if (ret < 0) {
goto out;
}
- ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
+ ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0);
if (ret < 0) {
goto out;
}
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 4920e09495..9ac063a0cd 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -219,6 +219,10 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool must_wait;
+ if (bs->io_limits_disabled) {
+ return false;
+ }
+
/* Check if any of the timers in this group is already armed */
if (tg->any_timer_armed[is_write]) {
return true;
@@ -313,6 +317,17 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
qemu_mutex_unlock(&tg->lock);
}
+void throttle_group_restart_bs(BlockDriverState *bs)
+{
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
+ ;
+ }
+ }
+}
+
/* Update the throttle configuration for a particular group. Similar
* to throttle_config(), but guarantees atomicity within the
* throttling group.
@@ -335,6 +350,9 @@ void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
}
throttle_config(ts, tt, cfg);
qemu_mutex_unlock(&tg->lock);
+
+ qemu_co_enter_next(&bs->throttled_reqs[0]);
+ qemu_co_enter_next(&bs->throttled_reqs[1]);
}
/* Get the throttle configuration from a particular group. Similar to
diff --git a/block/vdi.c b/block/vdi.c
index 75d4819edb..54e11447c3 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -557,98 +557,109 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
}
-static int vdi_co_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVdiState *s = bs->opaque;
+ QEMUIOVector local_qiov;
uint32_t bmap_entry;
uint32_t block_index;
- uint32_t sector_in_block;
- uint32_t n_sectors;
+ uint32_t offset_in_block;
+ uint32_t n_bytes;
+ uint64_t bytes_done = 0;
int ret = 0;
logout("\n");
- while (ret >= 0 && nb_sectors > 0) {
- block_index = sector_num / s->block_sectors;
- sector_in_block = sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (ret >= 0 && bytes > 0) {
+ block_index = offset / s->block_size;
+ offset_in_block = offset % s->block_size;
+ n_bytes = MIN(bytes, s->block_size - offset_in_block);
- logout("will read %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, sector_num);
+ logout("will read %u bytes starting at offset %" PRIu64 "\n",
+ n_bytes, offset);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
- memset(buf, 0, n_sectors * SECTOR_SIZE);
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
ret = 0;
} else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
- ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
+ uint64_t data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ offset_in_block;
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->file->bs, data_offset, n_bytes,
+ &local_qiov, 0);
}
- logout("%u sectors read\n", n_sectors);
+ logout("%u bytes read\n", n_bytes);
- nb_sectors -= n_sectors;
- sector_num += n_sectors;
- buf += n_sectors * SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
-static int vdi_co_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVdiState *s = bs->opaque;
+ QEMUIOVector local_qiov;
uint32_t bmap_entry;
uint32_t block_index;
- uint32_t sector_in_block;
- uint32_t n_sectors;
+ uint32_t offset_in_block;
+ uint32_t n_bytes;
uint32_t bmap_first = VDI_UNALLOCATED;
uint32_t bmap_last = VDI_UNALLOCATED;
uint8_t *block = NULL;
+ uint64_t bytes_done = 0;
int ret = 0;
logout("\n");
- while (ret >= 0 && nb_sectors > 0) {
- block_index = sector_num / s->block_sectors;
- sector_in_block = sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (ret >= 0 && bytes > 0) {
+ block_index = offset / s->block_size;
+ offset_in_block = offset % s->block_size;
+ n_bytes = MIN(bytes, s->block_size - offset_in_block);
- logout("will write %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, sector_num);
+ logout("will write %u bytes starting at offset %" PRIu64 "\n",
+ n_bytes, offset);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
- uint64_t offset;
+ uint64_t data_offset;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
- offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors;
+ data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size;
if (block == NULL) {
block = g_malloc(s->block_size);
bmap_first = block_index;
}
bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
- memset(block, 0, sector_in_block * SECTOR_SIZE);
- memcpy(block + sector_in_block * SECTOR_SIZE,
- buf, n_sectors * SECTOR_SIZE);
- memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
- (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
+ memset(block, 0, offset_in_block);
+ qemu_iovec_to_buf(qiov, bytes_done, block + offset_in_block,
+ n_bytes);
+ memset(block + offset_in_block + n_bytes, 0,
+ s->block_size - n_bytes - offset_in_block);
/* Note that this coroutine does not yield anywhere from reading the
* bmap entry until here, so in regards to all the coroutines trying
@@ -658,12 +669,12 @@ static int vdi_co_write(BlockDriverState *bs,
* acquire the lock and thus the padded cluster is written before
* the other coroutines can write to the affected area. */
qemu_co_mutex_lock(&s->write_lock);
- ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
+ ret = bdrv_pwrite(bs->file->bs, data_offset, block, s->block_size);
qemu_co_mutex_unlock(&s->write_lock);
} else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
+ uint64_t data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ offset_in_block;
qemu_co_mutex_lock(&s->write_lock);
/* This lock is only used to make sure the following write operation
* is executed after the write issued by the coroutine allocating
@@ -674,16 +685,23 @@ static int vdi_co_write(BlockDriverState *bs,
* that that write operation has returned (there may be other writes
* in flight, but they do not concern this very operation). */
qemu_co_mutex_unlock(&s->write_lock);
- ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_pwritev(bs->file->bs, data_offset, n_bytes,
+ &local_qiov, 0);
}
- nb_sectors -= n_sectors;
- sector_num += n_sectors;
- buf += n_sectors * SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
- logout("%u sectors written\n", n_sectors);
+ logout("%u bytes written\n", n_bytes);
}
+ qemu_iovec_destroy(&local_qiov);
+
logout("finished data write\n");
if (ret < 0) {
return ret;
@@ -694,6 +712,7 @@ static int vdi_co_write(BlockDriverState *bs,
VdiHeader *header = (VdiHeader *) block;
uint8_t *base;
uint64_t offset;
+ uint32_t n_sectors;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(bmap_first));
@@ -808,7 +827,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- ret = blk_pwrite(blk, offset, &header, sizeof(header));
+ ret = blk_pwrite(blk, offset, &header, sizeof(header), 0);
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -829,7 +848,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- ret = blk_pwrite(blk, offset, bmap, bmap_size);
+ ret = blk_pwrite(blk, offset, bmap, bmap_size, 0);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -903,9 +922,9 @@ static BlockDriver bdrv_vdi = {
.bdrv_co_get_block_status = vdi_co_get_block_status,
.bdrv_make_empty = vdi_make_empty,
- .bdrv_read = vdi_co_read,
+ .bdrv_co_preadv = vdi_co_preadv,
#if defined(CONFIG_VDI_WRITE)
- .bdrv_write = vdi_co_write,
+ .bdrv_co_pwritev = vdi_co_pwritev,
#endif
.bdrv_get_info = vdi_get_info,
diff --git a/block/vhdx.c b/block/vhdx.c
index 2b7b332404..ec778fe2a7 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1856,13 +1856,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
- ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
+ 0);
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
- creator, creator_items * sizeof(gunichar2));
+ creator, creator_items * sizeof(gunichar2), 0);
if (ret < 0) {
goto delete_and_exit;
}
diff --git a/block/vmdk.c b/block/vmdk.c
index 45f9d3c5b9..e6c97c25a6 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1016,27 +1016,26 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
*/
static int get_whole_cluster(BlockDriverState *bs,
VmdkExtent *extent,
- uint64_t cluster_sector_num,
- uint64_t sector_num,
- uint64_t skip_start_sector,
- uint64_t skip_end_sector)
+ uint64_t cluster_offset,
+ uint64_t offset,
+ uint64_t skip_start_bytes,
+ uint64_t skip_end_bytes)
{
int ret = VMDK_OK;
int64_t cluster_bytes;
uint8_t *whole_grain;
/* For COW, align request sector_num to cluster start */
- sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+ offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
whole_grain = qemu_blockalign(bs, cluster_bytes);
if (!bs->backing) {
- memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
- memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
- cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+ memset(whole_grain, 0, skip_start_bytes);
+ memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
}
- assert(skip_end_sector <= extent->cluster_sectors);
+ assert(skip_end_bytes <= cluster_bytes);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
if (bs->backing && !vmdk_is_cid_valid(bs)) {
@@ -1045,42 +1044,43 @@ static int get_whole_cluster(BlockDriverState *bs,
}
/* Read backing data before skip range */
- if (skip_start_sector > 0) {
+ if (skip_start_bytes > 0) {
if (bs->backing) {
- ret = bdrv_read(bs->backing->bs, sector_num,
- whole_grain, skip_start_sector);
+ ret = bdrv_pread(bs->backing->bs, offset, whole_grain,
+ skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
- skip_start_sector);
+ ret = bdrv_pwrite(extent->file->bs, cluster_offset, whole_grain,
+ skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
/* Read backing data after skip range */
- if (skip_end_sector < extent->cluster_sectors) {
+ if (skip_end_bytes < cluster_bytes) {
if (bs->backing) {
- ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
- whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
- extent->cluster_sectors - skip_end_sector);
+ ret = bdrv_pread(bs->backing->bs, offset + skip_end_bytes,
+ whole_grain + skip_end_bytes,
+ cluster_bytes - skip_end_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
- whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
- extent->cluster_sectors - skip_end_sector);
+ ret = bdrv_pwrite(extent->file->bs, cluster_offset + skip_end_bytes,
+ whole_grain + skip_end_bytes,
+ cluster_bytes - skip_end_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
+ ret = VMDK_OK;
exit:
qemu_vfree(whole_grain);
return ret;
@@ -1142,8 +1142,8 @@ static int get_cluster_offset(BlockDriverState *bs,
uint64_t offset,
bool allocate,
uint64_t *cluster_offset,
- uint64_t skip_start_sector,
- uint64_t skip_end_sector)
+ uint64_t skip_start_bytes,
+ uint64_t skip_end_bytes)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
@@ -1230,10 +1230,8 @@ static int get_cluster_offset(BlockDriverState *bs,
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- ret = get_whole_cluster(bs, extent,
- cluster_sector,
- offset >> BDRV_SECTOR_BITS,
- skip_start_sector, skip_end_sector);
+ ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
+ offset, skip_start_bytes, skip_end_bytes);
if (ret) {
return ret;
}
@@ -1259,15 +1257,26 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
return NULL;
}
+static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
+ int64_t offset)
+{
+ uint64_t offset_in_cluster, extent_begin_offset, extent_relative_offset;
+ uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
+
+ extent_begin_offset =
+ (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
+ extent_relative_offset = offset - extent_begin_offset;
+ offset_in_cluster = extent_relative_offset % cluster_size;
+
+ return offset_in_cluster;
+}
+
static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
int64_t sector_num)
{
- uint64_t index_in_cluster, extent_begin_sector, extent_relative_sector_num;
-
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- return index_in_cluster;
+ uint64_t offset;
+ offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE);
+ return offset / BDRV_SECTOR_SIZE;
}
static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
@@ -1319,38 +1328,57 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
}
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
- int64_t offset_in_cluster, const uint8_t *buf,
- int nb_sectors, int64_t sector_num)
+ int64_t offset_in_cluster, QEMUIOVector *qiov,
+ uint64_t qiov_offset, uint64_t n_bytes,
+ uint64_t offset)
{
int ret;
VmdkGrainMarker *data = NULL;
uLongf buf_len;
- const uint8_t *write_buf = buf;
- int write_len = nb_sectors * 512;
+ QEMUIOVector local_qiov;
+ struct iovec iov;
int64_t write_offset;
int64_t write_end_sector;
if (extent->compressed) {
+ void *compressed_data;
+
if (!extent->has_marker) {
ret = -EINVAL;
goto out;
}
buf_len = (extent->cluster_sectors << 9) * 2;
data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
- if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
- buf_len == 0) {
+
+ compressed_data = g_malloc(n_bytes);
+ qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
+ ret = compress(data->data, &buf_len, compressed_data, n_bytes);
+ g_free(compressed_data);
+
+ if (ret != Z_OK || buf_len == 0) {
ret = -EINVAL;
goto out;
}
- data->lba = sector_num;
+
+ data->lba = offset >> BDRV_SECTOR_BITS;
data->size = buf_len;
- write_buf = (uint8_t *)data;
- write_len = buf_len + sizeof(VmdkGrainMarker);
+
+ n_bytes = buf_len + sizeof(VmdkGrainMarker);
+ iov = (struct iovec) {
+ .iov_base = data,
+ .iov_len = n_bytes,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+ } else {
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
}
+
write_offset = cluster_offset + offset_in_cluster,
- ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
+ ret = bdrv_co_pwritev(extent->file->bs, write_offset, n_bytes,
+ &local_qiov, 0);
- write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
+ write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
if (extent->compressed) {
extent->next_cluster_sector = write_end_sector;
@@ -1359,19 +1387,21 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
write_end_sector);
}
- if (ret != write_len) {
- ret = ret < 0 ? ret : -EIO;
+ if (ret < 0) {
goto out;
}
ret = 0;
out:
g_free(data);
+ if (!extent->compressed) {
+ qemu_iovec_destroy(&local_qiov);
+ }
return ret;
}
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
- int64_t offset_in_cluster, uint8_t *buf,
- int nb_sectors)
+ int64_t offset_in_cluster, QEMUIOVector *qiov,
+ int bytes)
{
int ret;
int cluster_bytes, buf_bytes;
@@ -1383,14 +1413,13 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
if (!extent->compressed) {
- ret = bdrv_pread(extent->file->bs,
- cluster_offset + offset_in_cluster,
- buf, nb_sectors * 512);
- if (ret == nb_sectors * 512) {
- return 0;
- } else {
- return -EIO;
+ ret = bdrv_co_preadv(extent->file->bs,
+ cluster_offset + offset_in_cluster, bytes,
+ qiov, 0);
+ if (ret < 0) {
+ return ret;
}
+ return 0;
}
cluster_bytes = extent->cluster_sectors * 512;
/* Read two clusters in case GrainMarker + compressed data > one cluster */
@@ -1422,11 +1451,11 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
}
if (offset_in_cluster < 0 ||
- offset_in_cluster + nb_sectors * 512 > buf_len) {
+ offset_in_cluster + bytes > buf_len) {
ret = -EINVAL;
goto out;
}
- memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
+ qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
ret = 0;
out:
@@ -1435,64 +1464,73 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
return ret;
}
-static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVmdkState *s = bs->opaque;
int ret;
- uint64_t n, index_in_cluster;
+ uint64_t n_bytes, offset_in_cluster;
VmdkExtent *extent = NULL;
+ QEMUIOVector local_qiov;
uint64_t cluster_offset;
+ uint64_t bytes_done = 0;
- while (nb_sectors > 0) {
- extent = find_extent(s, sector_num, extent);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+
+ while (bytes > 0) {
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
if (!extent) {
- return -EIO;
+ ret = -EIO;
+ goto fail;
}
ret = get_cluster_offset(bs, extent, NULL,
- sector_num << 9, false, &cluster_offset,
- 0, 0);
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
+ offset, false, &cluster_offset, 0, 0);
+ offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+
+ n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+ - offset_in_cluster);
+
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
if (bs->backing && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail;
}
- ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->backing->bs, offset, n_bytes,
+ &local_qiov, 0);
if (ret < 0) {
- return ret;
+ goto fail;
}
} else {
- memset(buf, 0, 512 * n);
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
}
} else {
- ret = vmdk_read_extent(extent,
- cluster_offset, index_in_cluster * 512,
- buf, n);
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
+ &local_qiov, n_bytes);
if (ret) {
- return ret;
+ goto fail;
}
}
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVmdkState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vmdk_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
@@ -1506,38 +1544,38 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
*
* Returns: error code with 0 for success.
*/
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors,
- bool zeroed, bool zero_dry_run)
+static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ bool zeroed, bool zero_dry_run)
{
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int ret;
- int64_t index_in_cluster, n;
+ int64_t offset_in_cluster, n_bytes;
uint64_t cluster_offset;
+ uint64_t bytes_done = 0;
VmdkMetaData m_data;
- if (sector_num > bs->total_sectors) {
- error_report("Wrong offset: sector_num=0x%" PRIx64
+ if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
+ error_report("Wrong offset: offset=0x%" PRIx64
" total_sectors=0x%" PRIx64,
- sector_num, bs->total_sectors);
+ offset, bs->total_sectors);
return -EIO;
}
- while (nb_sectors > 0) {
- extent = find_extent(s, sector_num, extent);
+ while (bytes > 0) {
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
if (!extent) {
return -EIO;
}
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+ n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+ - offset_in_cluster);
+
+ ret = get_cluster_offset(bs, extent, &m_data, offset,
!(extent->compressed || zeroed),
- &cluster_offset,
- index_in_cluster, index_in_cluster + n);
+ &cluster_offset, offset_in_cluster,
+ offset_in_cluster + n_bytes);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@@ -1546,7 +1584,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else {
/* allocate */
- ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ ret = get_cluster_offset(bs, extent, &m_data, offset,
true, &cluster_offset, 0, 0);
}
}
@@ -1556,9 +1594,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
- index_in_cluster == 0 &&
- n >= extent->cluster_sectors) {
- n = extent->cluster_sectors;
+ offset_in_cluster == 0 &&
+ n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
+ n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
if (!zero_dry_run) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
@@ -1570,9 +1608,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -ENOTSUP;
}
} else {
- ret = vmdk_write_extent(extent,
- cluster_offset, index_in_cluster * 512,
- buf, n, sector_num);
+ ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
+ qiov, bytes_done, n_bytes, offset);
if (ret) {
return ret;
}
@@ -1585,9 +1622,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
}
}
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
/* update CID on the first write every time the virtual disk is
* opened */
@@ -1602,25 +1639,65 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return 0;
}
-static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
+typedef struct VmdkWriteCompressedCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} VmdkWriteCompressedCo;
+
+static void vmdk_co_write_compressed(void *opaque)
+{
+ VmdkWriteCompressedCo *co = opaque;
+ QEMUIOVector local_qiov;
+ uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+ uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+
+ co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
+}
+
static int vmdk_write_compressed(BlockDriverState *bs,
int64_t sector_num,
const uint8_t *buf,
int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
+
if (s->num_extents == 1 && s->extents[0].compressed) {
- return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ VmdkWriteCompressedCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(vmdk_co_write_compressed);
+ qemu_coroutine_enter(co, &data);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
} else {
return -ENOTSUP;
}
@@ -1633,12 +1710,15 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
{
int ret;
BDRVVmdkState *s = bs->opaque;
+ uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
+ uint64_t bytes = nb_sectors * BDRV_SECTOR_SIZE;
+
qemu_co_mutex_lock(&s->lock);
/* write zeroes could fail if sectors not aligned to cluster, test it with
* dry_run == true before really updating image */
- ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+ ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
if (!ret) {
- ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+ ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
}
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -1728,12 +1808,12 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.check_bytes[3] = 0xa;
/* write all the data */
- ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
+ ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
+ ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1753,7 +1833,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
gd_buf[i] = cpu_to_le32(tmp);
}
ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ gd_buf, gd_buf_size, 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1765,7 +1845,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
gd_buf[i] = cpu_to_le32(tmp);
}
ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ gd_buf, gd_buf_size, 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1829,8 +1909,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size = 0, filesize;
char *adapter_type = NULL;
char *backing_file = NULL;
+ char *hw_version = NULL;
char *fmt = NULL;
- int flags = 0;
int ret = 0;
bool flat, split, compress;
GString *ext_desc_lines;
@@ -1861,7 +1941,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
"# The Disk Data Base\n"
"#DDB\n"
"\n"
- "ddb.virtualHWVersion = \"%d\"\n"
+ "ddb.virtualHWVersion = \"%s\"\n"
"ddb.geometry.cylinders = \"%" PRId64 "\"\n"
"ddb.geometry.heads = \"%" PRIu32 "\"\n"
"ddb.geometry.sectors = \"63\"\n"
@@ -1878,8 +1958,20 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
BDRV_SECTOR_SIZE);
adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
- flags |= BLOCK_FLAG_COMPAT6;
+ if (strcmp(hw_version, "undefined")) {
+ error_setg(errp,
+ "compat6 cannot be enabled with hwversion set");
+ ret = -EINVAL;
+ goto exit;
+ }
+ g_free(hw_version);
+ hw_version = g_strdup("6");
+ }
+ if (strcmp(hw_version, "undefined") == 0) {
+ g_free(hw_version);
+ hw_version = g_strdup("4");
}
fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) {
@@ -2001,7 +2093,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
fmt,
parent_desc_line,
ext_desc_lines->str,
- (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+ hw_version,
total_size /
(int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
number_heads,
@@ -2028,7 +2120,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
blk_set_allow_write_beyond_eof(new_blk, true);
- ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
+ ret = blk_pwrite(new_blk, desc_offset, desc, desc_len, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -2047,6 +2139,7 @@ exit:
}
g_free(adapter_type);
g_free(backing_file);
+ g_free(hw_version);
g_free(fmt);
g_free(desc);
g_free(path);
@@ -2298,6 +2391,12 @@ static QemuOptsList vmdk_create_opts = {
.def_value_str = "off"
},
{
+ .name = BLOCK_OPT_HWVERSION,
+ .type = QEMU_OPT_STRING,
+ .help = "VMDK hardware version",
+ .def_value_str = "undefined"
+ },
+ {
.name = BLOCK_OPT_SUBFMT,
.type = QEMU_OPT_STRING,
.help =
@@ -2321,8 +2420,8 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
- .bdrv_read = vmdk_co_read,
- .bdrv_write = vmdk_co_write,
+ .bdrv_co_preadv = vmdk_co_preadv,
+ .bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_write_compressed = vmdk_write_compressed,
.bdrv_co_write_zeroes = vmdk_co_write_zeroes,
.bdrv_close = vmdk_close,
diff --git a/block/vpc.c b/block/vpc.c
index 3e2ea698d9..0379813e2f 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -454,22 +454,21 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
* The parameter write must be 1 if the offset will be used for a write
* operation (the block bitmaps is updated then), 0 otherwise.
*/
-static inline int64_t get_sector_offset(BlockDriverState *bs,
- int64_t sector_num, int write)
+static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
+ bool write)
{
BDRVVPCState *s = bs->opaque;
- uint64_t offset = sector_num * 512;
uint64_t bitmap_offset, block_offset;
- uint32_t pagetable_index, pageentry_index;
+ uint32_t pagetable_index, offset_in_block;
pagetable_index = offset / s->block_size;
- pageentry_index = (offset % s->block_size) / 512;
+ offset_in_block = offset % s->block_size;
if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
return -1; /* not allocated */
bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
- block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+ block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
/* We must ensure that we don't write to any sectors which are marked as
unused in the bitmap. We get away with setting all bits in the block
@@ -487,6 +486,12 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
return block_offset;
}
+static inline int64_t get_sector_offset(BlockDriverState *bs,
+ int64_t sector_num, bool write)
+{
+ return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
+}
+
/*
* Writes the footer to the end of the image file. This is needed when the
* file grows as it overwrites the old footer
@@ -513,7 +518,7 @@ static int rewrite_footer(BlockDriverState* bs)
*
* Returns the sectors' offset in the image file on success and < 0 on error
*/
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
{
BDRVVPCState *s = bs->opaque;
int64_t bat_offset;
@@ -522,14 +527,13 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
uint8_t bitmap[s->bitmap_size];
/* Check if sector_num is valid */
- if ((sector_num < 0) || (sector_num > bs->total_sectors))
- return -1;
+ if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
+ return -EINVAL;
+ }
/* Write entry into in-memory BAT */
- index = (sector_num * 512) / s->block_size;
- if (s->pagetable[index] != 0xFFFFFFFF)
- return -1;
-
+ index = offset / s->block_size;
+ assert(s->pagetable[index] == 0xFFFFFFFF);
s->pagetable[index] = s->free_data_block_offset / 512;
/* Initialize the block's bitmap */
@@ -553,11 +557,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
if (ret < 0)
goto fail;
- return get_sector_offset(bs, sector_num, 0);
+ return get_image_offset(bs, offset, false);
fail:
s->free_data_block_offset -= (s->block_size + s->bitmap_size);
- return -1;
+ return ret;
}
static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -573,104 +577,105 @@ static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
-static int vpc_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVPCState *s = bs->opaque;
int ret;
- int64_t offset;
- int64_t sectors, sectors_per_block;
+ int64_t image_offset;
+ int64_t n_bytes;
+ int64_t bytes_done = 0;
VHDFooter *footer = (VHDFooter *) s->footer_buf;
+ QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
+ return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0);
}
- while (nb_sectors > 0) {
- offset = get_sector_offset(bs, sector_num, 0);
- sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
- sectors = sectors_per_block - (sector_num % sectors_per_block);
- if (sectors > nb_sectors) {
- sectors = nb_sectors;
- }
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (bytes > 0) {
+ image_offset = get_image_offset(bs, offset, false);
+ n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
- if (offset == -1) {
- memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+ if (image_offset == -1) {
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
} else {
- ret = bdrv_pread(bs->file->bs, offset, buf,
- sectors * BDRV_SECTOR_SIZE);
- if (ret != sectors * BDRV_SECTOR_SIZE) {
- return -1;
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
}
}
- nb_sectors -= sectors;
- sector_num += sectors;
- buf += sectors * BDRV_SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVPCState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vpc_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
+ qemu_iovec_destroy(&local_qiov);
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
-static int vpc_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVPCState *s = bs->opaque;
- int64_t offset;
- int64_t sectors, sectors_per_block;
+ int64_t image_offset;
+ int64_t n_bytes;
+ int64_t bytes_done = 0;
int ret;
VHDFooter *footer = (VHDFooter *) s->footer_buf;
+ QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
+ return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0);
}
- while (nb_sectors > 0) {
- offset = get_sector_offset(bs, sector_num, 1);
- sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
- sectors = sectors_per_block - (sector_num % sectors_per_block);
- if (sectors > nb_sectors) {
- sectors = nb_sectors;
- }
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (bytes > 0) {
+ image_offset = get_image_offset(bs, offset, true);
+ n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
- if (offset == -1) {
- offset = alloc_block(bs, sector_num);
- if (offset < 0)
- return -1;
+ if (image_offset == -1) {
+ image_offset = alloc_block(bs, offset);
+ if (image_offset < 0) {
+ ret = image_offset;
+ goto fail;
+ }
}
- ret = bdrv_pwrite(bs->file->bs, offset, buf,
- sectors * BDRV_SECTOR_SIZE);
- if (ret != sectors * BDRV_SECTOR_SIZE) {
- return -1;
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
}
- nb_sectors -= sectors;
- sector_num += sectors;
- buf += sectors * BDRV_SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-
-static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVPCState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vpc_write(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
+ qemu_iovec_destroy(&local_qiov);
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
@@ -783,13 +788,13 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {
goto fail;
}
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
- ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {
goto fail;
}
@@ -799,7 +804,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- ret = blk_pwrite(blk, offset, buf, 512);
+ ret = blk_pwrite(blk, offset, buf, 512, 0);
if (ret < 0) {
goto fail;
}
@@ -826,7 +831,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
/* Write the header */
offset = 512;
- ret = blk_pwrite(blk, offset, buf, 1024);
+ ret = blk_pwrite(blk, offset, buf, 1024, 0);
if (ret < 0) {
goto fail;
}
@@ -848,7 +853,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
return ret;
}
- ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
if (ret < 0) {
return ret;
}
@@ -1056,8 +1061,8 @@ static BlockDriver bdrv_vpc = {
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_create = vpc_create,
- .bdrv_read = vpc_co_read,
- .bdrv_write = vpc_co_write,
+ .bdrv_co_preadv = vpc_co_preadv,
+ .bdrv_co_pwritev = vpc_co_pwritev,
.bdrv_co_get_block_status = vpc_co_get_block_status,
.bdrv_get_info = vpc_get_info,
diff --git a/block/vvfat.c b/block/vvfat.c
index 183fc4f049..5b0c8dd639 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1179,6 +1179,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
bs->read_only = 0;
}
+ bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
bs->total_sectors = cyls * heads * secs;
if (init_directories(s, dirname, heads, secs, errp)) {
@@ -1421,14 +1422,31 @@ DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
return 0;
}
-static coroutine_fn int vvfat_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVVFATState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ void *buf;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ buf = g_try_malloc(bytes);
+ if (bytes && buf == NULL) {
+ return -ENOMEM;
+ }
+
qemu_co_mutex_lock(&s->lock);
ret = vvfat_read(bs, sector_num, buf, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+
return ret;
}
@@ -2880,14 +2898,31 @@ DLOG(checkpoint());
return 0;
}
-static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVVFATState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ void *buf;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ buf = g_try_malloc(bytes);
+ if (bytes && buf == NULL) {
+ return -ENOMEM;
+ }
+ qemu_iovec_to_buf(qiov, 0, buf, bytes);
+
qemu_co_mutex_lock(&s->lock);
ret = vvfat_write(bs, sector_num, buf, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+
+ g_free(buf);
+
return ret;
}
@@ -2904,8 +2939,10 @@ static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
return BDRV_BLOCK_DATA;
}
-static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
- const uint8_t* buffer, int nb_sectors) {
+static int coroutine_fn
+write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
return try_commit(s);
}
@@ -2918,7 +2955,7 @@ static void write_target_close(BlockDriverState *bs) {
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
- .bdrv_write = write_target_commit,
+ .bdrv_co_pwritev = write_target_commit,
.bdrv_close = write_target_close,
};
@@ -3014,8 +3051,8 @@ static BlockDriver bdrv_vvfat = {
.bdrv_file_open = vvfat_open,
.bdrv_close = vvfat_close,
- .bdrv_read = vvfat_co_read,
- .bdrv_write = vvfat_co_write,
+ .bdrv_co_preadv = vvfat_co_preadv,
+ .bdrv_co_pwritev = vvfat_co_pwritev,
.bdrv_co_get_block_status = vvfat_co_get_block_status,
};