aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c11
-rw-r--r--block/blkdebug.c20
-rw-r--r--block/block-backend.c65
-rw-r--r--block/commit.c2
-rw-r--r--block/crypto.c8
-rw-r--r--block/file-posix.c79
-rw-r--r--block/file-win32.c5
-rw-r--r--block/gluster.c83
-rw-r--r--block/io.c98
-rw-r--r--block/iscsi.c164
-rw-r--r--block/mirror.c2
-rw-r--r--block/nfs.c5
-rw-r--r--block/null.c23
-rw-r--r--block/nvme.c14
-rw-r--r--block/parallels.c28
-rw-r--r--block/qcow.c32
-rw-r--r--block/qcow2-bitmap.c4
-rw-r--r--block/qcow2-cluster.c4
-rw-r--r--block/qcow2-refcount.c4
-rw-r--r--block/qcow2-snapshot.c10
-rw-r--r--block/qcow2.c60
-rw-r--r--block/qcow2.h6
-rw-r--r--block/qed.c82
-rw-r--r--block/raw-format.c21
-rw-r--r--block/rbd.c6
-rw-r--r--block/sheepdog.c36
-rw-r--r--block/ssh.c66
-rw-r--r--block/throttle.c2
-rw-r--r--block/vdi.c50
-rw-r--r--block/vhdx.c5
-rw-r--r--block/vmdk.c43
-rw-r--r--block/vpc.c50
-rw-r--r--block/vvfat.c16
-rw-r--r--docs/interop/qcow2.txt16
-rw-r--r--docs/qcow2-cache.txt46
-rw-r--r--hw/ide/core.c10
-rw-r--r--include/block/aio-wait.h116
-rw-r--r--include/block/aio.h7
-rw-r--r--include/block/block.h54
-rw-r--r--include/block/block_int.h61
-rw-r--r--qapi/block-core.json6
-rw-r--r--qemu-img.c2
-rw-r--r--tests/Makefile.include2
-rwxr-xr-xtests/qemu-iotests/03329
-rw-r--r--tests/qemu-iotests/033.out13
-rw-r--r--tests/test-block-backend.c82
-rw-r--r--util/Makefile.objs2
-rw-r--r--util/aio-wait.c40
48 files changed, 980 insertions, 610 deletions
diff --git a/block.c b/block.c
index c205320a50..4f76714f6b 100644
--- a/block.c
+++ b/block.c
@@ -418,7 +418,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque)
CreateCo *cco = opaque;
assert(cco->drv);
- ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
+ ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err);
error_propagate(&cco->err, local_err);
cco->ret = ret;
}
@@ -437,7 +437,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
.err = NULL,
};
- if (!drv->bdrv_create) {
+ if (!drv->bdrv_co_create_opts) {
error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
ret = -ENOTSUP;
goto out;
@@ -4711,7 +4711,12 @@ out:
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
{
- return bs->aio_context;
+ return bs ? bs->aio_context : qemu_get_aio_context();
+}
+
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
+{
+ return bs ? &bs->wait : NULL;
}
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
diff --git a/block/blkdebug.c b/block/blkdebug.c
index d83f23febd..589712475a 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -627,15 +627,17 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
}
-static int64_t coroutine_fn blkdebug_co_get_block_status(
- BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
- BlockDriverState **file)
+static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
- assert(QEMU_IS_ALIGNED(sector_num | nb_sectors,
- DIV_ROUND_UP(bs->bl.request_alignment,
- BDRV_SECTOR_SIZE)));
- return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors,
- pnum, file);
+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
+ return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes,
+ pnum, map, file);
}
static void blkdebug_close(BlockDriverState *bs)
@@ -907,7 +909,7 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_co_flush_to_disk = blkdebug_co_flush,
.bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes,
.bdrv_co_pdiscard = blkdebug_co_pdiscard,
- .bdrv_co_get_block_status = blkdebug_co_get_block_status,
+ .bdrv_co_block_status = blkdebug_co_block_status,
.bdrv_debug_event = blkdebug_debug_event,
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
diff --git a/block/block-backend.c b/block/block-backend.c
index 94ffbb6a60..b3c790e2bd 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -73,6 +73,14 @@ struct BlockBackend {
int quiesce_counter;
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
+
+ /* Number of in-flight aio requests. BlockDriverState also counts
+ * in-flight requests but aio requests can exist even when blk->root is
+ * NULL, so we cannot rely on its counter for that case.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ AioWait wait;
};
typedef struct BlockBackendAIOCB {
@@ -1225,11 +1233,22 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
return bdrv_make_zero(blk->root, flags);
}
+static void blk_inc_in_flight(BlockBackend *blk)
+{
+ atomic_inc(&blk->in_flight);
+}
+
+static void blk_dec_in_flight(BlockBackend *blk)
+{
+ atomic_dec(&blk->in_flight);
+ aio_wait_kick(&blk->wait);
+}
+
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
- bdrv_dec_in_flight(acb->common.bs);
+ blk_dec_in_flight(acb->blk);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_aio_unref(acb);
}
@@ -1240,7 +1259,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
{
struct BlockBackendAIOCB *acb;
- bdrv_inc_in_flight(blk_bs(blk));
+ blk_inc_in_flight(blk);
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
acb->blk = blk;
acb->ret = ret;
@@ -1263,7 +1282,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
if (acb->has_returned) {
- bdrv_dec_in_flight(acb->common.bs);
+ blk_dec_in_flight(acb->rwco.blk);
acb->common.cb(acb->common.opaque, acb->rwco.ret);
qemu_aio_unref(acb);
}
@@ -1284,7 +1303,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
BlkAioEmAIOCB *acb;
Coroutine *co;
- bdrv_inc_in_flight(blk_bs(blk));
+ blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
@@ -1521,14 +1540,41 @@ int blk_flush(BlockBackend *blk)
void blk_drain(BlockBackend *blk)
{
- if (blk_bs(blk)) {
- bdrv_drain(blk_bs(blk));
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_drained_begin(bs);
+ }
+
+ /* We may have -ENOMEDIUM completions in flight */
+ AIO_WAIT_WHILE(&blk->wait,
+ blk_get_aio_context(blk),
+ atomic_mb_read(&blk->in_flight) > 0);
+
+ if (bs) {
+ bdrv_drained_end(bs);
}
}
void blk_drain_all(void)
{
- bdrv_drain_all();
+ BlockBackend *blk = NULL;
+
+ bdrv_drain_all_begin();
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *ctx = blk_get_aio_context(blk);
+
+ aio_context_acquire(ctx);
+
+ /* We may have -ENOMEDIUM completions in flight */
+ AIO_WAIT_WHILE(&blk->wait, ctx,
+ atomic_mb_read(&blk->in_flight) > 0);
+
+ aio_context_release(ctx);
+ }
+
+ bdrv_drain_all_end();
}
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
@@ -1569,10 +1615,11 @@ static void send_qmp_error_event(BlockBackend *blk,
bool is_read, int error)
{
IoOperationType optype;
+ BlockDriverState *bs = blk_bs(blk);
optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
- qapi_event_send_block_io_error(blk_name(blk),
- bdrv_get_node_name(blk_bs(blk)), optype,
+ qapi_event_send_block_io_error(blk_name(blk), !!bs,
+ bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error),
&error_abort);
diff --git a/block/commit.c b/block/commit.c
index bb6c904704..1943c9c3e1 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -265,7 +265,7 @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
static BlockDriver bdrv_commit_top = {
.format_name = "commit_top",
.bdrv_co_preadv = bdrv_commit_top_preadv,
- .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing,
+ .bdrv_co_block_status = bdrv_co_block_status_from_backing,
.bdrv_refresh_filename = bdrv_commit_top_refresh_filename,
.bdrv_close = bdrv_commit_top_close,
.bdrv_child_perm = bdrv_commit_top_child_perm,
diff --git a/block/crypto.c b/block/crypto.c
index aeac482e7b..17b5c0abad 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -556,9 +556,9 @@ static int block_crypto_open_luks(BlockDriverState *bs,
bs, options, flags, errp);
}
-static int block_crypto_create_luks(const char *filename,
- QemuOpts *opts,
- Error **errp)
+static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
{
return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
filename, opts, errp);
@@ -617,7 +617,7 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_open = block_crypto_open_luks,
.bdrv_close = block_crypto_close,
.bdrv_child_perm = bdrv_format_default_perms,
- .bdrv_create = block_crypto_create_luks,
+ .bdrv_co_create_opts = block_crypto_co_create_opts_luks,
.bdrv_truncate = block_crypto_truncate,
.create_opts = &block_crypto_create_opts_luks,
diff --git a/block/file-posix.c b/block/file-posix.c
index ca49c1a98a..7f2cc63c60 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1982,7 +1982,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
return (int64_t)st.st_blocks * 512;
}
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int fd;
int result = 0;
@@ -2131,25 +2132,24 @@ static int find_allocation(BlockDriverState *bs, off_t start,
}
/*
- * Returns the allocation status of the specified sectors.
+ * Returns the allocation status of the specified offset.
*
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
+ * The block layer guarantees 'offset' and 'bytes' are within bounds.
*
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
+ * 'pnum' is set to the number of bytes (including and immediately following
+ * the specified offset) that are known to be in the same
* allocated/unallocated state.
*
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
+ * 'bytes' is the max value 'pnum' should be set to.
*/
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum,
- BlockDriverState **file)
-{
- off_t start, data = 0, hole = 0;
- int64_t total_size;
+static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+ off_t data = 0, hole = 0;
int ret;
ret = fd_open(bs);
@@ -2157,39 +2157,36 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
return ret;
}
- start = sector_num * BDRV_SECTOR_SIZE;
- total_size = bdrv_getlength(bs);
- if (total_size < 0) {
- return total_size;
- } else if (start >= total_size) {
- *pnum = 0;
- return 0;
- } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
- nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
+ if (!want_zero) {
+ *pnum = bytes;
+ *map = offset;
+ *file = bs;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
- ret = find_allocation(bs, start, &data, &hole);
+ ret = find_allocation(bs, offset, &data, &hole);
if (ret == -ENXIO) {
/* Trailing hole */
- *pnum = nb_sectors;
+ *pnum = bytes;
ret = BDRV_BLOCK_ZERO;
} else if (ret < 0) {
/* No info available, so pretend there are no holes */
- *pnum = nb_sectors;
+ *pnum = bytes;
ret = BDRV_BLOCK_DATA;
- } else if (data == start) {
- /* On a data extent, compute sectors to the end of the extent,
+ } else if (data == offset) {
+ /* On a data extent, compute bytes to the end of the extent,
* possibly including a partial sector at EOF. */
- *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
+ *pnum = MIN(bytes, hole - offset);
ret = BDRV_BLOCK_DATA;
} else {
- /* On a hole, compute sectors to the beginning of the next extent. */
- assert(hole == start);
- *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
+ /* On a hole, compute bytes to the beginning of the next extent. */
+ assert(hole == offset);
+ *pnum = MIN(bytes, data - offset);
ret = BDRV_BLOCK_ZERO;
}
+ *map = offset;
*file = bs;
- return ret | BDRV_BLOCK_OFFSET_VALID | start;
+ return ret | BDRV_BLOCK_OFFSET_VALID;
}
static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
@@ -2280,9 +2277,9 @@ BlockDriver bdrv_file = {
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_close = raw_close,
- .bdrv_create = raw_create,
+ .bdrv_co_create_opts = raw_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_get_block_status = raw_co_get_block_status,
+ .bdrv_co_block_status = raw_co_block_status,
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
.bdrv_co_preadv = raw_co_preadv,
@@ -2684,8 +2681,8 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
return -ENOTSUP;
}
-static int hdev_create(const char *filename, QemuOpts *opts,
- Error **errp)
+static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int fd;
int ret = 0;
@@ -2758,7 +2755,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
+ .bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
@@ -2880,7 +2877,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
+ .bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
@@ -3011,7 +3008,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
+ .bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
.bdrv_co_preadv = raw_co_preadv,
diff --git a/block/file-win32.c b/block/file-win32.c
index f24c7bb92c..4a430d45f1 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -553,7 +553,8 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
return st.st_size;
}
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int fd;
int64_t total_size = 0;
@@ -599,7 +600,7 @@ BlockDriver bdrv_file = {
.bdrv_file_open = raw_open,
.bdrv_refresh_limits = raw_probe_alignment,
.bdrv_close = raw_close,
- .bdrv_create = raw_create,
+ .bdrv_co_create_opts = raw_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_aio_readv = raw_aio_readv,
diff --git a/block/gluster.c b/block/gluster.c
index 3f17b7819d..79b4cfdf74 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1021,8 +1021,9 @@ static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset,
return 0;
}
-static int qemu_gluster_create(const char *filename,
- QemuOpts *opts, Error **errp)
+static int coroutine_fn qemu_gluster_co_create_opts(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
{
BlockdevOptionsGluster *gconf;
struct glfs *glfs;
@@ -1362,68 +1363,66 @@ exit:
}
/*
- * Returns the allocation status of the specified sectors.
+ * Returns the allocation status of the specified offset.
*
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
+ * The block layer guarantees 'offset' and 'bytes' are within bounds.
*
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
+ * 'pnum' is set to the number of bytes (including and immediately following
+ * the specified offset) that are known to be in the same
* allocated/unallocated state.
*
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
+ * 'bytes' is the max value 'pnum' should be set to.
*
- * (Based on raw_co_get_block_status() from file-posix.c.)
+ * (Based on raw_co_block_status() from file-posix.c.)
*/
-static int64_t coroutine_fn qemu_gluster_co_get_block_status(
- BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
- BlockDriverState **file)
+static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
BDRVGlusterState *s = bs->opaque;
- off_t start, data = 0, hole = 0;
- int64_t total_size;
+ off_t data = 0, hole = 0;
int ret = -EINVAL;
if (!s->fd) {
return ret;
}
- start = sector_num * BDRV_SECTOR_SIZE;
- total_size = bdrv_getlength(bs);
- if (total_size < 0) {
- return total_size;
- } else if (start >= total_size) {
- *pnum = 0;
- return 0;
- } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
- nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
+ if (!want_zero) {
+ *pnum = bytes;
+ *map = offset;
+ *file = bs;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
- ret = find_allocation(bs, start, &data, &hole);
+ ret = find_allocation(bs, offset, &data, &hole);
if (ret == -ENXIO) {
/* Trailing hole */
- *pnum = nb_sectors;
+ *pnum = bytes;
ret = BDRV_BLOCK_ZERO;
} else if (ret < 0) {
/* No info available, so pretend there are no holes */
- *pnum = nb_sectors;
+ *pnum = bytes;
ret = BDRV_BLOCK_DATA;
- } else if (data == start) {
- /* On a data extent, compute sectors to the end of the extent,
+ } else if (data == offset) {
+ /* On a data extent, compute bytes to the end of the extent,
* possibly including a partial sector at EOF. */
- *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
+ *pnum = MIN(bytes, hole - offset);
ret = BDRV_BLOCK_DATA;
} else {
- /* On a hole, compute sectors to the beginning of the next extent. */
- assert(hole == start);
- *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
+ /* On a hole, compute bytes to the beginning of the next extent. */
+ assert(hole == offset);
+ *pnum = MIN(bytes, data - offset);
ret = BDRV_BLOCK_ZERO;
}
+ *map = offset;
*file = bs;
- return ret | BDRV_BLOCK_OFFSET_VALID | start;
+ return ret | BDRV_BLOCK_OFFSET_VALID;
}
@@ -1437,7 +1436,7 @@ static BlockDriver bdrv_gluster = {
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
.bdrv_close = qemu_gluster_close,
- .bdrv_create = qemu_gluster_create,
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
@@ -1451,7 +1450,7 @@ static BlockDriver bdrv_gluster = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -1465,7 +1464,7 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
.bdrv_close = qemu_gluster_close,
- .bdrv_create = qemu_gluster_create,
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
@@ -1479,7 +1478,7 @@ static BlockDriver bdrv_gluster_tcp = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -1493,7 +1492,7 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
.bdrv_close = qemu_gluster_close,
- .bdrv_create = qemu_gluster_create,
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
@@ -1507,7 +1506,7 @@ static BlockDriver bdrv_gluster_unix = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -1527,7 +1526,7 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
.bdrv_close = qemu_gluster_close,
- .bdrv_create = qemu_gluster_create,
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
.bdrv_getlength = qemu_gluster_getlength,
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
.bdrv_truncate = qemu_gluster_truncate,
@@ -1541,7 +1540,7 @@ static BlockDriver bdrv_gluster_rdma = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
.create_opts = &qemu_gluster_create_opts,
};
diff --git a/block/io.c b/block/io.c
index 89d0745e95..2b09c656d0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "trace.h"
#include "sysemu/block-backend.h"
+#include "block/aio-wait.h"
#include "block/blockjob.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
@@ -587,16 +588,9 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
atomic_inc(&bs->in_flight);
}
-static void dummy_bh_cb(void *opaque)
-{
-}
-
void bdrv_wakeup(BlockDriverState *bs)
{
- /* The barrier (or an atomic op) is in the caller. */
- if (atomic_read(&bs->wakeup)) {
- aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
- }
+ aio_wait_kick(bdrv_get_aio_wait(bs));
}
void bdrv_dec_in_flight(BlockDriverState *bs)
@@ -1701,7 +1695,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
*/
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
- if (!qiov) {
+ if (flags & BDRV_REQ_ZERO_WRITE) {
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
@@ -1868,30 +1862,34 @@ typedef struct BdrvCoBlockStatusData {
bool done;
} BdrvCoBlockStatusData;
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- int *pnum,
- BlockDriverState **file)
+int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
assert(bs->file && bs->file->bs);
- *pnum = nb_sectors;
+ *pnum = bytes;
+ *map = offset;
*file = bs->file->bs;
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
- (sector_num << BDRV_SECTOR_BITS);
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- int *pnum,
- BlockDriverState **file)
+int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
assert(bs->backing && bs->backing->bs);
- *pnum = nb_sectors;
+ *pnum = bytes;
+ *map = offset;
*file = bs->backing->bs;
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
- (sector_num << BDRV_SECTOR_BITS);
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
/*
@@ -1899,10 +1897,10 @@ int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
* Drivers not implementing the functionality are assumed to not support
* backing files, hence all their sectors are reported as allocated.
*
- * If 'want_zero' is true, the caller is querying for mapping purposes,
- * and the result should include BDRV_BLOCK_OFFSET_VALID and
- * BDRV_BLOCK_ZERO where possible; otherwise, the result may omit those
- * bits particularly if it allows for a larger value in 'pnum'.
+ * If 'want_zero' is true, the caller is querying for mapping
+ * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and
+ * _ZERO where possible; otherwise, the result favors larger 'pnum',
+ * with a focus on accurate BDRV_BLOCK_ALLOCATED.
*
* If 'offset' is beyond the end of the disk image the return value is
* BDRV_BLOCK_EOF and 'pnum' is set to 0.
@@ -1959,7 +1957,7 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
/* Must be non-NULL or bdrv_getlength() would have failed */
assert(bs->drv);
- if (!bs->drv->bdrv_co_get_block_status) {
+ if (!bs->drv->bdrv_co_block_status) {
*pnum = bytes;
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
if (offset + bytes == total_size) {
@@ -1976,44 +1974,24 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
bdrv_inc_in_flight(bs);
/* Round out to request_alignment boundaries */
- /* TODO: until we have a byte-based driver callback, we also have to
- * round out to sectors, even if that is bigger than request_alignment */
- align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
+ align = bs->bl.request_alignment;
aligned_offset = QEMU_ALIGN_DOWN(offset, align);
aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
- {
- int count; /* sectors */
- int64_t longret;
-
- assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
- BDRV_SECTOR_SIZE));
- /*
- * The contract allows us to return pnum smaller than bytes, even
- * if the next query would see the same status; we truncate the
- * request to avoid overflowing the driver's 32-bit interface.
- */
- longret = bs->drv->bdrv_co_get_block_status(
- bs, aligned_offset >> BDRV_SECTOR_BITS,
- MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count,
- &local_file);
- if (longret < 0) {
- assert(INT_MIN <= longret);
- ret = longret;
- goto out;
- }
- if (longret & BDRV_BLOCK_OFFSET_VALID) {
- local_map = longret & BDRV_BLOCK_OFFSET_MASK;
- }
- ret = longret & ~BDRV_BLOCK_OFFSET_MASK;
- *pnum = count * BDRV_SECTOR_SIZE;
+ ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
+ aligned_bytes, pnum, &local_map,
+ &local_file);
+ if (ret < 0) {
+ *pnum = 0;
+ goto out;
}
/*
- * The driver's result must be a multiple of request_alignment.
+ * The driver's result must be a non-zero multiple of request_alignment.
* Clamp pnum and adjust map to original request.
*/
- assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset);
+ assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
+ align > offset - aligned_offset);
*pnum -= offset - aligned_offset;
if (*pnum > bytes) {
*pnum = bytes;
diff --git a/block/iscsi.c b/block/iscsi.c
index d2b320ea41..8bf0e87244 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -86,7 +86,7 @@ typedef struct IscsiLun {
unsigned long *allocmap;
unsigned long *allocmap_valid;
long allocmap_size;
- int cluster_sectors;
+ int cluster_size;
bool use_16_for_rw;
bool write_protected;
bool lbpme;
@@ -430,9 +430,10 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
{
iscsi_allocmap_free(iscsilun);
+ assert(iscsilun->cluster_size);
iscsilun->allocmap_size =
- DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
- iscsilun->cluster_sectors);
+ DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size,
+ iscsilun->cluster_size);
iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
if (!iscsilun->allocmap) {
@@ -440,7 +441,7 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
}
if (open_flags & BDRV_O_NOCACHE) {
- /* in case that cache.direct = on all allocmap entries are
+ /* when cache.direct = on all allocmap entries are
* treated as invalid to force a relookup of the block
* status on every read request */
return 0;
@@ -457,8 +458,8 @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
}
static void
-iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors, bool allocated, bool valid)
+iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset,
+ int64_t bytes, bool allocated, bool valid)
{
int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
@@ -466,13 +467,13 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
return;
}
/* expand to entirely contain all affected clusters */
- cl_num_expanded = sector_num / iscsilun->cluster_sectors;
- nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
- iscsilun->cluster_sectors) - cl_num_expanded;
+ assert(iscsilun->cluster_size);
+ cl_num_expanded = offset / iscsilun->cluster_size;
+ nb_cls_expanded = DIV_ROUND_UP(offset + bytes,
+ iscsilun->cluster_size) - cl_num_expanded;
/* shrink to touch only completely contained clusters */
- cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
- nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
- - cl_num_shrunk;
+ cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size);
+ nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk;
if (allocated) {
bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
} else {
@@ -495,26 +496,26 @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
}
static void
-iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset,
+ int64_t bytes)
{
- iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
+ iscsi_allocmap_update(iscsilun, offset, bytes, true, true);
}
static void
-iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset,
+ int64_t bytes)
{
/* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
* is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
*/
- iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
+ iscsi_allocmap_update(iscsilun, offset, bytes, false, true);
}
-static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset,
+ int64_t bytes)
{
- iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
+ iscsi_allocmap_update(iscsilun, offset, bytes, false, false);
}
static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
@@ -528,28 +529,30 @@ static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
}
static inline bool
-iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset,
+ int64_t bytes)
{
unsigned long size;
if (iscsilun->allocmap == NULL) {
return true;
}
- size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ assert(iscsilun->cluster_size);
+ size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
return !(find_next_bit(iscsilun->allocmap, size,
- sector_num / iscsilun->cluster_sectors) == size);
+ offset / iscsilun->cluster_size) == size);
}
static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
- int64_t sector_num, int nb_sectors)
+ int64_t offset, int64_t bytes)
{
unsigned long size;
if (iscsilun->allocmap_valid == NULL) {
return false;
}
- size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ assert(iscsilun->cluster_size);
+ size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
return (find_next_zero_bit(iscsilun->allocmap_valid, size,
- sector_num / iscsilun->cluster_sectors) == size);
+ offset / iscsilun->cluster_size) == size);
}
static int coroutine_fn
@@ -631,14 +634,16 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
iTask.err_str);
r = iTask.err_code;
goto out_unlock;
}
- iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
@@ -648,36 +653,36 @@ out_unlock:
-static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum,
- BlockDriverState **file)
+static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
IscsiLun *iscsilun = bs->opaque;
struct scsi_get_lba_status *lbas = NULL;
struct scsi_lba_status_descriptor *lbasd = NULL;
struct IscsiTask iTask;
uint64_t lba;
- int64_t ret;
+ int ret;
iscsi_co_init_iscsitask(iscsilun, &iTask);
- if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- ret = -EINVAL;
- goto out;
- }
+ assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size));
/* default to all sectors allocated */
- ret = BDRV_BLOCK_DATA;
- ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
- *pnum = nb_sectors;
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+ if (map) {
+ *map = offset;
+ }
+ *pnum = bytes;
/* LUN does not support logical block provisioning */
if (!iscsilun->lbpme) {
goto out;
}
- lba = sector_qemu2lun(sector_num, iscsilun);
+ lba = offset / iscsilun->block_size;
qemu_mutex_lock(&iscsilun->mutex);
retry:
@@ -722,12 +727,12 @@ retry:
lbasd = &lbas->descriptors[0];
- if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
+ if (lba != lbasd->lba) {
ret = -EIO;
goto out_unlock;
}
- *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
+ *pnum = lbasd->num_blocks * iscsilun->block_size;
if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
@@ -738,13 +743,13 @@ retry:
}
if (ret & BDRV_BLOCK_ZERO) {
- iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
+ iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum);
} else {
- iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
+ iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
}
- if (*pnum > nb_sectors) {
- *pnum = nb_sectors;
+ if (*pnum > bytes) {
+ *pnum = bytes;
}
out_unlock:
qemu_mutex_unlock(&iscsilun->mutex);
@@ -753,7 +758,7 @@ out:
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
}
- if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
*file = bs;
}
return ret;
@@ -780,29 +785,37 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
/* if cache.direct is off and we have a valid entry in our allocation map
* we can skip checking the block status and directly return zeroes if
* the request falls within an unallocated area */
- if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
- !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+ if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE) &&
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE)) {
qemu_iovec_memset(iov, 0, 0x00, iov->size);
return 0;
}
if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
- !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
- !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
- int pnum;
- BlockDriverState *file;
+ !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE) &&
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE)) {
+ int64_t pnum;
/* check the block status from the beginning of the cluster
* containing the start sector */
- int64_t ret = iscsi_co_get_block_status(bs,
- sector_num - sector_num % iscsilun->cluster_sectors,
- BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
+ int64_t head;
+ int ret;
+
+ assert(iscsilun->cluster_size);
+ head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size;
+ ret = iscsi_co_block_status(bs, true,
+ sector_num * BDRV_SECTOR_SIZE - head,
+ BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL);
if (ret < 0) {
return ret;
}
/* if the whole request falls into an unallocated area we can avoid
- * to read and directly return zeroes instead */
+ * reading and directly return zeroes instead */
if (ret & BDRV_BLOCK_ZERO &&
- pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
+ pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) {
qemu_iovec_memset(iov, 0, 0x00, iov->size);
return 0;
}
@@ -1146,8 +1159,7 @@ retry:
goto retry;
}
- iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
- bytes >> BDRV_SECTOR_BITS);
+ iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
/* the target might fail with a check condition if it
@@ -1260,8 +1272,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
- bytes >> BDRV_SECTOR_BITS);
+ iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s",
lba, iTask.err_str);
r = iTask.err_code;
@@ -1269,11 +1280,9 @@ retry:
}
if (flags & BDRV_REQ_MAY_UNMAP) {
- iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
- bytes >> BDRV_SECTOR_BITS);
+ iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
} else {
- iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
- bytes >> BDRV_SECTOR_BITS);
+ iscsi_allocmap_set_allocated(iscsilun, offset, bytes);
}
out_unlock:
@@ -1953,8 +1962,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
* reasonable size */
if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
- iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
- iscsilun->block_size) >> BDRV_SECTOR_BITS;
+ iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran *
+ iscsilun->block_size;
if (iscsilun->lbprz) {
ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
}
@@ -2108,7 +2117,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
return 0;
}
-static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int ret = 0;
int64_t total_size = 0;
@@ -2163,7 +2173,7 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
IscsiLun *iscsilun = bs->opaque;
bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
- bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
+ bdi->cluster_size = iscsilun->cluster_size;
return 0;
}
@@ -2195,7 +2205,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
- .bdrv_create = iscsi_create,
+ .bdrv_co_create_opts = iscsi_co_create_opts,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
@@ -2206,7 +2216,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_truncate = iscsi_truncate,
.bdrv_refresh_limits = iscsi_refresh_limits,
- .bdrv_co_get_block_status = iscsi_co_get_block_status,
+ .bdrv_co_block_status = iscsi_co_block_status,
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
@@ -2230,7 +2240,7 @@ static BlockDriver bdrv_iser = {
.bdrv_parse_filename = iscsi_parse_filename,
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
- .bdrv_create = iscsi_create,
+ .bdrv_co_create_opts = iscsi_co_create_opts,
.create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_reopen_commit = iscsi_reopen_commit,
@@ -2241,7 +2251,7 @@ static BlockDriver bdrv_iser = {
.bdrv_truncate = iscsi_truncate,
.bdrv_refresh_limits = iscsi_refresh_limits,
- .bdrv_co_get_block_status = iscsi_co_get_block_status,
+ .bdrv_co_block_status = iscsi_co_block_status,
.bdrv_co_pdiscard = iscsi_co_pdiscard,
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
diff --git a/block/mirror.c b/block/mirror.c
index c9badc1203..f5bf620942 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1094,7 +1094,7 @@ static BlockDriver bdrv_mirror_top = {
.bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
.bdrv_co_flush = bdrv_mirror_top_flush,
- .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing,
+ .bdrv_co_block_status = bdrv_co_block_status_from_backing,
.bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
.bdrv_close = bdrv_mirror_top_close,
.bdrv_child_perm = bdrv_mirror_top_child_perm,
diff --git a/block/nfs.c b/block/nfs.c
index bbdb4fadad..1d82ff5042 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -684,7 +684,8 @@ static QemuOptsList nfs_create_opts = {
}
};
-static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
+static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts,
+ Error **errp)
{
int64_t ret, total_size;
NFSClient *client = g_new0(NFSClient, 1);
@@ -897,7 +898,7 @@ static BlockDriver bdrv_nfs = {
.bdrv_file_open = nfs_file_open,
.bdrv_close = nfs_file_close,
- .bdrv_create = nfs_file_create,
+ .bdrv_co_create_opts = nfs_file_co_create_opts,
.bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_preadv = nfs_co_preadv,
diff --git a/block/null.c b/block/null.c
index 214d394fff..806a8631e4 100644
--- a/block/null.c
+++ b/block/null.c
@@ -223,22 +223,23 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
-static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum,
- BlockDriverState **file)
+static int coroutine_fn null_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
BDRVNullState *s = bs->opaque;
- off_t start = sector_num * BDRV_SECTOR_SIZE;
+ int ret = BDRV_BLOCK_OFFSET_VALID;
- *pnum = nb_sectors;
+ *pnum = bytes;
+ *map = offset;
*file = bs;
if (s->read_zeroes) {
- return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
- } else {
- return BDRV_BLOCK_OFFSET_VALID | start;
+ ret |= BDRV_BLOCK_ZERO;
}
+ return ret;
}
static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
@@ -270,7 +271,7 @@ static BlockDriver bdrv_null_co = {
.bdrv_co_flush_to_disk = null_co_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
- .bdrv_co_get_block_status = null_co_get_block_status,
+ .bdrv_co_block_status = null_co_block_status,
.bdrv_refresh_filename = null_refresh_filename,
};
@@ -290,7 +291,7 @@ static BlockDriver bdrv_null_aio = {
.bdrv_aio_flush = null_aio_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
- .bdrv_co_get_block_status = null_co_get_block_status,
+ .bdrv_co_block_status = null_co_block_status,
.bdrv_refresh_filename = null_refresh_filename,
};
diff --git a/block/nvme.c b/block/nvme.c
index 75078022f6..8bca57aae6 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1072,18 +1072,6 @@ static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
-static int64_t coroutine_fn nvme_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum,
- BlockDriverState **file)
-{
- *pnum = nb_sectors;
- *file = bs;
-
- return BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_OFFSET_VALID |
- (sector_num << BDRV_SECTOR_BITS);
-}
-
static void nvme_refresh_filename(BlockDriverState *bs, QDict *opts)
{
QINCREF(opts);
@@ -1183,8 +1171,6 @@ static BlockDriver bdrv_nvme = {
.bdrv_co_flush_to_disk = nvme_co_flush,
.bdrv_reopen_prepare = nvme_reopen_prepare,
- .bdrv_co_get_block_status = nvme_co_get_block_status,
-
.bdrv_refresh_filename = nvme_refresh_filename,
.bdrv_refresh_limits = nvme_refresh_limits,
diff --git a/block/parallels.c b/block/parallels.c
index e1e3d80c88..81085795c2 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -261,23 +261,31 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
}
-static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
{
BDRVParallelsState *s = bs->opaque;
- int64_t offset;
+ int count;
+ assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
qemu_co_mutex_lock(&s->lock);
- offset = block_status(s, sector_num, nb_sectors, pnum);
+ offset = block_status(s, offset >> BDRV_SECTOR_BITS,
+ bytes >> BDRV_SECTOR_BITS, &count);
qemu_co_mutex_unlock(&s->lock);
+ *pnum = count * BDRV_SECTOR_SIZE;
if (offset < 0) {
return 0;
}
+ *map = offset * BDRV_SECTOR_SIZE;
*file = bs->file->bs;
- return (offset << BDRV_SECTOR_BITS) |
- BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
@@ -467,7 +475,9 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
}
-static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn parallels_co_create_opts(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
{
int64_t total_size, cl_size;
uint8_t tmp[BDRV_SECTOR_SIZE];
@@ -782,13 +792,13 @@ static BlockDriver bdrv_parallels = {
.bdrv_open = parallels_open,
.bdrv_close = parallels_close,
.bdrv_child_perm = bdrv_format_default_perms,
- .bdrv_co_get_block_status = parallels_co_get_block_status,
+ .bdrv_co_block_status = parallels_co_block_status,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_flush_to_os = parallels_co_flush_to_os,
.bdrv_co_readv = parallels_co_readv,
.bdrv_co_writev = parallels_co_writev,
.supports_backing = true,
- .bdrv_create = parallels_create,
+ .bdrv_co_create_opts = parallels_co_create_opts,
.bdrv_check = parallels_check,
.create_opts = &parallels_create_opts,
};
diff --git a/block/qcow.c b/block/qcow.c
index 8631155ac8..47a18d9a3a 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -524,23 +524,28 @@ static int get_cluster_offset(BlockDriverState *bs,
return 1;
}
-static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+static int coroutine_fn qcow_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n, ret;
+ int index_in_cluster, ret;
+ int64_t n;
uint64_t cluster_offset;
qemu_co_mutex_lock(&s->lock);
- ret = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, &cluster_offset);
+ ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors)
- n = nb_sectors;
+ index_in_cluster = offset & (s->cluster_size - 1);
+ n = s->cluster_size - index_in_cluster;
+ if (n > bytes) {
+ n = bytes;
+ }
*pnum = n;
if (!cluster_offset) {
return 0;
@@ -548,9 +553,9 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
return BDRV_BLOCK_DATA;
}
- cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *map = cluster_offset | index_in_cluster;
*file = bs->file->bs;
- return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
@@ -805,7 +810,8 @@ static void qcow_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn qcow_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int header_size, backing_filename_len, l1_size, shift, i;
QCowHeader header;
@@ -1122,13 +1128,13 @@ static BlockDriver bdrv_qcow = {
.bdrv_close = qcow_close,
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_reopen_prepare = qcow_reopen_prepare,
- .bdrv_create = qcow_create,
+ .bdrv_co_create_opts = qcow_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.supports_backing = true,
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
- .bdrv_co_get_block_status = qcow_co_get_block_status,
+ .bdrv_co_block_status = qcow_co_block_status,
.bdrv_make_empty = qcow_make_empty,
.bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 4f6fd863ea..5127276f90 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -413,8 +413,8 @@ static inline void bitmap_dir_entry_to_be(Qcow2BitmapDirEntry *entry)
static inline int calc_dir_entry_size(size_t name_size, size_t extra_data_size)
{
- return align_offset(sizeof(Qcow2BitmapDirEntry) +
- name_size + extra_data_size, 8);
+ int size = sizeof(Qcow2BitmapDirEntry) + name_size + extra_data_size;
+ return ROUND_UP(size, 8);
}
static inline int dir_entry_size(Qcow2BitmapDirEntry *entry)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e406b0f3b9..98908c4264 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -126,11 +126,11 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
new_l1_table = qemu_try_blockalign(bs->file->bs,
- align_offset(new_l1_size2, 512));
+ ROUND_UP(new_l1_size2, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
- memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+ memset(new_l1_table, 0, ROUND_UP(new_l1_size2, 512));
if (s->l1_size) {
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index d46b69d7f3..126cca3276 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1204,7 +1204,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
if (l1_table_offset != s->l1_table_offset) {
- l1_table = g_try_malloc0(align_offset(l1_size2, 512));
+ l1_table = g_try_malloc0(ROUND_UP(l1_size2, 512));
if (l1_size2 && l1_table == NULL) {
ret = -ENOMEM;
goto fail;
@@ -2553,7 +2553,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
}
/* align range to test to cluster boundaries */
- size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
+ size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size);
offset = start_of_cluster(s, offset);
if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 44243e0e95..cee25f582b 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -66,7 +66,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
- offset = align_offset(offset, 8);
+ offset = ROUND_UP(offset, 8);
ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
@@ -155,7 +155,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
offset = 0;
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
- offset = align_offset(offset, 8);
+ offset = ROUND_UP(offset, 8);
offset += sizeof(h);
offset += sizeof(extra);
offset += strlen(sn->id_str);
@@ -215,7 +215,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
h.id_str_size = cpu_to_be16(id_str_size);
h.name_size = cpu_to_be16(name_size);
- offset = align_offset(offset, 8);
+ offset = ROUND_UP(offset, 8);
ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
@@ -441,7 +441,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
/* The VM state isn't needed any more in the active L1 table; in fact, it
* hurts by causing expensive COW for the next snapshot. */
qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
- align_offset(sn->vm_state_size, s->cluster_size),
+ ROUND_UP(sn->vm_state_size, s->cluster_size),
QCOW2_DISCARD_NEVER, false);
#ifdef DEBUG_ALLOC
@@ -710,7 +710,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
new_l1_table = qemu_try_blockalign(bs->file->bs,
- align_offset(new_l1_bytes, 512));
+ ROUND_UP(new_l1_bytes, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 3dd098b74f..071dc4d608 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1377,7 +1377,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
if (s->l1_size > 0) {
s->l1_table = qemu_try_blockalign(bs->file->bs,
- align_offset(s->l1_size * sizeof(uint64_t), 512));
+ ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate L1 table");
ret = -ENOMEM;
@@ -1668,32 +1668,34 @@ static void qcow2_join_options(QDict *options, QDict *old_options)
}
}
-static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t count,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
unsigned int bytes;
- int64_t status = 0;
+ int status = 0;
- bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
+ bytes = MIN(INT_MAX, count);
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes,
- &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
- *pnum = bytes >> BDRV_SECTOR_BITS;
+ *pnum = bytes;
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
!s->crypto) {
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ index_in_cluster = offset & (s->cluster_size - 1);
+ *map = cluster_offset | index_in_cluster;
*file = bs->file->bs;
- status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
+ status |= BDRV_BLOCK_OFFSET_VALID;
}
if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
status |= BDRV_BLOCK_ZERO;
@@ -2638,19 +2640,19 @@ static int64_t qcow2_calc_prealloc_size(int64_t total_size,
{
int64_t meta_size = 0;
uint64_t nl1e, nl2e;
- int64_t aligned_total_size = align_offset(total_size, cluster_size);
+ int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
/* header: 1 cluster */
meta_size += cluster_size;
/* total size of L2 tables */
nl2e = aligned_total_size / cluster_size;
- nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
+ nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
meta_size += nl2e * sizeof(uint64_t);
/* total size of L1 tables */
nl1e = nl2e * sizeof(uint64_t) / cluster_size;
- nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
+ nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
meta_size += nl1e * sizeof(uint64_t);
/* total size of refcount table and blocks */
@@ -2721,11 +2723,12 @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
return refcount_bits;
}
-static int qcow2_create2(const char *filename, int64_t total_size,
- const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size, PreallocMode prealloc,
- QemuOpts *opts, int version, int refcount_order,
- const char *encryptfmt, Error **errp)
+static int coroutine_fn
+qcow2_co_create2(const char *filename, int64_t total_size,
+ const char *backing_file, const char *backing_format,
+ int flags, size_t cluster_size, PreallocMode prealloc,
+ QemuOpts *opts, int version, int refcount_order,
+ const char *encryptfmt, Error **errp)
{
QDict *options;
@@ -2912,7 +2915,8 @@ out:
return ret;
}
-static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
char *backing_file = NULL;
char *backing_fmt = NULL;
@@ -2993,9 +2997,9 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
refcount_order = ctz32(refcount_bits);
- ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
- cluster_size, prealloc, opts, version, refcount_order,
- encryptfmt, &local_err);
+ ret = qcow2_co_create2(filename, size, backing_file, backing_fmt, flags,
+ cluster_size, prealloc, opts, version, refcount_order,
+ encryptfmt, &local_err);
error_propagate(errp, local_err);
finish:
@@ -3704,8 +3708,8 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
has_backing_file = !!optstr;
g_free(optstr);
- virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
- cluster_size);
+ virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ virtual_size = ROUND_UP(virtual_size, cluster_size);
/* Check that virtual disk size is valid */
l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
@@ -3725,7 +3729,7 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
goto err;
}
- virtual_size = align_offset(ssize, cluster_size);
+ virtual_size = ROUND_UP(ssize, cluster_size);
if (has_backing_file) {
/* We don't how much of the backing chain is shared by the input
@@ -4348,9 +4352,9 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_join_options = qcow2_join_options,
.bdrv_child_perm = bdrv_format_default_perms,
- .bdrv_create = qcow2_create,
+ .bdrv_co_create_opts = qcow2_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_get_block_status = qcow2_co_get_block_status,
+ .bdrv_co_block_status = qcow2_co_block_status,
.bdrv_co_preadv = qcow2_co_preadv,
.bdrv_co_pwritev = qcow2_co_pwritev,
diff --git a/block/qcow2.h b/block/qcow2.h
index 883802241f..1a84cc77b0 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -480,12 +480,6 @@ static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset)
return (offset >> s->cluster_bits) & (s->l2_slice_size - 1);
}
-static inline int64_t align_offset(int64_t offset, int n)
-{
- offset = (offset + n - 1) & ~(n - 1);
- return offset;
-}
-
static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
diff --git a/block/qed.c b/block/qed.c
index c6ff3ab015..72cf2f58ab 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -638,7 +638,9 @@ out:
return ret;
}
-static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
{
uint64_t image_size = 0;
uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
@@ -688,74 +690,46 @@ finish:
return ret;
}
-typedef struct {
- BlockDriverState *bs;
- Coroutine *co;
- uint64_t pos;
- int64_t status;
- int *pnum;
- BlockDriverState **file;
-} QEDIsAllocatedCB;
-
-/* Called with table_lock held. */
-static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
+static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t pos, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
- QEDIsAllocatedCB *cb = opaque;
- BDRVQEDState *s = cb->bs->opaque;
- *cb->pnum = len / BDRV_SECTOR_SIZE;
+ BDRVQEDState *s = bs->opaque;
+ size_t len = MIN(bytes, SIZE_MAX);
+ int status;
+ QEDRequest request = { .l2_table = NULL };
+ uint64_t offset;
+ int ret;
+
+ qemu_co_mutex_lock(&s->table_lock);
+ ret = qed_find_cluster(s, &request, pos, &len, &offset);
+
+ *pnum = len;
switch (ret) {
case QED_CLUSTER_FOUND:
- offset |= qed_offset_into_cluster(s, cb->pos);
- cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
- *cb->file = cb->bs->file->bs;
+ *map = offset | qed_offset_into_cluster(s, pos);
+ status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+ *file = bs->file->bs;
break;
case QED_CLUSTER_ZERO:
- cb->status = BDRV_BLOCK_ZERO;
+ status = BDRV_BLOCK_ZERO;
break;
case QED_CLUSTER_L2:
case QED_CLUSTER_L1:
- cb->status = 0;
+ status = 0;
break;
default:
assert(ret < 0);
- cb->status = ret;
+ status = ret;
break;
}
- if (cb->co) {
- aio_co_wake(cb->co);
- }
-}
-
-static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum,
- BlockDriverState **file)
-{
- BDRVQEDState *s = bs->opaque;
- size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
- QEDIsAllocatedCB cb = {
- .bs = bs,
- .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
- .status = BDRV_BLOCK_OFFSET_MASK,
- .pnum = pnum,
- .file = file,
- };
- QEDRequest request = { .l2_table = NULL };
- uint64_t offset;
- int ret;
-
- qemu_co_mutex_lock(&s->table_lock);
- ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
- qed_is_allocated_cb(&cb, ret, offset, len);
-
- /* The callback was invoked immediately */
- assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
-
qed_unref_l2_cache_entry(request.l2_table);
qemu_co_mutex_unlock(&s->table_lock);
- return cb.status;
+ return status;
}
static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
@@ -1592,9 +1566,9 @@ static BlockDriver bdrv_qed = {
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
- .bdrv_create = bdrv_qed_create,
+ .bdrv_co_create_opts = bdrv_qed_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
+ .bdrv_co_block_status = bdrv_qed_co_block_status,
.bdrv_co_readv = bdrv_qed_co_readv,
.bdrv_co_writev = bdrv_qed_co_writev,
.bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
diff --git a/block/raw-format.c b/block/raw-format.c
index ab552c0954..a378547c99 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -250,17 +250,17 @@ fail:
return ret;
}
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum,
+static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
BlockDriverState **file)
{
BDRVRawState *s = bs->opaque;
- *pnum = nb_sectors;
+ *pnum = bytes;
*file = bs->file->bs;
- sector_num += s->offset / BDRV_SECTOR_SIZE;
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
- (sector_num << BDRV_SECTOR_BITS);
+ *map = offset + s->offset;
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
@@ -396,7 +396,8 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file->bs);
}
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
return bdrv_create_file(filename, opts, errp);
}
@@ -491,12 +492,12 @@ BlockDriver bdrv_raw = {
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_child_perm = bdrv_filter_default_perms,
- .bdrv_create = &raw_create,
+ .bdrv_co_create_opts = &raw_co_create_opts,
.bdrv_co_preadv = &raw_co_preadv,
.bdrv_co_pwritev = &raw_co_pwritev,
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
.bdrv_co_pdiscard = &raw_co_pdiscard,
- .bdrv_co_get_block_status = &raw_co_get_block_status,
+ .bdrv_co_block_status = &raw_co_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,
diff --git a/block/rbd.c b/block/rbd.c
index 8474b0ba11..c7dd32e213 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -351,7 +351,9 @@ static QemuOptsList runtime_opts = {
},
};
-static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn qemu_rbd_co_create_opts(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
{
Error *local_err = NULL;
int64_t bytes = 0;
@@ -1132,7 +1134,7 @@ static BlockDriver bdrv_rbd = {
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
- .bdrv_create = qemu_rbd_create,
+ .bdrv_co_create_opts = qemu_rbd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_get_info = qemu_rbd_getinfo,
.create_opts = &qemu_rbd_create_opts,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 215223053b..d8c10b7cac 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1959,8 +1959,8 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
return 0;
}
-static int sd_create(const char *filename, QemuOpts *opts,
- Error **errp)
+static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
Error *err = NULL;
int ret = 0;
@@ -3004,19 +3004,19 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
return acb.ret;
}
-static coroutine_fn int64_t
-sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum, BlockDriverState **file)
+static coroutine_fn int
+sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
- uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
unsigned long start = offset / object_size,
- end = DIV_ROUND_UP((sector_num + nb_sectors) *
- BDRV_SECTOR_SIZE, object_size);
+ end = DIV_ROUND_UP(offset + bytes, object_size);
unsigned long idx;
- int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+ *map = offset;
+ int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
for (idx = start; idx < end; idx++) {
if (inode->data_vdi_id[idx] == 0) {
@@ -3033,9 +3033,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
}
}
- *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE;
- if (*pnum > nb_sectors) {
- *pnum = nb_sectors;
+ *pnum = (idx - start) * object_size;
+ if (*pnum > bytes) {
+ *pnum = bytes;
}
if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
*file = bs;
@@ -3103,7 +3103,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_reopen_commit = sd_reopen_commit,
.bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
- .bdrv_create = sd_create,
+ .bdrv_co_create_opts = sd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
@@ -3113,7 +3113,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_pdiscard = sd_co_pdiscard,
- .bdrv_co_get_block_status = sd_co_get_block_status,
+ .bdrv_co_block_status = sd_co_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
@@ -3139,7 +3139,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_reopen_commit = sd_reopen_commit,
.bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
- .bdrv_create = sd_create,
+ .bdrv_co_create_opts = sd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
@@ -3149,7 +3149,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_pdiscard = sd_co_pdiscard,
- .bdrv_co_get_block_status = sd_co_get_block_status,
+ .bdrv_co_block_status = sd_co_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
@@ -3175,7 +3175,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_reopen_commit = sd_reopen_commit,
.bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
- .bdrv_create = sd_create,
+ .bdrv_co_create_opts = sd_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_getlength = sd_getlength,
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
@@ -3185,7 +3185,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_pdiscard = sd_co_pdiscard,
- .bdrv_co_get_block_status = sd_co_get_block_status,
+ .bdrv_co_block_status = sd_co_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
diff --git a/block/ssh.c b/block/ssh.c
index b11d4c5e86..ff9929497d 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -803,6 +803,33 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
return ret;
}
+/* Note: This is a blocking operation */
+static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
+{
+ ssize_t ret;
+ char c[1] = { '\0' };
+ int was_blocking = libssh2_session_get_blocking(s->session);
+
+ /* offset must be strictly greater than the current size so we do
+ * not overwrite anything */
+ assert(offset > 0 && offset > s->attrs.filesize);
+
+ libssh2_session_set_blocking(s->session, 1);
+
+ libssh2_sftp_seek64(s->sftp_handle, offset - 1);
+ ret = libssh2_sftp_write(s->sftp_handle, c, 1);
+
+ libssh2_session_set_blocking(s->session, was_blocking);
+
+ if (ret < 0) {
+ sftp_error_setg(errp, s, "Failed to grow file");
+ return -EIO;
+ }
+
+ s->attrs.filesize = offset;
+ return 0;
+}
+
static QemuOptsList ssh_create_opts = {
.name = "ssh-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head),
@@ -816,14 +843,13 @@ static QemuOptsList ssh_create_opts = {
}
};
-static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int r, ret;
int64_t total_size = 0;
QDict *uri_options = NULL;
BDRVSSHState s;
- ssize_t r2;
- char c[1] = { '\0' };
ssh_state_init(&s);
@@ -849,14 +875,10 @@ static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
}
if (total_size > 0) {
- libssh2_sftp_seek64(s.sftp_handle, total_size-1);
- r2 = libssh2_sftp_write(s.sftp_handle, c, 1);
- if (r2 < 0) {
- sftp_error_setg(errp, &s, "truncate failed");
- ret = -EINVAL;
+ ret = ssh_grow_file(&s, total_size, errp);
+ if (ret < 0) {
goto out;
}
- s.attrs.filesize = total_size;
}
ret = 0;
@@ -1198,18 +1220,42 @@ static int64_t ssh_getlength(BlockDriverState *bs)
return length;
}
+static int ssh_truncate(BlockDriverState *bs, int64_t offset,
+ PreallocMode prealloc, Error **errp)
+{
+ BDRVSSHState *s = bs->opaque;
+
+ if (prealloc != PREALLOC_MODE_OFF) {
+ error_setg(errp, "Unsupported preallocation mode '%s'",
+ PreallocMode_str(prealloc));
+ return -ENOTSUP;
+ }
+
+ if (offset < s->attrs.filesize) {
+ error_setg(errp, "ssh driver does not support shrinking files");
+ return -ENOTSUP;
+ }
+
+ if (offset == s->attrs.filesize) {
+ return 0;
+ }
+
+ return ssh_grow_file(s, offset, errp);
+}
+
static BlockDriver bdrv_ssh = {
.format_name = "ssh",
.protocol_name = "ssh",
.instance_size = sizeof(BDRVSSHState),
.bdrv_parse_filename = ssh_parse_filename,
.bdrv_file_open = ssh_file_open,
- .bdrv_create = ssh_create,
+ .bdrv_co_create_opts = ssh_co_create_opts,
.bdrv_close = ssh_close,
.bdrv_has_zero_init = ssh_has_zero_init,
.bdrv_co_readv = ssh_co_readv,
.bdrv_co_writev = ssh_co_writev,
.bdrv_getlength = ssh_getlength,
+ .bdrv_truncate = ssh_truncate,
.bdrv_co_flush_to_disk = ssh_co_flush,
.create_opts = &ssh_create_opts,
};
diff --git a/block/throttle.c b/block/throttle.c
index 495f88c752..5f4d43d0fc 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -240,7 +240,7 @@ static BlockDriver bdrv_throttle = {
.bdrv_reopen_prepare = throttle_reopen_prepare,
.bdrv_reopen_commit = throttle_reopen_commit,
.bdrv_reopen_abort = throttle_reopen_abort,
- .bdrv_co_get_block_status = bdrv_co_get_block_status_from_file,
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
.bdrv_co_drain_begin = throttle_co_drain_begin,
.bdrv_co_drain_end = throttle_co_drain_end,
diff --git a/block/vdi.c b/block/vdi.c
index fc1c614cb1..68592cc58d 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -87,12 +87,18 @@
#define DEFAULT_CLUSTER_SIZE (1 * MiB)
#if defined(CONFIG_VDI_DEBUG)
-#define logout(fmt, ...) \
- fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
+#define VDI_DEBUG 1
#else
-#define logout(fmt, ...) ((void)0)
+#define VDI_DEBUG 0
#endif
+#define logout(fmt, ...) \
+ do { \
+ if (VDI_DEBUG) { \
+ fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \
+ } \
+ } while (0)
+
/* Image signature. */
#define VDI_SIGNATURE 0xbeda107f
@@ -166,8 +172,6 @@ typedef struct {
uint32_t *bmap;
/* Size of block (bytes). */
uint32_t block_size;
- /* Size of block (sectors). */
- uint32_t block_sectors;
/* First sector of block map. */
uint32_t bmap_sector;
/* VDI header (converted to host endianness). */
@@ -457,7 +461,6 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
bs->total_sectors = header.disk_size / SECTOR_SIZE;
s->block_size = header.block_size;
- s->block_sectors = header.block_size / SECTOR_SIZE;
s->bmap_sector = header.offset_bmap / SECTOR_SIZE;
s->header = header;
@@ -503,33 +506,29 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
return 0;
}
-static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+static int coroutine_fn vdi_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
- /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
- size_t bmap_index = sector_num / s->block_sectors;
- size_t sector_in_block = sector_num % s->block_sectors;
- int n_sectors = s->block_sectors - sector_in_block;
+ size_t bmap_index = offset / s->block_size;
+ size_t index_in_block = offset % s->block_size;
uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
- uint64_t offset;
int result;
- logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
- *pnum = n_sectors;
+ logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum);
+ *pnum = MIN(s->block_size - index_in_block, bytes);
result = VDI_IS_ALLOCATED(bmap_entry);
if (!result) {
return 0;
}
- offset = s->header.offset_data +
- (uint64_t)bmap_entry * s->block_size +
- sector_in_block * SECTOR_SIZE;
+ *map = s->header.offset_data + (uint64_t)bmap_entry * s->block_size +
+ index_in_block;
*file = bs->file->bs;
- return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
static int coroutine_fn
@@ -717,7 +716,8 @@ nonallocating_write:
return ret;
}
-static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int ret = 0;
uint64_t bytes = 0;
@@ -895,9 +895,9 @@ static BlockDriver bdrv_vdi = {
.bdrv_close = vdi_close,
.bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
- .bdrv_create = vdi_create,
+ .bdrv_co_create_opts = vdi_co_create_opts,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_get_block_status = vdi_co_get_block_status,
+ .bdrv_co_block_status = vdi_co_block_status,
.bdrv_make_empty = vdi_make_empty,
.bdrv_co_preadv = vdi_co_preadv,
diff --git a/block/vhdx.c b/block/vhdx.c
index c449c5dcfd..3fbff5048b 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1792,7 +1792,8 @@ exit:
* .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
* 1MB
*/
-static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn vhdx_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int ret = 0;
uint64_t image_size = (uint64_t) 2 * GiB;
@@ -2003,7 +2004,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_child_perm = bdrv_format_default_perms,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
- .bdrv_create = vhdx_create,
+ .bdrv_co_create_opts = vhdx_co_create_opts,
.bdrv_get_info = vhdx_get_info,
.bdrv_check = vhdx_check,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
diff --git a/block/vmdk.c b/block/vmdk.c
index ef15ddbfd3..67342ed69b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1304,33 +1304,27 @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
return extent_relative_offset % cluster_size;
}
-static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
- int64_t sector_num)
-{
- uint64_t offset;
- offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE);
- return offset / BDRV_SECTOR_SIZE;
-}
-
-static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
BDRVVmdkState *s = bs->opaque;
int64_t index_in_cluster, n, ret;
- uint64_t offset;
+ uint64_t cluster_offset;
VmdkExtent *extent;
- extent = find_extent(s, sector_num, NULL);
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL);
if (!extent) {
- return 0;
+ return -EIO;
}
qemu_co_mutex_lock(&s->lock);
- ret = get_cluster_offset(bs, extent, NULL,
- sector_num * 512, false, &offset,
+ ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset,
0, 0);
qemu_co_mutex_unlock(&s->lock);
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
+ index_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
switch (ret) {
case VMDK_ERROR:
ret = -EIO;
@@ -1345,18 +1339,14 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
ret = BDRV_BLOCK_DATA;
if (!extent->compressed) {
ret |= BDRV_BLOCK_OFFSET_VALID;
- ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS))
- & BDRV_BLOCK_OFFSET_MASK;
+ *map = cluster_offset + index_in_cluster;
}
*file = extent->file->bs;
break;
}
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- *pnum = n;
+ n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster;
+ *pnum = MIN(n, bytes);
return ret;
}
@@ -1892,7 +1882,8 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
return VMDK_OK;
}
-static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
int idx = 0;
BlockBackend *new_blk = NULL;
@@ -2408,9 +2399,9 @@ static BlockDriver bdrv_vmdk = {
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
.bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
.bdrv_close = vmdk_close,
- .bdrv_create = vmdk_create,
+ .bdrv_co_create_opts = vmdk_co_create_opts,
.bdrv_co_flush_to_disk = vmdk_co_flush,
- .bdrv_co_get_block_status = vmdk_co_get_block_status,
+ .bdrv_co_block_status = vmdk_co_block_status,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.bdrv_has_zero_init = vmdk_has_zero_init,
.bdrv_get_specific_info = vmdk_get_specific_info,
diff --git a/block/vpc.c b/block/vpc.c
index cfa5144e86..b2e2b9ebd4 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -706,53 +706,54 @@ fail:
return ret;
}
-static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
{
BDRVVPCState *s = bs->opaque;
VHDFooter *footer = (VHDFooter*) s->footer_buf;
- int64_t start, offset;
+ int64_t image_offset;
bool allocated;
- int64_t ret;
- int n;
+ int ret;
+ int64_t n;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- *pnum = nb_sectors;
+ *pnum = bytes;
+ *map = offset;
*file = bs->file->bs;
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
- (sector_num << BDRV_SECTOR_BITS);
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
qemu_co_mutex_lock(&s->lock);
- offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL);
- start = offset;
- allocated = (offset != -1);
+ image_offset = get_image_offset(bs, offset, false, NULL);
+ allocated = (image_offset != -1);
*pnum = 0;
ret = 0;
do {
/* All sectors in a block are contiguous (without using the bitmap) */
- n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
- - sector_num;
- n = MIN(n, nb_sectors);
+ n = ROUND_UP(offset + 1, s->block_size) - offset;
+ n = MIN(n, bytes);
*pnum += n;
- sector_num += n;
- nb_sectors -= n;
+ offset += n;
+ bytes -= n;
/* *pnum can't be greater than one block for allocated
* sectors since there is always a bitmap in between. */
if (allocated) {
*file = bs->file->bs;
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ *map = image_offset;
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
break;
}
- if (nb_sectors == 0) {
+ if (bytes == 0) {
break;
}
- offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false,
- NULL);
- } while (offset == -1);
+ image_offset = get_image_offset(bs, offset, false, NULL);
+ } while (image_offset == -1);
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -896,7 +897,8 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
return ret;
}
-static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
+static int coroutine_fn vpc_co_create_opts(const char *filename, QemuOpts *opts,
+ Error **errp)
{
uint8_t buf[1024];
VHDFooter *footer = (VHDFooter *) buf;
@@ -1094,11 +1096,11 @@ static BlockDriver bdrv_vpc = {
.bdrv_close = vpc_close,
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_child_perm = bdrv_format_default_perms,
- .bdrv_create = vpc_create,
+ .bdrv_co_create_opts = vpc_co_create_opts,
.bdrv_co_preadv = vpc_co_preadv,
.bdrv_co_pwritev = vpc_co_pwritev,
- .bdrv_co_get_block_status = vpc_co_get_block_status,
+ .bdrv_co_block_status = vpc_co_block_status,
.bdrv_get_info = vpc_get_info,
diff --git a/block/vvfat.c b/block/vvfat.c
index 7e06ebacf6..4a17a49e12 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3088,15 +3088,13 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
return ret;
}
-static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
+static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *n,
+ int64_t *map,
+ BlockDriverState **file)
{
- *n = bs->total_sectors - sector_num;
- if (*n > nb_sectors) {
- *n = nb_sectors;
- } else if (*n < 0) {
- return 0;
- }
+ *n = bytes;
return BDRV_BLOCK_DATA;
}
@@ -3257,7 +3255,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_co_preadv = vvfat_co_preadv,
.bdrv_co_pwritev = vvfat_co_pwritev,
- .bdrv_co_get_block_status = vvfat_co_get_block_status,
+ .bdrv_co_block_status = vvfat_co_block_status,
};
static void bdrv_vvfat_init(void)
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index d7fdb1fee3..feb711fb6a 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -426,10 +426,20 @@ Standard Cluster Descriptor:
Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)):
- Bit 0 - x: Host cluster offset. This is usually _not_ aligned to a
- cluster boundary!
+ Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a
+ cluster or sector boundary!
- x+1 - 61: Compressed size of the images in sectors of 512 bytes
+ x - 61: Number of additional 512-byte sectors used for the
+ compressed data, beyond the sector containing the offset
+ in the previous field. Some of these sectors may reside
+ in the next contiguous host cluster.
+
+ Note that the compressed data does not necessarily occupy
+ all of the bytes in the final sector; rather, decompression
+ stops when it has produced a cluster of data.
+
+ Another compressed cluster may map to the tail of the final
+ sector used by this compressed cluster.
If a cluster is unallocated, read requests shall read the data from the backing
file (except if bit 0 in the Standard Cluster Descriptor is set). If there is
diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt
index b0571de4b8..170191a242 100644
--- a/docs/qcow2-cache.txt
+++ b/docs/qcow2-cache.txt
@@ -1,6 +1,6 @@
qcow2 L2/refcount cache configuration
=====================================
-Copyright (C) 2015 Igalia, S.L.
+Copyright (C) 2015, 2018 Igalia, S.L.
Author: Alberto Garcia <berto@igalia.com>
This work is licensed under the terms of the GNU GPL, version 2 or
@@ -118,8 +118,8 @@ There are three options available, and all of them take bytes:
There are two things that need to be taken into account:
- - Both caches must have a size that is a multiple of the cluster
- size.
+ - Both caches must have a size that is a multiple of the cluster size
+ (or the cache entry size: see "Using smaller cache sizes" below).
- If you only set one of the options above, QEMU will automatically
adjust the others so that the L2 cache is 4 times bigger than the
@@ -143,6 +143,46 @@ much less often than the L2 cache, so it's perfectly reasonable to
keep it small.
+Using smaller cache entries
+---------------------------
+The qcow2 L2 cache stores complete tables by default. This means that
+if QEMU needs an entry from an L2 table then the whole table is read
+from disk and is kept in the cache. If the cache is full then a
+complete table needs to be evicted first.
+
+This can be inefficient with large cluster sizes since it results in
+more disk I/O and wastes more cache memory.
+
+Since QEMU 2.12 you can change the size of the L2 cache entry and make
+it smaller than the cluster size. This can be configured using the
+"l2-cache-entry-size" parameter:
+
+ -drive file=hd.qcow2,l2-cache-size=2097152,l2-cache-entry-size=4096
+
+Some things to take into account:
+
+ - The L2 cache entry size has the same restrictions as the cluster
+ size (power of two, at least 512 bytes).
+
+ - Smaller entry sizes generally improve the cache efficiency and make
+ disk I/O faster. This is particularly true with solid state drives
+ so it's a good idea to reduce the entry size in those cases. With
+ rotating hard drives the situation is a bit more complicated so you
+ should test it first and stay with the default size if unsure.
+
+ - Try different entry sizes to see which one gives faster performance
+ in your case. The block size of the host filesystem is generally a
+ good default (usually 4096 bytes in the case of ext4).
+
+ - Only the L2 cache can be configured this way. The refcount cache
+ always uses the cluster size as the entry size.
+
+ - If the L2 cache is big enough to hold all of the image's L2 tables
+ (as explained in the "Choosing the right cache sizes" section
+ earlier in this document) then none of this is necessary and you
+ can omit the "l2-cache-entry-size" parameter altogether.
+
+
Reducing the memory usage
-------------------------
It is possible to clean unused cache entries in order to reduce the
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 257b429381..139c843514 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1087,15 +1087,7 @@ static void ide_flush_cache(IDEState *s)
s->status |= BUSY_STAT;
ide_set_retry(s);
block_acct_start(blk_get_stats(s->blk), &s->acct, 0, BLOCK_ACCT_FLUSH);
-
- if (blk_bs(s->blk)) {
- s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s);
- } else {
- /* XXX blk_aio_flush() crashes when blk_bs(blk) is NULL, remove this
- * temporary workaround when blk_aio_*() functions handle NULL blk_bs.
- */
- ide_flush_cb(s, 0);
- }
+ s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s);
}
static void ide_cfata_metadata_inquiry(IDEState *s)
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
new file mode 100644
index 0000000000..a48c744fa8
--- /dev/null
+++ b/include/block/aio-wait.h
@@ -0,0 +1,116 @@
+/*
+ * AioContext wait support
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_AIO_WAIT_H
+#define QEMU_AIO_WAIT_H
+
+#include "block/aio.h"
+
+/**
+ * AioWait:
+ *
+ * An object that facilitates synchronous waiting on a condition. The main
+ * loop can wait on an operation running in an IOThread as follows:
+ *
+ * AioWait *wait = ...;
+ * AioContext *ctx = ...;
+ * MyWork work = { .done = false };
+ * schedule_my_work_in_iothread(ctx, &work);
+ * AIO_WAIT_WHILE(wait, ctx, !work.done);
+ *
+ * The IOThread must call aio_wait_kick() to notify the main loop when
+ * work.done changes:
+ *
+ * static void do_work(...)
+ * {
+ * ...
+ * work.done = true;
+ * aio_wait_kick(wait);
+ * }
+ */
+typedef struct {
+ /* Is the main loop waiting for a kick? Accessed with atomic ops. */
+ bool need_kick;
+} AioWait;
+
+/**
+ * AIO_WAIT_WHILE:
+ * @wait: the aio wait object
+ * @ctx: the aio context
+ * @cond: wait while this conditional expression is true
+ *
+ * Wait while a condition is true. Use this to implement synchronous
+ * operations that require event loop activity.
+ *
+ * The caller must be sure that something calls aio_wait_kick() when the value
+ * of @cond might have changed.
+ *
+ * The caller's thread must be the IOThread that owns @ctx or the main loop
+ * thread (with @ctx acquired exactly once). This function cannot be used to
+ * wait on conditions between two IOThreads since that could lead to deadlock,
+ * go via the main loop instead.
+ */
+#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
+ bool waited_ = false; \
+ bool busy_ = true; \
+ AioWait *wait_ = (wait); \
+ AioContext *ctx_ = (ctx); \
+ if (in_aio_context_home_thread(ctx_)) { \
+ while ((cond) || busy_) { \
+ busy_ = aio_poll(ctx_, (cond)); \
+ waited_ |= !!(cond) | busy_; \
+ } \
+ } else { \
+ assert(qemu_get_current_aio_context() == \
+ qemu_get_aio_context()); \
+ assert(!wait_->need_kick); \
+ /* Set wait_->need_kick before evaluating cond. */ \
+ atomic_mb_set(&wait_->need_kick, true); \
+ while (busy_) { \
+ if ((cond)) { \
+ waited_ = busy_ = true; \
+ aio_context_release(ctx_); \
+ aio_poll(qemu_get_aio_context(), true); \
+ aio_context_acquire(ctx_); \
+ } else { \
+ busy_ = aio_poll(ctx_, false); \
+ waited_ |= busy_; \
+ } \
+ } \
+ atomic_set(&wait_->need_kick, false); \
+ } \
+ waited_; })
+
+/**
+ * aio_wait_kick:
+ * @wait: the aio wait object that should re-evaluate its condition
+ *
+ * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During
+ * synchronous operations performed in an IOThread, the main thread lets the
+ * IOThread's event loop run, waiting for the operation to complete. A
+ * aio_wait_kick() call will wake up the main thread.
+ */
+void aio_wait_kick(AioWait *wait);
+
+#endif /* QEMU_AIO_WAIT */
diff --git a/include/block/aio.h b/include/block/aio.h
index e9aeeaec94..a1d6b9e249 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -534,11 +534,14 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
AioContext *qemu_get_current_aio_context(void);
/**
+ * in_aio_context_home_thread:
* @ctx: the aio context
*
- * Return whether we are running in the I/O thread that manages @ctx.
+ * Return whether we are running in the thread that normally runs @ctx. Note
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
+ * still only has one home thread that is responsible for running it.
*/
-static inline bool aio_context_in_iothread(AioContext *ctx)
+static inline bool in_aio_context_home_thread(AioContext *ctx)
{
return ctx == qemu_get_current_aio_context();
}
diff --git a/include/block/block.h b/include/block/block.h
index fac401ba3e..8b6db952a2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -3,6 +3,7 @@
#include "block/aio.h"
#include "qapi/qapi-types-block-core.h"
+#include "block/aio-wait.h"
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
@@ -115,19 +116,19 @@ typedef struct HDGeometry {
* BDRV_BLOCK_ZERO: offset reads as zero
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- * layer (short for DATA || ZERO), set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
+ * layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ * layer, set by block layer
*
* Internal flag:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
*
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
- * the return value (old interface) or the entire map parameter (new
- * interface) represent the offset in the returned BDS that is allocated for
- * the corresponding raw data. However, whether that offset actually
- * contains data also depends on BDRV_BLOCK_DATA, as follows:
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data. However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
*
* DATA ZERO OFFSET_VALID
* t t t sectors read as zero, returned file is zero at offset
@@ -367,41 +368,14 @@ void bdrv_drain_all_begin(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
+/* Returns NULL when bs == NULL */
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
+
#define BDRV_POLL_WHILE(bs, cond) ({ \
- bool waited_ = false; \
- bool busy_ = true; \
BlockDriverState *bs_ = (bs); \
- AioContext *ctx_ = bdrv_get_aio_context(bs_); \
- if (aio_context_in_iothread(ctx_)) { \
- while ((cond) || busy_) { \
- busy_ = aio_poll(ctx_, (cond)); \
- waited_ |= !!(cond) | busy_; \
- } \
- } else { \
- assert(qemu_get_current_aio_context() == \
- qemu_get_aio_context()); \
- /* Ask bdrv_dec_in_flight to wake up the main \
- * QEMU AioContext. Extra I/O threads never take \
- * other I/O threads' AioContexts (see for example \
- * block_job_defer_to_main_loop for how to do it). \
- */ \
- assert(!bs_->wakeup); \
- /* Set bs->wakeup before evaluating cond. */ \
- atomic_mb_set(&bs_->wakeup, true); \
- while (busy_) { \
- if ((cond)) { \
- waited_ = busy_ = true; \
- aio_context_release(ctx_); \
- aio_poll(qemu_get_aio_context(), true); \
- aio_context_acquire(ctx_); \
- } else { \
- busy_ = aio_poll(ctx_, false); \
- waited_ |= busy_; \
- } \
- } \
- atomic_set(&bs_->wakeup, false); \
- } \
- waited_; })
+ AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
+ bdrv_get_aio_context(bs_), \
+ cond); })
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 5ea63f8fa8..64a5700f2b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
#include "block/accounting.h"
#include "block/block.h"
+#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
#include "qemu/stats64.h"
@@ -128,7 +129,8 @@ struct BlockDriver {
int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
Error **errp);
void (*bdrv_close)(BlockDriverState *bs);
- int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
+ int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts,
+ Error **errp);
int (*bdrv_make_empty)(BlockDriverState *bs);
void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
@@ -202,15 +204,22 @@ struct BlockDriver {
/*
* Building block for bdrv_block_status[_above] and
* bdrv_is_allocated[_above]. The driver should answer only
- * according to the current layer, and should not set
- * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h
- * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block
- * layer guarantees input aligned to request_alignment, as well as
- * non-NULL pnum and file.
+ * according to the current layer, and should only need to set
+ * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+ * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
+ * block.h for the overall meaning of the bits. As a hint, the
+ * flag want_zero is true if the caller cares more about precise
+ * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+ * overall allocation (favor larger *pnum, perhaps by reporting
+ * _DATA instead of _ZERO). The block layer guarantees input
+ * clamped to bdrv_getlength() and aligned to request_alignment,
+ * as well as non-NULL pnum, map, and file; in turn, the driver
+ * must return an error or set pnum to an aligned non-zero value.
*/
- int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum,
- BlockDriverState **file);
+ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
/*
* Invalidate any cached meta-data.
@@ -709,10 +718,8 @@ struct BlockDriverState {
unsigned int in_flight;
unsigned int serialising_in_flight;
- /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
- * ops.
- */
- bool wakeup;
+ /* Kicked to signal main loop when a request completes. */
+ AioWait wait;
/* counter for nested bdrv_io_plug.
* Accessed with atomic ops.
@@ -1031,23 +1038,27 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t *nperm, uint64_t *nshared);
/*
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * Default implementation for drivers to pass bdrv_co_block_status() to
* their file.
*/
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- int *pnum,
- BlockDriverState **file);
+int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
/*
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * Default implementation for drivers to pass bdrv_co_block_status() to
* their backing file.
*/
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- int *pnum,
- BlockDriverState **file);
+int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
const char *bdrv_get_parent_name(const BlockDriverState *bs);
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 5c5921bfb7..00475f08d4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3676,7 +3676,8 @@
#
# @node-name: node name. Note that errors may be reported for the root node
# that is directly attached to a guest device rather than for the
-# node where the error occurred. (Since: 2.8)
+# node where the error occurred. The node name is not present if
+# the drive is empty. (Since: 2.8)
#
# @operation: I/O operation
#
@@ -3707,7 +3708,8 @@
#
##
{ 'event': 'BLOCK_IO_ERROR',
- 'data': { 'device': 'str', 'node-name': 'str', 'operation': 'IoOperationType',
+ 'data': { 'device': 'str', '*node-name': 'str',
+ 'operation': 'IoOperationType',
'action': 'BlockErrorAction', '*nospace': 'bool',
'reason': 'str' } }
diff --git a/qemu-img.c b/qemu-img.c
index 40bf7aa7d1..088d89043e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3469,7 +3469,7 @@ static int img_resize(int argc, char **argv)
}
}
if (optind != argc - 1) {
- error_exit("Expecting one image file name");
+ error_exit("Expecting image file name and size");
}
filename = argv[optind++];
diff --git a/tests/Makefile.include b/tests/Makefile.include
index fdca062591..ef9b88c369 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -92,6 +92,7 @@ gcov-files-test-hbitmap-y = blockjob.c
check-unit-y += tests/test-bdrv-drain$(EXESUF)
check-unit-y += tests/test-blockjob$(EXESUF)
check-unit-y += tests/test-blockjob-txn$(EXESUF)
+check-unit-y += tests/test-block-backend$(EXESUF)
check-unit-y += tests/test-x86-cpuid$(EXESUF)
# all code tested by test-x86-cpuid is inside topology.h
gcov-files-test-x86-cpuid-y =
@@ -622,6 +623,7 @@ tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
+tests/test-block-backend$(EXESUF): tests/test-block-backend.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) $(test-crypto-obj-y)
diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033
index 2cdfd1397a..a1d8357331 100755
--- a/tests/qemu-iotests/033
+++ b/tests/qemu-iotests/033
@@ -64,6 +64,9 @@ do_test()
} | $QEMU_IO $IO_EXTRA_ARGS
}
+echo
+echo "=== Test aligned and misaligned write zeroes operations ==="
+
for write_zero_cmd in "write -z" "aio_write -z"; do
for align in 512 4k; do
echo
@@ -102,7 +105,33 @@ for align in 512 4k; do
done
done
+
+# Trigger truncate that would shrink qcow2 L1 table, which is done by
+# clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes()
+
+echo
+echo "=== Test misaligned write zeroes via truncate ==="
+echo
+
+# any size will do, but the smaller the size the smaller the required image
+CLUSTER_SIZE=$((4 * 1024))
+L2_COVERAGE=$(($CLUSTER_SIZE * $CLUSTER_SIZE / 8))
+_make_test_img $(($L2_COVERAGE * 2))
+
+do_test 512 "write -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io
+# next L2 table
+do_test 512 "write -P 1 $L2_COVERAGE 0x200" "$TEST_IMG" | _filter_qemu_io
+
+# only interested in qcow2 here; also other formats might respond with
+# "not supported" error message
+if [ $IMGFMT = "qcow2" ]; then
+ do_test 512 "truncate $L2_COVERAGE" "$TEST_IMG" | _filter_qemu_io
+fi
+
+do_test 512 "read -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io
+
# success, all done
+echo
echo "*** done"
rm -f $seq.full
status=0
diff --git a/tests/qemu-iotests/033.out b/tests/qemu-iotests/033.out
index 95929eff70..9683f6b290 100644
--- a/tests/qemu-iotests/033.out
+++ b/tests/qemu-iotests/033.out
@@ -1,6 +1,8 @@
QA output created by 033
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
+=== Test aligned and misaligned write zeroes operations ===
+
== preparing image ==
wrote 1024/1024 bytes at offset 512
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -164,4 +166,15 @@ read 512/512 bytes at offset 512
read 3072/3072 bytes at offset 1024
3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Test misaligned write zeroes via truncate ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2097152
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
*** done
diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c
new file mode 100644
index 0000000000..fd59f02bd0
--- /dev/null
+++ b/tests/test-block-backend.c
@@ -0,0 +1,82 @@
+/*
+ * BlockBackend tests
+ *
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+static void test_drain_aio_error_flush_cb(void *opaque, int ret)
+{
+ bool *completed = opaque;
+
+ g_assert(ret == -ENOMEDIUM);
+ *completed = true;
+}
+
+static void test_drain_aio_error(void)
+{
+ BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ BlockAIOCB *acb;
+ bool completed = false;
+
+ acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed);
+ g_assert(acb != NULL);
+ g_assert(completed == false);
+
+ blk_drain(blk);
+ g_assert(completed == true);
+
+ blk_unref(blk);
+}
+
+static void test_drain_all_aio_error(void)
+{
+ BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ BlockAIOCB *acb;
+ bool completed = false;
+
+ acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed);
+ g_assert(acb != NULL);
+ g_assert(completed == false);
+
+ blk_drain_all();
+ g_assert(completed == true);
+
+ blk_unref(blk);
+}
+
+int main(int argc, char **argv)
+{
+ bdrv_init();
+ qemu_init_main_loop(&error_abort);
+
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/block-backend/drain_aio_error", test_drain_aio_error);
+ g_test_add_func("/block-backend/drain_all_aio_error",
+ test_drain_all_aio_error);
+
+ return g_test_run();
+}
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 3fb611631f..ae90b9963d 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,7 +1,7 @@
util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
util-obj-y += bufferiszero.o
util-obj-y += lockcnt.o
-util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o
+util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o
util-obj-y += main-loop.o iohandler.o
util-obj-$(CONFIG_POSIX) += aio-posix.o
util-obj-$(CONFIG_POSIX) += compatfd.o
diff --git a/util/aio-wait.c b/util/aio-wait.c
new file mode 100644
index 0000000000..a487cdb852
--- /dev/null
+++ b/util/aio-wait.c
@@ -0,0 +1,40 @@
+/*
+ * AioContext wait support
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "block/aio-wait.h"
+
+static void dummy_bh_cb(void *opaque)
+{
+ /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */
+}
+
+void aio_wait_kick(AioWait *wait)
+{
+ /* The barrier (or an atomic op) is in the caller. */
+ if (atomic_read(&wait->need_kick)) {
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
+ }
+}