diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-10-28 14:40:00 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-10-28 14:40:01 +0000 |
commit | aaffb853359829a37daaf883c773e8320b55c723 (patch) | |
tree | 65d111156f4480afe7cb6c4552d490eb1947e3cd | |
parent | 9bb73502321d46f4d320fa17aa38201445783fc4 (diff) | |
parent | ba9c45139e2938b8d20ce407db83a31bc9e5066c (diff) |
Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-10-28' into staging
Block patches for softfreeze:
- iotest patches
- Improve performance of the mirror block job in write-blocking mode
- Limit memory usage for the backup block job
- Add discard and write-zeroes support to the NVMe host block driver
- Fix a bug in the mirror job
- Prevent the qcow2 driver from creating technically non-compliant qcow2
v3 images (where there is not enough extra data for snapshot table
entries)
- Allow callers of bdrv_truncate() (etc.) to determine whether the file
must be resized to the exact given size or whether it is OK for block
devices not to shrink
# gpg: Signature made Mon 28 Oct 2019 12:13:53 GMT
# gpg: using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg: issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40
* remotes/maxreitz/tags/pull-block-2019-10-28: (69 commits)
qemu-iotests: restrict 264 to qcow2 only
Revert "qemu-img: Check post-truncation size"
block: Pass truncate exact=true where reasonable
block: Let format drivers pass @exact
block: Evaluate @exact in protocol drivers
block: Add @exact parameter to bdrv_co_truncate()
block: Do not truncate file node when formatting
block/cor: Drop cor_co_truncate()
block: Handle filter truncation like native impl.
iotests: Test qcow2's snapshot table handling
iotests: Add peek_file* functions
qcow2: Fix v3 snapshot table entry compliancy
qcow2: Repair snapshot table with too many entries
qcow2: Fix overly long snapshot tables
qcow2: Keep track of the snapshot table length
qcow2: Fix broken snapshot table entries
qcow2: Add qcow2_check_fix_snapshot_table()
qcow2: Separate qcow2_check_read_snapshot_table()
qcow2: Write v3-compliant snapshot list on upgrade
qcow2: Put qcow2_upgrade() into its own function
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
84 files changed, 2232 insertions, 486 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index eb22ff306e..8b8f2a80a0 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1178,9 +1178,10 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, return ret; } -int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) { int ret; BlockDriverState *bs; @@ -1207,11 +1208,19 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, flags |= BDRV_REQ_FUA; } - ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags); + ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset, + flags); bdrv_dec_in_flight(bs); return ret; } +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); +} + typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; @@ -2063,15 +2072,15 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, BDRV_REQ_WRITE_COMPRESSED); } -int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, - Error **errp) +int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, Error **errp) { if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; } - return bdrv_truncate(blk->root, offset, prealloc, errp); + return bdrv_truncate(blk->root, offset, exact, prealloc, errp); } static void blk_pdiscard_entry(void *opaque) diff --git a/block/block-copy.c b/block/block-copy.c index 066e3a7274..c39cc9cffe 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -18,6 +18,11 @@ #include "qapi/error.h" #include "block/block-copy.h" #include "sysemu/block-backend.h" +#include "qemu/units.h" + +#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB) +#define BLOCK_COPY_MAX_BUFFER (1 * MiB) +#define BLOCK_COPY_MAX_MEM (128 * MiB) static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s, int64_t start, @@ -61,6 +66,7 @@ void block_copy_state_free(BlockCopyState *s) } bdrv_release_dirty_bitmap(s->copy_bitmap); + shres_destroy(s->mem); g_free(s); } @@ -71,8 +77,9 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, BlockCopyState *s; BdrvDirtyBitmap *copy_bitmap; uint32_t max_transfer = - MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer, - target->bs->bl.max_transfer)); + MIN_NON_ZERO(INT_MAX, + MIN_NON_ZERO(source->bs->bl.max_transfer, + target->bs->bl.max_transfer)); copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL, errp); @@ -89,19 +96,31 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, .cluster_size = cluster_size, .len = bdrv_dirty_bitmap_size(copy_bitmap), .write_flags = write_flags, + .mem = shres_create(BLOCK_COPY_MAX_MEM), }; - s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size), - /* - * Set use_copy_range, consider the following: - * 1. Compression is not supported for copy_range. - * 2. copy_range does not respect max_transfer (it's a TODO), so we factor - * that in here. If max_transfer is smaller than the job->cluster_size, - * we do not use copy_range (in that case it's zero after aligning down - * above). - */ - s->use_copy_range = - !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0; + if (max_transfer < cluster_size) { + /* + * copy_range does not respect max_transfer. We don't want to bother + * with requests smaller than block-copy cluster size, so fallback to + * buffered copying (read and write respect max_transfer on their + * behalf). + */ + s->use_copy_range = false; + s->copy_size = cluster_size; + } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) { + /* Compression is not supported for copy_range */ + s->use_copy_range = false; + s->copy_size = MAX(cluster_size, BLOCK_COPY_MAX_BUFFER); + } else { + /* + * copy_range does not respect max_transfer (it's a TODO), so we factor + * that in here. + */ + s->use_copy_range = true; + s->copy_size = MIN(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE), + QEMU_ALIGN_DOWN(max_transfer, cluster_size)); + } QLIST_INIT(&s->inflight_reqs); @@ -120,79 +139,71 @@ void block_copy_set_callbacks( } /* - * Copy range to target with a bounce buffer and return the bytes copied. If - * error occurred, return a negative error number + * block_copy_do_copy + * + * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to + * cover last cluster when s->len is not aligned to clusters. + * + * No sync here: nor bitmap neighter intersecting requests handling, only copy. + * + * Returns 0 on success. */ -static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s, - int64_t start, - int64_t end, - bool *error_is_read, - void **bounce_buffer) +static int coroutine_fn block_copy_do_copy(BlockCopyState *s, + int64_t start, int64_t end, + bool *error_is_read) { int ret; - int nbytes; + int nbytes = MIN(end, s->len) - start; + void *bounce_buffer = NULL; assert(QEMU_IS_ALIGNED(start, s->cluster_size)); - bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); - nbytes = MIN(s->cluster_size, s->len - start); - if (!*bounce_buffer) { - *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size); + assert(QEMU_IS_ALIGNED(end, s->cluster_size)); + assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size)); + + if (s->use_copy_range) { + ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes, + 0, s->write_flags); + if (ret < 0) { + trace_block_copy_copy_range_fail(s, start, ret); + s->use_copy_range = false; + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); + /* Fallback to read+write with allocated buffer */ + } else { + goto out; + } } - ret = bdrv_co_pread(s->source, start, nbytes, *bounce_buffer, 0); + /* + * In case of failed copy_range request above, we may proceed with buffered + * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will + * be properly limited, so don't care too much. + */ + + bounce_buffer = qemu_blockalign(s->source->bs, nbytes); + + ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0); if (ret < 0) { - trace_block_copy_with_bounce_buffer_read_fail(s, start, ret); + trace_block_copy_read_fail(s, start, ret); if (error_is_read) { *error_is_read = true; } - goto fail; + goto out; } - ret = bdrv_co_pwrite(s->target, start, nbytes, *bounce_buffer, + ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer, s->write_flags); if (ret < 0) { - trace_block_copy_with_bounce_buffer_write_fail(s, start, ret); + trace_block_copy_write_fail(s, start, ret); if (error_is_read) { *error_is_read = false; } - goto fail; + goto out; } - return nbytes; -fail: - bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); - return ret; - -} - -/* - * Copy range to target and return the bytes copied. If error occurred, return a - * negative error number. - */ -static int coroutine_fn block_copy_with_offload(BlockCopyState *s, - int64_t start, - int64_t end) -{ - int ret; - int nr_clusters; - int nbytes; - - assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size)); - assert(QEMU_IS_ALIGNED(start, s->cluster_size)); - nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start); - nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size); - bdrv_reset_dirty_bitmap(s->copy_bitmap, start, - s->cluster_size * nr_clusters); - ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes, - 0, s->write_flags); - if (ret < 0) { - trace_block_copy_with_offload_fail(s, start, ret); - bdrv_set_dirty_bitmap(s->copy_bitmap, start, - s->cluster_size * nr_clusters); - return ret; - } +out: + qemu_vfree(bounce_buffer); - return nbytes; + return ret; } /* @@ -271,7 +282,6 @@ int coroutine_fn block_copy(BlockCopyState *s, { int ret = 0; int64_t end = bytes + start; /* bytes */ - void *bounce_buffer = NULL; int64_t status_bytes; BlockCopyInFlightReq req; @@ -289,7 +299,7 @@ int coroutine_fn block_copy(BlockCopyState *s, block_copy_inflight_req_begin(s, &req, start, end); while (start < end) { - int64_t dirty_end; + int64_t next_zero, chunk_end; if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) { trace_block_copy_skip(s, start); @@ -297,10 +307,14 @@ int coroutine_fn block_copy(BlockCopyState *s, continue; /* already copied */ } - dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start, - (end - start)); - if (dirty_end < 0) { - dirty_end = end; + chunk_end = MIN(end, start + s->copy_size); + + next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start, + chunk_end - start); + if (next_zero >= 0) { + assert(next_zero > start); /* start is dirty */ + assert(next_zero < chunk_end); /* no need to do MIN() */ + chunk_end = next_zero; } if (s->skip_unallocated) { @@ -311,34 +325,26 @@ int coroutine_fn block_copy(BlockCopyState *s, continue; } /* Clamp to known allocated region */ - dirty_end = MIN(dirty_end, start + status_bytes); + chunk_end = MIN(chunk_end, start + status_bytes); } trace_block_copy_process(s, start); - if (s->use_copy_range) { - ret = block_copy_with_offload(s, start, dirty_end); - if (ret < 0) { - s->use_copy_range = false; - } - } - if (!s->use_copy_range) { - ret = block_copy_with_bounce_buffer(s, start, dirty_end, - error_is_read, &bounce_buffer); - } + bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start); + + co_get_from_shres(s->mem, chunk_end - start); + ret = block_copy_do_copy(s, start, chunk_end, error_is_read); + co_put_to_shres(s->mem, chunk_end - start); if (ret < 0) { + bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start); break; } - start += ret; - s->progress_bytes_callback(ret, s->progress_opaque); + s->progress_bytes_callback(chunk_end - start, s->progress_opaque); + start = chunk_end; ret = 0; } - if (bounce_buffer) { - qemu_vfree(bounce_buffer); - } - block_copy_inflight_req_end(&req); return ret; diff --git a/block/commit.c b/block/commit.c index bc8454463d..23c90b3b91 100644 --- a/block/commit.c +++ b/block/commit.c @@ -155,7 +155,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) } if (base_len < len) { - ret = blk_truncate(s->base, len, PREALLOC_MODE_OFF, NULL); + ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL); if (ret) { goto out; } @@ -471,7 +471,8 @@ int bdrv_commit(BlockDriverState *bs) * grow the backing file image if possible. If not possible, * we must return an error */ if (length > backing_length) { - ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err); + ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, + &local_err); if (ret < 0) { error_report_err(local_err); goto ro_cleanup; diff --git a/block/copy-on-read.c b/block/copy-on-read.c index 6631f30205..e95223d3cb 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -73,13 +73,6 @@ static int64_t cor_getlength(BlockDriverState *bs) } -static int coroutine_fn cor_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) -{ - return bdrv_co_truncate(bs->file, offset, prealloc, errp); -} - - static int coroutine_fn cor_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) @@ -139,7 +132,6 @@ static BlockDriver bdrv_copy_on_read = { .bdrv_child_perm = cor_child_perm, .bdrv_getlength = cor_getlength, - .bdrv_co_truncate = cor_co_truncate, .bdrv_co_preadv = cor_co_preadv, .bdrv_co_pwritev = cor_co_pwritev, diff --git a/block/crypto.c b/block/crypto.c index 7eb698774e..24823835c1 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -113,8 +113,8 @@ static ssize_t block_crypto_init_func(QCryptoBlock *block, * available to the guest, so we must take account of that * which will be used by the crypto header */ - return blk_truncate(data->blk, data->size + headerlen, data->prealloc, - errp); + return blk_truncate(data->blk, data->size + headerlen, false, + data->prealloc, errp); } @@ -297,7 +297,7 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, } static int coroutine_fn -block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, +block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; @@ -311,7 +311,7 @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, offset += payload_offset; - return bdrv_co_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); } static void block_crypto_close(BlockDriverState *bs) diff --git a/block/file-posix.c b/block/file-posix.c index 5d1995a07c..0b7e904d48 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2020,7 +2020,8 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, } static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { BDRVRawState *s = bs->opaque; struct stat st; @@ -2033,6 +2034,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, } if (S_ISREG(st.st_mode)) { + /* Always resizes to the exact @offset */ return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); } @@ -2043,7 +2045,12 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, } if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { - if (offset > raw_getlength(bs)) { + int64_t cur_length = raw_getlength(bs); + + if (offset != cur_length && exact) { + error_setg(errp, "Cannot resize device files"); + return -ENOTSUP; + } else if (offset > cur_length) { error_setg(errp, "Cannot grow device files"); return -EINVAL; } diff --git a/block/file-win32.c b/block/file-win32.c index 41f55dfece..77e8ff7b68 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -468,7 +468,8 @@ static void raw_close(BlockDriverState *bs) } static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { BDRVRawState *s = bs->opaque; LONG low, high; diff --git a/block/gluster.c b/block/gluster.c index 64028b2cba..4fa4a77a47 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1225,6 +1225,7 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, Error **errp) { diff --git a/block/io.c b/block/io.c index e46d9e8b97..02659f994d 100644 --- a/block/io.c +++ b/block/io.c @@ -3291,8 +3291,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) /** * Truncate file to 'offset' bytes (needed only for file protocols) + * + * If 'exact' is true, the file must be resized to exactly the given + * 'offset'. Otherwise, it is sufficient for the node to be at least + * 'offset' bytes in length. */ -int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, Error **errp) { BlockDriverState *bs = child->bs; @@ -3347,20 +3351,19 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, goto out; } - if (!drv->bdrv_co_truncate) { - if (bs->file && drv->is_filter) { - ret = bdrv_co_truncate(bs->file, offset, prealloc, errp); - goto out; - } + if (drv->bdrv_co_truncate) { + ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp); + } else if (bs->file && drv->is_filter) { + ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); + } else { error_setg(errp, "Image format driver does not support resize"); ret = -ENOTSUP; goto out; } - - ret = drv->bdrv_co_truncate(bs, offset, prealloc, errp); if (ret < 0) { goto out; } + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); if (ret < 0) { error_setg_errno(errp, -ret, "Could not refresh total sector count"); @@ -3382,6 +3385,7 @@ out: typedef struct TruncateCo { BdrvChild *child; int64_t offset; + bool exact; PreallocMode prealloc; Error **errp; int ret; @@ -3390,18 +3394,19 @@ typedef struct TruncateCo { static void coroutine_fn bdrv_truncate_co_entry(void *opaque) { TruncateCo *tco = opaque; - tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->prealloc, - tco->errp); + tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, + tco->prealloc, tco->errp); aio_wait_kick(); } -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, - Error **errp) +int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, Error **errp) { Coroutine *co; TruncateCo tco = { .child = child, .offset = offset, + .exact = exact, .prealloc = prealloc, .errp = errp, .ret = NOT_DONE, diff --git a/block/iscsi.c b/block/iscsi.c index 2ced15066a..2aea7e3f13 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2123,9 +2123,11 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) } static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { IscsiLun *iscsilun = bs->opaque; + int64_t cur_length; Error *local_err = NULL; if (prealloc != PREALLOC_MODE_OFF) { @@ -2145,7 +2147,11 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, return -EIO; } - if (offset > iscsi_getlength(bs)) { + cur_length = iscsi_getlength(bs); + if (offset != cur_length && exact) { + error_setg(errp, "Cannot resize iSCSI devices"); + return -ENOTSUP; + } else if (offset > cur_length) { error_setg(errp, "Cannot grow iSCSI devices"); return -EINVAL; } diff --git a/block/mirror.c b/block/mirror.c index a6c50caea4..f0f2d9dff1 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -620,11 +620,11 @@ static int mirror_exit_common(Job *job) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); BlockJob *bjob = &s->common; - MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque; + MirrorBDSOpaque *bs_opaque; AioContext *replace_aio_context = NULL; - BlockDriverState *src = s->mirror_top_bs->backing->bs; - BlockDriverState *target_bs = blk_bs(s->target); - BlockDriverState *mirror_top_bs = s->mirror_top_bs; + BlockDriverState *src; + BlockDriverState *target_bs; + BlockDriverState *mirror_top_bs; Error *local_err = NULL; bool abort = job->ret < 0; int ret = 0; @@ -634,6 +634,11 @@ static int mirror_exit_common(Job *job) } s->prepared = true; + mirror_top_bs = s->mirror_top_bs; + bs_opaque = mirror_top_bs->opaque; + src = mirror_top_bs->backing->bs; + target_bs = blk_bs(s->target); + if (bdrv_chain_contains(src, target_bs)) { bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs); } @@ -873,8 +878,8 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) } if (s->bdev_length > base_length) { - ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF, - NULL); + ret = blk_truncate(s->target, s->bdev_length, false, + PREALLOC_MODE_OFF, NULL); if (ret < 0) { goto immediate_exit; } @@ -1181,84 +1186,107 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - QEMUIOVector target_qiov; - uint64_t dirty_offset = offset; - uint64_t dirty_bytes; + int ret; + size_t qiov_offset = 0; + int64_t bitmap_offset, bitmap_end; - if (qiov) { - qemu_iovec_init(&target_qiov, qiov->niov); + if (!QEMU_IS_ALIGNED(offset, job->granularity) && + bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) + { + /* + * Dirty unaligned padding: ignore it. + * + * Reasoning: + * 1. If we copy it, we can't reset corresponding bit in + * dirty_bitmap as there may be some "dirty" bytes still not + * copied. + * 2. It's already dirty, so skipping it we don't diverge mirror + * progress. + * + * Note, that because of this, guest write may have no contribution + * into mirror converge, but that's not bad, as we have background + * process of mirroring. If under some bad circumstances (high guest + * IO load) background process starve, we will not converge anyway, + * even if each write will contribute, as guest is not guaranteed to + * rewrite the whole disk. + */ + qiov_offset = QEMU_ALIGN_UP(offset, job->granularity) - offset; + if (bytes <= qiov_offset) { + /* nothing to do after shrink */ + return; + } + offset += qiov_offset; + bytes -= qiov_offset; } - while (true) { - bool valid_area; - int ret; + if (!QEMU_IS_ALIGNED(offset + bytes, job->granularity) && + bdrv_dirty_bitmap_get(job->dirty_bitmap, offset + bytes - 1)) + { + uint64_t tail = (offset + bytes) % job->granularity; - bdrv_dirty_bitmap_lock(job->dirty_bitmap); - dirty_bytes = MIN(offset + bytes - dirty_offset, INT_MAX); - valid_area = bdrv_dirty_bitmap_next_dirty_area(job->dirty_bitmap, - &dirty_offset, - &dirty_bytes); - if (!valid_area) { - bdrv_dirty_bitmap_unlock(job->dirty_bitmap); - break; + if (bytes <= tail) { + /* nothing to do after shrink */ + return; } + bytes -= tail; + } - bdrv_reset_dirty_bitmap_locked(job->dirty_bitmap, - dirty_offset, dirty_bytes); - bdrv_dirty_bitmap_unlock(job->dirty_bitmap); - - job_progress_increase_remaining(&job->common.job, dirty_bytes); - - assert(dirty_offset - offset <= SIZE_MAX); - if (qiov) { - qemu_iovec_reset(&target_qiov); - qemu_iovec_concat(&target_qiov, qiov, - dirty_offset - offset, dirty_bytes); - } + /* + * Tails are either clean or shrunk, so for bitmap resetting + * we safely align the range down. + */ + bitmap_offset = QEMU_ALIGN_UP(offset, job->granularity); + bitmap_end = QEMU_ALIGN_DOWN(offset + bytes, job->granularity); + if (bitmap_offset < bitmap_end) { + bdrv_reset_dirty_bitmap(job->dirty_bitmap, bitmap_offset, + bitmap_end - bitmap_offset); + } - switch (method) { - case MIRROR_METHOD_COPY: - ret = blk_co_pwritev(job->target, dirty_offset, dirty_bytes, - qiov ? &target_qiov : NULL, flags); - break; + job_progress_increase_remaining(&job->common.job, bytes); - case MIRROR_METHOD_ZERO: - assert(!qiov); - ret = blk_co_pwrite_zeroes(job->target, dirty_offset, dirty_bytes, - flags); - break; + switch (method) { + case MIRROR_METHOD_COPY: + ret = blk_co_pwritev_part(job->target, offset, bytes, + qiov, qiov_offset, flags); + break; - case MIRROR_METHOD_DISCARD: - assert(!qiov); - ret = blk_co_pdiscard(job->target, dirty_offset, dirty_bytes); - break; + case MIRROR_METHOD_ZERO: + assert(!qiov); + ret = blk_co_pwrite_zeroes(job->target, offset, bytes, flags); + break; - default: - abort(); - } + case MIRROR_METHOD_DISCARD: + assert(!qiov); + ret = blk_co_pdiscard(job->target, offset, bytes); + break; - if (ret >= 0) { - job_progress_update(&job->common.job, dirty_bytes); - } else { - BlockErrorAction action; + default: + abort(); + } - bdrv_set_dirty_bitmap(job->dirty_bitmap, dirty_offset, dirty_bytes); - job->actively_synced = false; + if (ret >= 0) { + job_progress_update(&job->common.job, bytes); + } else { + BlockErrorAction action; - action = mirror_error_action(job, false, -ret); - if (action == BLOCK_ERROR_ACTION_REPORT) { - if (!job->ret) { - job->ret = ret; - } - break; + /* + * We failed, so we should mark dirty the whole area, aligned up. + * Note that we don't care about shrunk tails if any: they were dirty + * at function start, and they must be still dirty, as we've locked + * the region for in-flight op. + */ + bitmap_offset = QEMU_ALIGN_DOWN(offset, job->granularity); + bitmap_end = QEMU_ALIGN_UP(offset + bytes, job->granularity); + bdrv_set_dirty_bitmap(job->dirty_bitmap, bitmap_offset, + bitmap_end - bitmap_offset); + job->actively_synced = false; + + action = mirror_error_action(job, false, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + if (!job->ret) { + job->ret = ret; } } - - dirty_offset += dirty_bytes; - } - - if (qiov) { - qemu_iovec_destroy(&target_qiov); } } @@ -1465,15 +1493,6 @@ static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, *nshared = BLK_PERM_ALL; } -static void bdrv_mirror_top_refresh_limits(BlockDriverState *bs, Error **errp) -{ - MirrorBDSOpaque *s = bs->opaque; - - if (s && s->job && s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING) { - bs->bl.request_alignment = s->job->granularity; - } -} - /* Dummy node that provides consistent read to its users without requiring it * from its backing file and that allows writes on the backing file chain. */ static BlockDriver bdrv_mirror_top = { @@ -1486,7 +1505,6 @@ static BlockDriver bdrv_mirror_top = { .bdrv_co_block_status = bdrv_co_block_status_from_backing, .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, .bdrv_child_perm = bdrv_mirror_top_child_perm, - .bdrv_refresh_limits = bdrv_mirror_top_refresh_limits, }; static BlockJob *mirror_start_job( @@ -1634,29 +1652,13 @@ static BlockJob *mirror_start_job( s->should_complete = true; } - /* - * Must be called before we start tracking writes, but after - * - * ((MirrorBlockJob *) - * ((MirrorBDSOpaque *) - * mirror_top_bs->opaque - * )->job - * )->copy_mode - * - * has the correct value. - * (We start tracking writes as of the following - * bdrv_create_dirty_bitmap() call.) - */ - bdrv_refresh_limits(mirror_top_bs, &local_err); - if (local_err) { - error_propagate(errp, local_err); - goto fail; - } - s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { goto fail; } + if (s->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING) { + bdrv_disable_dirty_bitmap(s->dirty_bitmap); + } ret = block_job_add_bdrv(&s->common, "source", bs, 0, BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | diff --git a/block/nfs.c b/block/nfs.c index 40f23495a0..9a6311e270 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -752,7 +752,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) } static int coroutine_fn -nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, +nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, Error **errp) { NFSClient *client = bs->opaque; diff --git a/block/nvme.c b/block/nvme.c index 910872ec59..d41c4bda6e 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -112,6 +112,9 @@ typedef struct { uint64_t max_transfer; bool plugged; + bool supports_write_zeroes; + bool supports_discard; + CoMutex dma_map_lock; CoQueue dma_flush_queue; @@ -423,6 +426,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) NvmeIdNs *idns; NvmeLBAF *lbaf; uint8_t *resp; + uint16_t oncs; int r; uint64_t iova; NvmeCmd cmd = { @@ -460,6 +464,10 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) s->max_transfer = MIN_NON_ZERO(s->max_transfer, s->page_size / sizeof(uint64_t) * s->page_size); + oncs = le16_to_cpu(idctrl->oncs); + s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROS); + s->supports_discard = !!(oncs & NVME_ONCS_DSM); + memset(resp, 0, 4096); cmd.cdw10 = 0; @@ -472,6 +480,12 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) s->nsze = le64_to_cpu(idns->nsze); lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)]; + if (NVME_ID_NS_DLFEAT_WRITE_ZEROES(idns->dlfeat) && + NVME_ID_NS_DLFEAT_READ_BEHAVIOR(idns->dlfeat) == + NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES) { + bs->supported_write_flags |= BDRV_REQ_MAY_UNMAP; + } + if (lbaf->ms) { error_setg(errp, "Namespaces with metadata are not yet supported"); goto out; @@ -766,6 +780,8 @@ static int nvme_file_open(BlockDriverState *bs, QDict *options, int flags, int ret; BDRVNVMeState *s = bs->opaque; + bs->supported_write_flags = BDRV_REQ_FUA; + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &error_abort); device = qemu_opt_get(opts, NVME_BLOCK_OPT_DEVICE); @@ -794,7 +810,6 @@ static int nvme_file_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } } - bs->supported_write_flags = BDRV_REQ_FUA; return 0; fail: nvme_close(bs); @@ -1088,6 +1103,140 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs) } +static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, + int bytes, + BdrvRequestFlags flags) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[1]; + NVMeRequest *req; + + uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF; + + if (!s->supports_write_zeroes) { + return -ENOTSUP; + } + + NvmeCmd cmd = { + .opcode = NVME_CMD_WRITE_ZEROS, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF), + .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF), + }; + + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + if (flags & BDRV_REQ_MAY_UNMAP) { + cdw12 |= (1 << 25); + } + + if (flags & BDRV_REQ_FUA) { + cdw12 |= (1 << 30); + } + + cmd.cdw12 = cpu_to_le32(cdw12); + + trace_nvme_write_zeroes(s, offset, bytes, flags); + assert(s->nr_queues > 1); + req = nvme_get_free_req(ioq); + assert(req); + + nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + while (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + trace_nvme_rw_done(s, true, offset, bytes, data.ret); + return data.ret; +} + + +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, + int64_t offset, + int bytes) +{ + BDRVNVMeState *s = bs->opaque; + NVMeQueuePair *ioq = s->queues[1]; + NVMeRequest *req; + NvmeDsmRange *buf; + QEMUIOVector local_qiov; + int ret; + + NvmeCmd cmd = { + .opcode = NVME_CMD_DSM, + .nsid = cpu_to_le32(s->nsid), + .cdw10 = cpu_to_le32(0), /*number of ranges - 0 based*/ + .cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/ + }; + + NVMeCoData data = { + .ctx = bdrv_get_aio_context(bs), + .ret = -EINPROGRESS, + }; + + if (!s->supports_discard) { + return -ENOTSUP; + } + + assert(s->nr_queues > 1); + + buf = qemu_try_blockalign0(bs, s->page_size); + if (!buf) { + return -ENOMEM; + } + + buf->nlb = cpu_to_le32(bytes >> s->blkshift); + buf->slba = cpu_to_le64(offset >> s->blkshift); + buf->cattr = 0; + + qemu_iovec_init(&local_qiov, 1); + qemu_iovec_add(&local_qiov, buf, 4096); + + req = nvme_get_free_req(ioq); + assert(req); + + qemu_co_mutex_lock(&s->dma_map_lock); + ret = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + + if (ret) { + req->busy = false; + goto out; + } + + trace_nvme_dsm(s, offset, bytes); + + nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + + data.co = qemu_coroutine_self(); + while (data.ret == -EINPROGRESS) { + qemu_coroutine_yield(); + } + + qemu_co_mutex_lock(&s->dma_map_lock); + ret = nvme_cmd_unmap_qiov(bs, &local_qiov); + qemu_co_mutex_unlock(&s->dma_map_lock); + + if (ret) { + goto out; + } + + ret = data.ret; + trace_nvme_dsm_done(s, offset, bytes, ret); +out: + qemu_iovec_destroy(&local_qiov); + qemu_vfree(buf); + return ret; + +} + + static int nvme_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp) { @@ -1192,6 +1341,10 @@ static BlockDriver bdrv_nvme = { .bdrv_co_preadv = nvme_co_preadv, .bdrv_co_pwritev = nvme_co_pwritev, + + .bdrv_co_pwrite_zeroes = nvme_co_pwrite_zeroes, + .bdrv_co_pdiscard = nvme_co_pdiscard, + .bdrv_co_flush_to_disk = nvme_co_flush, .bdrv_reopen_prepare = nvme_reopen_prepare, diff --git a/block/parallels.c b/block/parallels.c index f1dfb03eef..7a01997659 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -203,7 +203,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } else { ret = bdrv_truncate(bs->file, (s->data_end + space) << BDRV_SECTOR_BITS, - PREALLOC_MODE_OFF, NULL); + false, PREALLOC_MODE_OFF, NULL); } if (ret < 0) { return ret; @@ -487,7 +487,12 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, res->leaks += count; if (fix & BDRV_FIX_LEAKS) { Error *local_err = NULL; - ret = bdrv_truncate(bs->file, res->image_end_offset, + + /* + * In order to really repair the image, we must shrink it. + * That means we have to pass exact=true. + */ + ret = bdrv_truncate(bs->file, res->image_end_offset, true, PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); @@ -563,11 +568,6 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, blk_set_allow_write_beyond_eof(blk, true); /* Create image format */ - ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); - if (ret < 0) { - goto out; - } - bat_entries = DIV_ROUND_UP(total_size, cl_size); bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size); bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS; @@ -885,7 +885,9 @@ static void parallels_close(BlockDriverState *bs) if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) { s->header->inuse = 0; parallels_update_header(bs); - bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, + + /* errors are ignored, so we might as well pass exact=true */ + bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, PREALLOC_MODE_OFF, NULL); } diff --git a/block/qcow.c b/block/qcow.c index 5bdf72ba33..fce8989868 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -480,7 +480,7 @@ static int get_cluster_offset(BlockDriverState *bs, return -E2BIG; } ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, - PREALLOC_MODE_OFF, NULL); + false, PREALLOC_MODE_OFF, NULL); if (ret < 0) { return ret; } @@ -858,11 +858,6 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, blk_set_allow_write_beyond_eof(qcow_blk, true); /* Create image format */ - ret = blk_truncate(qcow_blk, 0, PREALLOC_MODE_OFF, errp); - if (ret < 0) { - goto exit; - } - memset(&header, 0, sizeof(header)); header.magic = cpu_to_be32(QCOW_MAGIC); header.version = cpu_to_be32(QCOW_VERSION); @@ -1038,7 +1033,7 @@ static int qcow_make_empty(BlockDriverState *bs) if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0) return -1; - ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, + ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, PREALLOC_MODE_OFF, NULL); if (ret < 0) return ret; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0d64bf5a5e..f67ac6b2d8 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -2016,7 +2016,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, goto resize_fail; } - ret = bdrv_truncate(bs->file, offset + s->cluster_size, + ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index d0e7fa9311..5ab64da1ec 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -29,29 +29,64 @@ #include "qemu/error-report.h" #include "qemu/cutils.h" +static void qcow2_free_single_snapshot(BlockDriverState *bs, int i) +{ + BDRVQcow2State *s = bs->opaque; + + assert(i >= 0 && i < s->nb_snapshots); + g_free(s->snapshots[i].name); + g_free(s->snapshots[i].id_str); + g_free(s->snapshots[i].unknown_extra_data); + memset(&s->snapshots[i], 0, sizeof(s->snapshots[i])); +} + void qcow2_free_snapshots(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; int i; for(i = 0; i < s->nb_snapshots; i++) { - g_free(s->snapshots[i].name); - g_free(s->snapshots[i].id_str); + qcow2_free_single_snapshot(bs, i); } g_free(s->snapshots); s->snapshots = NULL; s->nb_snapshots = 0; } -int qcow2_read_snapshots(BlockDriverState *bs) +/* + * If @repair is true, try to repair a broken snapshot table instead + * of just returning an error: + * + * - If the snapshot table was too long, set *nb_clusters_reduced to + * the number of snapshots removed off the end. + * The caller will update the on-disk nb_snapshots accordingly; + * this leaks clusters, but is safe. + * (The on-disk information must be updated before + * qcow2_check_refcounts(), because that function relies on + * s->nb_snapshots to reflect the on-disk value.) + * + * - If there were snapshots with too much extra metadata, increment + * *extra_data_dropped for each. + * This requires the caller to eventually rewrite the whole snapshot + * table, which requires cluster allocation. Therefore, this should + * be done only after qcow2_check_refcounts() made sure the refcount + * structures are valid. + * (In the meantime, the image is still valid because + * qcow2_check_refcounts() does not do anything with snapshots' + * extra data.) + */ +static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair, + int *nb_clusters_reduced, + int *extra_data_dropped, + Error **errp) { BDRVQcow2State *s = bs->opaque; QCowSnapshotHeader h; QCowSnapshotExtraData extra; QCowSnapshot *sn; int i, id_str_size, name_size; - int64_t offset; - uint32_t extra_data_size; + int64_t offset, pre_sn_offset; + uint64_t table_length = 0; int ret; if (!s->nb_snapshots) { @@ -64,10 +99,16 @@ int qcow2_read_snapshots(BlockDriverState *bs) s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots); for(i = 0; i < s->nb_snapshots; i++) { + bool truncate_unknown_extra_data = false; + + pre_sn_offset = offset; + table_length = ROUND_UP(table_length, 8); + /* Read statically sized part of the snapshot header */ offset = ROUND_UP(offset, 8); ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); goto fail; } @@ -79,33 +120,77 @@ int qcow2_read_snapshots(BlockDriverState *bs) sn->date_sec = be32_to_cpu(h.date_sec); sn->date_nsec = be32_to_cpu(h.date_nsec); sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); - extra_data_size = be32_to_cpu(h.extra_data_size); + sn->extra_data_size = be32_to_cpu(h.extra_data_size); id_str_size = be16_to_cpu(h.id_str_size); name_size = be16_to_cpu(h.name_size); - /* Read extra data */ + if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) { + if (!repair) { + ret = -EFBIG; + error_setg(errp, "Too much extra metadata in snapshot table " + "entry %i", i); + error_append_hint(errp, "You can force-remove this extra " + "metadata with qemu-img check -r all\n"); + goto fail; + } + + fprintf(stderr, "Discarding too much extra metadata in snapshot " + "table entry %i (%" PRIu32 " > %u)\n", + i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA); + + (*extra_data_dropped)++; + truncate_unknown_extra_data = true; + } + + /* Read known extra data */ ret = bdrv_pread(bs->file, offset, &extra, - MIN(sizeof(extra), extra_data_size)); + MIN(sizeof(extra), sn->extra_data_size)); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); goto fail; } - offset += extra_data_size; + offset += MIN(sizeof(extra), sn->extra_data_size); - if (extra_data_size >= 8) { + if (sn->extra_data_size >= endof(QCowSnapshotExtraData, + vm_state_size_large)) { sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large); } - if (extra_data_size >= 16) { + if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) { sn->disk_size = be64_to_cpu(extra.disk_size); } else { sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; } + if (sn->extra_data_size > sizeof(extra)) { + uint64_t extra_data_end; + size_t unknown_extra_data_size; + + extra_data_end = offset + sn->extra_data_size - sizeof(extra); + + if (truncate_unknown_extra_data) { + sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA; + } + + /* Store unknown extra data */ + unknown_extra_data_size = sn->extra_data_size - sizeof(extra); + sn->unknown_extra_data = g_malloc(unknown_extra_data_size); + ret = bdrv_pread(bs->file, offset, sn->unknown_extra_data, + unknown_extra_data_size); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to read snapshot table"); + goto fail; + } + offset = extra_data_end; + } + /* Read snapshot ID */ sn->id_str = g_malloc(id_str_size + 1); ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); goto fail; } offset += id_str_size; @@ -115,14 +200,47 @@ int qcow2_read_snapshots(BlockDriverState *bs) sn->name = g_malloc(name_size + 1); ret = bdrv_pread(bs->file, offset, sn->name, name_size); if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read snapshot table"); goto fail; } offset += name_size; sn->name[name_size] = '\0'; - if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) { - ret = -EFBIG; - goto fail; + /* Note that the extra data may have been truncated */ + table_length += sizeof(h) + sn->extra_data_size + id_str_size + + name_size; + if (!repair) { + assert(table_length == offset - s->snapshots_offset); + } + + if (table_length > QCOW_MAX_SNAPSHOTS_SIZE || + offset - s->snapshots_offset > INT_MAX) + { + if (!repair) { + ret = -EFBIG; + error_setg(errp, "Snapshot table is too big"); + error_append_hint(errp, "You can force-remove all %u " + "overhanging snapshots with qemu-img check " + "-r all\n", s->nb_snapshots - i); + goto fail; + } + + fprintf(stderr, "Discarding %u overhanging snapshots (snapshot " + "table is too big)\n", s->nb_snapshots - i); + + *nb_clusters_reduced += (s->nb_snapshots - i); + + /* Discard current snapshot also */ + qcow2_free_single_snapshot(bs, i); + + /* + * This leaks all the rest of the snapshot table and the + * snapshots' clusters, but we run in check -r all mode, + * so qcow2_check_refcounts() will take care of it. + */ + s->nb_snapshots = i; + offset = pre_sn_offset; + break; } } @@ -135,8 +253,13 @@ fail: return ret; } +int qcow2_read_snapshots(BlockDriverState *bs, Error **errp) +{ + return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp); +} + /* add at the end of the file a new list of snapshots */ -static int qcow2_write_snapshots(BlockDriverState *bs) +int qcow2_write_snapshots(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; QCowSnapshot *sn; @@ -156,7 +279,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) sn = s->snapshots + i; offset = ROUND_UP(offset, 8); offset += sizeof(h); - offset += sizeof(extra); + offset += MAX(sizeof(extra), sn->extra_data_size); offset += strlen(sn->id_str); offset += strlen(sn->name); @@ -203,7 +326,8 @@ static int qcow2_write_snapshots(BlockDriverState *bs) h.date_sec = cpu_to_be32(sn->date_sec); h.date_nsec = cpu_to_be32(sn->date_nsec); h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); - h.extra_data_size = cpu_to_be32(sizeof(extra)); + h.extra_data_size = cpu_to_be32(MAX(sizeof(extra), + sn->extra_data_size)); memset(&extra, 0, sizeof(extra)); extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size); @@ -228,6 +352,22 @@ static int qcow2_write_snapshots(BlockDriverState *bs) } offset += sizeof(extra); + if (sn->extra_data_size > sizeof(extra)) { + size_t unknown_extra_data_size = + sn->extra_data_size - sizeof(extra); + + /* qcow2_read_snapshots() ensures no unbounded allocation */ + assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES); + assert(sn->unknown_extra_data); + + ret = bdrv_pwrite(bs->file, offset, sn->unknown_extra_data, + unknown_extra_data_size); + if (ret < 0) { + goto fail; + } + offset += unknown_extra_data_size; + } + ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size); if (ret < 0) { goto fail; @@ -251,7 +391,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) } QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) != - offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots)); + endof(QCowHeader, nb_snapshots)); header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots); header_data.snapshots_offset = cpu_to_be64(snapshots_offset); @@ -277,6 +417,151 @@ fail: return ret; } +int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + Error *local_err = NULL; + int nb_clusters_reduced = 0; + int extra_data_dropped = 0; + int ret; + struct { + uint32_t nb_snapshots; + uint64_t snapshots_offset; + } QEMU_PACKED snapshot_table_pointer; + + /* qcow2_do_open() discards this information in check mode */ + ret = bdrv_pread(bs->file, offsetof(QCowHeader, nb_snapshots), + &snapshot_table_pointer, sizeof(snapshot_table_pointer)); + if (ret < 0) { + result->check_errors++; + fprintf(stderr, "ERROR failed to read the snapshot table pointer from " + "the image header: %s\n", strerror(-ret)); + return ret; + } + + s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset); + s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots); + + if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) { + fprintf(stderr, "Discarding %u overhanging snapshots\n", + s->nb_snapshots - QCOW_MAX_SNAPSHOTS); + + nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS; + s->nb_snapshots = QCOW_MAX_SNAPSHOTS; + } + + ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots, + sizeof(QCowSnapshotHeader), + sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, + "snapshot table", &local_err); + if (ret < 0) { + result->check_errors++; + error_reportf_err(local_err, "ERROR "); + + if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) { + fprintf(stderr, "You can force-remove all %u overhanging snapshots " + "with qemu-img check -r all\n", + s->nb_snapshots - QCOW_MAX_SNAPSHOTS); + } + + /* We did not read the snapshot table, so invalidate this information */ + s->snapshots_offset = 0; + s->nb_snapshots = 0; + + return ret; + } + + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS, + &nb_clusters_reduced, &extra_data_dropped, + &local_err); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + result->check_errors++; + error_reportf_err(local_err, + "ERROR failed to read the snapshot table: "); + + /* We did not read the snapshot table, so invalidate this information */ + s->snapshots_offset = 0; + s->nb_snapshots = 0; + + return ret; + } + result->corruptions += nb_clusters_reduced + extra_data_dropped; + + if (nb_clusters_reduced) { + /* + * Update image header now, because: + * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be + * the same as what the image header says, + * (2) this leaks clusters, but qcow2_check_refcounts() will + * fix that. + */ + assert(fix & BDRV_FIX_ERRORS); + + snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots); + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), + &snapshot_table_pointer.nb_snapshots, + sizeof(snapshot_table_pointer.nb_snapshots)); + if (ret < 0) { + result->check_errors++; + fprintf(stderr, "ERROR failed to update the snapshot count in the " + "image header: %s\n", strerror(-ret)); + return ret; + } + + result->corruptions_fixed += nb_clusters_reduced; + result->corruptions -= nb_clusters_reduced; + } + + /* + * All of v3 images' snapshot table entries need to have at least + * 16 bytes of extra data. + */ + if (s->qcow_version >= 3) { + int i; + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].extra_data_size < + sizeof_field(QCowSnapshotExtraData, vm_state_size_large) + + sizeof_field(QCowSnapshotExtraData, disk_size)) + { + result->corruptions++; + fprintf(stderr, "%s snapshot table entry %i is incomplete\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + } + } + } + + return 0; +} + +int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + if (result->corruptions && (fix & BDRV_FIX_ERRORS)) { + qemu_co_mutex_unlock(&s->lock); + ret = qcow2_write_snapshots(bs); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + result->check_errors++; + fprintf(stderr, "ERROR failed to update snapshot table: %s\n", + strerror(-ret)); + return ret; + } + + result->corruptions_fixed += result->corruptions; + result->corruptions = 0; + } + + return 0; +} + static void find_new_snapshot_id(BlockDriverState *bs, char *id_str, int id_str_size) { @@ -370,6 +655,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) sn->date_sec = sn_info->date_sec; sn->date_nsec = sn_info->date_nsec; sn->vm_clock_nsec = sn_info->vm_clock_nsec; + sn->extra_data_size = sizeof(QCowSnapshotExtraData); /* Allocate the L1 table of the snapshot and copy the current one there. */ l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); @@ -641,6 +927,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs, * The snapshot is now unused, clean up. If we fail after this point, we * won't recover but just leak clusters. */ + g_free(sn.unknown_extra_data); g_free(sn.id_str); g_free(sn.name); diff --git a/block/qcow2.c b/block/qcow2.c index 0bc69e6996..7c18721741 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -570,11 +570,47 @@ int qcow2_mark_consistent(BlockDriverState *bs) return 0; } +static void qcow2_add_check_result(BdrvCheckResult *out, + const BdrvCheckResult *src, + bool set_allocation_info) +{ + out->corruptions += src->corruptions; + out->leaks += src->leaks; + out->check_errors += src->check_errors; + out->corruptions_fixed += src->corruptions_fixed; + out->leaks_fixed += src->leaks_fixed; + + if (set_allocation_info) { + out->image_end_offset = src->image_end_offset; + out->bfi = src->bfi; + } +} + static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix) { - int ret = qcow2_check_refcounts(bs, result, fix); + BdrvCheckResult snapshot_res = {}; + BdrvCheckResult refcount_res = {}; + int ret; + + memset(result, 0, sizeof(*result)); + + ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix); + if (ret < 0) { + qcow2_add_check_result(result, &snapshot_res, false); + return ret; + } + + ret = qcow2_check_refcounts(bs, &refcount_res, fix); + qcow2_add_check_result(result, &refcount_res, true); + if (ret < 0) { + qcow2_add_check_result(result, &snapshot_res, false); + return ret; + } + + ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix); + qcow2_add_check_result(result, &snapshot_res, false); if (ret < 0) { return ret; } @@ -1410,17 +1446,22 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, goto fail; } - /* The total size in bytes of the snapshot table is checked in - * qcow2_read_snapshots() because the size of each snapshot is - * variable and we don't know it yet. - * Here we only check the offset and number of snapshots. */ - ret = qcow2_validate_table(bs, header.snapshots_offset, - header.nb_snapshots, - sizeof(QCowSnapshotHeader), - sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, - "Snapshot table", errp); - if (ret < 0) { - goto fail; + if (!(flags & BDRV_O_CHECK)) { + /* + * The total size in bytes of the snapshot table is checked in + * qcow2_read_snapshots() because the size of each snapshot is + * variable and we don't know it yet. + * Here we only check the offset and number of snapshots. + */ + ret = qcow2_validate_table(bs, header.snapshots_offset, + header.nb_snapshots, + sizeof(QCowSnapshotHeader), + sizeof(QCowSnapshotHeader) * + QCOW_MAX_SNAPSHOTS, + "Snapshot table", errp); + if (ret < 0) { + goto fail; + } } /* read the level 1 table */ @@ -1580,14 +1621,19 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, s->image_backing_file = g_strdup(bs->auto_backing_file); } - /* Internal snapshots */ - s->snapshots_offset = header.snapshots_offset; - s->nb_snapshots = header.nb_snapshots; + /* + * Internal snapshots; skip reading them in check mode, because + * we do not need them then, and we do not want to abort because + * of a broken table. + */ + if (!(flags & BDRV_O_CHECK)) { + s->snapshots_offset = header.snapshots_offset; + s->nb_snapshots = header.nb_snapshots; - ret = qcow2_read_snapshots(bs); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not read snapshots"); - goto fail; + ret = qcow2_read_snapshots(bs, errp); + if (ret < 0) { + goto fail; + } } /* Clear unknown autoclear feature bits */ @@ -3028,8 +3074,8 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, if (mode == PREALLOC_MODE_METADATA) { mode = PREALLOC_MODE_OFF; } - ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, mode, - errp); + ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, + mode, errp); if (ret < 0) { return ret; } @@ -3345,12 +3391,6 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } blk_set_allow_write_beyond_eof(blk, true); - /* Clear the protocol layer and preallocate it if necessary */ - ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); - if (ret < 0) { - goto out; - } - /* Write the header */ QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); header = g_malloc0(cluster_size); @@ -3449,7 +3489,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } /* Okay, now that we have a valid image, let's give it the right size */ - ret = blk_truncate(blk, qcow2_opts->size, qcow2_opts->preallocation, errp); + ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, + errp); if (ret < 0) { error_prepend(errp, "Could not resize image: "); goto out; @@ -3897,7 +3938,8 @@ fail: } static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { BDRVQcow2State *s = bs->opaque; uint64_t old_length; @@ -3985,8 +4027,15 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, if ((last_cluster + 1) * s->cluster_size < old_file_size) { Error *local_err = NULL; + /* + * Do not pass @exact here: It will not help the user if + * we get an error here just because they wanted to shrink + * their qcow2 image (on a block device) with qemu-img. + * (And on the qcow2 layer, the @exact requirement is + * always fulfilled, so there is no need to pass it on.) + */ bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, - PREALLOC_MODE_OFF, &local_err); + false, PREALLOC_MODE_OFF, &local_err); if (local_err) { warn_reportf_err(local_err, "Failed to truncate the tail of the image: "); @@ -4003,7 +4052,12 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, switch (prealloc) { case PREALLOC_MODE_OFF: if (has_data_file(bs)) { - ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp); + /* + * If the caller wants an exact resize, the external data + * file should be resized to the exact target size, too, + * so we pass @exact here. + */ + ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp); if (ret < 0) { goto fail; } @@ -4088,7 +4142,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, /* Allocate the data area */ new_file_size = allocation_start + nb_new_data_clusters * s->cluster_size; - ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); + /* Image file grows, so @exact does not matter */ + ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp); if (ret < 0) { error_prepend(errp, "Failed to resize underlying file: "); qcow2_free_clusters(bs, allocation_start, @@ -4191,7 +4246,7 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, if (len < 0) { return len; } - return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL); + return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL); } if (offset_into_cluster(s, offset)) { @@ -4428,7 +4483,7 @@ static int make_completely_empty(BlockDriverState *bs) goto fail; } - ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, + ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); @@ -4913,12 +4968,74 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, return 0; } +/* + * Upgrades an image's version. While newer versions encompass all + * features of older versions, some things may have to be presented + * differently. + */ +static int qcow2_upgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + bool need_snapshot_update; + int current_version = s->qcow_version; + int i; + int ret; + + /* This is qcow2_upgrade(), not qcow2_downgrade() */ + assert(target_version > current_version); + + /* There are no other versions (yet) that you can upgrade to */ + assert(target_version == 3); + + status_cb(bs, 0, 2, cb_opaque); + + /* + * In v2, snapshots do not need to have extra data. v3 requires + * the 64-bit VM state size and the virtual disk size to be + * present. + * qcow2_write_snapshots() will always write the list in the + * v3-compliant format. + */ + need_snapshot_update = false; + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].extra_data_size < + sizeof_field(QCowSnapshotExtraData, vm_state_size_large) + + sizeof_field(QCowSnapshotExtraData, disk_size)) + { + need_snapshot_update = true; + break; + } + } + if (need_snapshot_update) { + ret = qcow2_write_snapshots(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update the snapshot table"); + return ret; + } + } + status_cb(bs, 1, 2, cb_opaque); + + s->qcow_version = target_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = current_version; + error_setg_errno(errp, -ret, "Failed to update the image header"); + return ret; + } + status_cb(bs, 2, 2, cb_opaque); + + return 0; +} + typedef enum Qcow2AmendOperation { /* This is the value Qcow2AmendHelperCBInfo::last_operation will be * statically initialized to so that the helper CB can discern the first * invocation from an operation change */ QCOW2_NO_OPERATION = 0, + QCOW2_UPGRADING, QCOW2_CHANGING_REFCOUNT_ORDER, QCOW2_DOWNGRADING, } Qcow2AmendOperation; @@ -5101,17 +5218,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, helper_cb_info = (Qcow2AmendHelperCBInfo){ .original_status_cb = status_cb, .original_cb_opaque = cb_opaque, - .total_operations = (new_version < old_version) + .total_operations = (new_version != old_version) + (s->refcount_bits != refcount_bits) }; /* Upgrade first (some features may require compat=1.1) */ if (new_version > old_version) { - s->qcow_version = new_version; - ret = qcow2_update_header(bs); + helper_cb_info.current_operation = QCOW2_UPGRADING; + ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb, + &helper_cb_info, errp); if (ret < 0) { - s->qcow_version = old_version; - error_setg_errno(errp, -ret, "Failed to update the image header"); return ret; } } @@ -5207,7 +5323,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, return ret; } - ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp); + /* + * Amending image options should ensure that the image has + * exactly the given new values, so pass exact=true here. + */ + ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp); blk_unref(blk); if (ret < 0) { return ret; diff --git a/block/qcow2.h b/block/qcow2.h index 5cccd87162..601c2e4c82 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -61,6 +61,9 @@ * space for snapshot names and IDs */ #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS) +/* Maximum amount of extra data per snapshot table entry to accept */ +#define QCOW_MAX_SNAPSHOT_EXTRA_DATA 1024 + /* Bitmap header extension constraints */ #define QCOW2_MAX_BITMAPS 65535 #define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS) @@ -181,6 +184,10 @@ typedef struct QCowSnapshot { uint32_t date_sec; uint32_t date_nsec; uint64_t vm_clock_nsec; + /* Size of all extra data, including QCowSnapshotExtraData if available */ + uint32_t extra_data_size; + /* Data beyond QCowSnapshotExtraData, if any */ + void *unknown_extra_data; } QCowSnapshot; struct Qcow2Cache; @@ -708,7 +715,15 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, Error **errp); void qcow2_free_snapshots(BlockDriverState *bs); -int qcow2_read_snapshots(BlockDriverState *bs); +int qcow2_read_snapshots(BlockDriverState *bs, Error **errp); +int qcow2_write_snapshots(BlockDriverState *bs); + +int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); +int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); /* qcow2-cache.c functions */ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, diff --git a/block/qed.c b/block/qed.c index 0d8fd507aa..d8c4e5fb1e 100644 --- a/block/qed.c +++ b/block/qed.c @@ -673,8 +673,11 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, l1_size = header.cluster_size * header.table_size; - /* File must start empty and grow, check truncate is supported */ - ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); + /* + * The QED format associates file length with allocation status, + * so a new file (which is empty) must have a length of 0. + */ + ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto out; } @@ -1461,6 +1464,7 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, Error **errp) { diff --git a/block/raw-format.c b/block/raw-format.c index 42c28cc29a..3a76ec7dd2 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -370,7 +370,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { BDRVRawState *s = bs->opaque; @@ -386,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, s->size = offset; offset += s->offset; - return bdrv_co_truncate(bs->file, offset, prealloc, errp); + return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); } static void raw_eject(BlockDriverState *bs, bool eject_flag) diff --git a/block/rbd.c b/block/rbd.c index c71e45d7c3..027cbcc695 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1087,6 +1087,7 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs) static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, Error **errp) { diff --git a/block/sheepdog.c b/block/sheepdog.c index 773dfc6ab1..cfa84338a2 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2285,7 +2285,8 @@ static int64_t sd_getlength(BlockDriverState *bs) } static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { BDRVSheepdogState *s = bs->opaque; int ret, fd; @@ -2601,7 +2602,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, assert(!flags); if (offset > s->inode.vdi_size) { - ret = sd_co_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); + ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL); if (ret < 0) { return ret; } diff --git a/block/ssh.c b/block/ssh.c index 84d01e892b..b4375cf7d2 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -1295,7 +1295,8 @@ static int64_t ssh_getlength(BlockDriverState *bs) } static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp) + bool exact, PreallocMode prealloc, + Error **errp) { BDRVSSHState *s = bs->opaque; diff --git a/block/trace-events b/block/trace-events index b8d70f5242..6ba86decca 100644 --- a/block/trace-events +++ b/block/trace-events @@ -45,9 +45,9 @@ backup_do_cow_return(void *job, int64_t offset, uint64_t bytes, int ret) "job %p block_copy_skip(void *bcs, int64_t start) "bcs %p start %"PRId64 block_copy_skip_range(void *bcs, int64_t start, uint64_t bytes) "bcs %p start %"PRId64" bytes %"PRId64 block_copy_process(void *bcs, int64_t start) "bcs %p start %"PRId64 -block_copy_with_bounce_buffer_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" -block_copy_with_bounce_buffer_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" -block_copy_with_offload_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" +block_copy_copy_range_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" +block_copy_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" +block_copy_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d" # ../blockdev.c qmp_block_job_cancel(void *job) "job %p" @@ -152,9 +152,12 @@ nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, nvme_handle_event(void *s) "s %p" nvme_poll_cb(void *s) "s %p" nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int flags, int niov) "s %p is_write %d offset %"PRId64" bytes %"PRId64" flags %d niov %d" +nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p offset %"PRId64" bytes %"PRId64" flags %d" nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x" nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d" nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d" +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" bytes %"PRId64"" +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset %"PRId64" bytes %"PRId64" ret %d" nvme_dma_map_flush(void *s) "s %p" nvme_free_req_queue_wait(void *q) "q %p" nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d" diff --git a/block/vdi.c b/block/vdi.c index 806ba7f53c..0142da7233 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -874,7 +874,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, } if (image_type == VDI_TYPE_STATIC) { - ret = blk_truncate(blk, offset + blocks * block_size, + ret = blk_truncate(blk, offset + blocks * block_size, false, PREALLOC_MODE_OFF, errp); if (ret < 0) { error_prepend(errp, "Failed to statically allocate file"); diff --git a/block/vhdx-log.c b/block/vhdx-log.c index fdd3a7adc3..13a49c2a33 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -557,8 +557,8 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, ret = -EINVAL; goto exit; } - ret = bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, - NULL); + ret = bdrv_truncate(bs->file, new_file_size, false, + PREALLOC_MODE_OFF, NULL); if (ret < 0) { goto exit; } diff --git a/block/vhdx.c b/block/vhdx.c index 371f226286..f02d2611be 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1263,7 +1263,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, return -EINVAL; } - return bdrv_truncate(bs->file, *new_offset + s->block_size, + return bdrv_truncate(bs->file, *new_offset + s->block_size, false, PREALLOC_MODE_OFF, NULL); } @@ -1702,12 +1702,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, if (type == VHDX_TYPE_DYNAMIC) { /* All zeroes, so we can just extend the file - the end of the BAT * is the furthest thing we have written yet */ - ret = blk_truncate(blk, data_file_offset, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, + errp); if (ret < 0) { goto exit; } } else if (type == VHDX_TYPE_FIXED) { - ret = blk_truncate(blk, data_file_offset + image_size, + ret = blk_truncate(blk, data_file_offset + image_size, false, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; diff --git a/block/vmdk.c b/block/vmdk.c index fed3b50c8a..20e909d997 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2076,7 +2076,7 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, return length; } length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); - ret = bdrv_truncate(s->extents[i].file, length, + ret = bdrv_truncate(s->extents[i].file, length, false, PREALLOC_MODE_OFF, NULL); if (ret < 0) { return ret; @@ -2118,7 +2118,7 @@ static int vmdk_init_extent(BlockBackend *blk, int gd_buf_size; if (flat) { - ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp); goto exit; } magic = cpu_to_be32(VMDK4_MAGIC); @@ -2181,7 +2181,7 @@ static int vmdk_init_extent(BlockBackend *blk, goto exit; } - ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; @@ -2523,7 +2523,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = blk_truncate(blk, desc_len, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 5cd3890780..a65550298e 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -898,7 +898,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, /* Add footer to total size */ total_size += HEADER_SIZE; - ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp); if (ret < 0) { return ret; } diff --git a/blockdev.c b/blockdev.c index ba491e3ef5..8e029e9c01 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3204,7 +3204,7 @@ void qmp_block_resize(bool has_device, const char *device, } bdrv_drained_begin(bs); - ret = blk_truncate(blk, size, PREALLOC_MODE_OFF, errp); + ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp); bdrv_drained_end(bs); out: diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index dd14b9a489..9fa2eaf890 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -42,9 +42,9 @@ */ static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_BLK_F_DISCARD, - .end = virtio_endof(struct virtio_blk_config, discard_sector_alignment)}, + .end = endof(struct virtio_blk_config, discard_sector_alignment)}, {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES, - .end = virtio_endof(struct virtio_blk_config, write_zeroes_may_unmap)}, + .end = endof(struct virtio_blk_config, write_zeroes_may_unmap)}, {} }; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 9f11422337..2c4909c5f9 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -90,15 +90,15 @@ static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, - .end = virtio_endof(struct virtio_net_config, mac)}, + .end = endof(struct virtio_net_config, mac)}, {.flags = 1ULL << VIRTIO_NET_F_STATUS, - .end = virtio_endof(struct virtio_net_config, status)}, + .end = endof(struct virtio_net_config, status)}, {.flags = 1ULL << VIRTIO_NET_F_MQ, - .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)}, + .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, {.flags = 1ULL << VIRTIO_NET_F_MTU, - .end = virtio_endof(struct virtio_net_config, mtu)}, + .end = endof(struct virtio_net_config, mtu)}, {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, - .end = virtio_endof(struct virtio_net_config, duplex)}, + .end = endof(struct virtio_net_config, duplex)}, {} }; diff --git a/include/block/block-copy.h b/include/block/block-copy.h index e2e135ff1b..0a161724d7 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -16,6 +16,7 @@ #define BLOCK_COPY_H #include "block/block.h" +#include "qemu/co-shared-resource.h" typedef struct BlockCopyInFlightReq { int64_t start_byte; @@ -37,7 +38,7 @@ typedef struct BlockCopyState { BdrvDirtyBitmap *copy_bitmap; int64_t cluster_size; bool use_copy_range; - int64_t copy_range_size; + int64_t copy_size; uint64_t len; QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs; @@ -69,6 +70,8 @@ typedef struct BlockCopyState { */ ProgressResetCallbackFunc progress_reset_callback; void *progress_opaque; + + SharedResource *mem; } BlockCopyState; BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, diff --git a/include/block/block.h b/include/block/block.h index 89606bd9f8..1df9848e74 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -346,10 +346,10 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); void bdrv_refresh_filename(BlockDriverState *bs); -int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, Error **errp); -int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc, - Error **errp); +int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, Error **errp); int64_t bdrv_nb_sectors(BlockDriverState *bs); int64_t bdrv_getlength(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index ca4ccac4c1..02dc0034a2 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -334,8 +334,23 @@ struct BlockDriver { * bdrv_parse_filename. */ const char *protocol_name; + + /* + * Truncate @bs to @offset bytes using the given @prealloc mode + * when growing. Modes other than PREALLOC_MODE_OFF should be + * rejected when shrinking @bs. + * + * If @exact is true, @bs must be resized to exactly @offset. + * Otherwise, it is sufficient for @bs (if it is a host block + * device and thus there is no way to resize it) to be at least + * @offset bytes in length. + * + * If @exact is true and this function fails but would succeed + * with @exact = false, it should return -ENOTSUP. + */ int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, - PreallocMode prealloc, Error **errp); + bool exact, PreallocMode prealloc, + Error **errp); int64_t (*bdrv_getlength)(BlockDriverState *bs); bool has_variable_length; diff --git a/include/block/nvme.h b/include/block/nvme.h index 3ec8efcc43..ab5943b90a 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -653,12 +653,29 @@ typedef struct NvmeIdNs { uint8_t mc; uint8_t dpc; uint8_t dps; - uint8_t res30[98]; + + uint8_t nmic; + uint8_t rescap; + uint8_t fpi; + uint8_t dlfeat; + + uint8_t res34[94]; NvmeLBAF lbaf[16]; uint8_t res192[192]; uint8_t vs[3712]; } NvmeIdNs; + +/*Deallocate Logical Block Features*/ +#define NVME_ID_NS_DLFEAT_GUARD_CRC(dlfeat) ((dlfeat) & 0x10) +#define NVME_ID_NS_DLFEAT_WRITE_ZEROES(dlfeat) ((dlfeat) & 0x08) + +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR(dlfeat) ((dlfeat) & 0x7) +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR_UNDEFINED 0 +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES 1 +#define NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ONES 2 + + #define NVME_ID_NS_NSFEAT_THIN(nsfeat) ((nsfeat & 0x1)) #define NVME_ID_NS_FLBAS_EXTENDED(flbas) ((flbas >> 4) & 0x1) #define NVME_ID_NS_FLBAS_INDEX(flbas) ((flbas & 0xf)) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 5560f4a5ea..bea3f14d41 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -37,13 +37,6 @@ static inline hwaddr vring_align(hwaddr addr, return QEMU_ALIGN_UP(addr, align); } -/* - * Calculate the number of bytes up to and including the given 'field' of - * 'container'. - */ -#define virtio_endof(container, field) \ - (offsetof(container, field) + sizeof_field(container, field)) - typedef struct VirtIOFeature { uint64_t flags; size_t end; diff --git a/include/qemu/co-shared-resource.h b/include/qemu/co-shared-resource.h new file mode 100644 index 0000000000..4e4503004c --- /dev/null +++ b/include/qemu/co-shared-resource.h @@ -0,0 +1,71 @@ +/* + * Helper functionality for distributing a fixed total amount of + * an abstract resource among multiple coroutines. + * + * Copyright (c) 2019 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_CO_SHARED_RESOURCE_H +#define QEMU_CO_SHARED_RESOURCE_H + + +typedef struct SharedResource SharedResource; + +/* + * Create SharedResource structure + * + * @total: total amount of some resource to be shared between clients + * + * Note: this API is not thread-safe. + */ +SharedResource *shres_create(uint64_t total); + +/* + * Release SharedResource structure + * + * This function may only be called once everything allocated by all + * clients has been deallocated. + */ +void shres_destroy(SharedResource *s); + +/* + * Try to allocate an amount of @n. Return true on success, and false + * if there is too little left of the collective resource to fulfill + * the request. + */ +bool co_try_get_from_shres(SharedResource *s, uint64_t n); + +/* + * Allocate an amount of @n, and, if necessary, yield until + * that becomes possible. + */ +void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n); + +/* + * Deallocate an amount of @n. The total amount allocated by a caller + * does not need to be deallocated/released with a single call, but may + * be split over several calls. For example, get(4), get(3), and then + * put(5), put(2). + */ +void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n); + + +#endif /* QEMU_CO_SHARED_RESOURCE_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 7b93c73340..85c02c16d3 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -60,6 +60,13 @@ #define sizeof_field(type, field) sizeof(((type *)0)->field) +/* + * Calculate the number of bytes up to and including the given 'field' of + * 'container'. + */ +#define endof(container, field) \ + (offsetof(container, field) + sizeof_field(container, field)) + /* Convert from a base type to a parent type, with compile time checking. */ #ifdef __GNUC__ #define DO_UPCAST(type, field, dev) ( __extension__ ( { \ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 368d53af77..b198deca0b 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -121,6 +121,10 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + unsigned int bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); @@ -233,8 +237,8 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, int bytes, BdrvRequestFlags flags); int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, int bytes); -int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, - Error **errp); +int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, Error **errp); int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size); diff --git a/python/qemu/machine.py b/python/qemu/machine.py index 128a3d1dc2..2024e8b1b1 100644 --- a/python/qemu/machine.py +++ b/python/qemu/machine.py @@ -71,7 +71,7 @@ class QEMUMachine(object): def __init__(self, binary, args=None, wrapper=None, name=None, test_dir="/var/tmp", monitor_address=None, - socket_scm_helper=None): + socket_scm_helper=None, sock_dir=None): ''' Initialize a QEMUMachine @@ -90,6 +90,8 @@ class QEMUMachine(object): wrapper = [] if name is None: name = "qemu-%d" % os.getpid() + if sock_dir is None: + sock_dir = test_dir self._name = name self._monitor_address = monitor_address self._vm_monitor = None @@ -106,12 +108,14 @@ class QEMUMachine(object): self._qemu_full_args = None self._test_dir = test_dir self._temp_dir = None + self._sock_dir = sock_dir self._launched = False self._machine = None self._console_set = False self._console_device_type = None self._console_address = None self._console_socket = None + self._remove_files = [] # just in case logging wasn't configured by the main script: logging.basicConfig() @@ -236,8 +240,9 @@ class QEMUMachine(object): if self._machine is not None: args.extend(['-machine', self._machine]) if self._console_set: - self._console_address = os.path.join(self._temp_dir, + self._console_address = os.path.join(self._sock_dir, self._name + "-console.sock") + self._remove_files.append(self._console_address) chardev = ('socket,id=console,path=%s,server,nowait' % self._console_address) args.extend(['-chardev', chardev]) @@ -253,8 +258,9 @@ class QEMUMachine(object): if self._monitor_address is not None: self._vm_monitor = self._monitor_address else: - self._vm_monitor = os.path.join(self._temp_dir, + self._vm_monitor = os.path.join(self._sock_dir, self._name + "-monitor.sock") + self._remove_files.append(self._vm_monitor) self._qemu_log_path = os.path.join(self._temp_dir, self._name + ".log") self._qemu_log_file = open(self._qemu_log_path, 'wb') @@ -279,6 +285,9 @@ class QEMUMachine(object): shutil.rmtree(self._temp_dir) self._temp_dir = None + while len(self._remove_files) > 0: + self._remove_if_exists(self._remove_files.pop()) + def launch(self): """ Launch the VM and make sure we cleanup and expose the diff --git a/python/qemu/qtest.py b/python/qemu/qtest.py index 3f1d2cb325..d24ad04256 100644 --- a/python/qemu/qtest.py +++ b/python/qemu/qtest.py @@ -84,14 +84,17 @@ class QEMUQtestMachine(QEMUMachine): '''A QEMU VM''' def __init__(self, binary, args=None, name=None, test_dir="/var/tmp", - socket_scm_helper=None): + socket_scm_helper=None, sock_dir=None): if name is None: name = "qemu-%d" % os.getpid() + if sock_dir is None: + sock_dir = test_dir super(QEMUQtestMachine, self).__init__(binary, args, name=name, test_dir=test_dir, - socket_scm_helper=socket_scm_helper) + socket_scm_helper=socket_scm_helper, + sock_dir=sock_dir) self._qtest = None - self._qtest_path = os.path.join(test_dir, name + "-qtest.sock") + self._qtest_path = os.path.join(sock_dir, name + "-qtest.sock") def _base_args(self): args = super(QEMUQtestMachine, self)._base_args() diff --git a/qemu-img.c b/qemu-img.c index 8b03ef8171..95a24b9762 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3656,7 +3656,7 @@ static int img_resize(int argc, char **argv) Error *err = NULL; int c, ret, relative; const char *filename, *fmt, *size; - int64_t n, total_size, current_size, new_size; + int64_t n, total_size, current_size; bool quiet = false; BlockBackend *blk = NULL; PreallocMode prealloc = PREALLOC_MODE_OFF; @@ -3831,43 +3831,17 @@ static int img_resize(int argc, char **argv) } } - ret = blk_truncate(blk, total_size, prealloc, &err); - if (ret < 0) { + /* + * The user expects the image to have the desired size after + * resizing, so pass @exact=true. It is of no use to report + * success when the image has not actually been resized. + */ + ret = blk_truncate(blk, total_size, true, prealloc, &err); + if (!ret) { + qprintf(quiet, "Image resized.\n"); + } else { error_report_err(err); - goto out; - } - - new_size = blk_getlength(blk); - if (new_size < 0) { - error_report("Failed to verify truncated image length: %s", - strerror(-new_size)); - ret = -1; - goto out; - } - - /* Some block drivers implement a truncation method, but only so - * the user can cause qemu to refresh the image's size from disk. - * The idea is that the user resizes the image outside of qemu and - * then invokes block_resize to inform qemu about it. - * (This includes iscsi and file-posix for device files.) - * Of course, that is not the behavior someone invoking - * qemu-img resize would find useful, so we catch that behavior - * here and tell the user. */ - if (new_size != total_size && new_size == current_size) { - error_report("Image was not resized; resizing may not be supported " - "for this image"); - ret = -1; - goto out; } - - if (new_size != total_size) { - warn_report("Image should have been resized to %" PRIi64 - " bytes, but was resized to %" PRIi64 " bytes", - total_size, new_size); - } - - qprintf(quiet, "Image resized.\n"); - out: blk_unref(blk); if (ret) { diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 349256a5fe..1b7e700020 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1710,7 +1710,12 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) return offset; } - ret = blk_truncate(blk, offset, PREALLOC_MODE_OFF, &local_err); + /* + * qemu-io is a debugging tool, so let us be strict here and pass + * exact=true. It is better to err on the "emit more errors" side + * than to be overly permissive. + */ + ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); return ret; diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083 index b270550d3e..10fdfc8ebb 100755 --- a/tests/qemu-iotests/083 +++ b/tests/qemu-iotests/083 @@ -28,7 +28,7 @@ status=1 # failure is the default! _cleanup() { - rm -f nbd.sock + rm -f "$SOCK_DIR/nbd.sock" rm -f nbd-fault-injector.out rm -f nbd-fault-injector.conf } @@ -80,10 +80,10 @@ EOF if [ "$proto" = "tcp" ]; then nbd_addr="127.0.0.1:0" else - nbd_addr="$TEST_DIR/nbd.sock" + nbd_addr="$SOCK_DIR/nbd.sock" fi - rm -f "$TEST_DIR/nbd.sock" + rm -f "$SOCK_DIR/nbd.sock" echo > "$TEST_DIR/nbd-fault-injector.out" $PYTHON nbd-fault-injector.py $extra_args "$nbd_addr" "$TEST_DIR/nbd-fault-injector.conf" >"$TEST_DIR/nbd-fault-injector.out" 2>&1 & diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out index eee6dd1379..2090ee693c 100644 --- a/tests/qemu-iotests/083.out +++ b/tests/qemu-iotests/083.out @@ -110,43 +110,43 @@ read failed: Input/output error === Check disconnect before neg1 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect after neg1 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect 8 neg1 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect 16 neg1 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect before export === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect after export === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect 4 export === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect 12 export === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect 16 export === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect before neg2 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect after neg2 === @@ -154,11 +154,11 @@ read failed: Input/output error === Check disconnect 8 neg2 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect 10 neg2 === -qemu-io: can't open device nbd+unix:///foo?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///foo?socket=SOCK_DIR/nbd.sock === Check disconnect before request === @@ -195,23 +195,23 @@ read 512/512 bytes at offset 0 === Check disconnect before neg-classic === -qemu-io: can't open device nbd+unix:///?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///?socket=SOCK_DIR/nbd.sock === Check disconnect 8 neg-classic === -qemu-io: can't open device nbd+unix:///?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///?socket=SOCK_DIR/nbd.sock === Check disconnect 16 neg-classic === -qemu-io: can't open device nbd+unix:///?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///?socket=SOCK_DIR/nbd.sock === Check disconnect 24 neg-classic === -qemu-io: can't open device nbd+unix:///?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///?socket=SOCK_DIR/nbd.sock === Check disconnect 28 neg-classic === -qemu-io: can't open device nbd+unix:///?socket=TEST_DIR/nbd.sock +qemu-io: can't open device nbd+unix:///?socket=SOCK_DIR/nbd.sock === Check disconnect after neg-classic === diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093 index 3c4f5173ce..f03fa24a07 100755 --- a/tests/qemu-iotests/093 +++ b/tests/qemu-iotests/093 @@ -24,7 +24,7 @@ import iotests nsec_per_sec = 1000000000 class ThrottleTestCase(iotests.QMPTestCase): - test_img = "null-aio://" + test_driver = "null-aio" max_drives = 3 def blockstats(self, device): @@ -35,10 +35,14 @@ class ThrottleTestCase(iotests.QMPTestCase): return stat['rd_bytes'], stat['rd_operations'], stat['wr_bytes'], stat['wr_operations'] raise Exception("Device not found for blockstats: %s" % device) + def required_drivers(self): + return [self.test_driver] + + @iotests.skip_if_unsupported(required_drivers) def setUp(self): self.vm = iotests.VM() for i in range(0, self.max_drives): - self.vm.add_drive(self.test_img, "file.read-zeroes=on") + self.vm.add_drive(self.test_driver + "://", "file.read-zeroes=on") self.vm.launch() def tearDown(self): @@ -264,16 +268,15 @@ class ThrottleTestCase(iotests.QMPTestCase): self.assertEqual(self.blockstats('drive1')[0], 4096) class ThrottleTestCoroutine(ThrottleTestCase): - test_img = "null-co://" + test_driver = "null-co" class ThrottleTestGroupNames(iotests.QMPTestCase): - test_img = "null-aio://" max_drives = 3 def setUp(self): self.vm = iotests.VM() for i in range(0, self.max_drives): - self.vm.add_drive(self.test_img, + self.vm.add_drive("null-co://", "throttling.iops-total=100,file.read-zeroes=on") self.vm.launch() @@ -376,10 +379,10 @@ class ThrottleTestRemovableMedia(iotests.QMPTestCase): def test_removable_media(self): # Add a couple of dummy nodes named cd0 and cd1 - result = self.vm.qmp("blockdev-add", driver="null-aio", + result = self.vm.qmp("blockdev-add", driver="null-co", read_zeroes=True, node_name="cd0") self.assert_qmp(result, 'return', {}) - result = self.vm.qmp("blockdev-add", driver="null-aio", + result = self.vm.qmp("blockdev-add", driver="null-co", read_zeroes=True, node_name="cd1") self.assert_qmp(result, 'return', {}) @@ -426,4 +429,6 @@ class ThrottleTestRemovableMedia(iotests.QMPTestCase): if __name__ == '__main__': + if 'null-co' not in iotests.supported_formats(): + iotests.notrun('null-co driver support missing') iotests.main(supported_fmts=["raw"]) diff --git a/tests/qemu-iotests/136 b/tests/qemu-iotests/136 index a46a7b7630..012ea111ac 100755 --- a/tests/qemu-iotests/136 +++ b/tests/qemu-iotests/136 @@ -30,7 +30,7 @@ bad_offset = bad_sector * 512 blkdebug_file = os.path.join(iotests.test_dir, 'blkdebug.conf') class BlockDeviceStatsTestCase(iotests.QMPTestCase): - test_img = "null-aio://" + test_driver = "null-aio" total_rd_bytes = 0 total_rd_ops = 0 total_wr_bytes = 0 @@ -67,6 +67,10 @@ sector = "%d" ''' % (bad_sector, bad_sector)) file.close() + def required_drivers(self): + return [self.test_driver] + + @iotests.skip_if_unsupported(required_drivers) def setUp(self): drive_args = [] drive_args.append("stats-intervals.0=%d" % interval_length) @@ -76,8 +80,8 @@ sector = "%d" (self.account_failed and "on" or "off")) drive_args.append("file.image.read-zeroes=on") self.create_blkdebug_file() - self.vm = iotests.VM().add_drive('blkdebug:%s:%s' % - (blkdebug_file, self.test_img), + self.vm = iotests.VM().add_drive('blkdebug:%s:%s://' % + (blkdebug_file, self.test_driver), ','.join(drive_args)) self.vm.launch() # Set an initial value for the clock @@ -337,7 +341,9 @@ class BlockDeviceStatsTestAccountBoth(BlockDeviceStatsTestCase): account_failed = True class BlockDeviceStatsTestCoroutine(BlockDeviceStatsTestCase): - test_img = "null-co://" + test_driver = "null-co" if __name__ == '__main__': + if 'null-co' not in iotests.supported_formats(): + iotests.notrun('null-co driver support missing') iotests.main(supported_fmts=["raw"]) diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140 index b965b1dd5d..8d2ce5d9e3 100755 --- a/tests/qemu-iotests/140 +++ b/tests/qemu-iotests/140 @@ -34,7 +34,7 @@ _cleanup() { _cleanup_qemu _cleanup_test_img - rm -f "$TEST_DIR/nbd" + rm -f "$SOCK_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -69,7 +69,7 @@ _send_qemu_cmd $QEMU_HANDLE \ _send_qemu_cmd $QEMU_HANDLE \ "{ 'execute': 'nbd-server-start', 'arguments': { 'addr': { 'type': 'unix', - 'data': { 'path': '$TEST_DIR/nbd' }}}}" \ + 'data': { 'path': '$SOCK_DIR/nbd' }}}}" \ 'return' _send_qemu_cmd $QEMU_HANDLE \ @@ -78,7 +78,7 @@ _send_qemu_cmd $QEMU_HANDLE \ 'return' $QEMU_IO_PROG -f raw -r -c 'read -P 42 0 64k' \ - "nbd+unix:///drv?socket=$TEST_DIR/nbd" 2>&1 \ + "nbd+unix:///drv?socket=$SOCK_DIR/nbd" 2>&1 \ | _filter_qemu_io | _filter_nbd _send_qemu_cmd $QEMU_HANDLE \ @@ -87,7 +87,7 @@ _send_qemu_cmd $QEMU_HANDLE \ 'return' $QEMU_IO_PROG -f raw -r -c close \ - "nbd+unix:///drv?socket=$TEST_DIR/nbd" 2>&1 \ + "nbd+unix:///drv?socket=$SOCK_DIR/nbd" 2>&1 \ | _filter_qemu_io | _filter_nbd _send_qemu_cmd $QEMU_HANDLE \ diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out index 67fe44a3e3..2511eb7369 100644 --- a/tests/qemu-iotests/140.out +++ b/tests/qemu-iotests/140.out @@ -8,7 +8,7 @@ wrote 65536/65536 bytes at offset 0 read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) {"return": {}} -qemu-io: can't open device nbd+unix:///drv?socket=TEST_DIR/nbd: Requested export not available +qemu-io: can't open device nbd+unix:///drv?socket=SOCK_DIR/nbd: Requested export not available server reported: export 'drv' not present {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 index 92249ac8da..f649b36195 100755 --- a/tests/qemu-iotests/143 +++ b/tests/qemu-iotests/143 @@ -29,7 +29,7 @@ status=1 # failure is the default! _cleanup() { _cleanup_qemu - rm -f "$TEST_DIR/nbd" + rm -f "$SOCK_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -51,12 +51,12 @@ _send_qemu_cmd $QEMU_HANDLE \ _send_qemu_cmd $QEMU_HANDLE \ "{ 'execute': 'nbd-server-start', 'arguments': { 'addr': { 'type': 'unix', - 'data': { 'path': '$TEST_DIR/nbd' }}}}" \ + 'data': { 'path': '$SOCK_DIR/nbd' }}}}" \ 'return' # This should just result in a client error, not in the server crashing $QEMU_IO_PROG -f raw -c quit \ - "nbd+unix:///no_such_export?socket=$TEST_DIR/nbd" 2>&1 \ + "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ | _filter_qemu_io | _filter_nbd _send_qemu_cmd $QEMU_HANDLE \ diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out index ee71b5aa42..037d34a409 100644 --- a/tests/qemu-iotests/143.out +++ b/tests/qemu-iotests/143.out @@ -1,7 +1,7 @@ QA output created by 143 {"return": {}} {"return": {}} -qemu-io: can't open device nbd+unix:///no_such_export?socket=TEST_DIR/nbd: Requested export not available +qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available server reported: export 'no_such_export' not present {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147 index ab8480b9a4..03fc2fabcf 100755 --- a/tests/qemu-iotests/147 +++ b/tests/qemu-iotests/147 @@ -32,7 +32,7 @@ NBD_IPV6_PORT_START = NBD_PORT_END NBD_IPV6_PORT_END = NBD_IPV6_PORT_START + 1024 test_img = os.path.join(iotests.test_dir, 'test.img') -unix_socket = os.path.join(iotests.test_dir, 'nbd.socket') +unix_socket = os.path.join(iotests.sock_dir, 'nbd.socket') def flatten_sock_addr(crumpled_address): diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181 index e317e63422..378c2899d1 100755 --- a/tests/qemu-iotests/181 +++ b/tests/qemu-iotests/181 @@ -26,7 +26,7 @@ echo "QA output created by $seq" status=1 # failure is the default! -MIG_SOCKET="${TEST_DIR}/migrate" +MIG_SOCKET="${SOCK_DIR}/migrate" _cleanup() { diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182 index 7f494eb9bb..1ccb850055 100755 --- a/tests/qemu-iotests/182 +++ b/tests/qemu-iotests/182 @@ -31,7 +31,7 @@ _cleanup() { _cleanup_test_img rm -f "$TEST_IMG.overlay" - rm -f "$TEST_DIR/nbd.socket" + rm -f "$SOCK_DIR/nbd.socket" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -133,7 +133,7 @@ success_or_failure=y _send_qemu_cmd $QEMU_HANDLE \ 'addr': { 'type': 'unix', 'data': { - 'path': '$TEST_DIR/nbd.socket' + 'path': '$SOCK_DIR/nbd.socket' } } } }" \ 'return' \ 'error' diff --git a/tests/qemu-iotests/183 b/tests/qemu-iotests/183 index 04fb344d08..bced83fae0 100755 --- a/tests/qemu-iotests/183 +++ b/tests/qemu-iotests/183 @@ -26,7 +26,7 @@ echo "QA output created by $seq" status=1 # failure is the default! -MIG_SOCKET="${TEST_DIR}/migrate" +MIG_SOCKET="${SOCK_DIR}/migrate" _cleanup() { diff --git a/tests/qemu-iotests/192 b/tests/qemu-iotests/192 index 034432272f..d2ba55dd90 100755 --- a/tests/qemu-iotests/192 +++ b/tests/qemu-iotests/192 @@ -31,7 +31,7 @@ _cleanup() { _cleanup_qemu _cleanup_test_img - rm -f "$TEST_DIR/nbd" + rm -f "$SOCK_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -66,7 +66,7 @@ else QEMU_COMM_TIMEOUT=1 fi -_send_qemu_cmd $h "nbd_server_start unix:$TEST_DIR/nbd" "(qemu)" +_send_qemu_cmd $h "nbd_server_start unix:$SOCK_DIR/nbd" "(qemu)" _send_qemu_cmd $h "nbd_server_add -w drive0" "(qemu)" _send_qemu_cmd $h "q" "(qemu)" diff --git a/tests/qemu-iotests/192.out b/tests/qemu-iotests/192.out index 1e0be4c4d7..b9429dbe36 100644 --- a/tests/qemu-iotests/192.out +++ b/tests/qemu-iotests/192.out @@ -1,7 +1,7 @@ QA output created by 192 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 QEMU X.Y.Z monitor - type 'help' for more information -(qemu) nbd_server_start unix:TEST_DIR/nbd +(qemu) nbd_server_start unix:SOCK_DIR/nbd (qemu) nbd_server_add -w drive0 (qemu) q *** done diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194 index d746ab1e21..72e47e8833 100755 --- a/tests/qemu-iotests/194 +++ b/tests/qemu-iotests/194 @@ -26,8 +26,8 @@ iotests.verify_platform(['linux']) with iotests.FilePath('source.img') as source_img_path, \ iotests.FilePath('dest.img') as dest_img_path, \ - iotests.FilePath('migration.sock') as migration_sock_path, \ - iotests.FilePath('nbd.sock') as nbd_sock_path, \ + iotests.FilePaths(['migration.sock', 'nbd.sock'], iotests.sock_dir) as \ + [migration_sock_path, nbd_sock_path], \ iotests.VM('source') as source_vm, \ iotests.VM('dest') as dest_vm: diff --git a/tests/qemu-iotests/201 b/tests/qemu-iotests/201 index 7abf740fe4..86fa37e714 100755 --- a/tests/qemu-iotests/201 +++ b/tests/qemu-iotests/201 @@ -24,7 +24,7 @@ echo "QA output created by $seq" status=1 # failure is the default! -MIG_SOCKET="${TEST_DIR}/migrate" +MIG_SOCKET="${SOCK_DIR}/migrate" # get standard environment, filters and checks . ./common.rc diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205 index 76f6c5fa2b..4bb2c21e8b 100755 --- a/tests/qemu-iotests/205 +++ b/tests/qemu-iotests/205 @@ -24,7 +24,7 @@ import iotests import time from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive -nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock') +nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock disk = os.path.join(iotests.test_dir, 'disk') diff --git a/tests/qemu-iotests/208 b/tests/qemu-iotests/208 index 1e202388dc..546eb1de3e 100755 --- a/tests/qemu-iotests/208 +++ b/tests/qemu-iotests/208 @@ -26,7 +26,7 @@ iotests.verify_image_format(supported_fmts=['generic']) with iotests.FilePath('disk.img') as disk_img_path, \ iotests.FilePath('disk-snapshot.img') as disk_snapshot_img_path, \ - iotests.FilePath('nbd.sock') as nbd_sock_path, \ + iotests.FilePath('nbd.sock', iotests.sock_dir) as nbd_sock_path, \ iotests.VM() as vm: img_size = '10M' diff --git a/tests/qemu-iotests/209 b/tests/qemu-iotests/209 index 259e991ec6..e0f464bcbe 100755 --- a/tests/qemu-iotests/209 +++ b/tests/qemu-iotests/209 @@ -24,7 +24,8 @@ from iotests import qemu_img_create, qemu_io, qemu_img_verbose, qemu_nbd, \ iotests.verify_image_format(supported_fmts=['qcow2']) -disk, nbd_sock = file_path('disk', 'nbd-sock') +disk = file_path('disk') +nbd_sock = file_path('nbd-sock', base_dir=iotests.sock_dir) nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock qemu_img_create('-f', iotests.imgfmt, disk, '1M') diff --git a/tests/qemu-iotests/222 b/tests/qemu-iotests/222 index 0ead56d574..3f9f934ad8 100644 --- a/tests/qemu-iotests/222 +++ b/tests/qemu-iotests/222 @@ -48,7 +48,7 @@ remainder = [("0xd5", "0x108000", "32k"), # Right-end of partial-left [1] with iotests.FilePath('base.img') as base_img_path, \ iotests.FilePath('fleece.img') as fleece_img_path, \ - iotests.FilePath('nbd.sock') as nbd_sock_path, \ + iotests.FilePath('nbd.sock', iotests.sock_dir) as nbd_sock_path, \ iotests.VM() as vm: log('--- Setting up images ---') diff --git a/tests/qemu-iotests/223 b/tests/qemu-iotests/223 index 2ba3d8124b..b5a80e50bb 100755 --- a/tests/qemu-iotests/223 +++ b/tests/qemu-iotests/223 @@ -28,7 +28,7 @@ _cleanup() nbd_server_stop _cleanup_test_img _cleanup_qemu - rm -f "$TEST_DIR/nbd" + rm -f "$SOCK_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -125,11 +125,11 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", "arguments":{"device":"n"}}' "error" # Attempt add without server _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-start", "arguments":{"addr":{"type":"unix", - "data":{"path":"'"$TEST_DIR/nbd"'"}}}}' "return" + "data":{"path":"'"$SOCK_DIR/nbd"'"}}}}' "return" _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-start", "arguments":{"addr":{"type":"unix", - "data":{"path":"'"$TEST_DIR/nbd"1'"}}}}' "error" # Attempt second server -$QEMU_NBD_PROG -L -k "$TEST_DIR/nbd" + "data":{"path":"'"$SOCK_DIR/nbd"1'"}}}}' "error" # Attempt second server +$QEMU_NBD_PROG -L -k "$SOCK_DIR/nbd" _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", "arguments":{"device":"n", "bitmap":"b"}}' "return" _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", @@ -145,14 +145,14 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", "arguments":{"device":"n", "name":"n2", "writable":true, "bitmap":"b2"}}' "return" -$QEMU_NBD_PROG -L -k "$TEST_DIR/nbd" +$QEMU_NBD_PROG -L -k "$SOCK_DIR/nbd" echo echo "=== Contrast normal status to large granularity dirty-bitmap ===" echo QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT -IMG="driver=nbd,export=n,server.type=unix,server.path=$TEST_DIR/nbd" +IMG="driver=nbd,export=n,server.type=unix,server.path=$SOCK_DIR/nbd" $QEMU_IO -r -c 'r -P 0x22 512 512' -c 'r -P 0 512k 512k' -c 'r -P 0x11 1m 1m' \ -c 'r -P 0x33 2m 2m' --image-opts "$IMG" | _filter_qemu_io $QEMU_IMG map --output=json --image-opts \ @@ -164,7 +164,7 @@ echo echo "=== Contrast to small granularity dirty-bitmap ===" echo -IMG="driver=nbd,export=n2,server.type=unix,server.path=$TEST_DIR/nbd" +IMG="driver=nbd,export=n2,server.type=unix,server.path=$SOCK_DIR/nbd" $QEMU_IMG map --output=json --image-opts \ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 index f73bc07d80..8b4337b58d 100755 --- a/tests/qemu-iotests/240 +++ b/tests/qemu-iotests/240 @@ -29,7 +29,7 @@ status=1 # failure is the default! _cleanup() { - rm -f "$TEST_DIR/nbd" + rm -f "$SOCK_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -135,7 +135,7 @@ echo run_qemu <<EOF { "execute": "qmp_capabilities" } { "execute": "blockdev-add", "arguments": {"driver": "null-co", "read-zeroes": true, "node-name": "hd0", "read-only": true}} -{ "execute": "nbd-server-start", "arguments": {"addr":{"type":"unix","data":{"path":"$TEST_DIR/nbd"}}}} +{ "execute": "nbd-server-start", "arguments": {"addr":{"type":"unix","data":{"path":"$SOCK_DIR/nbd"}}}} { "execute": "nbd-server-add", "arguments": {"device":"hd0"}} { "execute": "object-add", "arguments": {"qom-type": "iothread", "id": "iothread0"}} { "execute": "device_add", "arguments": {"id": "scsi0", "driver": "${virtio_scsi}", "iothread": "iothread0"}} diff --git a/tests/qemu-iotests/241 b/tests/qemu-iotests/241 index 58b64ebf41..8dae8d39e4 100755 --- a/tests/qemu-iotests/241 +++ b/tests/qemu-iotests/241 @@ -23,8 +23,6 @@ echo "QA output created by $seq" status=1 # failure is the default! -nbd_unix_socket=$TEST_DIR/test_qemu_nbd_socket - _cleanup() { _cleanup_test_img diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 index 41218d5f1d..e66a23c5f0 100644 --- a/tests/qemu-iotests/245 +++ b/tests/qemu-iotests/245 @@ -598,7 +598,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): ################## ###### null ###### ################## - opts = {'driver': 'null-aio', 'node-name': 'root', 'size': 1024} + opts = {'driver': 'null-co', 'node-name': 'root', 'size': 1024} result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) self.assert_qmp(result, 'return', {}) diff --git a/tests/qemu-iotests/261 b/tests/qemu-iotests/261 new file mode 100755 index 0000000000..fb96bcfbe2 --- /dev/null +++ b/tests/qemu-iotests/261 @@ -0,0 +1,523 @@ +#!/usr/bin/env bash +# +# Test case for qcow2's handling of extra data in snapshot table entries +# (and more generally, how certain cases of broken snapshot tables are +# handled) +# +# Copyright (C) 2019 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq=$(basename $0) +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "$TEST_IMG".v{2,3}.orig + rm -f "$TEST_DIR"/sn{0,1,2}{,-pre,-extra,-post} +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# This tests qocw2-specific low-level functionality +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux +# (1) We create a v2 image that supports nothing but refcount_bits=16 +# (2) We do some refcount management on our own which expects +# refcount_bits=16 +_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' + +# Parameters: +# $1: image filename +# $2: snapshot table entry offset in the image +snapshot_table_entry_size() +{ + id_len=$(peek_file_be "$1" $(($2 + 12)) 2) + name_len=$(peek_file_be "$1" $(($2 + 14)) 2) + extra_len=$(peek_file_be "$1" $(($2 + 36)) 4) + + full_len=$((40 + extra_len + id_len + name_len)) + echo $(((full_len + 7) / 8 * 8)) +} + +# Parameter: +# $1: image filename +print_snapshot_table() +{ + nb_entries=$(peek_file_be "$1" 60 4) + offset=$(peek_file_be "$1" 64 8) + + echo "Snapshots in $1:" | _filter_testdir | _filter_imgfmt + + for ((i = 0; i < nb_entries; i++)); do + id_len=$(peek_file_be "$1" $((offset + 12)) 2) + name_len=$(peek_file_be "$1" $((offset + 14)) 2) + extra_len=$(peek_file_be "$1" $((offset + 36)) 4) + + extra_ofs=$((offset + 40)) + id_ofs=$((extra_ofs + extra_len)) + name_ofs=$((id_ofs + id_len)) + + echo " [$i]" + echo " ID: $(peek_file_raw "$1" $id_ofs $id_len)" + echo " Name: $(peek_file_raw "$1" $name_ofs $name_len)" + echo " Extra data size: $extra_len" + if [ $extra_len -ge 8 ]; then + echo " VM state size: $(peek_file_be "$1" $extra_ofs 8)" + fi + if [ $extra_len -ge 16 ]; then + echo " Disk size: $(peek_file_be "$1" $((extra_ofs + 8)) 8)" + fi + if [ $extra_len -gt 16 ]; then + echo ' Unknown extra data:' \ + "$(peek_file_raw "$1" $((extra_ofs + 16)) $((extra_len - 16)) \ + | tr -d '\0')" + fi + + offset=$((offset + $(snapshot_table_entry_size "$1" $offset))) + done +} + +# Mark clusters as allocated; works only in refblock 0 (i.e. before +# cluster #32768). +# Parameters: +# $1: Start offset of what to allocate +# $2: End offset (exclusive) +refblock0_allocate() +{ + reftable_ofs=$(peek_file_be "$TEST_IMG" 48 8) + refblock_ofs=$(peek_file_be "$TEST_IMG" $reftable_ofs 8) + + cluster=$(($1 / 65536)) + ecluster=$((($2 + 65535) / 65536)) + + while [ $cluster -lt $ecluster ]; do + if [ $cluster -ge 32768 ]; then + echo "*** Abort: Cluster $cluster exceeds refblock 0 ***" + exit 1 + fi + poke_file "$TEST_IMG" $((refblock_ofs + cluster * 2)) '\x00\x01' + cluster=$((cluster + 1)) + done +} + + +echo +echo '=== Create v2 template ===' +echo + +# Create v2 image with a snapshot table with three entries: +# [0]: No extra data (valid with v2, not valid with v3) +# [1]: Has extra data unknown to qemu +# [2]: Has the 64-bit VM state size, but not the disk size (again, +# valid with v2, not valid with v3) + +TEST_IMG="$TEST_IMG.v2.orig" IMGOPTS='compat=0.10' _make_test_img 64M +$QEMU_IMG snapshot -c sn0 "$TEST_IMG.v2.orig" +$QEMU_IMG snapshot -c sn1 "$TEST_IMG.v2.orig" +$QEMU_IMG snapshot -c sn2 "$TEST_IMG.v2.orig" + +# Copy out all existing snapshot table entries +sn_table_ofs=$(peek_file_be "$TEST_IMG.v2.orig" 64 8) + +# ofs: Snapshot table entry offset +# eds: Extra data size +# ids: Name + ID size +# len: Total entry length +sn0_ofs=$sn_table_ofs +sn0_eds=$(peek_file_be "$TEST_IMG.v2.orig" $((sn0_ofs + 36)) 4) +sn0_ids=$(($(peek_file_be "$TEST_IMG.v2.orig" $((sn0_ofs + 12)) 2) + + $(peek_file_be "$TEST_IMG.v2.orig" $((sn0_ofs + 14)) 2))) +sn0_len=$(snapshot_table_entry_size "$TEST_IMG.v2.orig" $sn0_ofs) +sn1_ofs=$((sn0_ofs + sn0_len)) +sn1_eds=$(peek_file_be "$TEST_IMG.v2.orig" $((sn1_ofs + 36)) 4) +sn1_ids=$(($(peek_file_be "$TEST_IMG.v2.orig" $((sn1_ofs + 12)) 2) + + $(peek_file_be "$TEST_IMG.v2.orig" $((sn1_ofs + 14)) 2))) +sn1_len=$(snapshot_table_entry_size "$TEST_IMG.v2.orig" $sn1_ofs) +sn2_ofs=$((sn1_ofs + sn1_len)) +sn2_eds=$(peek_file_be "$TEST_IMG.v2.orig" $((sn2_ofs + 36)) 4) +sn2_ids=$(($(peek_file_be "$TEST_IMG.v2.orig" $((sn2_ofs + 12)) 2) + + $(peek_file_be "$TEST_IMG.v2.orig" $((sn2_ofs + 14)) 2))) +sn2_len=$(snapshot_table_entry_size "$TEST_IMG.v2.orig" $sn2_ofs) + +# Data before extra data +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn0-pre" bs=1 skip=$sn0_ofs count=40 \ + &> /dev/null +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn1-pre" bs=1 skip=$sn1_ofs count=40 \ + &> /dev/null +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn2-pre" bs=1 skip=$sn2_ofs count=40 \ + &> /dev/null + +# Extra data +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn0-extra" bs=1 \ + skip=$((sn0_ofs + 40)) count=$sn0_eds &> /dev/null +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn1-extra" bs=1 \ + skip=$((sn1_ofs + 40)) count=$sn1_eds &> /dev/null +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn2-extra" bs=1 \ + skip=$((sn2_ofs + 40)) count=$sn2_eds &> /dev/null + +# Data after extra data +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn0-post" bs=1 \ + skip=$((sn0_ofs + 40 + sn0_eds)) count=$sn0_ids \ + &> /dev/null +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn1-post" bs=1 \ + skip=$((sn1_ofs + 40 + sn1_eds)) count=$sn1_ids \ + &> /dev/null +dd if="$TEST_IMG.v2.orig" of="$TEST_DIR/sn2-post" bs=1 \ + skip=$((sn2_ofs + 40 + sn2_eds)) count=$sn2_ids \ + &> /dev/null + +# Amend them, one by one +# Set sn0's extra data size to 0 +poke_file "$TEST_DIR/sn0-pre" 36 '\x00\x00\x00\x00' +truncate -s 0 "$TEST_DIR/sn0-extra" +# Grow sn0-post to pad +truncate -s $(($(snapshot_table_entry_size "$TEST_DIR/sn0-pre") - 40)) \ + "$TEST_DIR/sn0-post" + +# Set sn1's extra data size to 42 +poke_file "$TEST_DIR/sn1-pre" 36 '\x00\x00\x00\x2a' +truncate -s 42 "$TEST_DIR/sn1-extra" +poke_file "$TEST_DIR/sn1-extra" 16 'very important data' +# Grow sn1-post to pad +truncate -s $(($(snapshot_table_entry_size "$TEST_DIR/sn1-pre") - 82)) \ + "$TEST_DIR/sn1-post" + +# Set sn2's extra data size to 8 +poke_file "$TEST_DIR/sn2-pre" 36 '\x00\x00\x00\x08' +truncate -s 8 "$TEST_DIR/sn2-extra" +# Grow sn2-post to pad +truncate -s $(($(snapshot_table_entry_size "$TEST_DIR/sn2-pre") - 48)) \ + "$TEST_DIR/sn2-post" + +# Construct snapshot table +cat "$TEST_DIR"/sn0-{pre,extra,post} \ + "$TEST_DIR"/sn1-{pre,extra,post} \ + "$TEST_DIR"/sn2-{pre,extra,post} \ + | dd of="$TEST_IMG.v2.orig" bs=1 seek=$sn_table_ofs conv=notrunc \ + &> /dev/null + +# Done! +TEST_IMG="$TEST_IMG.v2.orig" _check_test_img +print_snapshot_table "$TEST_IMG.v2.orig" + +echo +echo '=== Upgrade to v3 ===' +echo + +cp "$TEST_IMG.v2.orig" "$TEST_IMG.v3.orig" +$QEMU_IMG amend -o compat=1.1 "$TEST_IMG.v3.orig" +TEST_IMG="$TEST_IMG.v3.orig" _check_test_img +print_snapshot_table "$TEST_IMG.v3.orig" + +echo +echo '=== Repair botched v3 ===' +echo + +# Force the v2 file to be v3. v3 requires each snapshot table entry +# to have at least 16 bytes of extra data, so it will not comply to +# the qcow2 v3 specification; but we can fix that. +cp "$TEST_IMG.v2.orig" "$TEST_IMG" + +# Set version +poke_file "$TEST_IMG" 4 '\x00\x00\x00\x03' +# Increase header length (necessary for v3) +poke_file "$TEST_IMG" 100 '\x00\x00\x00\x68' +# Set refcount order (necessary for v3) +poke_file "$TEST_IMG" 96 '\x00\x00\x00\x04' + +_check_test_img -r all +print_snapshot_table "$TEST_IMG" + + +# From now on, just test the qcow2 version we are supposed to test. +# (v3 by default, v2 by choice through $IMGOPTS.) +# That works because we always write all known extra data when +# updating the snapshot table, independent of the version. + +if echo "$IMGOPTS" | grep -q 'compat=\(0\.10\|v2\)' 2> /dev/null; then + subver=v2 +else + subver=v3 +fi + +echo +echo '=== Add new snapshot ===' +echo + +cp "$TEST_IMG.$subver.orig" "$TEST_IMG" +$QEMU_IMG snapshot -c sn3 "$TEST_IMG" +_check_test_img +print_snapshot_table "$TEST_IMG" + +echo +echo '=== Remove different snapshots ===' + +for sn in sn0 sn1 sn2; do + echo + echo "--- $sn ---" + + cp "$TEST_IMG.$subver.orig" "$TEST_IMG" + $QEMU_IMG snapshot -d $sn "$TEST_IMG" + _check_test_img + print_snapshot_table "$TEST_IMG" +done + +echo +echo '=== Reject too much unknown extra data ===' +echo + +cp "$TEST_IMG.$subver.orig" "$TEST_IMG" +$QEMU_IMG snapshot -c sn3 "$TEST_IMG" + +sn_table_ofs=$(peek_file_be "$TEST_IMG" 64 8) +sn0_ofs=$sn_table_ofs +sn1_ofs=$((sn0_ofs + $(snapshot_table_entry_size "$TEST_IMG" $sn0_ofs))) +sn2_ofs=$((sn1_ofs + $(snapshot_table_entry_size "$TEST_IMG" $sn1_ofs))) +sn3_ofs=$((sn2_ofs + $(snapshot_table_entry_size "$TEST_IMG" $sn2_ofs))) + +# 64 kB of extra data should be rejected +# (Note that this also induces a refcount error, because it spills +# over to the next cluster. That's a good way to test that we can +# handle simultaneous snapshot table and refcount errors.) +poke_file "$TEST_IMG" $((sn3_ofs + 36)) '\x00\x01\x00\x00' + +# Print error +_img_info +echo +_check_test_img +echo + +# Should be repairable +_check_test_img -r all + +echo +echo '=== Snapshot table too big ===' +echo + +sn_table_ofs=$(peek_file_be "$TEST_IMG.v3.orig" 64 8) + +# Fill a snapshot with 1 kB of extra data, a 65535-char ID, and a +# 65535-char name, and repeat it as many times as necessary to fill +# 64 MB (the maximum supported by qemu) + +touch "$TEST_DIR/sn0" + +# Full size (fixed + extra + ID + name + padding) +sn_size=$((40 + 1024 + 65535 + 65535 + 2)) + +# We only need the fixed part, though. +truncate -s 40 "$TEST_DIR/sn0" + +# 65535-char ID string +poke_file "$TEST_DIR/sn0" 12 '\xff\xff' +# 65535-char name +poke_file "$TEST_DIR/sn0" 14 '\xff\xff' +# 1 kB of extra data +poke_file "$TEST_DIR/sn0" 36 '\x00\x00\x04\x00' + +# Create test image +_make_test_img 64M + +# Hook up snapshot table somewhere safe (at 1 MB) +poke_file "$TEST_IMG" 64 '\x00\x00\x00\x00\x00\x10\x00\x00' + +offset=1048576 +size_written=0 +sn_count=0 +while [ $size_written -le $((64 * 1048576)) ]; do + dd if="$TEST_DIR/sn0" of="$TEST_IMG" bs=1 seek=$offset conv=notrunc \ + &> /dev/null + offset=$((offset + sn_size)) + size_written=$((size_written + sn_size)) + sn_count=$((sn_count + 1)) +done +truncate -s "$offset" "$TEST_IMG" + +# Give the last snapshot (the one to be removed) an L1 table so we can +# see how that is handled when repairing the image +# (Put it two clusters before 1 MB, and one L2 table one cluster +# before 1 MB) +poke_file "$TEST_IMG" $((offset - sn_size + 0)) \ + '\x00\x00\x00\x00\x00\x0e\x00\x00' +poke_file "$TEST_IMG" $((offset - sn_size + 8)) \ + '\x00\x00\x00\x01' + +# Hook up the L2 table +poke_file "$TEST_IMG" $((1048576 - 2 * 65536)) \ + '\x80\x00\x00\x00\x00\x0f\x00\x00' + +# Make sure all of the clusters we just hooked up are allocated: +# - The snapshot table +# - The last snapshot's L1 and L2 table +refblock0_allocate $((1048576 - 2 * 65536)) $offset + +poke_file "$TEST_IMG" 60 \ + "$(printf '%08x' $sn_count | sed -e 's/\(..\)/\\x\1/g')" + +# Print error +_img_info +echo +_check_test_img +echo + +# Should be repairable +_check_test_img -r all + +echo +echo "$((sn_count - 1)) snapshots should remain:" +echo " qemu-img info reports $(_img_info | grep -c '^ \{34\}') snapshots" +echo " Image header reports $(peek_file_be "$TEST_IMG" 60 4) snapshots" + +echo +echo '=== Snapshot table too big with one entry with too much extra data ===' +echo + +# For this test, we reuse the image from the previous case, which has +# a snapshot table that is right at the limit. +# Our layout looks like this: +# - (a number of snapshot table entries) +# - One snapshot with $extra_data_size extra data +# - One normal snapshot that breaks the 64 MB boundary +# - One normal snapshot beyond the 64 MB boundary +# +# $extra_data_size is calculated so that simply by virtue of it +# decreasing to 1 kB, the penultimate snapshot will fit into 64 MB +# limit again. The final snapshot will always be beyond the limit, so +# that we can see that the repair algorithm does still determine the +# limit to be somewhere, even when truncating one snapshot's extra +# data. + +# The last case has removed the last snapshot, so calculate +# $old_offset to get the current image's real length +old_offset=$((offset - sn_size)) + +# The layout from the previous test had one snapshot beyond the 64 MB +# limit; we want the same (after the oversized extra data has been +# truncated to 1 kB), so we drop the last three snapshots and +# construct them from scratch. +offset=$((offset - 3 * sn_size)) +sn_count=$((sn_count - 3)) + +# Assuming we had already written one of the three snapshots +# (necessary so we can calculate $extra_data_size next). +size_written=$((size_written - 2 * sn_size)) + +# Increase the extra data size so we go past the limit +# (The -1024 comes from the 1 kB of extra data we already have) +extra_data_size=$((64 * 1048576 + 8 - sn_size - (size_written - 1024))) + +poke_file "$TEST_IMG" $((offset + 36)) \ + "$(printf '%08x' $extra_data_size | sed -e 's/\(..\)/\\x\1/g')" + +offset=$((offset + sn_size - 1024 + extra_data_size)) +size_written=$((size_written - 1024 + extra_data_size)) +sn_count=$((sn_count + 1)) + +# Write the two normal snapshots +for ((i = 0; i < 2; i++)); do + dd if="$TEST_DIR/sn0" of="$TEST_IMG" bs=1 seek=$offset conv=notrunc \ + &> /dev/null + offset=$((offset + sn_size)) + size_written=$((size_written + sn_size)) + sn_count=$((sn_count + 1)) + + if [ $i = 0 ]; then + # Check that the penultimate snapshot is beyond the 64 MB limit + echo "Snapshot table size should equal $((64 * 1048576 + 8)):" \ + $size_written + echo + fi +done + +truncate -s $offset "$TEST_IMG" +refblock0_allocate $old_offset $offset + +poke_file "$TEST_IMG" 60 \ + "$(printf '%08x' $sn_count | sed -e 's/\(..\)/\\x\1/g')" + +# Print error +_img_info +echo +_check_test_img +echo + +# Just truncating the extra data should be sufficient to shorten the +# snapshot table so only one snapshot exceeds the extra size +_check_test_img -r all + +echo +echo '=== Too many snapshots ===' +echo + +# Create a v2 image, for speeds' sake: All-zero snapshot table entries +# are only valid in v2. +IMGOPTS='compat=0.10' _make_test_img 64M + +# Hook up snapshot table somewhere safe (at 1 MB) +poke_file "$TEST_IMG" 64 '\x00\x00\x00\x00\x00\x10\x00\x00' +# "Create" more than 65536 snapshots (twice that many here) +poke_file "$TEST_IMG" 60 '\x00\x02\x00\x00' + +# 40-byte all-zero snapshot table entries are valid snapshots, but +# only in v2 (v3 needs 16 bytes of extra data, so we would have to +# write 131072x '\x10'). +truncate -s $((1048576 + 40 * 131072)) "$TEST_IMG" + +# But let us give one of the snapshots to be removed an L1 table so +# we can see how that is handled when repairing the image. +# (Put it two clusters before 1 MB, and one L2 table one cluster +# before 1 MB) +poke_file "$TEST_IMG" $((1048576 + 40 * 65536 + 0)) \ + '\x00\x00\x00\x00\x00\x0e\x00\x00' +poke_file "$TEST_IMG" $((1048576 + 40 * 65536 + 8)) \ + '\x00\x00\x00\x01' + +# Hook up the L2 table +poke_file "$TEST_IMG" $((1048576 - 2 * 65536)) \ + '\x80\x00\x00\x00\x00\x0f\x00\x00' + +# Make sure all of the clusters we just hooked up are allocated: +# - The snapshot table +# - The last snapshot's L1 and L2 table +refblock0_allocate $((1048576 - 2 * 65536)) $((1048576 + 40 * 131072)) + +# Print error +_img_info +echo +_check_test_img +echo + +# Should be repairable +_check_test_img -r all + +echo +echo '65536 snapshots should remain:' +echo " qemu-img info reports $(_img_info | grep -c '^ \{34\}') snapshots" +echo " Image header reports $(peek_file_be "$TEST_IMG" 60 4) snapshots" + +# success, all done +echo "*** done" +status=0 diff --git a/tests/qemu-iotests/261.out b/tests/qemu-iotests/261.out new file mode 100644 index 0000000000..2600354566 --- /dev/null +++ b/tests/qemu-iotests/261.out @@ -0,0 +1,346 @@ +QA output created by 261 + +=== Create v2 template === + +Formatting 'TEST_DIR/t.IMGFMT.v2.orig', fmt=IMGFMT size=67108864 +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT.v2.orig: + [0] + ID: 1 + Name: sn0 + Extra data size: 0 + [1] + ID: 2 + Name: sn1 + Extra data size: 42 + VM state size: 0 + Disk size: 67108864 + Unknown extra data: very important data + [2] + ID: 3 + Name: sn2 + Extra data size: 8 + VM state size: 0 + +=== Upgrade to v3 === + +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT.v3.orig: + [0] + ID: 1 + Name: sn0 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + [1] + ID: 2 + Name: sn1 + Extra data size: 42 + VM state size: 0 + Disk size: 67108864 + Unknown extra data: very important data + [2] + ID: 3 + Name: sn2 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + +=== Repair botched v3 === + +Repairing snapshot table entry 0 is incomplete +Repairing snapshot table entry 2 is incomplete +The following inconsistencies were found and repaired: + + 0 leaked clusters + 2 corruptions + +Double checking the fixed image now... +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT: + [0] + ID: 1 + Name: sn0 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + [1] + ID: 2 + Name: sn1 + Extra data size: 42 + VM state size: 0 + Disk size: 67108864 + Unknown extra data: very important data + [2] + ID: 3 + Name: sn2 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + +=== Add new snapshot === + +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT: + [0] + ID: 1 + Name: sn0 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + [1] + ID: 2 + Name: sn1 + Extra data size: 42 + VM state size: 0 + Disk size: 67108864 + Unknown extra data: very important data + [2] + ID: 3 + Name: sn2 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + [3] + ID: 4 + Name: sn3 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + +=== Remove different snapshots === + +--- sn0 --- +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT: + [0] + ID: 2 + Name: sn1 + Extra data size: 42 + VM state size: 0 + Disk size: 67108864 + Unknown extra data: very important data + [1] + ID: 3 + Name: sn2 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + +--- sn1 --- +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT: + [0] + ID: 1 + Name: sn0 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + [1] + ID: 3 + Name: sn2 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + +--- sn2 --- +No errors were found on the image. +Snapshots in TEST_DIR/t.IMGFMT: + [0] + ID: 1 + Name: sn0 + Extra data size: 16 + VM state size: 0 + Disk size: 67108864 + [1] + ID: 2 + Name: sn1 + Extra data size: 42 + VM state size: 0 + Disk size: 67108864 + Unknown extra data: very important data + +=== Reject too much unknown extra data === + +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Too much extra metadata in snapshot table entry 3 +You can force-remove this extra metadata with qemu-img check -r all + +qemu-img: ERROR failed to read the snapshot table: Too much extra metadata in snapshot table entry 3 +You can force-remove this extra metadata with qemu-img check -r all +qemu-img: Check failed: File too large + +Discarding too much extra metadata in snapshot table entry 3 (65536 > 1024) +ERROR cluster 10 refcount=0 reference=1 +Rebuilding refcount structure +Repairing cluster 1 refcount=1 reference=0 +Repairing cluster 2 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 0 leaked clusters + 2 corruptions + +Double checking the fixed image now... +No errors were found on the image. + +=== Snapshot table too big === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Snapshot table is too big +You can force-remove all 1 overhanging snapshots with qemu-img check -r all + +qemu-img: ERROR failed to read the snapshot table: Snapshot table is too big +You can force-remove all 1 overhanging snapshots with qemu-img check -r all +qemu-img: Check failed: File too large + +Discarding 1 overhanging snapshots (snapshot table is too big) +Leaked cluster 14 refcount=1 reference=0 +Leaked cluster 15 refcount=1 reference=0 +Leaked cluster 1039 refcount=1 reference=0 +Leaked cluster 1040 refcount=1 reference=0 +Repairing cluster 14 refcount=1 reference=0 +Repairing cluster 15 refcount=1 reference=0 +Repairing cluster 1039 refcount=1 reference=0 +Repairing cluster 1040 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 4 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. + +507 snapshots should remain: + qemu-img info reports 507 snapshots + Image header reports 507 snapshots + +=== Snapshot table too big with one entry with too much extra data === + +Snapshot table size should equal 67108872: 67108872 + +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Too much extra metadata in snapshot table entry 505 +You can force-remove this extra metadata with qemu-img check -r all + +qemu-img: ERROR failed to read the snapshot table: Too much extra metadata in snapshot table entry 505 +You can force-remove this extra metadata with qemu-img check -r all +qemu-img: Check failed: File too large + +Discarding too much extra metadata in snapshot table entry 505 (116944 > 1024) +Discarding 1 overhanging snapshots (snapshot table is too big) +Leaked cluster 1041 refcount=1 reference=0 +Leaked cluster 1042 refcount=1 reference=0 +Repairing cluster 1041 refcount=1 reference=0 +Repairing cluster 1042 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 2 leaked clusters + 2 corruptions + +Double checking the fixed image now... +No errors were found on the image. + +=== Too many snapshots === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Snapshot table too large + +qemu-img: ERROR snapshot table too large +You can force-remove all 65536 overhanging snapshots with qemu-img check -r all +qemu-img: Check failed: File too large + +Discarding 65536 overhanging snapshots +Leaked cluster 14 refcount=1 reference=0 +Leaked cluster 15 refcount=1 reference=0 +Leaked cluster 56 refcount=1 reference=0 +Leaked cluster 57 refcount=1 reference=0 +Leaked cluster 58 refcount=1 reference=0 +Leaked cluster 59 refcount=1 reference=0 +Leaked cluster 60 refcount=1 reference=0 +Leaked cluster 61 refcount=1 reference=0 +Leaked cluster 62 refcount=1 reference=0 +Leaked cluster 63 refcount=1 reference=0 +Leaked cluster 64 refcount=1 reference=0 +Leaked cluster 65 refcount=1 reference=0 +Leaked cluster 66 refcount=1 reference=0 +Leaked cluster 67 refcount=1 reference=0 +Leaked cluster 68 refcount=1 reference=0 +Leaked cluster 69 refcount=1 reference=0 +Leaked cluster 70 refcount=1 reference=0 +Leaked cluster 71 refcount=1 reference=0 +Leaked cluster 72 refcount=1 reference=0 +Leaked cluster 73 refcount=1 reference=0 +Leaked cluster 74 refcount=1 reference=0 +Leaked cluster 75 refcount=1 reference=0 +Leaked cluster 76 refcount=1 reference=0 +Leaked cluster 77 refcount=1 reference=0 +Leaked cluster 78 refcount=1 reference=0 +Leaked cluster 79 refcount=1 reference=0 +Leaked cluster 80 refcount=1 reference=0 +Leaked cluster 81 refcount=1 reference=0 +Leaked cluster 82 refcount=1 reference=0 +Leaked cluster 83 refcount=1 reference=0 +Leaked cluster 84 refcount=1 reference=0 +Leaked cluster 85 refcount=1 reference=0 +Leaked cluster 86 refcount=1 reference=0 +Leaked cluster 87 refcount=1 reference=0 +Leaked cluster 88 refcount=1 reference=0 +Leaked cluster 89 refcount=1 reference=0 +Leaked cluster 90 refcount=1 reference=0 +Leaked cluster 91 refcount=1 reference=0 +Leaked cluster 92 refcount=1 reference=0 +Leaked cluster 93 refcount=1 reference=0 +Leaked cluster 94 refcount=1 reference=0 +Leaked cluster 95 refcount=1 reference=0 +Repairing cluster 14 refcount=1 reference=0 +Repairing cluster 15 refcount=1 reference=0 +Repairing cluster 56 refcount=1 reference=0 +Repairing cluster 57 refcount=1 reference=0 +Repairing cluster 58 refcount=1 reference=0 +Repairing cluster 59 refcount=1 reference=0 +Repairing cluster 60 refcount=1 reference=0 +Repairing cluster 61 refcount=1 reference=0 +Repairing cluster 62 refcount=1 reference=0 +Repairing cluster 63 refcount=1 reference=0 +Repairing cluster 64 refcount=1 reference=0 +Repairing cluster 65 refcount=1 reference=0 +Repairing cluster 66 refcount=1 reference=0 +Repairing cluster 67 refcount=1 reference=0 +Repairing cluster 68 refcount=1 reference=0 +Repairing cluster 69 refcount=1 reference=0 +Repairing cluster 70 refcount=1 reference=0 +Repairing cluster 71 refcount=1 reference=0 +Repairing cluster 72 refcount=1 reference=0 +Repairing cluster 73 refcount=1 reference=0 +Repairing cluster 74 refcount=1 reference=0 +Repairing cluster 75 refcount=1 reference=0 +Repairing cluster 76 refcount=1 reference=0 +Repairing cluster 77 refcount=1 reference=0 +Repairing cluster 78 refcount=1 reference=0 +Repairing cluster 79 refcount=1 reference=0 +Repairing cluster 80 refcount=1 reference=0 +Repairing cluster 81 refcount=1 reference=0 +Repairing cluster 82 refcount=1 reference=0 +Repairing cluster 83 refcount=1 reference=0 +Repairing cluster 84 refcount=1 reference=0 +Repairing cluster 85 refcount=1 reference=0 +Repairing cluster 86 refcount=1 reference=0 +Repairing cluster 87 refcount=1 reference=0 +Repairing cluster 88 refcount=1 reference=0 +Repairing cluster 89 refcount=1 reference=0 +Repairing cluster 90 refcount=1 reference=0 +Repairing cluster 91 refcount=1 reference=0 +Repairing cluster 92 refcount=1 reference=0 +Repairing cluster 93 refcount=1 reference=0 +Repairing cluster 94 refcount=1 reference=0 +Repairing cluster 95 refcount=1 reference=0 +The following inconsistencies were found and repaired: + + 42 leaked clusters + 65536 corruptions + +Double checking the fixed image now... +No errors were found on the image. + +65536 snapshots should remain: + qemu-img info reports 65536 snapshots + Image header reports 65536 snapshots +*** done diff --git a/tests/qemu-iotests/264 b/tests/qemu-iotests/264 index c8cd97ae2b..131366422b 100755 --- a/tests/qemu-iotests/264 +++ b/tests/qemu-iotests/264 @@ -24,6 +24,8 @@ import iotests from iotests import qemu_img_create, qemu_io_silent_check, file_path, \ qemu_nbd_popen, log +iotests.verify_image_format(supported_fmts=['qcow2']) + disk_a, disk_b, nbd_sock = file_path('disk_a', 'disk_b', 'nbd-sock') nbd_uri = 'nbd+unix:///?socket=' + nbd_sock size = 5 * 1024 * 1024 diff --git a/tests/qemu-iotests/267 b/tests/qemu-iotests/267 index d37a67c012..170e173c0a 100755 --- a/tests/qemu-iotests/267 +++ b/tests/qemu-iotests/267 @@ -29,7 +29,7 @@ status=1 # failure is the default! _cleanup() { _cleanup_test_img - rm -f "$TEST_DIR/nbd" + rm -f "$SOCK_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -143,7 +143,7 @@ echo IMGOPTS="backing_file=$TEST_IMG.base" _make_test_img $size cat <<EOF | -nbd_server_start unix:$TEST_DIR/nbd +nbd_server_start unix:$SOCK_DIR/nbd nbd_server_add -w backing-fmt savevm snap0 info snapshots diff --git a/tests/qemu-iotests/267.out b/tests/qemu-iotests/267.out index 9d812e3c72..8dddb4baa4 100644 --- a/tests/qemu-iotests/267.out +++ b/tests/qemu-iotests/267.out @@ -161,7 +161,7 @@ Internal snapshots on backing file: Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base Testing: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT.base,node-name=backing-file -blockdev driver=IMGFMT,file=backing-file,node-name=backing-fmt -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=file -blockdev driver=IMGFMT,file=file,backing=backing-fmt,node-name=fmt QEMU X.Y.Z monitor - type 'help' for more information -(qemu) nbd_server_start unix:TEST_DIR/nbd +(qemu) nbd_server_start unix:SOCK_DIR/nbd (qemu) nbd_server_add -w backing-fmt (qemu) savevm snap0 (qemu) info snapshots diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 588c453a94..71fe38834e 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -97,6 +97,7 @@ IMGFMT -- $FULL_IMGFMT_DETAILS IMGPROTO -- $IMGPROTO PLATFORM -- $FULL_HOST_DETAILS TEST_DIR -- $TEST_DIR +SOCK_DIR -- $SOCK_DIR SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER EOF @@ -116,10 +117,14 @@ set_prog_path() if [ -z "$TEST_DIR" ]; then TEST_DIR=$PWD/scratch fi +mkdir -p "$TEST_DIR" || _init_error 'Failed to create TEST_DIR' -if [ ! -e "$TEST_DIR" ]; then - mkdir "$TEST_DIR" +tmp_sock_dir=false +if [ -z "$SOCK_DIR" ]; then + SOCK_DIR=$(mktemp -d) + tmp_sock_dir=true fi +mkdir -p "$SOCK_DIR" || _init_error 'Failed to create SOCK_DIR' diff="diff -u" verbose=false @@ -534,6 +539,7 @@ if [ -z "$SAMPLE_IMG_DIR" ]; then fi export TEST_DIR +export SOCK_DIR export SAMPLE_IMG_DIR if [ -s $tmp.list ] @@ -716,6 +722,11 @@ END { if (NR > 0) { rm -f "${TEST_DIR}"/*.out "${TEST_DIR}"/*.err "${TEST_DIR}"/*.time rm -f "${TEST_DIR}"/check.pid "${TEST_DIR}"/check.sts rm -f $tmp.* + + if $tmp_sock_dir + then + rm -rf "$SOCK_DIR" + fi } trap "_wrapup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 9f418b4881..f870e00e44 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -43,7 +43,8 @@ _filter_qom_path() # replace occurrences of the actual TEST_DIR value with TEST_DIR _filter_testdir() { - $SED -e "s#$TEST_DIR/#TEST_DIR/#g" + $SED -e "s#$TEST_DIR/#TEST_DIR/#g" \ + -e "s#$SOCK_DIR/#SOCK_DIR/#g" } # replace occurrences of the actual IMGFMT value with IMGFMT @@ -124,6 +125,7 @@ _filter_img_create() $SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ + -e "s#$SOCK_DIR#SOCK_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ -e 's#nbd:127.0.0.1:10810#TEST_DIR/t.IMGFMT#g' \ -e "s# encryption=off##g" \ @@ -160,6 +162,7 @@ _filter_img_info() $SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ + -e "s#$SOCK_DIR#SOCK_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ -e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \ -e 's#json.*vdisk-id.*vxhs"}}#TEST_DIR/t.IMGFMT#' \ @@ -218,7 +221,7 @@ _filter_nbd() # Filter out the TCP port number since this changes between runs. $SED -e '/nbd\/.*\.c:/d' \ -e 's#127\.0\.0\.1:[0-9]*#127.0.0.1:PORT#g' \ - -e "s#?socket=$TEST_DIR#?socket=TEST_DIR#g" \ + -e "s#?socket=$SOCK_DIR#?socket=SOCK_DIR#g" \ -e 's#\(foo\|PORT/\?\|.sock\): Failed to .*$#\1#' } diff --git a/tests/qemu-iotests/common.nbd b/tests/qemu-iotests/common.nbd index 24b01b60aa..a8cae8fe2c 100644 --- a/tests/qemu-iotests/common.nbd +++ b/tests/qemu-iotests/common.nbd @@ -19,7 +19,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # -nbd_unix_socket="${TEST_DIR}/qemu-nbd.sock" +nbd_unix_socket="${SOCK_DIR}/qemu-nbd.sock" nbd_tcp_addr="127.0.0.1" nbd_pid_file="${TEST_DIR}/qemu-nbd.pid" nbd_stderr_fifo="${TEST_DIR}/qemu-nbd.fifo" diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 12b4751848..fa7bae2422 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -53,6 +53,26 @@ poke_file() printf "$3" | dd "of=$1" bs=1 "seek=$2" conv=notrunc &>/dev/null } +# peek_file_le 'test.img' 512 2 => 65534 +peek_file_le() +{ + # Wrap in echo $() to strip spaces + echo $(od -j"$2" -N"$3" --endian=little -An -vtu"$3" "$1") +} + +# peek_file_be 'test.img' 512 2 => 65279 +peek_file_be() +{ + # Wrap in echo $() to strip spaces + echo $(od -j"$2" -N"$3" --endian=big -An -vtu"$3" "$1") +} + +# peek_file_raw 'test.img' 512 2 => '\xff\xfe' +peek_file_raw() +{ + dd if="$1" bs=1 skip="$2" count="$3" status=none +} + if ! . ./common.config then diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index af322af756..28871604cd 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -274,6 +274,7 @@ 257 rw 258 rw quick 260 rw quick +261 rw 262 rw quick migration 263 rw quick 264 rw diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 709def4d5d..075f4739da 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -57,6 +57,7 @@ qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') imgfmt = os.environ.get('IMGFMT', 'raw') imgproto = os.environ.get('IMGPROTO', 'file') test_dir = os.environ.get('TEST_DIR') +sock_dir = os.environ.get('SOCK_DIR') output_dir = os.environ.get('OUTPUT_DIR', '.') cachemode = os.environ.get('CACHEMODE') qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE') @@ -385,10 +386,10 @@ class FilePaths(object): qemu_img('create', img_path, '1G') # migration_sock_path is automatically deleted """ - def __init__(self, names): + def __init__(self, names, base_dir=test_dir): self.paths = [] for name in names: - self.paths.append(os.path.join(test_dir, file_pattern(name))) + self.paths.append(os.path.join(base_dir, file_pattern(name))) def __enter__(self): return self.paths @@ -405,8 +406,8 @@ class FilePath(FilePaths): """ FilePath is a specialization of FilePaths that takes a single filename. """ - def __init__(self, name): - super(FilePath, self).__init__([name]) + def __init__(self, name, base_dir=test_dir): + super(FilePath, self).__init__([name], base_dir) def __enter__(self): return self.paths[0] @@ -419,7 +420,7 @@ def file_path_remover(): pass -def file_path(*names): +def file_path(*names, base_dir=test_dir): ''' Another way to get auto-generated filename that cleans itself up. Use is as simple as: @@ -435,7 +436,7 @@ def file_path(*names): paths = [] for name in names: filename = file_pattern(name) - path = os.path.join(test_dir, filename) + path = os.path.join(base_dir, filename) file_path_remover.paths.append(path) paths.append(path) @@ -456,7 +457,8 @@ class VM(qtest.QEMUQtestMachine): name = "qemu%s-%d" % (path_suffix, os.getpid()) super(VM, self).__init__(qemu_prog, qemu_opts, name=name, test_dir=test_dir, - socket_scm_helper=socket_scm_helper) + socket_scm_helper=socket_scm_helper, + sock_dir=sock_dir) self._num_drives = 0 def add_object(self, opts): @@ -838,6 +840,11 @@ class QMPTestCase(unittest.TestCase): return self.pause_wait(job_id) return result + def case_skip(self, reason): + '''Skip this test case''' + case_notrun(reason) + self.skipTest(reason) + def notrun(reason): '''Skip this test suite''' @@ -849,7 +856,11 @@ def notrun(reason): sys.exit(0) def case_notrun(reason): - '''Skip this test case''' + '''Mark this test case as not having been run (without actually + skipping it, that is left to the caller). See + QMPTestCase.case_skip() for a variant that actually skips the + current test case.''' + # Each test in qemu-iotests has a number ("seq") seq = os.path.basename(sys.argv[0]) @@ -912,22 +923,34 @@ def qemu_pipe(*args): def supported_formats(read_only=False): '''Set 'read_only' to True to check ro-whitelist Otherwise, rw-whitelist is checked''' - format_message = qemu_pipe("-drive", "format=help") - line = 1 if read_only else 0 - return format_message.splitlines()[line].split(":")[1].split() + + if not hasattr(supported_formats, "formats"): + supported_formats.formats = {} + + if read_only not in supported_formats.formats: + format_message = qemu_pipe("-drive", "format=help") + line = 1 if read_only else 0 + supported_formats.formats[read_only] = \ + format_message.splitlines()[line].split(":")[1].split() + + return supported_formats.formats[read_only] def skip_if_unsupported(required_formats=[], read_only=False): '''Skip Test Decorator Runs the test if all the required formats are whitelisted''' def skip_test_decorator(func): - def func_wrapper(*args, **kwargs): - usf_list = list(set(required_formats) - - set(supported_formats(read_only))) + def func_wrapper(test_case: QMPTestCase, *args, **kwargs): + if callable(required_formats): + fmts = required_formats(test_case) + else: + fmts = required_formats + + usf_list = list(set(fmts) - set(supported_formats(read_only))) if usf_list: - case_notrun('{}: formats {} are not whitelisted'.format( - args[0], usf_list)) + test_case.case_skip('{}: formats {} are not whitelisted'.format( + test_case, usf_list)) else: - return func(*args, **kwargs) + return func(test_case, *args, **kwargs) return func_wrapper return skip_test_decorator @@ -950,8 +973,15 @@ def execute_unittest(output, verbosity, debug): unittest.main(testRunner=runner) finally: if not debug: - sys.stderr.write(re.sub(r'Ran (\d+) tests? in [\d.]+s', - r'Ran \1 tests', output.getvalue())) + out = output.getvalue() + out = re.sub(r'Ran (\d+) tests? in [\d.]+s', r'Ran \1 tests', out) + + # Hide skipped tests from the reference output + out = re.sub(r'OK \(skipped=\d+\)', 'OK', out) + out_first_line, out_rest = out.split('\n', 1) + out = out_first_line.replace('s', '.') + '\n' + out_rest + + sys.stderr.write(out) def execute_test(test_function=None, supported_fmts=[], supported_oses=['linux'], diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index cfe30bab21..0c861809f0 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -45,7 +45,7 @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs, } static int coroutine_fn -bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, +bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, Error **errp) { return 0; @@ -185,18 +185,18 @@ static void test_sync_op_truncate(BdrvChild *c) int ret; /* Normal success path */ - ret = bdrv_truncate(c, 65536, PREALLOC_MODE_OFF, NULL); + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); g_assert_cmpint(ret, ==, 0); /* Early error: Negative offset */ - ret = bdrv_truncate(c, -2, PREALLOC_MODE_OFF, NULL); + ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL); g_assert_cmpint(ret, ==, -EINVAL); /* Error: Read-only image */ c->bs->read_only = true; c->bs->open_flags &= ~BDRV_O_RDWR; - ret = bdrv_truncate(c, 65536, PREALLOC_MODE_OFF, NULL); + ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); g_assert_cmpint(ret, ==, -EACCES); c->bs->read_only = false; diff --git a/util/Makefile.objs b/util/Makefile.objs index 41bf59d127..df124af1c5 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -37,6 +37,7 @@ util-obj-y += rcu.o util-obj-$(CONFIG_MEMBARRIER) += sys_membarrier.o util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o util-obj-y += qemu-coroutine-sleep.o +util-obj-y += qemu-co-shared-resource.o util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o util-obj-y += buffer.o util-obj-y += timed-average.o diff --git a/util/hbitmap.c b/util/hbitmap.c index 66db87c6ff..242c6e519c 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -387,6 +387,10 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) uint64_t first, n; uint64_t last = start + count - 1; + if (count == 0) { + return; + } + trace_hbitmap_set(hb, start, count, start >> hb->granularity, last >> hb->granularity); @@ -478,6 +482,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) uint64_t last = start + count - 1; uint64_t gran = 1ULL << hb->granularity; + if (count == 0) { + return; + } + assert(QEMU_IS_ALIGNED(start, gran)); assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c new file mode 100644 index 0000000000..1c83cd9d29 --- /dev/null +++ b/util/qemu-co-shared-resource.c @@ -0,0 +1,76 @@ +/* + * Helper functionality for distributing a fixed total amount of + * an abstract resource among multiple coroutines. + * + * Copyright (c) 2019 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/coroutine.h" +#include "qemu/co-shared-resource.h" + +struct SharedResource { + uint64_t total; + uint64_t available; + + CoQueue queue; +}; + +SharedResource *shres_create(uint64_t total) +{ + SharedResource *s = g_new0(SharedResource, 1); + + s->total = s->available = total; + qemu_co_queue_init(&s->queue); + + return s; +} + +void shres_destroy(SharedResource *s) +{ + assert(s->available == s->total); + g_free(s); +} + +bool co_try_get_from_shres(SharedResource *s, uint64_t n) +{ + if (s->available >= n) { + s->available -= n; + return true; + } + + return false; +} + +void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n) +{ + assert(n <= s->total); + while (!co_try_get_from_shres(s, n)) { + qemu_co_queue_wait(&s->queue, NULL); + } +} + +void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n) +{ + assert(s->total - s->available >= n); + s->available += n; + qemu_co_queue_restart_all(&s->queue); +} |