From 5176196c3233dcb80aacc719be6b6502bb1c972b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:36 +0800 Subject: block: Make bdrv_perm_names public It can be used outside of block.c for making user friendly messages. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 2 +- include/block/block.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index a45b9b5c29..e3aa7f0824 100644 --- a/block.c +++ b/block.c @@ -1586,7 +1586,7 @@ static char *bdrv_child_user_desc(BdrvChild *c) return g_strdup("another user"); } -static char *bdrv_perm_names(uint64_t perm) +char *bdrv_perm_names(uint64_t perm) { struct perm_name { uint64_t perm; diff --git a/include/block/block.h b/include/block/block.h index 862eb56fc7..9fa0f235a6 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -224,6 +224,8 @@ enum { BLK_PERM_ALL = 0x1f, }; +char *bdrv_perm_names(uint64_t perm); + /* disk I/O throttling */ void bdrv_init(void); void bdrv_init_with_whitelist(void); -- cgit v1.2.3 From 5a9347c6730ff6d68bba30b24f80cd90a6671a20 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:37 +0800 Subject: block: Add, parse and store "force-share" option Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 17 +++++++++++++++++ include/block/block.h | 1 + include/block/block_int.h | 1 + qapi/block-core.json | 3 +++ 4 files changed, 22 insertions(+) diff --git a/block.c b/block.c index e3aa7f0824..31d6d0be34 100644 --- a/block.c +++ b/block.c @@ -800,6 +800,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, * the parent. */ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); + qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); /* Inherit the read-only option from the parent if it's not set */ qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); @@ -908,6 +909,7 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options, * which is only applied on the top level (BlockBackend) */ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); + qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); /* backing files always opened read-only */ qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); @@ -1150,6 +1152,11 @@ QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_STRING, .help = "discard operation (ignore/off, unmap/on)", }, + { + .name = BDRV_OPT_FORCE_SHARE, + .type = QEMU_OPT_BOOL, + .help = "always accept other writers (default: off)", + }, { /* end of list */ } }, }; @@ -1189,6 +1196,16 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, drv = bdrv_find_format(driver_name); assert(drv != NULL); + bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false); + + if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) { + error_setg(errp, + BDRV_OPT_FORCE_SHARE + "=on can only be used with read-only images"); + ret = -EINVAL; + goto fail_opts; + } + if (file != NULL) { filename = blk_bs(file)->filename; } else { diff --git a/include/block/block.h b/include/block/block.h index 9fa0f235a6..877fbb0d9a 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -109,6 +109,7 @@ typedef struct HDGeometry { #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" #define BDRV_OPT_READ_ONLY "read-only" #define BDRV_OPT_DISCARD "discard" +#define BDRV_OPT_FORCE_SHARE "force-share" #define BDRV_SECTOR_BITS 9 diff --git a/include/block/block_int.h b/include/block/block_int.h index 87739405d5..1b4d08e8df 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -518,6 +518,7 @@ struct BlockDriverState { bool valid_key; /* if true, a valid encryption key has been set */ bool sg; /* if true, the device is a /dev/sg* */ bool probed; /* if true, format was probed rather than specified */ + bool force_share; /* if true, always allow all shared permissions */ BlockDriver *drv; /* NULL means no media */ void *opaque; diff --git a/qapi/block-core.json b/qapi/block-core.json index 614181b553..30d5952321 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2898,6 +2898,8 @@ # (default: false) # @detect-zeroes: detect and optimize zero writes (Since 2.1) # (default: off) +# @force-share: force share all permission on added nodes. +# Requires read-only=true. (Since 2.10) # # Remaining options are determined by the block driver. # @@ -2909,6 +2911,7 @@ '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', '*read-only': 'bool', + '*force-share': 'bool', '*detect-zeroes': 'BlockdevDetectZeroesOptions' }, 'discriminator': 'driver', 'data': { -- cgit v1.2.3 From ffd1a5a25c7199d3c9c6d84a5a84ae51f8824602 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:38 +0800 Subject: block: Respect "force-share" in perm propagating Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/block.c b/block.c index 31d6d0be34..1e00e313c3 100644 --- a/block.c +++ b/block.c @@ -1465,6 +1465,22 @@ static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, static void bdrv_child_abort_perm_update(BdrvChild *c); static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, + BdrvChild *c, + const BdrvChildRole *role, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared) +{ + if (bs->drv && bs->drv->bdrv_child_perm) { + bs->drv->bdrv_child_perm(bs, c, role, + parent_perm, parent_shared, + nperm, nshared); + } + if (child_bs && child_bs->force_share) { + *nshared = BLK_PERM_ALL; + } +} + /* * Check whether permissions on this node can be changed in a way that * @cumulative_perms and @cumulative_shared_perms are the new cumulative @@ -1509,9 +1525,9 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Check all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - drv->bdrv_child_perm(bs, c, c->role, - cumulative_perms, cumulative_shared_perms, - &cur_perm, &cur_shared); + bdrv_child_perm(bs, c->bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children, errp); if (ret < 0) { @@ -1571,9 +1587,9 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Update all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; - drv->bdrv_child_perm(bs, c, c->role, - cumulative_perms, cumulative_shared_perms, - &cur_perm, &cur_shared); + bdrv_child_perm(bs, c->bs, c, c->role, + cumulative_perms, cumulative_shared_perms, + &cur_perm, &cur_shared); bdrv_child_set_perm(c, cur_perm, cur_shared); } } @@ -1908,8 +1924,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, assert(parent_bs->drv); assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); - parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, - perm, shared_perm, &perm, &shared_perm); + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, + perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, perm, shared_perm, parent_bs, errp); -- cgit v1.2.3 From 335e9937844449a3fc172dd5e64549ad7d5ad1c2 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:39 +0800 Subject: qemu-img: Add --force-share option to subcommands This will force the opened images to allow sharing all permissions with other programs. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- qemu-img.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 117 insertions(+), 36 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index f3b0ab49cc..418f061736 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -28,6 +28,7 @@ #include "qapi/qobject-output-visitor.h" #include "qapi/qmp/qerror.h" #include "qapi/qmp/qjson.h" +#include "qapi/qmp/qbool.h" #include "qemu/cutils.h" #include "qemu/config-file.h" #include "qemu/option.h" @@ -283,12 +284,20 @@ static int img_open_password(BlockBackend *blk, const char *filename, static BlockBackend *img_open_opts(const char *optstr, QemuOpts *opts, int flags, bool writethrough, - bool quiet) + bool quiet, bool force_share) { QDict *options; Error *local_err = NULL; BlockBackend *blk; options = qemu_opts_to_qdict(opts, NULL); + if (force_share) { + if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE) + && !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) { + error_report("--force-share/-U conflicts with image options"); + return NULL; + } + qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + } blk = blk_new_open(NULL, NULL, options, flags, &local_err); if (!blk) { error_reportf_err(local_err, "Could not open '%s': ", optstr); @@ -305,17 +314,20 @@ static BlockBackend *img_open_opts(const char *optstr, static BlockBackend *img_open_file(const char *filename, const char *fmt, int flags, - bool writethrough, bool quiet) + bool writethrough, bool quiet, + bool force_share) { BlockBackend *blk; Error *local_err = NULL; - QDict *options = NULL; + QDict *options = qdict_new(); if (fmt) { - options = qdict_new(); qdict_put_str(options, "driver", fmt); } + if (force_share) { + qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + } blk = blk_new_open(filename, NULL, options, flags, &local_err); if (!blk) { error_reportf_err(local_err, "Could not open '%s': ", filename); @@ -334,7 +346,7 @@ static BlockBackend *img_open_file(const char *filename, static BlockBackend *img_open(bool image_opts, const char *filename, const char *fmt, int flags, bool writethrough, - bool quiet) + bool quiet, bool force_share) { BlockBackend *blk; if (image_opts) { @@ -348,9 +360,11 @@ static BlockBackend *img_open(bool image_opts, if (!opts) { return NULL; } - blk = img_open_opts(filename, opts, flags, writethrough, quiet); + blk = img_open_opts(filename, opts, flags, writethrough, quiet, + force_share); } else { - blk = img_open_file(filename, fmt, flags, writethrough, quiet); + blk = img_open_file(filename, fmt, flags, writethrough, quiet, + force_share); } return blk; } @@ -650,6 +664,7 @@ static int img_check(int argc, char **argv) ImageCheck *check; bool quiet = false; bool image_opts = false; + bool force_share = false; fmt = NULL; output = NULL; @@ -664,9 +679,10 @@ static int img_check(int argc, char **argv) {"output", required_argument, 0, OPTION_OUTPUT}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:r:T:q", + c = getopt_long(argc, argv, ":hf:r:T:qU", long_options, &option_index); if (c == -1) { break; @@ -705,6 +721,9 @@ static int img_check(int argc, char **argv) case 'q': quiet = true; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -744,7 +763,8 @@ static int img_check(int argc, char **argv) return 1; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + force_share); if (!blk) { return 1; } @@ -947,7 +967,8 @@ static int img_commit(int argc, char **argv) return 1; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + false); if (!blk) { return 1; } @@ -1206,6 +1227,7 @@ static int img_compare(int argc, char **argv) int c, pnum; uint64_t progress_base; bool image_opts = false; + bool force_share = false; cache = BDRV_DEFAULT_CACHE; for (;;) { @@ -1213,9 +1235,10 @@ static int img_compare(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:F:T:pqs", + c = getopt_long(argc, argv, ":hf:F:T:pqsU", long_options, NULL); if (c == -1) { break; @@ -1248,6 +1271,9 @@ static int img_compare(int argc, char **argv) case 's': strict = true; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -1293,13 +1319,15 @@ static int img_compare(int argc, char **argv) goto out3; } - blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet); + blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet, + force_share); if (!blk1) { ret = 2; goto out3; } - blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet); + blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet, + force_share); if (!blk2) { ret = 2; goto out2; @@ -1902,6 +1930,7 @@ static int img_convert(int argc, char **argv) bool writethrough, src_writethrough, quiet = false, image_opts = false, skip_create = false, progress = false; int64_t ret = -EINVAL; + bool force_share = false; ImgConvertState s = (ImgConvertState) { /* Need at least 4k of zeros for sparse detection */ @@ -1916,9 +1945,10 @@ static int img_convert(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:W", + c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU", long_options, NULL); if (c == -1) { break; @@ -2021,6 +2051,9 @@ static int img_convert(int argc, char **argv) case 'W': s.wr_in_order = false; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *object_opts; object_opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -2080,7 +2113,8 @@ static int img_convert(int argc, char **argv) for (bs_i = 0; bs_i < s.src_num; bs_i++) { s.src[bs_i] = img_open(image_opts, argv[optind + bs_i], - fmt, src_flags, src_writethrough, quiet); + fmt, src_flags, src_writethrough, quiet, + force_share); if (!s.src[bs_i]) { ret = -1; goto out; @@ -2233,7 +2267,8 @@ static int img_convert(int argc, char **argv) * the bdrv_create() call which takes different params. * Not critical right now, so fix can wait... */ - s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet); + s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet, + false); if (!s.target) { ret = -1; goto out; @@ -2384,7 +2419,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) static ImageInfoList *collect_image_info_list(bool image_opts, const char *filename, const char *fmt, - bool chain) + bool chain, bool force_share) { ImageInfoList *head = NULL; ImageInfoList **last = &head; @@ -2407,7 +2442,8 @@ static ImageInfoList *collect_image_info_list(bool image_opts, g_hash_table_insert(filenames, (gpointer)filename, NULL); blk = img_open(image_opts, filename, fmt, - BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false); + BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false, + force_share); if (!blk) { goto err; } @@ -2459,6 +2495,7 @@ static int img_info(int argc, char **argv) const char *filename, *fmt, *output; ImageInfoList *list; bool image_opts = false; + bool force_share = false; fmt = NULL; output = NULL; @@ -2471,9 +2508,10 @@ static int img_info(int argc, char **argv) {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":f:h", + c = getopt_long(argc, argv, ":f:hU", long_options, &option_index); if (c == -1) { break; @@ -2491,6 +2529,9 @@ static int img_info(int argc, char **argv) case 'f': fmt = optarg; break; + case 'U': + force_share = true; + break; case OPTION_OUTPUT: output = optarg; break; @@ -2530,7 +2571,8 @@ static int img_info(int argc, char **argv) return 1; } - list = collect_image_info_list(image_opts, filename, fmt, chain); + list = collect_image_info_list(image_opts, filename, fmt, chain, + force_share); if (!list) { return 1; } @@ -2676,6 +2718,7 @@ static int img_map(int argc, char **argv) MapEntry curr = { .length = 0 }, next; int ret = 0; bool image_opts = false; + bool force_share = false; fmt = NULL; output = NULL; @@ -2687,9 +2730,10 @@ static int img_map(int argc, char **argv) {"output", required_argument, 0, OPTION_OUTPUT}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":f:h", + c = getopt_long(argc, argv, ":f:hU", long_options, &option_index); if (c == -1) { break; @@ -2707,6 +2751,9 @@ static int img_map(int argc, char **argv) case 'f': fmt = optarg; break; + case 'U': + force_share = true; + break; case OPTION_OUTPUT: output = optarg; break; @@ -2743,7 +2790,7 @@ static int img_map(int argc, char **argv) return 1; } - blk = img_open(image_opts, filename, fmt, 0, false, false); + blk = img_open(image_opts, filename, fmt, 0, false, false, force_share); if (!blk) { return 1; } @@ -2806,6 +2853,7 @@ static int img_snapshot(int argc, char **argv) bool quiet = false; Error *err = NULL; bool image_opts = false; + bool force_share = false; bdrv_oflags = BDRV_O_RDWR; /* Parse commandline parameters */ @@ -2814,9 +2862,10 @@ static int img_snapshot(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":la:c:d:hq", + c = getopt_long(argc, argv, ":la:c:d:hqU", long_options, NULL); if (c == -1) { break; @@ -2866,6 +2915,9 @@ static int img_snapshot(int argc, char **argv) case 'q': quiet = true; break; + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -2892,7 +2944,8 @@ static int img_snapshot(int argc, char **argv) } /* Open the image */ - blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet); + blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet, + force_share); if (!blk) { return 1; } @@ -2956,6 +3009,7 @@ static int img_rebase(int argc, char **argv) int c, flags, src_flags, ret; bool writethrough, src_writethrough; int unsafe = 0; + bool force_share = false; int progress = 0; bool quiet = false; Error *local_err = NULL; @@ -2972,9 +3026,10 @@ static int img_rebase(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hf:F:b:upt:T:q", + c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU", long_options, NULL); if (c == -1) { break; @@ -3024,6 +3079,9 @@ static int img_rebase(int argc, char **argv) case OPTION_IMAGE_OPTS: image_opts = true; break; + case 'U': + force_share = true; + break; } } @@ -3072,7 +3130,8 @@ static int img_rebase(int argc, char **argv) * Ignore the old backing file for unsafe rebase in case we want to correct * the reference to a renamed or moved backing file. */ - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + false); if (!blk) { ret = -1; goto out; @@ -3097,6 +3156,13 @@ static int img_rebase(int argc, char **argv) qdict_put_str(options, "driver", bs->backing_format); } + if (force_share) { + if (!options) { + options = qdict_new(); + } + qdict_put(options, BDRV_OPT_FORCE_SHARE, + qbool_from_bool(true)); + } bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); blk_old_backing = blk_new_open(backing_name, NULL, options, src_flags, &local_err); @@ -3109,11 +3175,12 @@ static int img_rebase(int argc, char **argv) } if (out_baseimg[0]) { + options = qdict_new(); if (out_basefmt) { - options = qdict_new(); qdict_put_str(options, "driver", out_basefmt); - } else { - options = NULL; + } + if (force_share) { + qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk_new_backing = blk_new_open(out_baseimg, NULL, @@ -3419,7 +3486,8 @@ static int img_resize(int argc, char **argv) qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, - BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet); + BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet, + false); if (!blk) { ret = -1; goto out; @@ -3573,7 +3641,8 @@ static int img_amend(int argc, char **argv) goto out; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + false); if (!blk) { ret = -1; goto out; @@ -3741,6 +3810,7 @@ static int img_bench(int argc, char **argv) bool writethrough = false; struct timeval t1, t2; int i; + bool force_share = false; for (;;) { static const struct option long_options[] = { @@ -3749,9 +3819,10 @@ static int img_bench(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"pattern", required_argument, 0, OPTION_PATTERN}, {"no-drain", no_argument, 0, OPTION_NO_DRAIN}, + {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:w", long_options, NULL); + c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL); if (c == -1) { break; } @@ -3845,6 +3916,9 @@ static int img_bench(int argc, char **argv) flags |= BDRV_O_RDWR; is_write = true; break; + case 'U': + force_share = true; + break; case OPTION_PATTERN: { unsigned long res; @@ -3892,7 +3966,8 @@ static int img_bench(int argc, char **argv) goto out; } - blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet); + blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, + force_share); if (!blk) { ret = -1; goto out; @@ -4059,6 +4134,7 @@ static int img_dd(int argc, char **argv) const char *fmt = NULL; int64_t size = 0; int64_t block_count = 0, out_pos, in_pos; + bool force_share = false; struct DdInfo dd = { .flags = 0, .count = 0, @@ -4087,10 +4163,11 @@ static int img_dd(int argc, char **argv) const struct option long_options[] = { { "help", no_argument, 0, 'h'}, { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, + { "force-share", no_argument, 0, 'U'}, { 0, 0, 0, 0 } }; - while ((c = getopt_long(argc, argv, ":hf:O:", long_options, NULL))) { + while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) { if (c == EOF) { break; } @@ -4110,6 +4187,9 @@ static int img_dd(int argc, char **argv) case 'h': help(); break; + case 'U': + force_share = true; + break; case OPTION_IMAGE_OPTS: image_opts = true; break; @@ -4154,7 +4234,8 @@ static int img_dd(int argc, char **argv) ret = -1; goto out; } - blk1 = img_open(image_opts, in.filename, fmt, 0, false, false); + blk1 = img_open(image_opts, in.filename, fmt, 0, false, false, + force_share); if (!blk1) { ret = -1; @@ -4222,7 +4303,7 @@ static int img_dd(int argc, char **argv) } blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR, - false, false); + false, false, false); if (!blk2) { ret = -1; -- cgit v1.2.3 From a8d16f9ca289c102d74c74c2834209eeef1e625f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:40 +0800 Subject: qemu-img: Update documentation for -U Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- qemu-img-cmds.hx | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index bf4ce59019..e5bc28fc3c 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,15 +10,15 @@ STEXI ETEXI DEF("bench", img_bench, - "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] filename") + "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename") STEXI -@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] @var{filename} +@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} ETEXI DEF("check", img_check, - "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] filename") + "check [-q] [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] [-U] filename") STEXI -@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] @var{filename} +@item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] [-U] @var{filename} ETEXI DEF("create", img_create, @@ -34,45 +34,45 @@ STEXI ETEXI DEF("compare", img_compare, - "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] filename1 filename2") + "compare [--object objectdef] [--image-opts] [-f fmt] [-F fmt] [-T src_cache] [-p] [-q] [-s] [-U] filename1 filename2") STEXI -@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] @var{filename1} @var{filename2} +@item compare [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [-F @var{fmt}] [-T @var{src_cache}] [-p] [-q] [-s] [-U] @var{filename1} @var{filename2} ETEXI DEF("convert", img_convert, - "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") + "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename") STEXI -@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("dd", img_dd, - "dd [--image-opts] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output") + "dd [--image-opts] [-U] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output") STEXI -@item dd [--image-opts] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} +@item dd [--image-opts] [-U] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output} ETEXI DEF("info", img_info, - "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] filename") + "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] [-U] filename") STEXI -@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} +@item info [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] [-U] @var{filename} ETEXI DEF("map", img_map, - "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] filename") + "map [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [-U] filename") STEXI -@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item map [--object @var{objectdef}] [--image-opts] [-f @var{fmt}] [--output=@var{ofmt}] [-U] @var{filename} ETEXI DEF("snapshot", img_snapshot, - "snapshot [--object objectdef] [--image-opts] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename") + "snapshot [--object objectdef] [--image-opts] [-U] [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename") STEXI -@item snapshot [--object @var{objectdef}] [--image-opts] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename} +@item snapshot [--object @var{objectdef}] [--image-opts] [-U] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename} ETEXI DEF("rebase", img_rebase, - "rebase [--object objectdef] [--image-opts] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") + "rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] [-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") STEXI -@item rebase [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} +@item rebase [--object @var{objectdef}] [--image-opts] [-U] [-q] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-p] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI DEF("resize", img_resize, -- cgit v1.2.3 From 459571f7b2df957b8108eda06e923ddf60fec113 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:41 +0800 Subject: qemu-io: Add --force-share option Add --force-share/-U to program options and -U to open subcommand. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- qemu-io.c | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/qemu-io.c b/qemu-io.c index ed0e2dceaa..34fa8a10a4 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -20,6 +20,7 @@ #include "qemu/readline.h" #include "qemu/log.h" #include "qapi/qmp/qstring.h" +#include "qapi/qmp/qbool.h" #include "qom/object_interfaces.h" #include "sysemu/block-backend.h" #include "block/block_int.h" @@ -53,7 +54,8 @@ static const cmdinfo_t close_cmd = { .oneline = "close the current open file", }; -static int openfile(char *name, int flags, bool writethrough, QDict *opts) +static int openfile(char *name, int flags, bool writethrough, bool force_share, + QDict *opts) { Error *local_err = NULL; BlockDriverState *bs; @@ -64,6 +66,18 @@ static int openfile(char *name, int flags, bool writethrough, QDict *opts) return 1; } + if (force_share) { + if (!opts) { + opts = qdict_new(); + } + if (qdict_haskey(opts, BDRV_OPT_FORCE_SHARE) + && !qdict_get_bool(opts, BDRV_OPT_FORCE_SHARE)) { + error_report("-U conflicts with image options"); + QDECREF(opts); + return 1; + } + qdict_put(opts, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true)); + } qemuio_blk = blk_new_open(name, NULL, opts, flags, &local_err); if (!qemuio_blk) { error_reportf_err(local_err, "can't open%s%s: ", @@ -108,6 +122,7 @@ static void open_help(void) " -r, -- open file read-only\n" " -s, -- use snapshot file\n" " -n, -- disable host cache, short for -t none\n" +" -U, -- force shared permissions\n" " -k, -- use kernel AIO implementation (on Linux only)\n" " -t, -- use the given cache mode for the image\n" " -d, -- use the given discard mode for the image\n" @@ -124,7 +139,7 @@ static const cmdinfo_t open_cmd = { .argmin = 1, .argmax = -1, .flags = CMD_NOFILE_OK, - .args = "[-rsnk] [-t cache] [-d discard] [-o options] [path]", + .args = "[-rsnkU] [-t cache] [-d discard] [-o options] [path]", .oneline = "open the file specified by path", .help = open_help, }; @@ -147,8 +162,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) int c; QemuOpts *qopts; QDict *opts; + bool force_share = false; - while ((c = getopt(argc, argv, "snro:kt:d:")) != -1) { + while ((c = getopt(argc, argv, "snro:kt:d:U")) != -1) { switch (c) { case 's': flags |= BDRV_O_SNAPSHOT; @@ -188,6 +204,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) return 0; } break; + case 'U': + force_share = true; + break; default: qemu_opts_reset(&empty_opts); return qemuio_command_usage(&open_cmd); @@ -211,9 +230,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) qemu_opts_reset(&empty_opts); if (optind == argc - 1) { - return openfile(argv[optind], flags, writethrough, opts); + return openfile(argv[optind], flags, writethrough, force_share, opts); } else if (optind == argc) { - return openfile(NULL, flags, writethrough, opts); + return openfile(NULL, flags, writethrough, force_share, opts); } else { QDECREF(opts); return qemuio_command_usage(&open_cmd); @@ -257,6 +276,7 @@ static void usage(const char *name) " -T, --trace [[enable=]][,events=][,file=]\n" " specify tracing options\n" " see qemu-img(1) man page for full description\n" +" -U, --force-share force shared permissions\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" "\n" @@ -436,7 +456,7 @@ static QemuOptsList file_opts = { int main(int argc, char **argv) { int readonly = 0; - const char *sopt = "hVc:d:f:rsnmkt:T:"; + const char *sopt = "hVc:d:f:rsnmkt:T:U"; const struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, @@ -452,6 +472,7 @@ int main(int argc, char **argv) { "trace", required_argument, NULL, 'T' }, { "object", required_argument, NULL, OPTION_OBJECT }, { "image-opts", no_argument, NULL, OPTION_IMAGE_OPTS }, + { "force-share", no_argument, 0, 'U'}, { NULL, 0, NULL, 0 } }; int c; @@ -462,6 +483,7 @@ int main(int argc, char **argv) QDict *opts = NULL; const char *format = NULL; char *trace_file = NULL; + bool force_share = false; #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); @@ -524,6 +546,9 @@ int main(int argc, char **argv) case 'h': usage(progname); exit(0); + case 'U': + force_share = true; + break; case OPTION_OBJECT: { QemuOpts *qopts; qopts = qemu_opts_parse_noisily(&qemu_object_opts, @@ -595,7 +620,7 @@ int main(int argc, char **argv) exit(1); } opts = qemu_opts_to_qdict(qopts, NULL); - if (openfile(NULL, flags, writethrough, opts)) { + if (openfile(NULL, flags, writethrough, force_share, opts)) { exit(1); } } else { @@ -603,7 +628,8 @@ int main(int argc, char **argv) opts = qdict_new(); qdict_put_str(opts, "driver", format); } - if (openfile(argv[optind], flags, writethrough, opts)) { + if (openfile(argv[optind], flags, writethrough, + force_share, opts)) { exit(1); } } -- cgit v1.2.3 From aca7063a5686b5551f5699ac70d9643bd48a6fa9 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:42 +0800 Subject: iotests: 030: Prepare for image locking qemu-img and qemu-io commands when guest is running need "-U" option, add it. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 0d472d5f27..e00c11b804 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -63,8 +63,8 @@ class TestSingleDrive(iotests.QMPTestCase): def test_stream_intermediate(self): self.assert_no_active_block_jobs() - self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img), - qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img), + self.assertNotEqual(qemu_io('-f', 'raw', '-rU', '-c', 'map', backing_img), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', mid_img), 'image file map matches backing file before streaming') result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid') @@ -114,7 +114,7 @@ class TestSingleDrive(iotests.QMPTestCase): self.assert_no_active_block_jobs() # The image map is empty before the operation - empty_map = qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img) + empty_map = qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', test_img) # This is a no-op: no data should ever be copied from the base image result = self.vm.qmp('block-stream', device='drive0', base=mid_img) @@ -197,8 +197,8 @@ class TestParallelOps(iotests.QMPTestCase): # Check that the maps don't match before the streaming operations for i in range(2, self.num_imgs, 2): - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]), - qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]), + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[i]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[i-1]), 'image file map matches backing file before streaming') # Create all streaming jobs @@ -351,8 +351,8 @@ class TestParallelOps(iotests.QMPTestCase): def test_stream_base_node_name(self): self.assert_no_active_block_jobs() - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]), - qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]), + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[4]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.imgs[3]), 'image file map matches backing file before streaming') # Error: the base node does not exist @@ -422,8 +422,8 @@ class TestQuorum(iotests.QMPTestCase): if not iotests.supports_quorum(): return - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]), - qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]), + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.children[0]), + qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.backing[0]), 'image file map matches backing file before streaming') self.assert_no_active_block_jobs() -- cgit v1.2.3 From 55e5a3b65e3e8bdf2fe5e22bbd8c0c4023cef1c2 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:43 +0800 Subject: iotests: 046: Prepare for image locking The qemu-img info command is executed while VM is running, add -U option to avoid the image locking error. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/046 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046 index e528b67cc6..f2ebecf24c 100755 --- a/tests/qemu-iotests/046 +++ b/tests/qemu-iotests/046 @@ -192,7 +192,7 @@ echo "== Verify image content ==" function verify_io() { - if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then + if ($QEMU_IMG info -U -f "$IMGFMT" "$TEST_IMG" | grep "compat: 0.10" > /dev/null); then # For v2 images, discarded clusters are read from the backing file # Keep the variable empty so that the backing file value can be used as # the default below -- cgit v1.2.3 From 4797aeabdc61d7bbf9ef0cb9e25fb539912bb7f3 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:44 +0800 Subject: iotests: 055: Don't attach the target image already for drive-backup Double attach is not a valid usage of the target image, drive-backup will open the blockdev itself so skip the add_drive call in this case. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/055 | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 index aafcd249f6..ba4da65c77 100755 --- a/tests/qemu-iotests/055 +++ b/tests/qemu-iotests/055 @@ -458,17 +458,18 @@ class TestDriveCompression(iotests.QMPTestCase): except OSError: pass - def do_prepare_drives(self, fmt, args): + def do_prepare_drives(self, fmt, args, attach_target): self.vm = iotests.VM().add_drive(test_img) qemu_img('create', '-f', fmt, blockdev_target_img, str(TestDriveCompression.image_len), *args) - self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") + if attach_target: + self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") self.vm.launch() - def do_test_compress_complete(self, cmd, format, **args): - self.do_prepare_drives(format['type'], format['args']) + def do_test_compress_complete(self, cmd, format, attach_target, **args): + self.do_prepare_drives(format['type'], format['args'], attach_target) self.assert_no_active_block_jobs() @@ -484,15 +485,16 @@ class TestDriveCompression(iotests.QMPTestCase): def test_complete_compress_drive_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_complete('drive-backup', format, + self.do_test_compress_complete('drive-backup', format, False, target=blockdev_target_img, mode='existing') def test_complete_compress_blockdev_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_complete('blockdev-backup', format, target='drive1') + self.do_test_compress_complete('blockdev-backup', format, True, + target='drive1') - def do_test_compress_cancel(self, cmd, format, **args): - self.do_prepare_drives(format['type'], format['args']) + def do_test_compress_cancel(self, cmd, format, attach_target, **args): + self.do_prepare_drives(format['type'], format['args'], attach_target) self.assert_no_active_block_jobs() @@ -506,15 +508,16 @@ class TestDriveCompression(iotests.QMPTestCase): def test_compress_cancel_drive_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_cancel('drive-backup', format, + self.do_test_compress_cancel('drive-backup', format, False, target=blockdev_target_img, mode='existing') def test_compress_cancel_blockdev_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_cancel('blockdev-backup', format, target='drive1') + self.do_test_compress_cancel('blockdev-backup', format, True, + target='drive1') - def do_test_compress_pause(self, cmd, format, **args): - self.do_prepare_drives(format['type'], format['args']) + def do_test_compress_pause(self, cmd, format, attach_target, **args): + self.do_prepare_drives(format['type'], format['args'], attach_target) self.assert_no_active_block_jobs() @@ -546,12 +549,13 @@ class TestDriveCompression(iotests.QMPTestCase): def test_compress_pause_drive_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_pause('drive-backup', format, + self.do_test_compress_pause('drive-backup', format, False, target=blockdev_target_img, mode='existing') def test_compress_pause_blockdev_backup(self): for format in TestDriveCompression.fmt_supports_compression: - self.do_test_compress_pause('blockdev-backup', format, target='drive1') + self.do_test_compress_pause('blockdev-backup', format, True, + target='drive1') if __name__ == '__main__': iotests.main(supported_fmts=['raw', 'qcow2']) -- cgit v1.2.3 From ecffa634218c05813c4b055f73826d09d7f07346 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:45 +0800 Subject: iotests: 085: Avoid image locking conflict In the case where we test the expected error when a blockdev-snapshot target already has a backing image, the backing chain is opened multiple times. This will be a problem when we use image locking, so use a different backing file that is not already open. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/085 | 33 +++++++++++++++++++-------------- tests/qemu-iotests/085.out | 3 ++- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085 index c53e97f067..b97adcd8db 100755 --- a/tests/qemu-iotests/085 +++ b/tests/qemu-iotests/085 @@ -45,7 +45,7 @@ _cleanup() rm -f "${TEST_DIR}/${i}-${snapshot_virt0}" rm -f "${TEST_DIR}/${i}-${snapshot_virt1}" done - rm -f "${TEST_IMG}.1" "${TEST_IMG}.2" + rm -f "${TEST_IMG}" "${TEST_IMG}.1" "${TEST_IMG}.2" "${TEST_IMG}.base" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -87,24 +87,26 @@ function create_group_snapshot() } # ${1}: unique identifier for the snapshot filename -# ${2}: true: open backing images; false: don't open them (default) +# ${2}: extra_params to the blockdev-add command +# ${3}: filename +function do_blockdev_add() +{ + cmd="{ 'execute': 'blockdev-add', 'arguments': + { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${2} + 'file': + { 'driver': 'file', 'filename': '${3}', + 'node-name': 'file_${1}' } } }" + _send_qemu_cmd $h "${cmd}" "return" +} + +# ${1}: unique identifier for the snapshot filename function add_snapshot_image() { - if [ "${2}" = "true" ]; then - extra_params="" - else - extra_params="'backing': '', " - fi base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}" snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}" _make_test_img -b "${base_image}" "$size" mv "${TEST_IMG}" "${snapshot_file}" - cmd="{ 'execute': 'blockdev-add', 'arguments': - { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${extra_params} - 'file': - { 'driver': 'file', 'filename': '${snapshot_file}', - 'node-name': 'file_${1}' } } }" - _send_qemu_cmd $h "${cmd}" "return" + do_blockdev_add "$1" "'backing': '', " "${snapshot_file}" } # ${1}: unique identifier for the snapshot filename @@ -222,7 +224,10 @@ echo === Invalid command - snapshot node has a backing image === echo SNAPSHOTS=$((${SNAPSHOTS}+1)) -add_snapshot_image ${SNAPSHOTS} true + +TEST_IMG="$TEST_IMG.base" _make_test_img "$size" +_make_test_img -b "${TEST_IMG}.base" "$size" +do_blockdev_add ${SNAPSHOTS} "" "${TEST_IMG}" blockdev_snapshot ${SNAPSHOTS} error echo diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 182acb42cf..a5d4cc3494 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -78,7 +78,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ === Invalid command - snapshot node has a backing image === -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/12-snapshot-v0.IMGFMT +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base {"return": {}} {"error": {"class": "GenericError", "desc": "The snapshot already has a backing image"}} -- cgit v1.2.3 From d5b8336a6284fa4d75eaae3ac0bfb974cb7f5f4a Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:46 +0800 Subject: iotests: 087: Don't attach test image twice The test scenario doesn't require the same image, instead it focuses on the duplicated node-name, so use null-co to avoid locking conflict. Reviewed-by: Max Reitz Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/087 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index 9de57ddf6d..6d52f7d1b7 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -82,8 +82,7 @@ run_qemu -drive driver=$IMGFMT,id=disk,node-name=test-node,file="$TEST_IMG" < Date: Wed, 3 May 2017 00:35:47 +0800 Subject: iotests: 091: Quit QEMU before checking image Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/091 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091 index 32bbd56975..10ac4a8d73 100755 --- a/tests/qemu-iotests/091 +++ b/tests/qemu-iotests/091 @@ -95,7 +95,9 @@ echo "vm2: qemu process running successfully" echo "vm2: flush io, and quit" _send_qemu_cmd $h2 'qemu-io disk flush' "(qemu)" _send_qemu_cmd $h2 'quit' "" +_send_qemu_cmd $h1 'quit' "" +wait echo "Check image pattern" ${QEMU_IO} -c "read -P 0x22 0 4M" "${TEST_IMG}" | _filter_testdir | _filter_qemu_io -- cgit v1.2.3 From 7ceb4fc1147ea34a50f1846db3782a8329a4efb4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:48 +0800 Subject: iotests: 172: Use separate images for multiple devices To avoid image lock failures. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/172 | 55 +++++++++++++++++++++++++--------------------- tests/qemu-iotests/172.out | 50 +++++++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 49 deletions(-) diff --git a/tests/qemu-iotests/172 b/tests/qemu-iotests/172 index 1b7d3a194d..826d6fecd3 100755 --- a/tests/qemu-iotests/172 +++ b/tests/qemu-iotests/172 @@ -30,6 +30,8 @@ status=1 # failure is the default! _cleanup() { _cleanup_test_img + rm -f "$TEST_IMG.2" + rm -f "$TEST_IMG.3" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -86,6 +88,9 @@ size=720k _make_test_img $size +TEST_IMG="$TEST_IMG.2" _make_test_img $size +TEST_IMG="$TEST_IMG.3" _make_test_img $size + # Default drive semantics: # # By default you get a single empty floppy drive. You can override it with @@ -105,7 +110,7 @@ echo === Using -fda/-fdb options === check_floppy_qtree -fda "$TEST_IMG" check_floppy_qtree -fdb "$TEST_IMG" -check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG" +check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG.2" echo @@ -114,7 +119,7 @@ echo === Using -drive options === check_floppy_qtree -drive if=floppy,file="$TEST_IMG" check_floppy_qtree -drive if=floppy,file="$TEST_IMG",index=1 -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG",index=1 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG.2",index=1 echo echo @@ -122,7 +127,7 @@ echo === Using -drive if=none and -global === check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0 check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 echo @@ -131,7 +136,7 @@ echo === Using -drive if=none and -device === check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -device floppy,drive=none0 -device floppy,drive=none1,unit=1 echo @@ -139,58 +144,58 @@ echo echo === Mixing -fdX and -global === # Working -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveB=none0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveA=none0 # Conflicting (-fdX wins) -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveA=none0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -global isa-fdc.driveB=none0 echo echo echo === Mixing -fdX and -device === # Working -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0 # Conflicting -check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0 -check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 +check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0 +check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1 echo echo echo === Mixing -drive and -device === # Working -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0 -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=1 # Conflicting -check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0 +check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" -device floppy,drive=none0,unit=0 echo echo echo === Mixing -global and -device === # Working -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -device floppy,drive=none1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveB=none0 -device floppy,drive=none1 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 # Conflicting -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 -check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \ +check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG.2" \ -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1 echo @@ -199,8 +204,8 @@ echo === Too many floppy drives === # Working check_floppy_qtree -drive if=floppy,file="$TEST_IMG" \ - -drive if=none,file="$TEST_IMG" \ - -drive if=none,file="$TEST_IMG" \ + -drive if=none,file="$TEST_IMG.2" \ + -drive if=none,file="$TEST_IMG.3" \ -global isa-fdc.driveB=none0 \ -device floppy,drive=none1 diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 54b53293d7..2732966166 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -1,5 +1,7 @@ QA output created by 172 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=737280 +Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=737280 +Formatting 'TEST_DIR/t.IMGFMT.3', fmt=IMGFMT size=737280 === Default === @@ -99,7 +101,7 @@ Testing: -fdb TEST_DIR/t.qcow2 share-rw = false drive-type = "288" -Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2 +Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -205,7 +207,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 share-rw = false drive-type = "288" -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2.2,index=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -300,7 +302,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -395,7 +397,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -436,7 +438,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco === Mixing -fdX and -global === -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -474,7 +476,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -512,7 +514,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- share-rw = false drive-type = "144" -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -539,7 +541,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -569,7 +571,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa- === Mixing -fdX and -device === -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -607,7 +609,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -645,7 +647,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -683,7 +685,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -721,18 +723,18 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop share-rw = false drive-type = "144" -Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 +Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0 QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed. -Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 +Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1 QEMU_PROG: -device floppy,drive=none0,unit=1: Floppy unit 1 is in use QEMU_PROG: -device floppy,drive=none0,unit=1: Device initialization failed. === Mixing -drive and -device === -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -770,7 +772,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q share-rw = false drive-type = "144" -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -808,14 +810,14 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q share-rw = false drive-type = "144" -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device floppy,drive=none0,unit=0 QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed. === Mixing -global and -device === -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -853,7 +855,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -891,7 +893,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -929,7 +931,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0 dev: isa-fdc, id "" iobase = 1008 (0x3f0) @@ -967,18 +969,18 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco share-rw = false drive-type = "144" -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0 QEMU_PROG: -device floppy,drive=none1,unit=0: Floppy unit 0 is in use QEMU_PROG: -device floppy,drive=none1,unit=0: Device initialization failed. -Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1 +Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1 QEMU_PROG: -device floppy,drive=none1,unit=1: Floppy unit 1 is in use QEMU_PROG: -device floppy,drive=none1,unit=1: Device initialization failed. === Too many floppy drives === -Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1 +Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -drive if=none,file=TEST_DIR/t.qcow2.3 -global isa-fdc.driveB=none0 -device floppy,drive=none1 QEMU_PROG: -device floppy,drive=none1: Can't create floppy unit 2, bus supports only 2 units QEMU_PROG: -device floppy,drive=none1: Device initialization failed. -- cgit v1.2.3 From 2420d369a2c16867ee18fd363b617aa5a66ab933 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:49 +0800 Subject: tests: Use null-co:// instead of /dev/null as the dummy image Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/drive_del-test.c | 2 +- tests/nvme-test.c | 2 +- tests/usb-hcd-uhci-test.c | 2 +- tests/usb-hcd-xhci-test.c | 2 +- tests/virtio-blk-test.c | 2 +- tests/virtio-scsi-test.c | 5 +++-- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/drive_del-test.c b/tests/drive_del-test.c index 121b9c917e..2175139abb 100644 --- a/tests/drive_del-test.c +++ b/tests/drive_del-test.c @@ -92,7 +92,7 @@ static void test_after_failed_device_add(void) static void test_drive_del_device_del(void) { /* Start with a drive used by a device that unplugs instantaneously */ - qtest_start("-drive if=none,id=drive0,file=/dev/null,format=raw" + qtest_start("-drive if=none,id=drive0,file=null-co://,format=raw" " -device virtio-scsi-pci" " -device scsi-hd,drive=drive0,id=dev0"); diff --git a/tests/nvme-test.c b/tests/nvme-test.c index c8bece4434..7674a446e4 100644 --- a/tests/nvme-test.c +++ b/tests/nvme-test.c @@ -22,7 +22,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); qtest_add_func("/nvme/nop", nop); - qtest_start("-drive id=drv0,if=none,file=/dev/null,format=raw " + qtest_start("-drive id=drv0,if=none,file=null-co://,format=raw " "-device nvme,drive=drv0,serial=foo"); ret = g_test_run(); diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c index f25bae5e6c..5b500fedb0 100644 --- a/tests/usb-hcd-uhci-test.c +++ b/tests/usb-hcd-uhci-test.c @@ -79,7 +79,7 @@ int main(int argc, char **argv) { const char *arch = qtest_get_arch(); const char *cmd = "-device piix3-usb-uhci,id=uhci,addr=1d.0" - " -drive id=drive0,if=none,file=/dev/null,format=raw" + " -drive id=drive0,if=none,file=null-co://,format=raw" " -device usb-tablet,bus=uhci.0,port=1"; int ret; diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c index 22513e9eb5..031764da6d 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -89,7 +89,7 @@ int main(int argc, char **argv) qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); qtest_start("-device nec-usb-xhci,id=xhci" - " -drive id=drive0,if=none,file=/dev/null,format=raw"); + " -drive id=drive0,if=none,file=null-co://,format=raw"); ret = g_test_run(); qtest_end(); diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 1eee95df49..fd2078c9da 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -63,7 +63,7 @@ static QOSState *pci_test_start(void) const char *arch = qtest_get_arch(); char *tmp_path; const char *cmd = "-drive if=none,id=drive0,file=%s,format=raw " - "-drive if=none,id=drive1,file=/dev/null,format=raw " + "-drive if=none,id=drive1,file=null-co://,format=raw " "-device virtio-blk-pci,id=drv0,drive=drive0," "addr=%x.%x"; diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c index 0eabd56fd9..8b0f77a63e 100644 --- a/tests/virtio-scsi-test.c +++ b/tests/virtio-scsi-test.c @@ -35,7 +35,7 @@ typedef struct { static QOSState *qvirtio_scsi_start(const char *extra_opts) { const char *arch = qtest_get_arch(); - const char *cmd = "-drive id=drv0,if=none,file=/dev/null,format=raw " + const char *cmd = "-drive id=drv0,if=none,file=null-co://,format=raw " "-device virtio-scsi-pci,id=vs0 " "-device scsi-hd,bus=vs0.0,drive=drv0 %s"; @@ -195,7 +195,8 @@ static void hotplug(void) QDict *response; QOSState *qs; - qs = qvirtio_scsi_start("-drive id=drv1,if=none,file=/dev/null,format=raw"); + qs = qvirtio_scsi_start( + "-drive id=drv1,if=none,file=null-co://,format=raw"); response = qmp("{\"execute\": \"device_add\"," " \"arguments\": {" " \"driver\": \"scsi-hd\"," -- cgit v1.2.3 From 16b48d5d66d2fceb86114b28d693aad930f5070d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:50 +0800 Subject: file-posix: Add 'locking' option Making this option available even before implementing it will let converting tests easier: in coming patches they can specify the option already when necessary, before we actually write code to lock the images. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/file-posix.c | 5 +++++ qapi/block-core.json | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 19c48a043e..d13e99c43f 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -392,6 +392,11 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "host AIO implementation (threads, native)", }, + { + .name = "locking", + .type = QEMU_OPT_STRING, + .help = "file locking mode (on/off/auto, default: auto)", + }, { /* end of list */ } }, }; diff --git a/qapi/block-core.json b/qapi/block-core.json index 30d5952321..52e3ecd505 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2127,11 +2127,15 @@ # # @filename: path to the image file # @aio: AIO backend (default: threads) (since: 2.8) +# @locking: whether to enable file locking. If set to 'auto', only enable +# when Open File Descriptor (OFD) locking API is available +# (default: auto, since 2.10) # # Since: 2.9 ## { 'struct': 'BlockdevOptionsFile', 'data': { 'filename': 'str', + '*locking': 'OnOffAuto', '*aio': 'BlockdevAioOptions' } } ## -- cgit v1.2.3 From 1c3a555c35f2124d45a295eb715e50512526810e Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:51 +0800 Subject: file-win32: Error out if locking=on We share the same set of QAPI options with file-posix, but locking is not supported here. So error out if it is specified as 'on' for now. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/file-win32.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/file-win32.c b/block/file-win32.c index d1eb0a14b2..1a35dbabf2 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -344,6 +344,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + if (qdict_get_try_bool(options, "locking", false)) { + error_setg(errp, "locking=on is not supported on Windows"); + goto fail; + } + filename = qemu_opt_get(opts, "filename"); use_aio = get_aio_option(opts, flags, &local_err); -- cgit v1.2.3 From 9c77fec2d366a2fc7848e9b58b82504365a664ae Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:52 +0800 Subject: tests: Disable image lock in test-replication The COLO block replication architecture requires one disk to be shared between primary and secondary, in the test both processes use posix file protocol (instead of over NBD) so it is affected by image locking. Disable the lock. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/test-replication.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test-replication.c b/tests/test-replication.c index 3016c6f2e0..cebeb793b0 100644 --- a/tests/test-replication.c +++ b/tests/test-replication.c @@ -179,7 +179,8 @@ static BlockBackend *start_primary(void) char *cmdline; cmdline = g_strdup_printf("driver=replication,mode=primary,node-name=xxx," - "file.driver=qcow2,file.file.filename=%s" + "file.driver=qcow2,file.file.filename=%s," + "file.file.locking=off" , p_local_disk); opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false); g_free(cmdline); @@ -310,7 +311,9 @@ static BlockBackend *start_secondary(void) Error *local_err = NULL; /* add s_local_disk and forge S_LOCAL_DISK_ID */ - cmdline = g_strdup_printf("file.filename=%s,driver=qcow2", s_local_disk); + cmdline = g_strdup_printf("file.filename=%s,driver=qcow2," + "file.locking=off", + s_local_disk); opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false); g_free(cmdline); @@ -331,8 +334,10 @@ static BlockBackend *start_secondary(void) /* add S_(ACTIVE/HIDDEN)_DISK and forge S_ID */ cmdline = g_strdup_printf("driver=replication,mode=secondary,top-id=%s," "file.driver=qcow2,file.file.filename=%s," + "file.file.locking=off," "file.backing.driver=qcow2," "file.backing.file.filename=%s," + "file.backing.file.locking=off," "file.backing.backing=%s" , S_ID, s_active_disk, s_hidden_disk , S_LOCAL_DISK_ID); -- cgit v1.2.3 From fc0932fdcfc3e5cafa3641e361b681c07f639812 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:53 +0800 Subject: block: Reuse bs as backing hd for drive-backup sync=none Opening the backing image for the second time is bad, especially here when it is also in use as the active image as the source. The drive-backup job itself doesn't read from target->backing for COW, instead it gets data from the write notifier, so it's not a big problem. However, exporting the target to NBD etc. won't work, because of the likely stale metadata cache. Use BDRV_O_NO_BACKING in this case and manually set up the backing BdrvChild. Cc: qemu-stable@nongnu.org Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- blockdev.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 0b38c3df71..0d3773bcb8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3151,6 +3151,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, Error *local_err = NULL; int flags; int64_t size; + bool set_backing_hd = false; if (!backup->has_speed) { backup->speed = 0; @@ -3201,6 +3202,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, } if (backup->sync == MIRROR_SYNC_MODE_NONE) { source = bs; + flags |= BDRV_O_NO_BACKING; + set_backing_hd = true; } size = bdrv_getlength(bs); @@ -3227,7 +3230,9 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, } if (backup->format) { - options = qdict_new(); + if (!options) { + options = qdict_new(); + } qdict_put_str(options, "driver", backup->format); } @@ -3238,6 +3243,14 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, bdrv_set_aio_context(target_bs, aio_context); + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { + bdrv_unref(target_bs); + goto out; + } + } + if (backup->has_bitmap) { bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); if (!bmap) { -- cgit v1.2.3 From 13461fdba6588540c99bf1e32275e421da22c396 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:54 +0800 Subject: osdep: Add qemu_lock_fd and qemu_unlock_fd They are wrappers of POSIX fcntl "file private locking", with a convenient "try lock" wrapper implemented with F_OFD_GETLK. Signed-off-by: Fam Zheng Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- include/qemu/osdep.h | 3 +++ util/osdep.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 122ff06ff6..1c9f5e260c 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -341,6 +341,9 @@ int qemu_close(int fd); #ifndef _WIN32 int qemu_dup(int fd); #endif +int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive); +int qemu_unlock_fd(int fd, int64_t start, int64_t len); +int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive); #if defined(__HAIKU__) && defined(__i386__) #define FMT_pid "%ld" diff --git a/util/osdep.c b/util/osdep.c index 06fb1cfda6..3de4a1826c 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -140,6 +140,54 @@ static int qemu_parse_fdset(const char *param) { return qemu_parse_fd(param); } + +static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) +{ +#ifdef F_OFD_SETLK + int ret; + struct flock fl = { + .l_whence = SEEK_SET, + .l_start = start, + .l_len = len, + .l_type = fl_type, + }; + ret = fcntl(fd, F_OFD_SETLK, &fl); + return ret == -1 ? -errno : 0; +#else + return -ENOTSUP; +#endif +} + +int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive) +{ + return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK); +} + +int qemu_unlock_fd(int fd, int64_t start, int64_t len) +{ + return qemu_lock_fcntl(fd, start, len, F_UNLCK); +} + +int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) +{ +#ifdef F_OFD_SETLK + int ret; + struct flock fl = { + .l_whence = SEEK_SET, + .l_start = start, + .l_len = len, + .l_type = exclusive ? F_WRLCK : F_RDLCK, + }; + ret = fcntl(fd, F_OFD_GETLK, &fl); + if (ret == -1) { + return -errno; + } else { + return fl.l_type == F_UNLCK ? 0 : -EAGAIN; + } +#else + return -ENOTSUP; +#endif +} #endif /* -- cgit v1.2.3 From e8c1094a0ee081f30ec9716593e7a90ee88623a1 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:55 +0800 Subject: osdep: Fall back to posix lock when OFD lock is unavailable Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- util/osdep.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/util/osdep.c b/util/osdep.c index 3de4a1826c..a2863c8e53 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -38,6 +38,14 @@ extern int madvise(caddr_t, size_t, int); #include "qemu/error-report.h" #include "monitor/monitor.h" +#ifdef F_OFD_SETLK +#define QEMU_SETLK F_OFD_SETLK +#define QEMU_GETLK F_OFD_GETLK +#else +#define QEMU_SETLK F_SETLK +#define QEMU_GETLK F_GETLK +#endif + static bool fips_enabled = false; static const char *hw_version = QEMU_HW_VERSION; @@ -143,7 +151,6 @@ static int qemu_parse_fdset(const char *param) static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) { -#ifdef F_OFD_SETLK int ret; struct flock fl = { .l_whence = SEEK_SET, @@ -151,11 +158,8 @@ static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type) .l_len = len, .l_type = fl_type, }; - ret = fcntl(fd, F_OFD_SETLK, &fl); + ret = fcntl(fd, QEMU_SETLK, &fl); return ret == -1 ? -errno : 0; -#else - return -ENOTSUP; -#endif } int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive) @@ -170,7 +174,6 @@ int qemu_unlock_fd(int fd, int64_t start, int64_t len) int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) { -#ifdef F_OFD_SETLK int ret; struct flock fl = { .l_whence = SEEK_SET, @@ -178,15 +181,12 @@ int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive) .l_len = len, .l_type = exclusive ? F_WRLCK : F_RDLCK, }; - ret = fcntl(fd, F_OFD_GETLK, &fl); + ret = fcntl(fd, QEMU_GETLK, &fl); if (ret == -1) { return -errno; } else { return fl.l_type == F_UNLCK ? 0 : -EAGAIN; } -#else - return -ENOTSUP; -#endif } #endif -- cgit v1.2.3 From 244a5668106297378391b768e7288eb157616f64 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:56 +0800 Subject: file-posix: Add image locking to perm operations This extends the permission bits of op blocker API to external using Linux OFD locks. Each permission in @perm and @shared_perm is represented by a locked byte in the image file. Requesting a permission in @perm is translated to a shared lock of the corresponding byte; rejecting to share the same permission is translated to a shared lock of a separate byte. With that, we use 2x number of bytes of distinct permission types. virtlockd in libvirt locks the first byte, so we do locking from a higher offset. Suggested-by: Kevin Wolf Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/file-posix.c | 276 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 275 insertions(+), 1 deletion(-) diff --git a/block/file-posix.c b/block/file-posix.c index d13e99c43f..a09055b700 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -129,12 +129,23 @@ do { \ #define MAX_BLOCKSIZE 4096 +/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes, + * leaving a few more bytes for its future use. */ +#define RAW_LOCK_PERM_BASE 100 +#define RAW_LOCK_SHARED_BASE 200 + typedef struct BDRVRawState { int fd; + int lock_fd; + bool use_lock; int type; int open_flags; size_t buf_align; + /* The current permissions. */ + uint64_t perm; + uint64_t shared_perm; + #ifdef CONFIG_XFS bool is_xfs:1; #endif @@ -411,6 +422,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, BlockdevAioOptions aio, aio_default; int fd, ret; struct stat st; + OnOffAuto locking; opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -440,6 +452,37 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); + locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"), + ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + switch (locking) { + case ON_OFF_AUTO_ON: + s->use_lock = true; +#ifndef F_OFD_SETLK + fprintf(stderr, + "File lock requested but OFD locking syscall is unavailable, " + "falling back to POSIX file locks.\n" + "Due to the implementation, locks can be lost unexpectedly.\n"); +#endif + break; + case ON_OFF_AUTO_OFF: + s->use_lock = false; + break; + case ON_OFF_AUTO_AUTO: +#ifdef F_OFD_SETLK + s->use_lock = true; +#else + s->use_lock = false; +#endif + break; + default: + abort(); + } + s->open_flags = open_flags; raw_parse_flags(bdrv_flags, &s->open_flags); @@ -455,6 +498,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } s->fd = fd; + s->lock_fd = -1; + if (s->use_lock) { + fd = qemu_open(filename, s->open_flags); + if (fd < 0) { + ret = -errno; + error_setg_errno(errp, errno, "Could not open '%s' for locking", + filename); + qemu_close(s->fd); + goto fail; + } + s->lock_fd = fd; + } + s->perm = 0; + s->shared_perm = BLK_PERM_ALL; + #ifdef CONFIG_LINUX_AIO /* Currently Linux does AIO only for files opened with O_DIRECT */ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { @@ -542,6 +600,161 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, return raw_open_common(bs, options, flags, 0, errp); } +typedef enum { + RAW_PL_PREPARE, + RAW_PL_COMMIT, + RAW_PL_ABORT, +} RawPermLockOp; + +#define PERM_FOREACH(i) \ + for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++) + +/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the + * file; if @unlock == true, also unlock the unneeded bytes. + * @shared_perm_lock_bits is the mask of all permissions that are NOT shared. + */ +static int raw_apply_lock_bytes(BDRVRawState *s, + uint64_t perm_lock_bits, + uint64_t shared_perm_lock_bits, + bool unlock, Error **errp) +{ + int ret; + int i; + + PERM_FOREACH(i) { + int off = RAW_LOCK_PERM_BASE + i; + if (perm_lock_bits & (1ULL << i)) { + ret = qemu_lock_fd(s->lock_fd, off, 1, false); + if (ret) { + error_setg(errp, "Failed to lock byte %d", off); + return ret; + } + } else if (unlock) { + ret = qemu_unlock_fd(s->lock_fd, off, 1); + if (ret) { + error_setg(errp, "Failed to unlock byte %d", off); + return ret; + } + } + } + PERM_FOREACH(i) { + int off = RAW_LOCK_SHARED_BASE + i; + if (shared_perm_lock_bits & (1ULL << i)) { + ret = qemu_lock_fd(s->lock_fd, off, 1, false); + if (ret) { + error_setg(errp, "Failed to lock byte %d", off); + return ret; + } + } else if (unlock) { + ret = qemu_unlock_fd(s->lock_fd, off, 1); + if (ret) { + error_setg(errp, "Failed to unlock byte %d", off); + return ret; + } + } + } + return 0; +} + +/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */ +static int raw_check_lock_bytes(BDRVRawState *s, + uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + int ret; + int i; + + PERM_FOREACH(i) { + int off = RAW_LOCK_SHARED_BASE + i; + uint64_t p = 1ULL << i; + if (perm & p) { + ret = qemu_lock_fd_test(s->lock_fd, off, 1, true); + if (ret) { + char *perm_name = bdrv_perm_names(p); + error_setg(errp, + "Failed to get \"%s\" lock", + perm_name); + g_free(perm_name); + error_append_hint(errp, + "Is another process using the image?\n"); + return ret; + } + } + } + PERM_FOREACH(i) { + int off = RAW_LOCK_PERM_BASE + i; + uint64_t p = 1ULL << i; + if (!(shared_perm & p)) { + ret = qemu_lock_fd_test(s->lock_fd, off, 1, true); + if (ret) { + char *perm_name = bdrv_perm_names(p); + error_setg(errp, + "Failed to get shared \"%s\" lock", + perm_name); + g_free(perm_name); + error_append_hint(errp, + "Is another process using the image?\n"); + return ret; + } + } + } + return 0; +} + +static int raw_handle_perm_lock(BlockDriverState *bs, + RawPermLockOp op, + uint64_t new_perm, uint64_t new_shared, + Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret = 0; + Error *local_err = NULL; + + if (!s->use_lock) { + return 0; + } + + if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) { + return 0; + } + + assert(s->lock_fd > 0); + + switch (op) { + case RAW_PL_PREPARE: + ret = raw_apply_lock_bytes(s, s->perm | new_perm, + ~s->shared_perm | ~new_shared, + false, errp); + if (!ret) { + ret = raw_check_lock_bytes(s, new_perm, new_shared, errp); + if (!ret) { + return 0; + } + } + op = RAW_PL_ABORT; + /* fall through to unlock bytes. */ + case RAW_PL_ABORT: + raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err); + if (local_err) { + /* Theoretically the above call only unlocks bytes and it cannot + * fail. Something weird happened, report it. + */ + error_report_err(local_err); + } + break; + case RAW_PL_COMMIT: + raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err); + if (local_err) { + /* Theoretically the above call only unlocks bytes and it cannot + * fail. Something weird happened, report it. + */ + error_report_err(local_err); + } + break; + } + return ret; +} + static int raw_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { @@ -1410,6 +1623,10 @@ static void raw_close(BlockDriverState *bs) qemu_close(s->fd); s->fd = -1; } + if (s->lock_fd >= 0) { + qemu_close(s->lock_fd); + s->lock_fd = -1; + } } static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) @@ -1954,6 +2171,54 @@ static QemuOptsList raw_create_opts = { } }; +static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared, + Error **errp) +{ + return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp); +} + +static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared) +{ + BDRVRawState *s = bs->opaque; + raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL); + s->perm = perm; + s->shared_perm = shared; +} + +static void raw_abort_perm_update(BlockDriverState *bs) +{ + raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); +} + +static int raw_inactivate(BlockDriverState *bs) +{ + int ret; + uint64_t perm = 0; + uint64_t shared = BLK_PERM_ALL; + + ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL); + if (ret) { + return ret; + } + raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL); + return 0; +} + + +static void raw_invalidate_cache(BlockDriverState *bs, Error **errp) +{ + BDRVRawState *s = bs->opaque; + int ret; + + assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)); + ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm, + errp); + if (ret) { + return; + } + raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL); +} + BlockDriver bdrv_file = { .format_name = "file", .protocol_name = "file", @@ -1984,7 +2249,11 @@ BlockDriver bdrv_file = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, - + .bdrv_inactivate = raw_inactivate, + .bdrv_invalidate_cache = raw_invalidate_cache, + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, .create_opts = &raw_create_opts, }; @@ -2443,6 +2712,11 @@ static BlockDriver bdrv_host_device = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_inactivate = raw_inactivate, + .bdrv_invalidate_cache = raw_invalidate_cache, + .bdrv_check_perm = raw_check_perm, + .bdrv_set_perm = raw_set_perm, + .bdrv_abort_perm_update = raw_abort_perm_update, .bdrv_probe_blocksizes = hdev_probe_blocksizes, .bdrv_probe_geometry = hdev_probe_geometry, -- cgit v1.2.3 From ba8980784df8c02ecc88f221e086653fc12908a8 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:57 +0800 Subject: qemu-iotests: Add test case 153 for image locking Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/153 | 233 +++++++++++++++++++++++++++ tests/qemu-iotests/153.out | 390 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 624 insertions(+) create mode 100755 tests/qemu-iotests/153 create mode 100644 tests/qemu-iotests/153.out diff --git a/tests/qemu-iotests/153 b/tests/qemu-iotests/153 new file mode 100755 index 0000000000..0b45d78ea3 --- /dev/null +++ b/tests/qemu-iotests/153 @@ -0,0 +1,233 @@ +#!/bin/bash +# +# Test image locking +# +# Copyright 2016, 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=famz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f "${TEST_IMG}.base" + rm -f "${TEST_IMG}.convert" + rm -f "${TEST_IMG}.a" + rm -f "${TEST_IMG}.b" + rm -f "${TEST_IMG}.lnk" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +size=32M + +_check_ofd() +{ + _make_test_img $size >/dev/null + if $QEMU_IMG_PROG info --image-opts "driver=file,locking=on,filename=$TEST_IMG" 2>&1 | + grep -q 'falling back to POSIX file'; then + return 1 + else + return 0 + fi +} + +_check_ofd || _notrun "OFD lock not available" + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +_run_cmd() +{ + echo + (echo "$@"; "$@" 2>&1 1>/dev/null) | _filter_testdir +} + +function _do_run_qemu() +{ + ( + if ! test -t 0; then + while read cmd; do + echo $cmd + done + fi + echo quit + ) | $QEMU -nographic -monitor stdio -serial none "$@" 1>/dev/null +} + +function _run_qemu_with_images() +{ + _do_run_qemu \ + $(for i in $@; do echo "-drive if=none,file=$i"; done) 2>&1 \ + | _filter_testdir | _filter_qemu +} + +echo "== readonly=off,force-share=on should be rejected ==" +_run_qemu_with_images null-co://,readonly=off,force-share=on + +for opts1 in "" "read-only=on" "read-only=on,force-share=on"; do + echo + echo "== Creating base image ==" + TEST_IMG="${TEST_IMG}.base" _make_test_img $size + + echo + echo "== Creating test image ==" + $QEMU_IMG create -f $IMGFMT "${TEST_IMG}" -b ${TEST_IMG}.base | _filter_img_create + + echo + echo "== Launching QEMU, opts: '$opts1' ==" + _launch_qemu -drive file="${TEST_IMG}",if=none,$opts1 + h=$QEMU_HANDLE + + for opts2 in "" "read-only=on" "read-only=on,force-share=on"; do + echo + echo "== Launching another QEMU, opts: '$opts2' ==" + echo "quit" | \ + $QEMU -nographic -monitor stdio \ + -drive file="${TEST_IMG}",if=none,$opts2 2>&1 1>/dev/null | \ + _filter_testdir | _filter_qemu + done + + for L in "" "-U"; do + + echo + echo "== Running utility commands $L ==" + _run_cmd $QEMU_IO $L -c "read 0 512" "${TEST_IMG}" + _run_cmd $QEMU_IO $L -r -c "read 0 512" "${TEST_IMG}" + _run_cmd $QEMU_IO -c "open $L ${TEST_IMG}" -c "read 0 512" + _run_cmd $QEMU_IO -c "open -r $L ${TEST_IMG}" -c "read 0 512" + _run_cmd $QEMU_IMG info $L "${TEST_IMG}" + _run_cmd $QEMU_IMG check $L "${TEST_IMG}" + _run_cmd $QEMU_IMG compare $L "${TEST_IMG}" "${TEST_IMG}" + _run_cmd $QEMU_IMG map $L "${TEST_IMG}" + _run_cmd $QEMU_IMG amend -o "" $L "${TEST_IMG}" + _run_cmd $QEMU_IMG commit $L "${TEST_IMG}" + _run_cmd $QEMU_IMG resize $L "${TEST_IMG}" $size + _run_cmd $QEMU_IMG rebase $L "${TEST_IMG}" -b "${TEST_IMG}.base" + _run_cmd $QEMU_IMG snapshot -l $L "${TEST_IMG}" + _run_cmd $QEMU_IMG convert $L "${TEST_IMG}" "${TEST_IMG}.convert" + _run_cmd $QEMU_IMG dd $L if="${TEST_IMG}" of="${TEST_IMG}.convert" bs=512 count=1 + _run_cmd $QEMU_IMG bench $L -c 1 "${TEST_IMG}" + _run_cmd $QEMU_IMG bench $L -w -c 1 "${TEST_IMG}" + done + _send_qemu_cmd $h "{ 'execute': 'quit', }" "" + echo + echo "Round done" + _cleanup_qemu +done + +for opt1 in $test_opts; do + for opt2 in $test_opts; do + echo + echo "== Two devices with the same image ($opt1 - $opt2) ==" + _run_qemu_with_images "${TEST_IMG},$opt1" "${TEST_IMG},$opt2" + done +done + +echo "== Creating ${TEST_IMG}.[abc] ==" | _filter_testdir +( + $QEMU_IMG create -f qcow2 "${TEST_IMG}.a" -b "${TEST_IMG}" + $QEMU_IMG create -f qcow2 "${TEST_IMG}.b" -b "${TEST_IMG}" + $QEMU_IMG create -f qcow2 "${TEST_IMG}.c" -b "${TEST_IMG}.b" +) | _filter_img_create + +echo +echo "== Two devices sharing the same file in backing chain ==" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}.b" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}.c" + +echo +echo "== Backing image also as an active device ==" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG}" + +echo +echo "== Backing image also as an active device (ro) ==" +_run_qemu_with_images "${TEST_IMG}.a" "${TEST_IMG},readonly=on" + +echo +echo "== Symbolic link ==" +rm -f "${TEST_IMG}.lnk" &>/dev/null +ln -s ${TEST_IMG} "${TEST_IMG}.lnk" || echo "Failed to create link" +_run_qemu_with_images "${TEST_IMG}.lnk" "${TEST_IMG}" + +echo +echo "== Closing an image should unlock it ==" +_launch_qemu + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'qmp_capabilities' }" \ + 'return' + +echo "Adding drive" +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_add 0 if=none,id=d0,file=${TEST_IMG}' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +echo "Closing drive" +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_del d0' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +echo "Adding two and closing one" +for d in d0 d1; do + _send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_add 0 if=none,id=$d,file=${TEST_IMG},readonly=on' } }" \ + "" +done + +_run_cmd $QEMU_IMG info "${TEST_IMG}" + +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_del d0' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +echo "Closing the other" +_send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'human-monitor-command', + 'arguments': { 'command-line': 'drive_del d1' } }" \ + "" + +_run_cmd $QEMU_IO "${TEST_IMG}" -c 'write 0 512' + +_cleanup_qemu + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/153.out b/tests/qemu-iotests/153.out new file mode 100644 index 0000000000..5ba0b63867 --- /dev/null +++ b/tests/qemu-iotests/153.out @@ -0,0 +1,390 @@ +QA output created by 153 +== readonly=off,force-share=on should be rejected == +QEMU_PROG: -drive if=none,file=null-co://,readonly=off,force-share=on: force-share=on can only be used with read-only images + +== Creating base image == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 + +== Creating test image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base + +== Launching QEMU, opts: '' == + +== Launching another QEMU, opts: '' == +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock +Is another process using the image? + +== Launching another QEMU, opts: 'read-only=on' == +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,read-only=on: Failed to get shared "write" lock +Is another process using the image? + +== Launching another QEMU, opts: 'read-only=on,force-share=on' == + +== Running utility commands == + +_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock +Is another process using the image? + +_qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper info TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper check TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper map TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper amend -o TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper commit TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get shared "write" lock +Is another process using the image? + +_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +== Running utility commands -U == + +_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper check -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper commit -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images + +Round done + +== Creating base image == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 + +== Creating test image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base + +== Launching QEMU, opts: 'read-only=on' == + +== Launching another QEMU, opts: '' == +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,: Failed to get "write" lock +Is another process using the image? + +== Launching another QEMU, opts: 'read-only=on' == + +== Launching another QEMU, opts: 'read-only=on,force-share=on' == + +== Running utility commands == + +_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +_qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info TEST_DIR/t.qcow2 + +_qemu_img_wrapper check TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper commit TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +== Running utility commands -U == + +_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper check -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper commit -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base +qemu-img: Could not open 'TEST_DIR/t.qcow2': Failed to get "write" lock +Is another process using the image? + +_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images + +Round done + +== Creating base image == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432 + +== Creating test image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base + +== Launching QEMU, opts: 'read-only=on,force-share=on' == + +== Launching another QEMU, opts: '' == + +== Launching another QEMU, opts: 'read-only=on' == + +== Launching another QEMU, opts: 'read-only=on,force-share=on' == + +== Running utility commands == + +_qemu_io_wrapper -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_io_wrapper -c open -r TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info TEST_DIR/t.qcow2 + +_qemu_img_wrapper check TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o TEST_DIR/t.qcow2 + +_qemu_img_wrapper commit TEST_DIR/t.qcow2 + +_qemu_img_wrapper resize TEST_DIR/t.qcow2 32M + +_qemu_img_wrapper rebase TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base + +_qemu_img_wrapper snapshot -l TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -w -c 1 TEST_DIR/t.qcow2 + +== Running utility commands -U == + +_qemu_io_wrapper -U -c read 0 512 TEST_DIR/t.qcow2 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -U -r -c read 0 512 TEST_DIR/t.qcow2 + +_qemu_io_wrapper -c open -U TEST_DIR/t.qcow2 -c read 0 512 +can't open device TEST_DIR/t.qcow2: force-share=on can only be used with read-only images + +_qemu_io_wrapper -c open -r -U TEST_DIR/t.qcow2 -c read 0 512 + +_qemu_img_wrapper info -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper check -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper compare -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2 + +_qemu_img_wrapper map -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper amend -o -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper commit -U TEST_DIR/t.qcow2 +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper resize -U TEST_DIR/t.qcow2 32M +qemu-img: unrecognized option '-U' +Try 'qemu-img --help' for more information + +_qemu_img_wrapper rebase -U TEST_DIR/t.qcow2 -b TEST_DIR/t.qcow2.base + +_qemu_img_wrapper snapshot -l -U TEST_DIR/t.qcow2 + +_qemu_img_wrapper convert -U TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.convert + +_qemu_img_wrapper dd -U if=TEST_DIR/t.qcow2 of=TEST_DIR/t.qcow2.convert bs=512 count=1 + +_qemu_img_wrapper bench -U -c 1 TEST_DIR/t.qcow2 + +_qemu_img_wrapper bench -U -w -c 1 TEST_DIR/t.qcow2 +qemu-img: Could not open 'TEST_DIR/t.qcow2': force-share=on can only be used with read-only images + +Round done +== Creating TEST_DIR/t.qcow2.[abc] == +Formatting 'TEST_DIR/t.IMGFMT.a', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT +Formatting 'TEST_DIR/t.IMGFMT.b', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT +Formatting 'TEST_DIR/t.IMGFMT.c', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.b + +== Two devices sharing the same file in backing chain == + +== Backing image also as an active device == +QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +== Backing image also as an active device (ro) == + +== Symbolic link == +QEMU_PROG: -drive if=none,file=TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? + +== Closing an image should unlock it == +{"return": {}} +Adding drive + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? +Closing drive + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +Adding two and closing one + +_qemu_img_wrapper info TEST_DIR/t.qcow2 + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +can't open device TEST_DIR/t.qcow2: Failed to get "write" lock +Is another process using the image? +Closing the other + +_qemu_io_wrapper TEST_DIR/t.qcow2 -c write 0 512 +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 893962d41e..c8ae60ea36 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -154,6 +154,7 @@ 149 rw auto sudo 150 rw auto quick 152 rw auto quick +153 rw auto quick 154 rw auto backing quick 155 rw auto 156 rw auto quick -- cgit v1.2.3 From de9efdb33454ae952c72ed826c54981d498b6c10 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 3 May 2017 00:35:58 +0800 Subject: tests: Add POSIX image locking test case 182 Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/182 | 68 ++++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/182.out | 8 ++++++ tests/qemu-iotests/group | 1 + 3 files changed, 77 insertions(+) create mode 100755 tests/qemu-iotests/182 create mode 100644 tests/qemu-iotests/182.out diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182 new file mode 100755 index 0000000000..7ecbb22604 --- /dev/null +++ b/tests/qemu-iotests/182 @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Test image locking for POSIX locks +# +# Copyright 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=famz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.qemu + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +size=32M + +_make_test_img $size + +echo "Starting QEMU" +_launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ + -device virtio-blk-pci,drive=drive0 + +echo +echo "Starting a second QEMU using the same image should fail" +echo 'quit' | $QEMU -monitor stdio \ + -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ + -device virtio-blk-pci,drive=drive0 2>&1 | _filter_testdir 2>&1 | + _filter_qemu | + sed -e '/falling back to POSIX file/d' \ + -e '/locks can be lost unexpectedly/d' + +_cleanup_qemu + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out new file mode 100644 index 0000000000..23a4dbf809 --- /dev/null +++ b/tests/qemu-iotests/182.out @@ -0,0 +1,8 @@ +QA output created by 182 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 +Starting QEMU + +Starting a second QEMU using the same image should fail +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,if=none,id=drive0,file.locking=on: Failed to get "write" lock +Is another process using the image? +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index c8ae60ea36..a4549d82b2 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -171,3 +171,4 @@ 175 auto quick 176 rw auto backing 181 rw auto migration +182 rw auto quick -- cgit v1.2.3 From 92413c16be8eee53a3b60739f2a28a1463651f47 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 4 May 2017 01:11:17 +0200 Subject: qcow2: Fix preallocation size formula When calculating the number of reftable entries, we should actually use the number of refblocks and not (wrongly[1]) re-calculate it. [1] "Wrongly" means: Dividing the number of clusters by the number of entries per refblock and rounding down instead of up. Reported-by: Eric Blake Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 1c2697732b..8caf800529 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2139,7 +2139,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * too, as long as the bulk is allocated here). Therefore, using * floating point arithmetic is fine. */ int64_t meta_size = 0; - uint64_t nreftablee, nrefblocke, nl1e, nl2e; + uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count; int64_t aligned_total_size = align_offset(total_size, cluster_size); int refblock_bits, refblock_size; /* refcount entry size in bytes */ @@ -2182,11 +2182,12 @@ static int qcow2_create2(const char *filename, int64_t total_size, nrefblocke = (aligned_total_size + meta_size + cluster_size) / (cluster_size - rces - rces * sizeof(uint64_t) / cluster_size); - meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size; + refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size); + meta_size += refblock_count * cluster_size; /* total size of refcount tables */ - nreftablee = nrefblocke / refblock_size; - nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t)); + nreftablee = align_offset(refblock_count, + cluster_size / sizeof(uint64_t)); meta_size += nreftablee * sizeof(uint64_t); qemu_opt_set_number(opts, BLOCK_OPT_SIZE, -- cgit v1.2.3 From 564a6b693828ac38f3f562eed9e8b1dc62f45253 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 4 May 2017 01:11:18 +0200 Subject: qcow2: Reuse preallocated zero clusters Instead of just freeing preallocated zero clusters and completely allocating them from scratch, reuse them. We cannot do this in handle_copied(), however, since this is a COW operation. Therefore, we have to add the new logic to handle_alloc() and simply return the existing offset if it exists. The only catch is that we have to convince qcow2_alloc_cluster_link_l2() not to free the old clusters (because we have reused them). Reported-by: Eric Blake Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 80 +++++++++++++++++++++++++++++++++++---------------- block/qcow2.h | 3 ++ 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 100398c565..fb91fd8979 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -309,14 +309,20 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, uint64_t *l2_table, uint64_t stop_flags) { int i; + int first_cluster_type; uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; uint64_t first_entry = be64_to_cpu(l2_table[0]); uint64_t offset = first_entry & mask; - if (!offset) + if (!offset) { return 0; + } - assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); + /* must be allocated */ + first_cluster_type = qcow2_get_cluster_type(first_entry); + assert(first_cluster_type == QCOW2_CLUSTER_NORMAL || + (first_cluster_type == QCOW2_CLUSTER_ZERO && + (first_entry & L2E_OFFSET_MASK) != 0)); for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; @@ -835,7 +841,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * Don't discard clusters that reach a refcount of 0 (e.g. compressed * clusters), the next write will reuse them anyway. */ - if (j != 0) { + if (!m->keep_old_clusters && j != 0) { for (i = 0; i < j; i++) { qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, QCOW2_DISCARD_NEVER); @@ -1132,8 +1138,9 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, uint64_t entry; uint64_t nb_clusters; int ret; + bool keep_old_clusters = false; - uint64_t alloc_cluster_offset; + uint64_t alloc_cluster_offset = 0; trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, *bytes); @@ -1170,31 +1177,54 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, * wrong with our code. */ assert(nb_clusters > 0); - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO && + (entry & L2E_OFFSET_MASK) != 0 && (entry & QCOW_OFLAG_COPIED) && + (!*host_offset || + start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK))) + { + /* Try to reuse preallocated zero clusters; contiguous normal clusters + * would be fine, too, but count_cow_clusters() above has limited + * nb_clusters already to a range of COW clusters */ + int preallocated_nb_clusters = + count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], QCOW_OFLAG_COPIED); + assert(preallocated_nb_clusters > 0); - /* Allocate, if necessary at a given offset in the image file */ - alloc_cluster_offset = start_of_cluster(s, *host_offset); - ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, - &nb_clusters); - if (ret < 0) { - goto fail; - } + nb_clusters = preallocated_nb_clusters; + alloc_cluster_offset = entry & L2E_OFFSET_MASK; - /* Can't extend contiguous allocation */ - if (nb_clusters == 0) { - *bytes = 0; - return 0; + /* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2() + * should not free them. */ + keep_old_clusters = true; } - /* !*host_offset would overwrite the image header and is reserved for "no - * host offset preferred". If 0 was a valid host offset, it'd trigger the - * following overlap check; do that now to avoid having an invalid value in - * *host_offset. */ + qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + if (!alloc_cluster_offset) { - ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, - nb_clusters * s->cluster_size); - assert(ret < 0); - goto fail; + /* Allocate, if necessary at a given offset in the image file */ + alloc_cluster_offset = start_of_cluster(s, *host_offset); + ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + /* Can't extend contiguous allocation */ + if (nb_clusters == 0) { + *bytes = 0; + return 0; + } + + /* !*host_offset would overwrite the image header and is reserved for + * "no host offset preferred". If 0 was a valid host offset, it'd + * trigger the following overlap check; do that now to avoid having an + * invalid value in *host_offset. */ + if (!alloc_cluster_offset) { + ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, + nb_clusters * s->cluster_size); + assert(ret < 0); + goto fail; + } } /* @@ -1225,6 +1255,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, .offset = start_of_cluster(s, guest_offset), .nb_clusters = nb_clusters, + .keep_old_clusters = keep_old_clusters, + .cow_start = { .offset = 0, .nb_bytes = offset_into_cluster(s, guest_offset), diff --git a/block/qcow2.h b/block/qcow2.h index f8aeb08794..8731f24b82 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -322,6 +322,9 @@ typedef struct QCowL2Meta /** Number of newly allocated clusters */ int nb_clusters; + /** Do not free the old clusters */ + bool keep_old_clusters; + /** * Requests that overlap with this allocation and wait to be restarted * when the allocating request has completed. -- cgit v1.2.3 From 293073a56c6c921c9902da17711914a0042f29ba Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 4 May 2017 01:11:19 +0200 Subject: qcow2: Discard preallocated zero clusters In discard_single_l2(), we completely discard normal clusters instead of simply turning them into preallocated zero clusters. That means we should probably do the same with such preallocated zero clusters: Discard them instead of keeping them allocated. Reported-by: Eric Blake Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index fb91fd8979..31077d8102 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1511,7 +1511,8 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, break; case QCOW2_CLUSTER_ZERO: - if (!full_discard) { + /* Preallocated zero clusters should be discarded in any case */ + if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) { continue; } break; -- cgit v1.2.3 From aa93c834f9c5b971ad3b54944a5dae97ca310225 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 4 May 2017 01:11:20 +0200 Subject: iotests: Extend test 066 066 was supposed to be a test "for discarding preallocated zero clusters", but it did so incompletely: While it did check the image file's integrity after the operation, it did not confirm that the clusters are indeed freed. This patch adds this test. In addition, new cases for writing to preallocated zero clusters are added. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/066 | 128 ++++++++++++++++++++++++++++++++++++++++++++- tests/qemu-iotests/066.out | 46 ++++++++++++++++ 2 files changed, 173 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/066 b/tests/qemu-iotests/066 index c2116a3088..8638217736 100755 --- a/tests/qemu-iotests/066 +++ b/tests/qemu-iotests/066 @@ -1,6 +1,6 @@ #!/bin/bash # -# Test case for discarding preallocated zero clusters in qcow2 +# Test case for preallocated zero clusters in qcow2 # # Copyright (C) 2013 Red Hat, Inc. # @@ -55,8 +55,134 @@ _make_test_img $IMG_SIZE $QEMU_IO -c "write 0 256k" -c "write -z 0 256k" -c "write 64M 512" \ -c "discard 0 $IMG_SIZE" -c "read -P 0 0 $IMG_SIZE" "$TEST_IMG" \ | _filter_qemu_io + # Check the image (there shouldn't be any leaks) _check_test_img +# Map the image (we want all clusters to be gone) +$QEMU_IMG map "$TEST_IMG" + +_cleanup_test_img + + +echo +echo '=== Writing to preallocated zero clusters ===' +echo + +_make_test_img $IMG_SIZE + +# Create data clusters (not aligned to an L2 table) +$QEMU_IO -c 'write -P 42 1M 256k' "$TEST_IMG" | _filter_qemu_io +orig_map=$($QEMU_IMG map --output=json "$TEST_IMG") + +# Convert the data clusters to preallocated zero clusters +$QEMU_IO -c 'write -z 1M 256k' "$TEST_IMG" | _filter_qemu_io + +# Now write to them (with a COW needed for the head and tail) +$QEMU_IO -c "write -P 23 $(((1024 + 32) * 1024)) 192k" "$TEST_IMG" \ + | _filter_qemu_io + +# Check metadata correctness +_check_test_img + +# Check data correctness +$QEMU_IO -c "read -P 0 $(( 1024 * 1024)) 32k" \ + -c "read -P 23 $(((1024 + 32) * 1024)) 192k" \ + -c "read -P 0 $(((1024 + 32 + 192) * 1024)) 32k" \ + "$TEST_IMG" \ + | _filter_qemu_io + +# Check that we have actually reused the original area +new_map=$($QEMU_IMG map --output=json "$TEST_IMG") +if [ "$new_map" = "$orig_map" ]; then + echo 'Successfully reused original clusters.' +else + echo 'Failed to reuse original clusters.' + echo 'Original map:' + echo "$orig_map" + echo 'New map:' + echo "$new_map" +fi + +_cleanup_test_img + + +echo +echo '=== Writing to a snapshotted preallocated zero cluster ===' +echo + +_make_test_img 64k + +# Create a preallocated zero cluster +$QEMU_IO -c 'write -P 42 0 64k' -c 'write -z 0 64k' "$TEST_IMG" \ + | _filter_qemu_io + +# Snapshot it +$QEMU_IMG snapshot -c foo "$TEST_IMG" + +# Write to the cluster +$QEMU_IO -c 'write -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io + +# Check metadata correctness +_check_test_img + +# Check data correctness +$QEMU_IO -c 'read -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG snapshot -a foo "$TEST_IMG" +$QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io + +_cleanup_test_img + + +echo +echo '=== Consecutive write to a preallocated zero cluster ===' +echo + +_make_test_img 192k + +# Create three normal clusters +$QEMU_IO -c 'write -P 42 0 192k' "$TEST_IMG" | _filter_qemu_io +orig_map=$($QEMU_IMG map --output=json "$TEST_IMG") + +# Make the middle cluster a preallocated zero cluster +$QEMU_IO -c 'write -z 64k 64k' "$TEST_IMG" | _filter_qemu_io + +# Try to overwrite everything: This should reuse the whole range. To test that +# this only issues a single continuous write request, use blkdebug. +$QEMU_IO -c 'write -P 42 0 192k' \ + "json:{ + 'driver': '$IMGFMT', + 'file': { + 'driver': 'blkdebug', + 'image.filename': '$TEST_IMG', + 'set-state': [{ + 'event': 'write_aio', + 'new_state': 2 + }], + 'inject-error': [{ + 'event': 'write_aio', + 'state': 2 + }] + } + }" \ + | _filter_qemu_io + +# Check metadata correctness +_check_test_img + +# Check that we have actually reused the original area +new_map=$($QEMU_IMG map --output=json "$TEST_IMG") +if [ "$new_map" = "$orig_map" ]; then + echo 'Successfully reused original clusters.' +else + echo 'Failed to reuse original clusters.' + echo 'Original map:' + echo "$orig_map" + echo 'New map:' + echo "$new_map" +fi + +_cleanup_test_img + # success, all done echo "*** done" diff --git a/tests/qemu-iotests/066.out b/tests/qemu-iotests/066.out index 7c1f31a1b1..3d9da9bd0b 100644 --- a/tests/qemu-iotests/066.out +++ b/tests/qemu-iotests/066.out @@ -14,4 +14,50 @@ discard 67109376/67109376 bytes at offset 0 read 67109376/67109376 bytes at offset 0 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. +Offset Length Mapped to File + +=== Writing to preallocated zero clusters === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67109376 +wrote 262144/262144 bytes at offset 1048576 +256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 262144/262144 bytes at offset 1048576 +256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 196608/196608 bytes at offset 1081344 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +read 32768/32768 bytes at offset 1048576 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 196608/196608 bytes at offset 1081344 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 32768/32768 bytes at offset 1277952 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Successfully reused original clusters. + +=== Writing to a snapshotted preallocated zero cluster === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Consecutive write to a preallocated zero cluster === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=196608 +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 196608/196608 bytes at offset 0 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +Successfully reused original clusters. *** done -- cgit v1.2.3 From ace21a58751824f9a3d399e332317233e880de3a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2017 18:52:36 +0200 Subject: migration: Unify block node activation error handling Migration code activates all block driver nodes on the destination when the migration completes. It does so by calling bdrv_invalidate_cache_all() and blk_resume_after_migration(). There is one code path for precopy and one for postcopy migration, resulting in four function calls, which used to have three different failure modes. This patch unifies the behaviour so that failure to activate all block nodes is non-fatal, but the error message is logged and the VM isn't automatically started. 'cont' will retry activating the block nodes. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- migration/migration.c | 16 +++++----------- migration/savevm.c | 12 +++++------- qmp.c | 18 +++++++++--------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 799952ce99..04af71988d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -338,20 +338,14 @@ static void process_incoming_migration_bh(void *opaque) Error *local_err = NULL; MigrationIncomingState *mis = opaque; - /* Make sure all file formats flush their mutable metadata */ + /* Make sure all file formats flush their mutable metadata. + * If we get an error here, just don't restart the VM yet. */ bdrv_invalidate_cache_all(&local_err); - if (local_err) { - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_FAILED); - error_report_err(local_err); - migrate_decompress_threads_join(); - exit(EXIT_FAILURE); + if (!local_err) { + blk_resume_after_migration(&local_err); } - - /* If we get an error here, just don't restart the VM yet. */ - blk_resume_after_migration(&local_err); if (local_err) { - error_free(local_err); + error_report_err(local_err); local_err = NULL; autostart = false; } diff --git a/migration/savevm.c b/migration/savevm.c index 352a8f23b5..3ca8d11704 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1612,16 +1612,14 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) qemu_announce_self(); - /* Make sure all file formats flush their mutable metadata */ + /* Make sure all file formats flush their mutable metadata. + * If we get an error here, just don't restart the VM yet. */ bdrv_invalidate_cache_all(&local_err); - if (local_err) { - error_report_err(local_err); + if (!local_err) { + blk_resume_after_migration(&local_err); } - - /* If we get an error here, just don't restart the VM yet. */ - blk_resume_after_migration(&local_err); if (local_err) { - error_free(local_err); + error_report_err(local_err); local_err = NULL; autostart = false; } diff --git a/qmp.c b/qmp.c index ab74cd729d..25b5050f9f 100644 --- a/qmp.c +++ b/qmp.c @@ -196,15 +196,15 @@ void qmp_cont(Error **errp) } /* Continuing after completed migration. Images have been inactivated to - * allow the destination to take control. Need to get control back now. */ - if (runstate_check(RUN_STATE_FINISH_MIGRATE) || - runstate_check(RUN_STATE_POSTMIGRATE)) - { - bdrv_invalidate_cache_all(&local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + * allow the destination to take control. Need to get control back now. + * + * If there are no inactive block nodes (e.g. because the VM was just + * paused rather than completing a migration), bdrv_inactivate_all() simply + * doesn't do anything. */ + bdrv_invalidate_cache_all(&local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } blk_resume_after_migration(&local_err); -- cgit v1.2.3 From 4417ab7adf1613799054be5afedf810fc2524ee8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2017 18:52:37 +0200 Subject: block: New BdrvChildRole.activate() for blk_resume_after_migration() Instead of manually calling blk_resume_after_migration() in migration code after doing bdrv_invalidate_cache_all(), integrate the BlockBackend activation with cache invalidation into a single function. This is achieved with a new callback in BdrvChildRole that is called by bdrv_invalidate_cache_all(). Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 12 +++++++++- block/block-backend.c | 56 +++++++++++++++++++++++------------------------ include/block/block.h | 2 -- include/block/block_int.h | 5 +++++ migration/migration.c | 3 --- migration/savevm.c | 3 --- qmp.c | 6 ----- 7 files changed, 44 insertions(+), 43 deletions(-) diff --git a/block.c b/block.c index 1e00e313c3..c8e6de29f0 100644 --- a/block.c +++ b/block.c @@ -3949,7 +3949,7 @@ void bdrv_init_with_whitelist(void) void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) { - BdrvChild *child; + BdrvChild *child, *parent; Error *local_err = NULL; int ret; @@ -3985,6 +3985,16 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) error_setg_errno(errp, -ret, "Could not refresh total sector count"); return; } + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->role->activate) { + parent->role->activate(parent, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + } } void bdrv_invalidate_cache_all(Error **errp) diff --git a/block/block-backend.c b/block/block-backend.c index f5bf13eec9..a7ce72b325 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -130,6 +130,32 @@ static const char *blk_root_get_name(BdrvChild *child) return blk_name(child->opaque); } +/* + * Notifies the user of the BlockBackend that migration has completed. qdev + * devices can tighten their permissions in response (specifically revoke + * shared write permissions that we needed for storage migration). + * + * If an error is returned, the VM cannot be allowed to be resumed. + */ +static void blk_root_activate(BdrvChild *child, Error **errp) +{ + BlockBackend *blk = child->opaque; + Error *local_err = NULL; + + if (!blk->disable_perm) { + return; + } + + blk->disable_perm = false; + + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk->disable_perm = true; + return; + } +} + static const BdrvChildRole child_root = { .inherit_options = blk_root_inherit_options, @@ -140,6 +166,8 @@ static const BdrvChildRole child_root = { .drained_begin = blk_root_drained_begin, .drained_end = blk_root_drained_end, + + .activate = blk_root_activate, }; /* @@ -601,34 +629,6 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) *shared_perm = blk->shared_perm; } -/* - * Notifies the user of all BlockBackends that migration has completed. qdev - * devices can tighten their permissions in response (specifically revoke - * shared write permissions that we needed for storage migration). - * - * If an error is returned, the VM cannot be allowed to be resumed. - */ -void blk_resume_after_migration(Error **errp) -{ - BlockBackend *blk; - Error *local_err = NULL; - - for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { - if (!blk->disable_perm) { - continue; - } - - blk->disable_perm = false; - - blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); - if (local_err) { - error_propagate(errp, local_err); - blk->disable_perm = true; - return; - } - } -} - static int blk_do_attach_dev(BlockBackend *blk, void *dev) { if (blk->dev) { diff --git a/include/block/block.h b/include/block/block.h index 877fbb0d9a..80d51d8f12 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -369,8 +369,6 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); void bdrv_invalidate_cache_all(Error **errp); int bdrv_inactivate_all(void); -void blk_resume_after_migration(Error **errp); - /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 1b4d08e8df..563792580c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -473,6 +473,11 @@ struct BdrvChildRole { void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); + /* Notifies the parent that the child has been activated (e.g. when + * migration is completing) and it can start requesting permissions and + * doing I/O on it. */ + void (*activate)(BdrvChild *child, Error **errp); + void (*attach)(BdrvChild *child); void (*detach)(BdrvChild *child); }; diff --git a/migration/migration.c b/migration/migration.c index 04af71988d..a5ade23e24 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -341,9 +341,6 @@ static void process_incoming_migration_bh(void *opaque) /* Make sure all file formats flush their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_invalidate_cache_all(&local_err); - if (!local_err) { - blk_resume_after_migration(&local_err); - } if (local_err) { error_report_err(local_err); local_err = NULL; diff --git a/migration/savevm.c b/migration/savevm.c index 3ca8d11704..7f66d58a7e 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1615,9 +1615,6 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) /* Make sure all file formats flush their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_invalidate_cache_all(&local_err); - if (!local_err) { - blk_resume_after_migration(&local_err); - } if (local_err) { error_report_err(local_err); local_err = NULL; diff --git a/qmp.c b/qmp.c index 25b5050f9f..f656940769 100644 --- a/qmp.c +++ b/qmp.c @@ -207,12 +207,6 @@ void qmp_cont(Error **errp) return; } - blk_resume_after_migration(&local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - if (runstate_check(RUN_STATE_INMIGRATE)) { autostart = 1; } else { -- cgit v1.2.3 From cfa1a5723f0fc8eb6563fb1d19c206fd5e40cd41 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2017 18:52:38 +0200 Subject: block: Drop permissions when migration completes With image locking, permissions affect other qemu processes as well. We want to be sure that the destination can run, so let's drop permissions on the source when migration completes. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 12 +++++++++++- block/block-backend.c | 25 +++++++++++++++++++++++++ include/block/block_int.h | 7 ++++--- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index c8e6de29f0..444a52e331 100644 --- a/block.c +++ b/block.c @@ -4019,7 +4019,7 @@ void bdrv_invalidate_cache_all(Error **errp) static int bdrv_inactivate_recurse(BlockDriverState *bs, bool setting_flag) { - BdrvChild *child; + BdrvChild *child, *parent; int ret; if (!setting_flag && bs->drv->bdrv_inactivate) { @@ -4038,6 +4038,16 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, if (setting_flag) { bs->open_flags |= BDRV_O_INACTIVE; + + QLIST_FOREACH(parent, &bs->parents, next_parent) { + if (parent->role->inactivate) { + ret = parent->role->inactivate(parent); + if (ret < 0) { + bs->open_flags &= ~BDRV_O_INACTIVE; + return ret; + } + } + } } return 0; } diff --git a/block/block-backend.c b/block/block-backend.c index a7ce72b325..f3a60081a7 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -156,6 +156,30 @@ static void blk_root_activate(BdrvChild *child, Error **errp) } } +static int blk_root_inactivate(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + + if (blk->disable_perm) { + return 0; + } + + /* Only inactivate BlockBackends for guest devices (which are inactive at + * this point because the VM is stopped) and unattached monitor-owned + * BlockBackends. If there is still any other user like a block job, then + * we simply can't inactivate the image. */ + if (!blk->dev && !blk->name[0]) { + return -EPERM; + } + + blk->disable_perm = true; + if (blk->root) { + bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort); + } + + return 0; +} + static const BdrvChildRole child_root = { .inherit_options = blk_root_inherit_options, @@ -168,6 +192,7 @@ static const BdrvChildRole child_root = { .drained_end = blk_root_drained_end, .activate = blk_root_activate, + .inactivate = blk_root_inactivate, }; /* diff --git a/include/block/block_int.h b/include/block/block_int.h index 563792580c..5750a448a6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -473,10 +473,11 @@ struct BdrvChildRole { void (*drained_begin)(BdrvChild *child); void (*drained_end)(BdrvChild *child); - /* Notifies the parent that the child has been activated (e.g. when - * migration is completing) and it can start requesting permissions and - * doing I/O on it. */ + /* Notifies the parent that the child has been activated/inactivated (e.g. + * when migration is completing) and it can start/stop requesting + * permissions and doing I/O on it. */ void (*activate)(BdrvChild *child, Error **errp); + int (*inactivate)(BdrvChild *child); void (*attach)(BdrvChild *child); void (*detach)(BdrvChild *child); -- cgit v1.2.3 From 38701b6aef5e934c5901905e8ca2c50453088a81 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2017 18:52:39 +0200 Subject: block: Inactivate parents before children The proper order for inactivating block nodes is that first the parents get inactivated and then the children. If we do things in this order, we can assert that we didn't accidentally leave a parent activated when one of its child nodes is inactive. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/block.c b/block.c index 444a52e331..170002e78e 100644 --- a/block.c +++ b/block.c @@ -762,6 +762,13 @@ static void bdrv_child_cb_drained_end(BdrvChild *child) bdrv_drained_end(bs); } +static int bdrv_child_cb_inactivate(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + assert(bs->open_flags & BDRV_O_INACTIVE); + return 0; +} + /* * Returns the options and flags that a temporary snapshot should get, based on * the originally requested flags (the originally requested image will have @@ -822,6 +829,7 @@ const BdrvChildRole child_file = { .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .inactivate = bdrv_child_cb_inactivate, }; /* @@ -843,6 +851,7 @@ const BdrvChildRole child_format = { .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .inactivate = bdrv_child_cb_inactivate, }; static void bdrv_backing_attach(BdrvChild *c) @@ -928,6 +937,7 @@ const BdrvChildRole child_backing = { .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .inactivate = bdrv_child_cb_inactivate, }; static int bdrv_open_flags(BlockDriverState *bs, int flags) @@ -4029,13 +4039,6 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, } } - QLIST_FOREACH(child, &bs->children, next) { - ret = bdrv_inactivate_recurse(child->bs, setting_flag); - if (ret < 0) { - return ret; - } - } - if (setting_flag) { bs->open_flags |= BDRV_O_INACTIVE; @@ -4049,6 +4052,14 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, } } } + + QLIST_FOREACH(child, &bs->children, next) { + ret = bdrv_inactivate_recurse(child->bs, setting_flag); + if (ret < 0) { + return ret; + } + } + return 0; } -- cgit v1.2.3 From 9c5e6594f15b7364624a3ad40306c396c93a2145 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2017 18:52:40 +0200 Subject: block: Fix write/resize permissions for inactive images Format drivers for inactive nodes don't need write/resize permissions on their bs->file and can share write/resize with another VM (in fact, this is the whole point of keeping images inactive). Represent this fact in the op blocker system, so that image locking does the right thing without special-casing inactive images. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 35 +++++++++++++++++++++++++++++++++-- include/block/block.h | 1 + 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 170002e78e..50ba264143 100644 --- a/block.c +++ b/block.c @@ -192,11 +192,20 @@ void path_combine(char *dest, int dest_size, } } +/* Returns whether the image file is opened as read-only. Note that this can + * return false and writing to the image file is still not possible because the + * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { return bs->read_only; } +/* Returns whether the image file can be written to right now */ +bool bdrv_is_writable(BlockDriverState *bs) +{ + return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE); +} + int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) { /* Do not set read_only if copy_on_read is enabled */ @@ -1510,7 +1519,7 @@ static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Write permissions never work with read-only images */ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && - bdrv_is_read_only(bs)) + !bdrv_is_writable(bs)) { error_setg(errp, "Block node is read-only"); return -EPERM; @@ -1795,7 +1804,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); /* Format drivers may touch metadata even if the guest doesn't write */ - if (!bdrv_is_read_only(bs)) { + if (bdrv_is_writable(bs)) { perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; } @@ -1821,6 +1830,10 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, BLK_PERM_WRITE_UNCHANGED; } + if (bs->open_flags & BDRV_O_INACTIVE) { + shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; + } + *nperm = perm; *nshared = shared; } @@ -3960,6 +3973,7 @@ void bdrv_init_with_whitelist(void) void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) { BdrvChild *child, *parent; + uint64_t perm, shared_perm; Error *local_err = NULL; int ret; @@ -3996,6 +4010,16 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) return; } + /* Update permissions, they may differ for inactive nodes */ + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err); + if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } + bdrv_set_perm(bs, perm, shared_perm); + QLIST_FOREACH(parent, &bs->parents, next_parent) { if (parent->role->activate) { parent->role->activate(parent, &local_err); @@ -4040,6 +4064,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, } if (setting_flag) { + uint64_t perm, shared_perm; + bs->open_flags |= BDRV_O_INACTIVE; QLIST_FOREACH(parent, &bs->parents, next_parent) { @@ -4051,6 +4077,11 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs, } } } + + /* Update permissions, they may differ for inactive nodes */ + bdrv_get_cumulative_perm(bs, &perm, &shared_perm); + bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort); + bdrv_set_perm(bs, perm, shared_perm); } QLIST_FOREACH(child, &bs->children, next) { diff --git a/include/block/block.h b/include/block/block.h index 80d51d8f12..90932b4709 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -435,6 +435,7 @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, int64_t sector_num, int nb_sectors, int *pnum); bool bdrv_is_read_only(BlockDriverState *bs); +bool bdrv_is_writable(BlockDriverState *bs); int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); bool bdrv_is_sg(BlockDriverState *bs); -- cgit v1.2.3 From 22d5cd82e98b61b1dbd791fab9f4ae0f77c0ed14 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 4 May 2017 18:52:41 +0200 Subject: file-posix: Remove .bdrv_inactivate/invalidate_cache Now that the block layer takes care to request a lot less permissions for inactive nodes, the special-casing in file-posix isn't necessary any more. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/file-posix.c | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index a09055b700..4354d49642 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2190,35 +2190,6 @@ static void raw_abort_perm_update(BlockDriverState *bs) raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL); } -static int raw_inactivate(BlockDriverState *bs) -{ - int ret; - uint64_t perm = 0; - uint64_t shared = BLK_PERM_ALL; - - ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL); - if (ret) { - return ret; - } - raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL); - return 0; -} - - -static void raw_invalidate_cache(BlockDriverState *bs, Error **errp) -{ - BDRVRawState *s = bs->opaque; - int ret; - - assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)); - ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm, - errp); - if (ret) { - return; - } - raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL); -} - BlockDriver bdrv_file = { .format_name = "file", .protocol_name = "file", @@ -2249,8 +2220,6 @@ BlockDriver bdrv_file = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, - .bdrv_inactivate = raw_inactivate, - .bdrv_invalidate_cache = raw_invalidate_cache, .bdrv_check_perm = raw_check_perm, .bdrv_set_perm = raw_set_perm, .bdrv_abort_perm_update = raw_abort_perm_update, @@ -2712,8 +2681,6 @@ static BlockDriver bdrv_host_device = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, - .bdrv_inactivate = raw_inactivate, - .bdrv_invalidate_cache = raw_invalidate_cache, .bdrv_check_perm = raw_check_perm, .bdrv_set_perm = raw_set_perm, .bdrv_abort_perm_update = raw_abort_perm_update, -- cgit v1.2.3 From b91127edd0ff96f27f1e58e47f4e9f9d6a0fed02 Mon Sep 17 00:00:00 2001 From: Anton Nefedov Date: Wed, 26 Apr 2017 11:33:15 +0300 Subject: qemu-img: wait for convert coroutines to complete On error path (like i/o error in one of the coroutines), it's required to - wait for coroutines completion before cleaning the common structures - reenter dependent coroutines so they ever finish Introduced in 2d9187bc65. Cc: qemu-stable@nongnu.org Signed-off-by: Anton Nefedov Reviewed-by: Peter Lieven Signed-off-by: Kevin Wolf --- qemu-img.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 418f061736..b506839ef0 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1761,13 +1761,13 @@ static void coroutine_fn convert_co_do_copy(void *opaque) qemu_co_mutex_lock(&s->lock); if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { qemu_co_mutex_unlock(&s->lock); - goto out; + break; } n = convert_iteration_sectors(s, s->sector_num); if (n < 0) { qemu_co_mutex_unlock(&s->lock); s->ret = n; - goto out; + break; } /* save current sector and allocation status to local variables */ sector_num = s->sector_num; @@ -1792,7 +1792,6 @@ static void coroutine_fn convert_co_do_copy(void *opaque) error_report("error while reading sector %" PRId64 ": %s", sector_num, strerror(-ret)); s->ret = ret; - goto out; } } else if (!s->min_sparse && status == BLK_ZERO) { status = BLK_DATA; @@ -1801,22 +1800,20 @@ static void coroutine_fn convert_co_do_copy(void *opaque) if (s->wr_in_order) { /* keep writes in order */ - while (s->wr_offs != sector_num) { - if (s->ret != -EINPROGRESS) { - goto out; - } + while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) { s->wait_sector_num[index] = sector_num; qemu_coroutine_yield(); } s->wait_sector_num[index] = -1; } - ret = convert_co_write(s, sector_num, n, buf, status); - if (ret < 0) { - error_report("error while writing sector %" PRId64 - ": %s", sector_num, strerror(-ret)); - s->ret = ret; - goto out; + if (s->ret == -EINPROGRESS) { + ret = convert_co_write(s, sector_num, n, buf, status); + if (ret < 0) { + error_report("error while writing sector %" PRId64 + ": %s", sector_num, strerror(-ret)); + s->ret = ret; + } } if (s->wr_in_order) { @@ -1837,7 +1834,6 @@ static void coroutine_fn convert_co_do_copy(void *opaque) } } -out: qemu_vfree(buf); s->co[index] = NULL; s->running_coroutines--; @@ -1899,7 +1895,7 @@ static int convert_do_copy(ImgConvertState *s) qemu_coroutine_enter(s->co[i]); } - while (s->ret == -EINPROGRESS) { + while (s->running_coroutines) { main_loop_wait(false); } -- cgit v1.2.3 From c03e7ef12a954f715a4b56e3a9595f8253987701 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 May 2017 11:58:07 +0200 Subject: nvme: Implement Write Zeroes Signed-off-by: Keith Busch [hch: ported over from qemu-nvme.git to mainline] Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 26 ++++++++++++++++++++++++++ hw/block/nvme.h | 1 + 2 files changed, 27 insertions(+) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ae303d44e5..7428db9f0c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -227,6 +227,29 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, return NVME_NO_COMPLETE; } +static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + NvmeRwCmd *rw = (NvmeRwCmd *)cmd; + const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; + uint64_t slba = le64_to_cpu(rw->slba); + uint32_t nlb = le16_to_cpu(rw->nlb) + 1; + uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); + uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); + + if (slba + nlb > ns->id_ns.nsze) { + return NVME_LBA_RANGE | NVME_DNR; + } + + req->has_sg = false; + block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, + BLOCK_ACCT_WRITE); + req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, aio_slba, aio_nlb, + BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req); + return NVME_NO_COMPLETE; +} + static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req) { @@ -279,6 +302,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (cmd->opcode) { case NVME_CMD_FLUSH: return nvme_flush(n, ns, cmd, req); + case NVME_CMD_WRITE_ZEROS: + return nvme_write_zeros(n, ns, cmd, req); case NVME_CMD_WRITE: case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); @@ -895,6 +920,7 @@ static int nvme_init(PCIDevice *pci_dev) id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); + id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS); id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 8fb0c10756..a0d15649f9 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -179,6 +179,7 @@ enum NvmeIoCommands { NVME_CMD_READ = 0x02, NVME_CMD_WRITE_UNCOR = 0x04, NVME_CMD_COMPARE = 0x05, + NVME_CMD_WRITE_ZEROS = 0x08, NVME_CMD_DSM = 0x09, }; -- cgit v1.2.3 From 698bdfa07d66b5ec218a60229e58eae1dcde00e5 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 10 May 2017 13:39:45 -0400 Subject: blockdev: use drained_begin/end for qmp_block_resize Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1447551 If one tries to issue a block_resize while a guest is busy accessing the disk, it is possible that qemu may deadlock when invoking aio_poll from both the main loop and the iothread. Replace another instance of bdrv_drain_all that doesn't quite belong. Cc: qemu-stable@nongnu.org Suggested-by: Paolo Bonzini Signed-off-by: John Snow Reviewed-by: Eric Blake Reviewed-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- blockdev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/blockdev.c b/blockdev.c index 0d3773bcb8..c63f4e82c7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2923,10 +2923,9 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - /* complete all in-flight operations before resizing the device */ - bdrv_drain_all(); - + bdrv_drained_begin(bs); ret = blk_truncate(blk, size, errp); + bdrv_drained_end(bs); out: blk_unref(blk); -- cgit v1.2.3 From 1bce6b4ce3ba03015bdf16d40c1bbe33d8b6031b Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:11 -0500 Subject: qemu-io: Improve alignment checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several copy-and-pasted alignment checks exist in qemu-io, which could use some minor improvements: - Manual comparison against 0x1ff is not as clean as using our alignment macros (QEMU_IS_ALIGNED) from osdep.h. - The error messages aren't quite grammatically correct. Suggested-by: Philippe Mathieu-Daudé Suggested-by: Max Reitz Signed-off-by: Eric Blake Message-id: 20170429191419.30051-2-eblake@redhat.com Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- qemu-io-cmds.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 21af9e65b2..6a0024b484 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -740,13 +740,13 @@ static int read_f(BlockBackend *blk, int argc, char **argv) } if (bflag) { - if (offset & 0x1ff) { - printf("offset %" PRId64 " is not sector aligned\n", + if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'offset'\n", offset); return 0; } - if (count & 0x1ff) { - printf("count %"PRId64" is not sector aligned\n", + if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) { + printf("%"PRId64" is not a sector-aligned value for 'count'\n", count); return 0; } @@ -1050,14 +1050,14 @@ static int write_f(BlockBackend *blk, int argc, char **argv) } if (bflag || cflag) { - if (offset & 0x1ff) { - printf("offset %" PRId64 " is not sector aligned\n", + if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'offset'\n", offset); return 0; } - if (count & 0x1ff) { - printf("count %"PRId64" is not sector aligned\n", + if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) { + printf("%"PRId64" is not a sector-aligned value for 'count'\n", count); return 0; } @@ -1769,8 +1769,8 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) if (offset < 0) { print_cvtnum_err(offset, argv[1]); return 0; - } else if (offset & 0x1ff) { - printf("offset %" PRId64 " is not sector aligned\n", + } else if (!QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'offset'\n", offset); return 0; } -- cgit v1.2.3 From 4401fdc77cb6d79e53ce2fc2193444cad8b95749 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:12 -0500 Subject: qemu-io: Switch 'alloc' command to byte-based length For the 'alloc' command, accepting an offset in bytes but a length in sectors, and reporting output in sectors, is confusing. Do everything in bytes, and adjust the expected output accordingly. Signed-off-by: Eric Blake Message-id: 20170429191419.30051-3-eblake@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- qemu-io-cmds.c | 30 ++++++++++++++++++------------ tests/qemu-iotests/019.out | 8 ++++---- tests/qemu-iotests/common.pattern | 2 +- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 6a0024b484..1e0ebb4575 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1760,7 +1760,7 @@ out: static int alloc_f(BlockBackend *blk, int argc, char **argv) { BlockDriverState *bs = blk_bs(blk); - int64_t offset, sector_num, nb_sectors, remaining; + int64_t offset, sector_num, nb_sectors, remaining, count; char s1[64]; int num, ret; int64_t sum_alloc; @@ -1776,18 +1776,24 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) } if (argc == 3) { - nb_sectors = cvtnum(argv[2]); - if (nb_sectors < 0) { - print_cvtnum_err(nb_sectors, argv[2]); + count = cvtnum(argv[2]); + if (count < 0) { + print_cvtnum_err(count, argv[2]); return 0; - } else if (nb_sectors > INT_MAX) { - printf("length argument cannot exceed %d, given %s\n", - INT_MAX, argv[2]); + } else if (count > INT_MAX * BDRV_SECTOR_SIZE) { + printf("length argument cannot exceed %llu, given %s\n", + INT_MAX * BDRV_SECTOR_SIZE, argv[2]); return 0; } } else { - nb_sectors = 1; + count = BDRV_SECTOR_SIZE; + } + if (!QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE)) { + printf("%" PRId64 " is not a sector-aligned value for 'count'\n", + count); + return 0; } + nb_sectors = count >> BDRV_SECTOR_BITS; remaining = nb_sectors; sum_alloc = 0; @@ -1811,8 +1817,8 @@ static int alloc_f(BlockBackend *blk, int argc, char **argv) cvtstr(offset, s1, sizeof(s1)); - printf("%"PRId64"/%"PRId64" sectors allocated at offset %s\n", - sum_alloc, nb_sectors, s1); + printf("%"PRId64"/%"PRId64" bytes allocated at offset %s\n", + sum_alloc << BDRV_SECTOR_BITS, nb_sectors << BDRV_SECTOR_BITS, s1); return 0; } @@ -1822,8 +1828,8 @@ static const cmdinfo_t alloc_cmd = { .argmin = 1, .argmax = 2, .cfunc = alloc_f, - .args = "off [sectors]", - .oneline = "checks if a sector is present in the file", + .args = "offset [count]", + .oneline = "checks if offset is allocated in the file", }; diff --git a/tests/qemu-iotests/019.out b/tests/qemu-iotests/019.out index 0124264975..17a7c036b9 100644 --- a/tests/qemu-iotests/019.out +++ b/tests/qemu-iotests/019.out @@ -542,8 +542,8 @@ Testing conversion with -B TEST_DIR/t.IMGFMT.base Checking if backing clusters are allocated when they shouldn't -0/128 sectors allocated at offset 1 MiB -0/128 sectors allocated at offset 4.001 GiB +0/65536 bytes allocated at offset 1 MiB +0/65536 bytes allocated at offset 4.001 GiB Reading === IO: pattern 42 @@ -1086,8 +1086,8 @@ Testing conversion with -o backing_file=TEST_DIR/t.IMGFMT.base Checking if backing clusters are allocated when they shouldn't -0/128 sectors allocated at offset 1 MiB -0/128 sectors allocated at offset 4.001 GiB +0/65536 bytes allocated at offset 1 MiB +0/65536 bytes allocated at offset 4.001 GiB Reading === IO: pattern 42 diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern index ddfbca1b76..34f4a8dc9b 100644 --- a/tests/qemu-iotests/common.pattern +++ b/tests/qemu-iotests/common.pattern @@ -18,7 +18,7 @@ function do_is_allocated() { local start=$1 - local size=$(( $2 / 512)) + local size=$2 local step=$3 local count=$4 -- cgit v1.2.3 From 6f3c90af3c50d4f839849c8ba9b6ba4e9a548c28 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:13 -0500 Subject: qemu-io: Switch 'map' output to byte-based reporting Mixing byte offset and sector allocation counts is a bit confusing. Also, reporting n/m sectors, where m decreases according to the remaining size of the file, isn't really adding any useful information; and reporting an offset at both the front and end of the line, with large amounts of whitespace, is pointless. Update the output to use byte counts and shorter lines, then adjust the affected tests (./check -qcow2 102, ./check -vpc 146). Note that 'qemu-io map' is MUCH weaker than 'qemu-img map'; the former only shows which regions of the active layer are allocated, without regards to where the allocation comes from or whether the allocated portion is known to read as zero (because it is using the weaker bdrv_is_allocated()); while the latter (especially in --output=json mode) reports more details from bdrv_get_block_status(). Signed-off-by: Eric Blake Message-id: 20170429191419.30051-4-eblake@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- qemu-io-cmds.c | 11 ++++++----- tests/qemu-iotests/102.out | 4 ++-- tests/qemu-iotests/146.out | 30 +++++++++++++++--------------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 1e0ebb4575..4b2278f040 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1868,7 +1868,7 @@ static int map_f(BlockBackend *blk, int argc, char **argv) { int64_t offset; int64_t nb_sectors, total_sectors; - char s1[64]; + char s1[64], s2[64]; int64_t num; int ret; const char *retstr; @@ -1894,10 +1894,11 @@ static int map_f(BlockBackend *blk, int argc, char **argv) } retstr = ret ? " allocated" : "not allocated"; - cvtstr(offset << 9ULL, s1, sizeof(s1)); - printf("[% 24" PRId64 "] % 8" PRId64 "/% 8" PRId64 " sectors %s " - "at offset %s (%d)\n", - offset << 9ULL, num, nb_sectors, retstr, s1, ret); + cvtstr(num << BDRV_SECTOR_BITS, s1, sizeof(s1)); + cvtstr(offset << BDRV_SECTOR_BITS, s2, sizeof(s2)); + printf("%s (0x%" PRIx64 ") bytes %s at offset %s (0x%" PRIx64 ")\n", + s1, num << BDRV_SECTOR_BITS, retstr, + s2, offset << BDRV_SECTOR_BITS); offset += num; nb_sectors -= num; diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out index eecde16ad5..ccf172abd9 100644 --- a/tests/qemu-iotests/102.out +++ b/tests/qemu-iotests/102.out @@ -6,7 +6,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image resized. -[ 0] 128/ 128 sectors allocated at offset 0 bytes (1) +64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0) Offset Length Mapped to File === Testing map on an image file truncated outside of qemu === @@ -17,5 +17,5 @@ wrote 65536/65536 bytes at offset 0 Image resized. QEMU X.Y.Z monitor - type 'help' for more information (qemu) qemu-io drv0 map -[ 0] 128/ 128 sectors allocated at offset 0 bytes (1) +64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0) *** done diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out index 4f334d86bc..db6b296b9e 100644 --- a/tests/qemu-iotests/146.out +++ b/tests/qemu-iotests/146.out @@ -2,39 +2,39 @@ QA output created by 146 === Testing VPC Autodetect === -[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) +126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0) === Testing VPC with current_size force === -[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) +127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0) === Testing VPC with chs force === -[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) +126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0) === Testing Hyper-V Autodetect === -[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) +127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0) === Testing Hyper-V with current_size force === -[ 0] 266338304/ 266338304 sectors not allocated at offset 0 bytes (0) +127 GiB (0x1fc0000000) bytes not allocated at offset 0 bytes (0x0) === Testing Hyper-V with chs force === -[ 0] 266334240/ 266334240 sectors not allocated at offset 0 bytes (0) +126.998 GiB (0x1fbfe04000) bytes not allocated at offset 0 bytes (0x0) === Testing d2v Autodetect === -[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) +251.250 MiB (0xfb40000) bytes allocated at offset 0 bytes (0x0) === Testing d2v with current_size force === -[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) +251.250 MiB (0xfb40000) bytes allocated at offset 0 bytes (0x0) === Testing d2v with chs force === -[ 0] 514560/ 514560 sectors allocated at offset 0 bytes (1) +251.250 MiB (0xfb40000) bytes allocated at offset 0 bytes (0x0) === Testing Image create, default === @@ -42,15 +42,15 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 === Read created image, default opts ==== -[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) +4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=chs ==== -[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) +4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=current_size ==== -[ 0] 8389584/ 8389584 sectors not allocated at offset 0 bytes (0) +4 GiB (0x10007a000) bytes not allocated at offset 0 bytes (0x0) === Testing Image create, force_size === @@ -58,13 +58,13 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 forc === Read created image, default opts ==== -[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=chs ==== -[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0) === Read created image, force_size_calc=current_size ==== -[ 0] 8388608/ 8388608 sectors not allocated at offset 0 bytes (0) +4 GiB (0x100000000) bytes not allocated at offset 0 bytes (0x0) *** done -- cgit v1.2.3 From e0ef439588ce1ede747f82b77d893190c1cc9f4d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:14 -0500 Subject: blkdebug: Sanity check block layer guarantees Commits 04ed95f4 and 1a62d0ac updated the block layer to auto-fragment any I/O to fit within device boundaries. Additionally, when using a minimum alignment of 4k, we want to ensure the block layer does proper read-modify-write rather than requesting I/O on a slice of a sector. Let's enforce that the contract is obeyed when using blkdebug. For now, blkdebug only allows alignment overrides, and just inherits other limits from whatever device it is wrapping, but a future patch will further enhance things. Signed-off-by: Eric Blake Reviewed-by: Kevin Wolf Reviewed-by: Max Reitz Message-id: 20170429191419.30051-5-eblake@redhat.com Signed-off-by: Max Reitz --- block/blkdebug.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/blkdebug.c b/block/blkdebug.c index 3c088934db..a562e2e32f 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -429,6 +429,13 @@ blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, BDRVBlkdebugState *s = bs->opaque; BlkdebugRule *rule = NULL; + /* Sanity check block layer guarantees */ + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (bs->bl.max_transfer) { + assert(bytes <= bs->bl.max_transfer); + } + QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { uint64_t inject_offset = rule->options.inject.offset; @@ -453,6 +460,13 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, BDRVBlkdebugState *s = bs->opaque; BlkdebugRule *rule = NULL; + /* Sanity check block layer guarantees */ + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + if (bs->bl.max_transfer) { + assert(bytes <= bs->bl.max_transfer); + } + QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { uint64_t inject_offset = rule->options.inject.offset; -- cgit v1.2.3 From d157ed5f7235f3d2d5596a514ad7507b18e24b88 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:15 -0500 Subject: blkdebug: Refactor error injection Rather than repeat the logic at each caller of checking if a Rule exists that warrants an error injection, fold that logic into inject_error(); and rename it to rule_check() for legibility. This will help the next patch, which adds two more callers that need to check rules for the potential of injecting errors. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170429191419.30051-6-eblake@redhat.com Signed-off-by: Max Reitz --- block/blkdebug.c | 74 +++++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index a562e2e32f..554573fd14 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -403,11 +403,30 @@ out: return ret; } -static int inject_error(BlockDriverState *bs, BlkdebugRule *rule) +static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes) { BDRVBlkdebugState *s = bs->opaque; - int error = rule->options.inject.error; - bool immediately = rule->options.inject.immediately; + BlkdebugRule *rule = NULL; + int error; + bool immediately; + + QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { + uint64_t inject_offset = rule->options.inject.offset; + + if (inject_offset == -1 || + (bytes && inject_offset >= offset && + inject_offset < offset + bytes)) + { + break; + } + } + + if (!rule || !rule->options.inject.error) { + return 0; + } + + immediately = rule->options.inject.immediately; + error = rule->options.inject.error; if (rule->options.inject.once) { QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next); @@ -426,8 +445,7 @@ static int coroutine_fn blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugRule *rule = NULL; + int err; /* Sanity check block layer guarantees */ assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); @@ -436,18 +454,9 @@ blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, assert(bytes <= bs->bl.max_transfer); } - QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - uint64_t inject_offset = rule->options.inject.offset; - - if (inject_offset == -1 || - (inject_offset >= offset && inject_offset < offset + bytes)) - { - break; - } - } - - if (rule && rule->options.inject.error) { - return inject_error(bs, rule); + err = rule_check(bs, offset, bytes); + if (err) { + return err; } return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); @@ -457,8 +466,7 @@ static int coroutine_fn blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugRule *rule = NULL; + int err; /* Sanity check block layer guarantees */ assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); @@ -467,18 +475,9 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, assert(bytes <= bs->bl.max_transfer); } - QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - uint64_t inject_offset = rule->options.inject.offset; - - if (inject_offset == -1 || - (inject_offset >= offset && inject_offset < offset + bytes)) - { - break; - } - } - - if (rule && rule->options.inject.error) { - return inject_error(bs, rule); + err = rule_check(bs, offset, bytes); + if (err) { + return err; } return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); @@ -486,17 +485,10 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, static int blkdebug_co_flush(BlockDriverState *bs) { - BDRVBlkdebugState *s = bs->opaque; - BlkdebugRule *rule = NULL; - - QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - if (rule->options.inject.offset == -1) { - break; - } - } + int err = rule_check(bs, 0, 0); - if (rule && rule->options.inject.error) { - return inject_error(bs, rule); + if (err) { + return err; } return bdrv_co_flush(bs->file->bs); -- cgit v1.2.3 From 63188c245013dbe383e8b031e665f813e2452ea5 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:16 -0500 Subject: blkdebug: Add pass-through write_zero and discard support In order to test the effects of artificial geometry constraints on operations like write zero or discard, we first need blkdebug to manage these actions. It also allows us to inject errors on those operations, just like we can for read/write/flush. We can also test the contract promised by the block layer; namely, if a device has specified limits on alignment or maximum size, then those limits must be obeyed (for now, the blkdebug driver merely inherits limits from whatever it is wrapping, but the next patch will further enhance it to allow specific limit overrides). This patch intentionally refuses to service requests smaller than the requested alignments; this is because an upcoming patch adds a qemu-iotest to prove that the block layer is correctly handling fragmentation, but the test only works if there is a way to tell the difference at artificial alignment boundaries when blkdebug is using a larger-than-default alignment. If we let the blkdebug layer always defer to the underlying layer, which potentially has a smaller granularity, the iotest will be thwarted. Tested by setting up an NBD server with export 'foo', then invoking: $ ./qemu-io qemu-io> open -o driver=blkdebug blkdebug::nbd://localhost:10809/foo qemu-io> d 0 15M qemu-io> w -z 0 15M Pre-patch, the server never sees the discard (it was silently eaten by the block layer); post-patch it is passed across the wire. Likewise, pre-patch the write is always passed with NBD_WRITE (with 15M of zeroes on the wire), while post-patch it can utilize NBD_WRITE_ZEROES (for less traffic). Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170429191419.30051-7-eblake@redhat.com Signed-off-by: Max Reitz --- block/blkdebug.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/block/blkdebug.c b/block/blkdebug.c index 554573fd14..b8cc87617e 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -1,6 +1,7 @@ /* * Block protocol for I/O error injection * + * Copyright (C) 2016-2017 Red Hat, Inc. * Copyright (c) 2010 Kevin Wolf * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -382,6 +383,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, goto out; } + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + bs->file->bs->supported_zero_flags; + /* Set request alignment */ align = qemu_opt_get_size(opts, "align", 0); if (align < INT_MAX && is_power_of_2(align)) { @@ -494,6 +500,72 @@ static int blkdebug_co_flush(BlockDriverState *bs) return bdrv_co_flush(bs->file->bs); } +static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int count, + BdrvRequestFlags flags) +{ + uint32_t align = MAX(bs->bl.request_alignment, + bs->bl.pwrite_zeroes_alignment); + int err; + + /* Only pass through requests that are larger than requested + * preferred alignment (so that we test the fallback to writes on + * unaligned portions), and check that the block layer never hands + * us anything unaligned that crosses an alignment boundary. */ + if (count < align) { + assert(QEMU_IS_ALIGNED(offset, align) || + QEMU_IS_ALIGNED(offset + count, align) || + DIV_ROUND_UP(offset, align) == + DIV_ROUND_UP(offset + count, align)); + return -ENOTSUP; + } + assert(QEMU_IS_ALIGNED(offset, align)); + assert(QEMU_IS_ALIGNED(count, align)); + if (bs->bl.max_pwrite_zeroes) { + assert(count <= bs->bl.max_pwrite_zeroes); + } + + err = rule_check(bs, offset, count); + if (err) { + return err; + } + + return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); +} + +static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs, + int64_t offset, int count) +{ + uint32_t align = bs->bl.pdiscard_alignment; + int err; + + /* Only pass through requests that are larger than requested + * minimum alignment, and ensure that unaligned requests do not + * cross optimum discard boundaries. */ + if (count < bs->bl.request_alignment) { + assert(QEMU_IS_ALIGNED(offset, align) || + QEMU_IS_ALIGNED(offset + count, align) || + DIV_ROUND_UP(offset, align) == + DIV_ROUND_UP(offset + count, align)); + return -ENOTSUP; + } + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment)); + if (align && count >= align) { + assert(QEMU_IS_ALIGNED(offset, align)); + assert(QEMU_IS_ALIGNED(count, align)); + } + if (bs->bl.max_pdiscard) { + assert(count <= bs->bl.max_pdiscard); + } + + err = rule_check(bs, offset, count); + if (err) { + return err; + } + + return bdrv_co_pdiscard(bs->file->bs, offset, count); +} static void blkdebug_close(BlockDriverState *bs) { @@ -748,6 +820,8 @@ static BlockDriver bdrv_blkdebug = { .bdrv_co_preadv = blkdebug_co_preadv, .bdrv_co_pwritev = blkdebug_co_pwritev, .bdrv_co_flush_to_disk = blkdebug_co_flush, + .bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes, + .bdrv_co_pdiscard = blkdebug_co_pdiscard, .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, -- cgit v1.2.3 From 3dc834f8795a46f919d90b1e5369308628858601 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:17 -0500 Subject: blkdebug: Simplify override logic Rather than store into a local variable, then copy to the struct if the value is valid, then reporting errors otherwise, it is simpler to just store into the struct and report errors if the value is invalid. This however requires that the struct store a 64-bit number, rather than a narrower type. Likewise, setting a sane errno value in ret prior to the sequence of parsing and jumping to out: on error makes it easier for the next patch to add a chain of similar checks. Signed-off-by: Eric Blake Message-id: 20170429191419.30051-8-eblake@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/blkdebug.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index b8cc87617e..29ab7a3a8b 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -38,7 +38,7 @@ typedef struct BDRVBlkdebugState { int state; int new_state; - int align; + uint64_t align; /* For blkdebug_refresh_filename() */ char *config_file; @@ -353,7 +353,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, BDRVBlkdebugState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - uint64_t align; int ret; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); @@ -387,20 +386,17 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, bs->file->bs->supported_write_flags; bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & bs->file->bs->supported_zero_flags; + ret = -EINVAL; /* Set request alignment */ - align = qemu_opt_get_size(opts, "align", 0); - if (align < INT_MAX && is_power_of_2(align)) { - s->align = align; - } else if (align) { - error_setg(errp, "Invalid alignment"); - ret = -EINVAL; + s->align = qemu_opt_get_size(opts, "align", 0); + if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) { + error_setg(errp, "Cannot meet constraints with align %" PRIu64, + s->align); goto out; } ret = 0; - goto out; - out: if (ret < 0) { g_free(s->config_file); -- cgit v1.2.3 From 430b26a82da61876c4eaf559ae02332582968043 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:18 -0500 Subject: blkdebug: Add ability to override unmap geometries Make it easier to simulate various unusual hardware setups (for example, recent commits 3482b9b and b8d0a98 affect the Dell Equallogic iSCSI with its 15M preferred and maximum unmap and write zero sizing, or b2f95fe deals with the Linux loopback block device having a max_transfer of 64k), by allowing blkdebug to wrap any other device with further restrictions on various alignments. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170429191419.30051-9-eblake@redhat.com Signed-off-by: Max Reitz --- block/blkdebug.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++- qapi/block-core.json | 33 ++++++++++++++++-- 2 files changed, 125 insertions(+), 4 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 29ab7a3a8b..a5196e889d 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -39,6 +39,11 @@ typedef struct BDRVBlkdebugState { int state; int new_state; uint64_t align; + uint64_t max_transfer; + uint64_t opt_write_zero; + uint64_t max_write_zero; + uint64_t opt_discard; + uint64_t max_discard; /* For blkdebug_refresh_filename() */ char *config_file; @@ -343,6 +348,31 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Required alignment in bytes", }, + { + .name = "max-transfer", + .type = QEMU_OPT_SIZE, + .help = "Maximum transfer size in bytes", + }, + { + .name = "opt-write-zero", + .type = QEMU_OPT_SIZE, + .help = "Optimum write zero alignment in bytes", + }, + { + .name = "max-write-zero", + .type = QEMU_OPT_SIZE, + .help = "Maximum write zero size in bytes", + }, + { + .name = "opt-discard", + .type = QEMU_OPT_SIZE, + .help = "Optimum discard alignment in bytes", + }, + { + .name = "max-discard", + .type = QEMU_OPT_SIZE, + .help = "Maximum discard size in bytes", + }, { /* end of list */ } }, }; @@ -354,6 +384,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; int ret; + uint64_t align; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -388,13 +419,61 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, bs->file->bs->supported_zero_flags; ret = -EINVAL; - /* Set request alignment */ + /* Set alignment overrides */ s->align = qemu_opt_get_size(opts, "align", 0); if (s->align && (s->align >= INT_MAX || !is_power_of_2(s->align))) { error_setg(errp, "Cannot meet constraints with align %" PRIu64, s->align); goto out; } + align = MAX(s->align, bs->file->bs->bl.request_alignment); + + s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0); + if (s->max_transfer && + (s->max_transfer >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_transfer, align))) { + error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64, + s->max_transfer); + goto out; + } + + s->opt_write_zero = qemu_opt_get_size(opts, "opt-write-zero", 0); + if (s->opt_write_zero && + (s->opt_write_zero >= INT_MAX || + !QEMU_IS_ALIGNED(s->opt_write_zero, align))) { + error_setg(errp, "Cannot meet constraints with opt-write-zero %" PRIu64, + s->opt_write_zero); + goto out; + } + + s->max_write_zero = qemu_opt_get_size(opts, "max-write-zero", 0); + if (s->max_write_zero && + (s->max_write_zero >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_write_zero, + MAX(s->opt_write_zero, align)))) { + error_setg(errp, "Cannot meet constraints with max-write-zero %" PRIu64, + s->max_write_zero); + goto out; + } + + s->opt_discard = qemu_opt_get_size(opts, "opt-discard", 0); + if (s->opt_discard && + (s->opt_discard >= INT_MAX || + !QEMU_IS_ALIGNED(s->opt_discard, align))) { + error_setg(errp, "Cannot meet constraints with opt-discard %" PRIu64, + s->opt_discard); + goto out; + } + + s->max_discard = qemu_opt_get_size(opts, "max-discard", 0); + if (s->max_discard && + (s->max_discard >= INT_MAX || + !QEMU_IS_ALIGNED(s->max_discard, + MAX(s->opt_discard, align)))) { + error_setg(errp, "Cannot meet constraints with max-discard %" PRIu64, + s->max_discard); + goto out; + } ret = 0; out: @@ -789,6 +868,21 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp) if (s->align) { bs->bl.request_alignment = s->align; } + if (s->max_transfer) { + bs->bl.max_transfer = s->max_transfer; + } + if (s->opt_write_zero) { + bs->bl.pwrite_zeroes_alignment = s->opt_write_zero; + } + if (s->max_write_zero) { + bs->bl.max_pwrite_zeroes = s->max_write_zero; + } + if (s->opt_discard) { + bs->bl.pdiscard_alignment = s->opt_discard; + } + if (s->max_discard) { + bs->bl.max_pdiscard = s->max_discard; + } } static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state, diff --git a/qapi/block-core.json b/qapi/block-core.json index 52e3ecd505..6b974b952f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2434,8 +2434,33 @@ # # @config: filename of the configuration file # -# @align: required alignment for requests in bytes, -# must be power of 2, or 0 for default +# @align: required alignment for requests in bytes, must be +# positive power of 2, or 0 for default +# +# @max-transfer: maximum size for I/O transfers in bytes, must be +# positive multiple of @align and of the underlying +# file's request alignment (but need not be a power of +# 2), or 0 for default (since 2.10) +# +# @opt-write-zero: preferred alignment for write zero requests in bytes, +# must be positive multiple of @align and of the +# underlying file's request alignment (but need not be a +# power of 2), or 0 for default (since 2.10) +# +# @max-write-zero: maximum size for write zero requests in bytes, must be +# positive multiple of @align, of @opt-write-zero, and of +# the underlying file's request alignment (but need not +# be a power of 2), or 0 for default (since 2.10) +# +# @opt-discard: preferred alignment for discard requests in bytes, must +# be positive multiple of @align and of the underlying +# file's request alignment (but need not be a power of +# 2), or 0 for default (since 2.10) +# +# @max-discard: maximum size for discard requests in bytes, must be +# positive multiple of @align, of @opt-discard, and of +# the underlying file's request alignment (but need not +# be a power of 2), or 0 for default (since 2.10) # # @inject-error: array of error injection descriptions # @@ -2446,7 +2471,9 @@ { 'struct': 'BlockdevOptionsBlkdebug', 'data': { 'image': 'BlockdevRef', '*config': 'str', - '*align': 'int', + '*align': 'int', '*max-transfer': 'int32', + '*opt-write-zero': 'int32', '*max-write-zero': 'int32', + '*opt-discard': 'int32', '*max-discard': 'int32', '*inject-error': ['BlkdebugInjectErrorOptions'], '*set-state': ['BlkdebugSetStateOptions'] } } -- cgit v1.2.3 From 40812d937392fddc11f72a668aef251039cc15ce Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 29 Apr 2017 14:14:19 -0500 Subject: tests: Add coverage for recent block geometry fixes Use blkdebug's new geometry constraints to emulate setups that have needed past regression fixes: write zeroes asserting when running through a loopback block device with max-transfer smaller than cluster size, and discard rounding away portions of requests not aligned to preferred boundaries. Also, add coverage that the block layer is honoring max transfer limits. For now, a single iotest performs all actions, with the idea that we can add future blkdebug constraint test cases in the same file; but it can be split into multiple iotests if we find reason to run one portion of the test in more setups than what are possible in the other. For reference, the final portion of the test (checking whether discard passes as much as possible to the lowest layers of the stack) works as follows: qemu-io: discard 30M at 80000001, passed to blkdebug blkdebug: discard 511 bytes at 80000001, -ENOTSUP (smaller than blkdebug's 512 align) blkdebug: discard 14371328 bytes at 80000512, passed to qcow2 qcow2: discard 739840 bytes at 80000512, -ENOTSUP (smaller than qcow2's 1M align) qcow2: discard 13M bytes at 77M, succeeds blkdebug: discard 15M bytes at 90M, passed to qcow2 qcow2: discard 15M bytes at 90M, succeeds blkdebug: discard 1356800 bytes at 105M, passed to qcow2 qcow2: discard 1M at 105M, succeeds qcow2: discard 308224 bytes at 106M, -ENOTSUP (smaller than qcow2's 1M align) blkdebug: discard 1 byte at 111457280, -ENOTSUP (smaller than blkdebug's 512 align) Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170429191419.30051-10-eblake@redhat.com [mreitz: For cooperation with image locking, add -r to the qemu-io invocation which verifies the image content] Signed-off-by: Max Reitz --- tests/qemu-iotests/177 | 114 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/177.out | 49 +++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 164 insertions(+) create mode 100755 tests/qemu-iotests/177 create mode 100644 tests/qemu-iotests/177.out diff --git a/tests/qemu-iotests/177 b/tests/qemu-iotests/177 new file mode 100755 index 0000000000..2005c174f2 --- /dev/null +++ b/tests/qemu-iotests/177 @@ -0,0 +1,114 @@ +#!/bin/bash +# +# Test corner cases with unusual block geometries +# +# Copyright (C) 2016-2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=eblake@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file + +CLUSTER_SIZE=1M +size=128M +options=driver=blkdebug,image.driver=qcow2 + +echo +echo "== setting up files ==" + +TEST_IMG="$TEST_IMG.base" _make_test_img $size +$QEMU_IO -c "write -P 11 0 $size" "$TEST_IMG.base" | _filter_qemu_io +_make_test_img -b "$TEST_IMG.base" +$QEMU_IO -c "write -P 22 0 $size" "$TEST_IMG" | _filter_qemu_io + +# Limited to 64k max-transfer +echo +echo "== constrained alignment and max-transfer ==" +limits=align=4k,max-transfer=64k +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "write -P 33 1000 128k" -c "read -P 33 1000 128k" | _filter_qemu_io + +echo +echo "== write zero with constrained max-transfer ==" +limits=align=512,max-transfer=64k,opt-write-zero=$CLUSTER_SIZE +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "write -z 8003584 2093056" | _filter_qemu_io + +# non-power-of-2 write-zero/discard alignments +echo +echo "== non-power-of-2 write zeroes limits ==" + +limits=align=512,opt-write-zero=15M,max-write-zero=15M,opt-discard=15M,max-discard=15M +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "write -z 32M 32M" | _filter_qemu_io + +echo +echo "== non-power-of-2 discard limits ==" + +limits=align=512,opt-write-zero=15M,max-write-zero=15M,opt-discard=15M,max-discard=15M +$QEMU_IO -c "open -o $options,$limits blkdebug::$TEST_IMG" \ + -c "discard 80000001 30M" | _filter_qemu_io + +echo +echo "== verify image content ==" + +function verify_io() +{ + if ($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | + grep "compat: 0.10" > /dev/null); then + # For v2 images, discarded clusters are read from the backing file + discarded=11 + else + # Discarded clusters are zeroed for v3 or later + discarded=0 + fi + + echo read -P 22 0 1000 + echo read -P 33 1000 128k + echo read -P 22 132072 7871512 + echo read -P 0 8003584 2093056 + echo read -P 22 10096640 23457792 + echo read -P 0 32M 32M + echo read -P 22 64M 13M + echo read -P $discarded 77M 29M + echo read -P 22 106M 22M +} + +verify_io | $QEMU_IO -r "$TEST_IMG" | _filter_qemu_io + +_check_test_img + +# success, all done +echo "*** done" +status=0 diff --git a/tests/qemu-iotests/177.out b/tests/qemu-iotests/177.out new file mode 100644 index 0000000000..e887542678 --- /dev/null +++ b/tests/qemu-iotests/177.out @@ -0,0 +1,49 @@ +QA output created by 177 + +== setting up files == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728 +wrote 134217728/134217728 bytes at offset 0 +128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base +wrote 134217728/134217728 bytes at offset 0 +128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== constrained alignment and max-transfer == +wrote 131072/131072 bytes at offset 1000 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 1000 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== write zero with constrained max-transfer == +wrote 2093056/2093056 bytes at offset 8003584 +1.996 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== non-power-of-2 write zeroes limits == +wrote 33554432/33554432 bytes at offset 33554432 +32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== non-power-of-2 discard limits == +discard 31457280/31457280 bytes at offset 80000001 +30 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify image content == +read 1000/1000 bytes at offset 0 +1000 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 1000 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 7871512/7871512 bytes at offset 132072 +7.507 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2093056/2093056 bytes at offset 8003584 +1.996 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 23457792/23457792 bytes at offset 10096640 +22.371 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 33554432/33554432 bytes at offset 33554432 +32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 13631488/13631488 bytes at offset 67108864 +13 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 30408704/30408704 bytes at offset 80740352 +29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 23068672/23068672 bytes at offset 111149056 +22 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a4549d82b2..bb6df8f2d7 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -170,5 +170,6 @@ 174 auto 175 auto quick 176 rw auto backing +177 rw auto quick 181 rw auto migration 182 rw auto quick -- cgit v1.2.3 From b32cbae11107e9e172e5c58425a2a8362e7382ed Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:41 -0500 Subject: qcow2: Nicer variable names in qcow2_update_snapshot_refcount() In order to keep checkpatch happy when the next patch changes indentation, we first have to shorten some long lines. The easiest approach is to use a new variable in place of 'offset & L2E_OFFSET_MASK', except that 'offset' is the best name for that variable. Change '[old_]offset' to '[old_]entry' to make room. While touching things, also fix checkpatch warnings about unusual 'for' statements. Suggested by Max Reitz Signed-off-by: Eric Blake Message-id: 20170507000552.20847-2-eblake@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4efca7ebdb..db0af2ce27 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1059,9 +1059,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { BDRVQcow2State *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount; + uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount; bool l1_allocated = false; - int64_t old_offset, old_l2_offset; + int64_t old_entry, old_l2_offset; int i, j, l1_modified = 0, nb_csectors; int ret; @@ -1089,15 +1089,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } - for(i = 0;i < l1_size; i++) + for (i = 0; i < l1_size; i++) { be64_to_cpus(&l1_table[i]); + } } else { assert(l1_size == s->l1_size); l1_table = s->l1_table; l1_allocated = false; } - for(i = 0; i < l1_size; i++) { + for (i = 0; i < l1_size; i++) { l2_offset = l1_table[i]; if (l2_offset) { old_l2_offset = l2_offset; @@ -1117,20 +1118,22 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } - for(j = 0; j < s->l2_size; j++) { + for (j = 0; j < s->l2_size; j++) { uint64_t cluster_index; + uint64_t offset; - offset = be64_to_cpu(l2_table[j]); - old_offset = offset; - offset &= ~QCOW_OFLAG_COPIED; + entry = be64_to_cpu(l2_table[j]); + old_entry = entry; + entry &= ~QCOW_OFLAG_COPIED; + offset = entry & L2E_OFFSET_MASK; - switch (qcow2_get_cluster_type(offset)) { + switch (qcow2_get_cluster_type(entry)) { case QCOW2_CLUSTER_COMPRESSED: - nb_csectors = ((offset >> s->csize_shift) & + nb_csectors = ((entry >> s->csize_shift) & s->csize_mask) + 1; if (addend != 0) { ret = update_refcount(bs, - (offset & s->cluster_offset_mask) & ~511, + (entry & s->cluster_offset_mask) & ~511, nb_csectors * 512, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { @@ -1143,18 +1146,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, case QCOW2_CLUSTER_NORMAL: case QCOW2_CLUSTER_ZERO: - if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) { + if (offset_into_cluster(s, offset)) { qcow2_signal_corruption(bs, true, -1, -1, "Data " - "cluster offset %#llx " - "unaligned (L2 offset: %#" + "cluster offset %#" PRIx64 + " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", - offset & L2E_OFFSET_MASK, - l2_offset, j); + offset, l2_offset, j); ret = -EIO; goto fail; } - cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; + cluster_index = offset >> s->cluster_bits; if (!cluster_index) { /* unallocated */ refcount = 0; @@ -1184,14 +1186,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } if (refcount == 1) { - offset |= QCOW_OFLAG_COPIED; + entry |= QCOW_OFLAG_COPIED; } - if (offset != old_offset) { + if (entry != old_entry) { if (addend > 0) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } - l2_table[j] = cpu_to_be64(offset); + l2_table[j] = cpu_to_be64(entry); qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); } -- cgit v1.2.3 From bbd995d8307e4a15a7724f11adf104b6e07a0849 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:42 -0500 Subject: qcow2: Use consistent switch indentation Fix a couple of inconsistent indentations, before an upcoming patch further tweaks the switch statements. (best viewed with 'git diff -b'). Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-3-eblake@redhat.com Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 32 +++++++++---------- block/qcow2-refcount.c | 84 +++++++++++++++++++++++++------------------------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 31077d8102..335a50512f 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1504,25 +1504,25 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, * but rather fall through to the backing file. */ switch (qcow2_get_cluster_type(old_l2_entry)) { - case QCOW2_CLUSTER_UNALLOCATED: - if (full_discard || !bs->backing) { - continue; - } - break; + case QCOW2_CLUSTER_UNALLOCATED: + if (full_discard || !bs->backing) { + continue; + } + break; - case QCOW2_CLUSTER_ZERO: - /* Preallocated zero clusters should be discarded in any case */ - if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) { - continue; - } - break; + case QCOW2_CLUSTER_ZERO: + /* Preallocated zero clusters should be discarded in any case */ + if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) { + continue; + } + break; - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_COMPRESSED: - break; + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_COMPRESSED: + break; - default: - abort(); + default: + abort(); } /* First remove L2 entries */ diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index db0af2ce27..908dbe5314 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1128,61 +1128,61 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, offset = entry & L2E_OFFSET_MASK; switch (qcow2_get_cluster_type(entry)) { - case QCOW2_CLUSTER_COMPRESSED: - nb_csectors = ((entry >> s->csize_shift) & - s->csize_mask) + 1; - if (addend != 0) { - ret = update_refcount(bs, + case QCOW2_CLUSTER_COMPRESSED: + nb_csectors = ((entry >> s->csize_shift) & + s->csize_mask) + 1; + if (addend != 0) { + ret = update_refcount(bs, (entry & s->cluster_offset_mask) & ~511, nb_csectors * 512, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); - if (ret < 0) { - goto fail; - } - } - /* compressed clusters are never modified */ - refcount = 2; - break; - - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO: - if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data " - "cluster offset %#" PRIx64 - " unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", - offset, l2_offset, j); - ret = -EIO; + if (ret < 0) { goto fail; } + } + /* compressed clusters are never modified */ + refcount = 2; + break; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO: + if (offset_into_cluster(s, offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Data " + "cluster offset %#" PRIx64 + " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + offset, l2_offset, j); + ret = -EIO; + goto fail; + } - cluster_index = offset >> s->cluster_bits; - if (!cluster_index) { - /* unallocated */ - refcount = 0; - break; - } - if (addend != 0) { - ret = qcow2_update_cluster_refcount(bs, + cluster_index = offset >> s->cluster_bits; + if (!cluster_index) { + /* unallocated */ + refcount = 0; + break; + } + if (addend != 0) { + ret = qcow2_update_cluster_refcount(bs, cluster_index, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); - if (ret < 0) { - goto fail; - } - } - - ret = qcow2_get_refcount(bs, cluster_index, &refcount); if (ret < 0) { goto fail; } - break; + } - case QCOW2_CLUSTER_UNALLOCATED: - refcount = 0; - break; + ret = qcow2_get_refcount(bs, cluster_index, &refcount); + if (ret < 0) { + goto fail; + } + break; + + case QCOW2_CLUSTER_UNALLOCATED: + refcount = 0; + break; - default: - abort(); + default: + abort(); } if (refcount == 1) { -- cgit v1.2.3 From 4c41cb4955effff3447bd07a09e0af9c6e0453e3 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:43 -0500 Subject: block: Update comments on BDRV_BLOCK_* meanings We had some conflicting documentation: a nice 8-way table that described all possible combinations of DATA, ZERO, and OFFSET_VALID, contrasted with text that implied that OFFSET_VALID always meant raw data could be read directly. Furthermore, the text refers a lot to bs->file, even though the interface was updated back in 67a0fd2a to let the driver pass back a specific BDS (not necessarily bs->file). As the 8-way table is the intended semantics, simplify the rest of the text to get rid of the confusion. ALLOCATED is always set by the block layer for convenience (drivers do not have to worry about it). RAW is used only internally, but by more than the raw driver. Document these additional items on the driver callback. Suggested-by: Max Reitz Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-4-eblake@redhat.com Signed-off-by: Max Reitz --- include/block/block.h | 35 +++++++++++++++++++---------------- include/block/block_int.h | 7 +++++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index 90932b4709..9b355e92d8 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -121,29 +121,32 @@ typedef struct HDGeometry { #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) /* - * Allocation status flags - * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status. - * BDRV_BLOCK_ZERO: sectors read as zero - * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by - * bdrv_get_block_status. + * Allocation status flags for bdrv_get_block_status() and friends. + * + * Public flags: + * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer + * BDRV_BLOCK_ZERO: offset reads as zero + * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this - * layer (as opposed to the backing file) - * BDRV_BLOCK_RAW: used internally to indicate that the request - * was answered by the raw driver and that one - * should look in bs->file directly. + * layer (short for DATA || ZERO), set by block layer * - * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in - * bs->file where sector data can be read from as raw data. + * Internal flag: + * BDRV_BLOCK_RAW: used internally to indicate that the request was + * answered by a passthrough driver such as raw and that the + * block layer should recompute the answer from bs->file. * - * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present. + * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) + * represent the offset in the returned BDS that is allocated for the + * corresponding raw data; however, whether that offset actually contains + * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows: * * DATA ZERO OFFSET_VALID - * t t t sectors read as zero, bs->file is zero at offset - * t f t sectors read as valid from bs->file at offset - * f t t sectors preallocated, read as zero, bs->file not + * t t t sectors read as zero, returned file is zero at offset + * t f t sectors read as valid from file at offset + * f t t sectors preallocated, read as zero, returned file not * necessarily zero at offset * f f t sectors preallocated but read from backing_hd, - * bs->file contains garbage at offset + * returned file contains garbage at offset * t t f sectors preallocated, read as zero, unknown offset * t f f sectors read from unknown file or offset * f t f not allocated or unknown offset, read as zero diff --git a/include/block/block_int.h b/include/block/block_int.h index 5750a448a6..8d3724cce6 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -165,6 +165,13 @@ struct BlockDriver { int64_t offset, int count, BdrvRequestFlags flags); int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, int64_t offset, int count); + + /* + * Building block for bdrv_block_status[_above]. The driver should + * answer only according to the current layer, and should not + * set BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h + * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. + */ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file); -- cgit v1.2.3 From 4341df8a83d6a528a1e2855735f87fc3aab42b70 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:44 -0500 Subject: qcow2: Correctly report status of preallocated zero clusters We were throwing away the preallocation information associated with zero clusters. But we should be matching the well-defined semantics in bdrv_get_block_status(), where (BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID) informs the user which offset is reserved, while still reminding the user that reading from that offset is likely to read garbage. count_contiguous_clusters_by_type() is now used only for unallocated cluster runs, hence it gets renamed and tightened. Making this change lets us see which portions of an image are zero but preallocated, when using qemu-img map --output=json. The --output=human side intentionally ignores all zero clusters, whether or not they are preallocated. The fact that there is no change to qemu-iotests './check -qcow2' merely means that we aren't yet testing this aspect of qemu-img; a later patch will add a test. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-5-eblake@redhat.com Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 335a50512f..f3bfce65f7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -334,16 +334,23 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, return i; } -static int count_contiguous_clusters_by_type(int nb_clusters, - uint64_t *l2_table, - int wanted_type) +/* + * Checks how many consecutive unallocated clusters in a given L2 + * table have the same cluster type. + */ +static int count_contiguous_clusters_unallocated(int nb_clusters, + uint64_t *l2_table, + int wanted_type) { int i; + assert(wanted_type == QCOW2_CLUSTER_ZERO || + wanted_type == QCOW2_CLUSTER_UNALLOCATED); for (i = 0; i < nb_clusters; i++) { - int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); + uint64_t entry = be64_to_cpu(l2_table[i]); + int type = qcow2_get_cluster_type(entry); - if (type != wanted_type) { + if (type != wanted_type || entry & L2E_OFFSET_MASK) { break; } } @@ -565,14 +572,32 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, ret = -EIO; goto fail; } - c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], - QCOW2_CLUSTER_ZERO); - *cluster_offset = 0; + /* Distinguish between pure zero clusters and pre-allocated ones */ + if (*cluster_offset & L2E_OFFSET_MASK) { + c = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], QCOW_OFLAG_ZERO); + *cluster_offset &= L2E_OFFSET_MASK; + if (offset_into_cluster(s, *cluster_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, + "Preallocated zero cluster offset %#" + PRIx64 " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + *cluster_offset, l2_offset, l2_index); + ret = -EIO; + goto fail; + } + } else { + c = count_contiguous_clusters_unallocated(nb_clusters, + &l2_table[l2_index], + QCOW2_CLUSTER_ZERO); + *cluster_offset = 0; + } break; case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ - c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], - QCOW2_CLUSTER_UNALLOCATED); + c = count_contiguous_clusters_unallocated(nb_clusters, + &l2_table[l2_index], + QCOW2_CLUSTER_UNALLOCATED); *cluster_offset = 0; break; case QCOW2_CLUSTER_NORMAL: -- cgit v1.2.3 From 3ef9521893c41638148d4452ee4da75fd8ef71a9 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:45 -0500 Subject: qcow2: Name typedef for cluster type Although it doesn't add all that much type safety (this is C, after all), it does add a bit of legibility to use the name QCow2ClusterType instead of a plain int. In particular, qcow2_get_cluster_offset() has an overloaded return type; a QCow2ClusterType on success, and -errno on failure; keeping the cluster type in a separate variable makes it slightly easier for the next patch to make further computations based on the type. Suggested-by: Max Reitz Signed-off-by: Eric Blake Message-id: 20170507000552.20847-6-eblake@redhat.com [mreitz: Use the new type in two more places (one of them pulled from the next patch)] Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 21 +++++++++++---------- block/qcow2-refcount.c | 2 +- block/qcow2.h | 6 +++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index f3bfce65f7..26f9e0e4b8 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -309,7 +309,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, uint64_t *l2_table, uint64_t stop_flags) { int i; - int first_cluster_type; + QCow2ClusterType first_cluster_type; uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; uint64_t first_entry = be64_to_cpu(l2_table[0]); uint64_t offset = first_entry & mask; @@ -340,7 +340,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, */ static int count_contiguous_clusters_unallocated(int nb_clusters, uint64_t *l2_table, - int wanted_type) + QCow2ClusterType wanted_type) { int i; @@ -348,7 +348,7 @@ static int count_contiguous_clusters_unallocated(int nb_clusters, wanted_type == QCOW2_CLUSTER_UNALLOCATED); for (i = 0; i < nb_clusters; i++) { uint64_t entry = be64_to_cpu(l2_table[i]); - int type = qcow2_get_cluster_type(entry); + QCow2ClusterType type = qcow2_get_cluster_type(entry); if (type != wanted_type || entry & L2E_OFFSET_MASK) { break; @@ -500,6 +500,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int l1_bits, c; unsigned int offset_in_cluster; uint64_t bytes_available, bytes_needed, nb_clusters; + QCow2ClusterType type; int ret; offset_in_cluster = offset_into_cluster(s, offset); @@ -522,13 +523,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, l1_index = offset >> l1_bits; if (l1_index >= s->l1_size) { - ret = QCOW2_CLUSTER_UNALLOCATED; + type = QCOW2_CLUSTER_UNALLOCATED; goto out; } l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; if (!l2_offset) { - ret = QCOW2_CLUSTER_UNALLOCATED; + type = QCOW2_CLUSTER_UNALLOCATED; goto out; } @@ -557,8 +558,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, * true */ assert(nb_clusters <= INT_MAX); - ret = qcow2_get_cluster_type(*cluster_offset); - switch (ret) { + type = qcow2_get_cluster_type(*cluster_offset); + switch (type) { case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters can only be processed one by one */ c = 1; @@ -633,7 +634,7 @@ out: assert(bytes_available - offset_in_cluster <= UINT_MAX); *bytes = bytes_available - offset_in_cluster; - return ret; + return type; fail: qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); @@ -891,7 +892,7 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); - int cluster_type = qcow2_get_cluster_type(l2_entry); + QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); switch(cluster_type) { case QCOW2_CLUSTER_NORMAL: @@ -1757,7 +1758,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, for (j = 0; j < s->l2_size; j++) { uint64_t l2_entry = be64_to_cpu(l2_table[j]); int64_t offset = l2_entry & L2E_OFFSET_MASK; - int cluster_type = qcow2_get_cluster_type(l2_entry); + QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); bool preallocated = offset != 0; if (cluster_type != QCOW2_CLUSTER_ZERO) { diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 908dbe5314..e639b341e5 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1640,7 +1640,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, for (j = 0; j < s->l2_size; j++) { uint64_t l2_entry = be64_to_cpu(l2_table[j]); uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; - int cluster_type = qcow2_get_cluster_type(l2_entry); + QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); if ((cluster_type == QCOW2_CLUSTER_NORMAL) || ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) { diff --git a/block/qcow2.h b/block/qcow2.h index 8731f24b82..c148bbcdb6 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -349,12 +349,12 @@ typedef struct QCowL2Meta QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; -enum { +typedef enum QCow2ClusterType { QCOW2_CLUSTER_UNALLOCATED, QCOW2_CLUSTER_NORMAL, QCOW2_CLUSTER_COMPRESSED, QCOW2_CLUSTER_ZERO -}; +} QCow2ClusterType; typedef enum QCow2MetadataOverlap { QCOW2_OL_MAIN_HEADER_BITNR = 0, @@ -443,7 +443,7 @@ static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s) return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits; } -static inline int qcow2_get_cluster_type(uint64_t l2_entry) +static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry) { if (l2_entry & QCOW_OFLAG_COMPRESSED) { return QCOW2_CLUSTER_COMPRESSED; -- cgit v1.2.3 From fdfab37dfeffefbd4533b4158055c9b82d7c3e69 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:46 -0500 Subject: qcow2: Make distinction between zero cluster types obvious Treat plain zero clusters differently from allocated ones, so that we can simplify the logic of checking whether an offset is present. Do this by splitting QCOW2_CLUSTER_ZERO into two new enums, QCOW2_CLUSTER_ZERO_PLAIN and QCOW2_CLUSTER_ZERO_ALLOC. I tried to arrange the enum so that we could use 'ret <= QCOW2_CLUSTER_ZERO_PLAIN' for all unallocated types, and 'ret >= QCOW2_CLUSTER_ZERO_ALLOC' for allocated types, although I didn't actually end up taking advantage of the layout. In many cases, this leads to simpler code, by properly combining cases (sometimes, both zero types pair together, other times, plain zero is more like unallocated while allocated zero is more like normal). Signed-off-by: Eric Blake Message-id: 20170507000552.20847-7-eblake@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 79 ++++++++++++++++++---------------------------- block/qcow2-refcount.c | 44 +++++++++++--------------- block/qcow2.c | 9 ++++-- block/qcow2.h | 8 +++-- tests/qemu-iotests/060.out | 6 ++-- 5 files changed, 63 insertions(+), 83 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 26f9e0e4b8..558c23957f 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -321,8 +321,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, /* must be allocated */ first_cluster_type = qcow2_get_cluster_type(first_entry); assert(first_cluster_type == QCOW2_CLUSTER_NORMAL || - (first_cluster_type == QCOW2_CLUSTER_ZERO && - (first_entry & L2E_OFFSET_MASK) != 0)); + first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC); for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; @@ -344,13 +343,13 @@ static int count_contiguous_clusters_unallocated(int nb_clusters, { int i; - assert(wanted_type == QCOW2_CLUSTER_ZERO || + assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN || wanted_type == QCOW2_CLUSTER_UNALLOCATED); for (i = 0; i < nb_clusters; i++) { uint64_t entry = be64_to_cpu(l2_table[i]); QCow2ClusterType type = qcow2_get_cluster_type(entry); - if (type != wanted_type || entry & L2E_OFFSET_MASK) { + if (type != wanted_type) { break; } } @@ -559,55 +558,36 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, assert(nb_clusters <= INT_MAX); type = qcow2_get_cluster_type(*cluster_offset); + if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN || + type == QCOW2_CLUSTER_ZERO_ALLOC)) { + qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" + " in pre-v3 image (L2 offset: %#" PRIx64 + ", L2 index: %#x)", l2_offset, l2_index); + ret = -EIO; + goto fail; + } switch (type) { case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters can only be processed one by one */ c = 1; *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; break; - case QCOW2_CLUSTER_ZERO: - if (s->qcow_version < 3) { - qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" - " in pre-v3 image (L2 offset: %#" PRIx64 - ", L2 index: %#x)", l2_offset, l2_index); - ret = -EIO; - goto fail; - } - /* Distinguish between pure zero clusters and pre-allocated ones */ - if (*cluster_offset & L2E_OFFSET_MASK) { - c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); - *cluster_offset &= L2E_OFFSET_MASK; - if (offset_into_cluster(s, *cluster_offset)) { - qcow2_signal_corruption(bs, true, -1, -1, - "Preallocated zero cluster offset %#" - PRIx64 " unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", - *cluster_offset, l2_offset, l2_index); - ret = -EIO; - goto fail; - } - } else { - c = count_contiguous_clusters_unallocated(nb_clusters, - &l2_table[l2_index], - QCOW2_CLUSTER_ZERO); - *cluster_offset = 0; - } - break; + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ c = count_contiguous_clusters_unallocated(nb_clusters, - &l2_table[l2_index], - QCOW2_CLUSTER_UNALLOCATED); + &l2_table[l2_index], type); *cluster_offset = 0; break; + case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); + &l2_table[l2_index], QCOW_OFLAG_ZERO); *cluster_offset &= L2E_OFFSET_MASK; if (offset_into_cluster(s, *cluster_offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#" + qcow2_signal_corruption(bs, true, -1, -1, + "Cluster allocation offset %#" PRIx64 " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", *cluster_offset, l2_offset, l2_index); @@ -902,7 +882,8 @@ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, break; case QCOW2_CLUSTER_UNALLOCATED: case QCOW2_CLUSTER_COMPRESSED: - case QCOW2_CLUSTER_ZERO: + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_ZERO_ALLOC: break; default: abort(); @@ -1203,8 +1184,8 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, * wrong with our code. */ assert(nb_clusters > 0); - if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO && - (entry & L2E_OFFSET_MASK) != 0 && (entry & QCOW_OFLAG_COPIED) && + if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC && + (entry & QCOW_OFLAG_COPIED) && (!*host_offset || start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK))) { @@ -1536,13 +1517,13 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, } break; - case QCOW2_CLUSTER_ZERO: - /* Preallocated zero clusters should be discarded in any case */ - if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) { + case QCOW2_CLUSTER_ZERO_PLAIN: + if (!full_discard) { continue; } break; + case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: case QCOW2_CLUSTER_COMPRESSED: break; @@ -1759,13 +1740,13 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, uint64_t l2_entry = be64_to_cpu(l2_table[j]); int64_t offset = l2_entry & L2E_OFFSET_MASK; QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); - bool preallocated = offset != 0; - if (cluster_type != QCOW2_CLUSTER_ZERO) { + if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && + cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { continue; } - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { if (!bs->backing) { /* not backed; therefore we can simply deallocate the * cluster */ @@ -1800,7 +1781,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, "%#" PRIx64 " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", offset, l2_offset, j); - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { qcow2_free_clusters(bs, offset, s->cluster_size, QCOW2_DISCARD_ALWAYS); } @@ -1810,7 +1791,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); if (ret < 0) { - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { qcow2_free_clusters(bs, offset, s->cluster_size, QCOW2_DISCARD_ALWAYS); } @@ -1819,7 +1800,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); if (ret < 0) { - if (!preallocated) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { qcow2_free_clusters(bs, offset, s->cluster_size, QCOW2_DISCARD_ALWAYS); } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index e639b341e5..7c06061aae 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1028,18 +1028,17 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, } break; case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO: - if (l2_entry & L2E_OFFSET_MASK) { - if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { - qcow2_signal_corruption(bs, false, -1, -1, - "Cannot free unaligned cluster %#llx", - l2_entry & L2E_OFFSET_MASK); - } else { - qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, - nb_clusters << s->cluster_bits, type); - } + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { + qcow2_signal_corruption(bs, false, -1, -1, + "Cannot free unaligned cluster %#llx", + l2_entry & L2E_OFFSET_MASK); + } else { + qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, + nb_clusters << s->cluster_bits, type); } break; + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: break; default: @@ -1145,10 +1144,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, break; case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO: + case QCOW2_CLUSTER_ZERO_ALLOC: if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Data " - "cluster offset %#" PRIx64 + qcow2_signal_corruption(bs, true, -1, -1, "Cluster " + "allocation offset %#" PRIx64 " unaligned (L2 offset: %#" PRIx64 ", L2 index: %#x)", offset, l2_offset, j); @@ -1157,11 +1156,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } cluster_index = offset >> s->cluster_bits; - if (!cluster_index) { - /* unallocated */ - refcount = 0; - break; - } + assert(cluster_index); if (addend != 0) { ret = qcow2_update_cluster_refcount(bs, cluster_index, abs(addend), addend < 0, @@ -1177,6 +1172,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } break; + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: refcount = 0; break; @@ -1443,12 +1439,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } break; - case QCOW2_CLUSTER_ZERO: - if ((l2_entry & L2E_OFFSET_MASK) == 0) { - break; - } - /* fall through */ - + case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: { uint64_t offset = l2_entry & L2E_OFFSET_MASK; @@ -1478,6 +1469,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, break; } + case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_UNALLOCATED: break; @@ -1642,8 +1634,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); - if ((cluster_type == QCOW2_CLUSTER_NORMAL) || - ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) { + if (cluster_type == QCOW2_CLUSTER_NORMAL || + cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) { ret = qcow2_get_refcount(bs, data_offset >> s->cluster_bits, &refcount); diff --git a/block/qcow2.c b/block/qcow2.c index 8caf800529..3fa62b0096 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1385,7 +1385,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, *file = bs->file->bs; status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; } - if (ret == QCOW2_CLUSTER_ZERO) { + if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { status |= BDRV_BLOCK_ZERO; } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { status |= BDRV_BLOCK_DATA; @@ -1482,7 +1482,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, } break; - case QCOW2_CLUSTER_ZERO: + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_ZERO_ALLOC: qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); break; @@ -2491,7 +2492,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, count = s->cluster_size; nr = s->cluster_size; ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); - if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) { + if (ret != QCOW2_CLUSTER_UNALLOCATED && + ret != QCOW2_CLUSTER_ZERO_PLAIN && + ret != QCOW2_CLUSTER_ZERO_ALLOC) { qemu_co_mutex_unlock(&s->lock); return -ENOTSUP; } diff --git a/block/qcow2.h b/block/qcow2.h index c148bbcdb6..810ceb84c8 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -351,9 +351,10 @@ typedef struct QCowL2Meta typedef enum QCow2ClusterType { QCOW2_CLUSTER_UNALLOCATED, + QCOW2_CLUSTER_ZERO_PLAIN, + QCOW2_CLUSTER_ZERO_ALLOC, QCOW2_CLUSTER_NORMAL, QCOW2_CLUSTER_COMPRESSED, - QCOW2_CLUSTER_ZERO } QCow2ClusterType; typedef enum QCow2MetadataOverlap { @@ -448,7 +449,10 @@ static inline QCow2ClusterType qcow2_get_cluster_type(uint64_t l2_entry) if (l2_entry & QCOW_OFLAG_COMPRESSED) { return QCOW2_CLUSTER_COMPRESSED; } else if (l2_entry & QCOW_OFLAG_ZERO) { - return QCOW2_CLUSTER_ZERO; + if (l2_entry & L2E_OFFSET_MASK) { + return QCOW2_CLUSTER_ZERO_ALLOC; + } + return QCOW2_CLUSTER_ZERO_PLAIN; } else if (!(l2_entry & L2E_OFFSET_MASK)) { return QCOW2_CLUSTER_UNALLOCATED; } else { diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index 5d40206ef8..9e8f5b9d79 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -135,7 +135,7 @@ qemu-img: Error while amending options: Input/output error Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed +qcow2: Marking image as corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed read failed: Input/output error === Testing unaligned pre-allocated zero cluster === @@ -166,7 +166,7 @@ discard 65536/65536 bytes at offset 0 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qcow2: Image is corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed +qcow2: Image is corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed read failed: Input/output error read failed: Input/output error @@ -176,7 +176,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed -qcow2: Marking image as corrupt: Data cluster offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed +qcow2: Marking image as corrupt: Cluster allocation offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed discard 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read failed: Input/output error -- cgit v1.2.3 From 06cc5e2b2d01cb778c966e1b4135062556b3b054 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:47 -0500 Subject: qcow2: Optimize zero_single_l2() to minimize L2 churn Similar to discard_single_l2(), we should try to avoid dirtying the L2 cache when the cluster we are changing already has the right characteristics. Note that by the time we get to zero_single_l2(), BDRV_REQ_MAY_UNMAP is a requirement to unallocate a cluster (this is because the block layer clears that flag if discard.* flags during open requested that we never punch holes - see the conversation around commit 170f4b2e, https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg07306.html). Therefore, this patch can only reuse a zero cluster as-is if either unmapping is not requested, or if the zero cluster was not associated with an allocation. Technically, there are some cases where an unallocated cluster already reads as all zeroes (namely, when there is no backing file [easy: check bs->backing], or when the backing file also reads as zeroes [harder: we can't check bdrv_get_block_status since we are already holding the lock]), where the guest would not immediately see a difference if we left that cluster unallocated. But if the user did not request unmapping, leaving an unallocated cluster is wrong; and even if the user DID request unmapping, keeping a cluster unallocated risks a subtle semantic change of guest-visible contents if a backing file is later added, and it is not worth auditing whether all internal uses such as mirror properly avoid an unmap request. Thus, this patch is intentionally limited to just clusters that are already marked as zero. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-8-eblake@redhat.com Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 558c23957f..e2c5759d94 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1601,6 +1601,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, int l2_index; int ret; int i; + bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP); ret = get_cluster_table(bs, offset, &l2_table, &l2_index); if (ret < 0) { @@ -1613,12 +1614,22 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, for (i = 0; i < nb_clusters; i++) { uint64_t old_offset; + QCow2ClusterType cluster_type; old_offset = be64_to_cpu(l2_table[l2_index + i]); - /* Update L2 entries */ + /* + * Minimize L2 changes if the cluster already reads back as + * zeroes with correct allocation. + */ + cluster_type = qcow2_get_cluster_type(old_offset); + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN || + (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) { + continue; + } + qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) { + if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) { l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { -- cgit v1.2.3 From d9ca2214bd058cfb4371b8373123c0444e4a5ac8 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:48 -0500 Subject: iotests: Improve _filter_qemu_img_map Although _filter_qemu_img_map documents that it scrubs offsets, it was only doing so for human mode. Of the existing tests using the filter (97, 122, 150, 154, 176), two of them are affected, but it does not hurt the validity of the tests to not require particular mappings (another test, 66, uses offsets but intentionally does not pass through _filter_qemu_img_map, because it checks that offsets are unchanged before and after an operation). Another justification for this patch is that it will allow a future patch to utilize 'qemu-img map --output=json' to check the status of preallocated zero clusters without regards to the mapping (since the qcow2 mapping can be very sensitive to the chosen cluster size, when preallocation is not in use). Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-9-eblake@redhat.com Signed-off-by: Max Reitz --- tests/qemu-iotests/122.out | 16 ++++++++-------- tests/qemu-iotests/154.out | 30 +++++++++++++++--------------- tests/qemu-iotests/common.filter | 4 +++- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out index 9317d801ad..47d8656db8 100644 --- a/tests/qemu-iotests/122.out +++ b/tests/qemu-iotests/122.out @@ -112,7 +112,7 @@ read 3145728/3145728 bytes at offset 0 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 63963136/63963136 bytes at offset 3145728 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}] +[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] convert -c -S 0: read 3145728/3145728 bytes at offset 0 @@ -134,7 +134,7 @@ read 30408704/30408704 bytes at offset 3145728 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 33554432/33554432 bytes at offset 33554432 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}] +[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] convert -c -S 0 with source backing file: read 3145728/3145728 bytes at offset 0 @@ -152,7 +152,7 @@ read 30408704/30408704 bytes at offset 3145728 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 33554432/33554432 bytes at offset 33554432 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}] +[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] convert -c -S 0 -B ... read 3145728/3145728 bytes at offset 0 @@ -176,11 +176,11 @@ wrote 1024/1024 bytes at offset 17408 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) convert -S 4k -[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 8192}, +[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false}, -{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 9216}, +{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false}, -{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 10240}, +{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}] convert -c -S 4k @@ -192,9 +192,9 @@ convert -c -S 4k { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}] convert -S 8k -[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": 8192}, +[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false}, -{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 17408}, +{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}] convert -c -S 8k diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out index da9eabdda8..d3b68e7729 100644 --- a/tests/qemu-iotests/154.out +++ b/tests/qemu-iotests/154.out @@ -42,9 +42,9 @@ read 1024/1024 bytes at offset 65536 read 2048/2048 bytes at offset 67584 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, -{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 36864, "length": 28672, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 134148096, "depth": 1, "zero": true, "data": false}] == backing file contains non-zero data after write_zeroes == @@ -69,9 +69,9 @@ read 1024/1024 bytes at offset 44032 read 3072/3072 bytes at offset 40960 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, -{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 36864, "length": 4096, "depth": 1, "zero": true, "data": false}, -{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 45056, "length": 134172672, "depth": 1, "zero": true, "data": false}] == write_zeroes covers non-zero data == @@ -143,13 +143,13 @@ read 1024/1024 bytes at offset 67584 read 5120/5120 bytes at offset 68608 5 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, -{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 36864, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false}, -{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 53248, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}] @@ -186,13 +186,13 @@ read 1024/1024 bytes at offset 72704 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false}, { "start": 32768, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false}, { "start": 49152, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false}, { "start": 65536, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 28672}, +{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}] == spanning two clusters, partially overwriting backing file == @@ -212,7 +212,7 @@ read 1024/1024 bytes at offset 5120 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 2048/2048 bytes at offset 6144 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": 20480}, +[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 8192, "length": 134209536, "depth": 1, "zero": true, "data": false}] == spanning multiple clusters, non-zero in first cluster == @@ -227,7 +227,7 @@ read 2048/2048 bytes at offset 65536 read 10240/10240 bytes at offset 67584 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 8192, "depth": 0, "zero": true, "data": false}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] @@ -257,7 +257,7 @@ read 2048/2048 bytes at offset 75776 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false}, { "start": 65536, "length": 8192, "depth": 0, "zero": true, "data": false}, -{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] == spanning multiple clusters, partially overwriting backing file == @@ -278,8 +278,8 @@ read 2048/2048 bytes at offset 74752 read 1024/1024 bytes at offset 76800 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) [{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false}, -{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 20480}, +{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false}, -{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": 24576}, +{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] *** done diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index f58548dc44..2f595b2ce2 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -152,10 +152,12 @@ _filter_img_info() -e "/log_size: [0-9]\\+/d" } -# filter out offsets and file names from qemu-img map +# filter out offsets and file names from qemu-img map; good for both +# human and json output _filter_qemu_img_map() { sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ + -e 's/"offset": [0-9]\+/"offset": OFFSET/g' \ -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt } -- cgit v1.2.3 From e249d5195279cfadb7f15e2ad55dd4fe1d67c105 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:49 -0500 Subject: iotests: Add test 179 to cover write zeroes with unmap No tests were covering write zeroes with unmap. Additionally, I needed to prove that my previous patches for correct status reporting and write zeroes optimizations actually had an impact. The test works for cluster_size between 8k and 2M (for smaller sizes, it fails because our allocation patterns are not contiguous with small clusters - in part, the largest consecutive allocation we tend to get is often bounded by the size covered by one L2 table). Note that testing for zero clusters is tricky: 'qemu-io map' reports whether data comes from the current layer of the image (useful for sniffing out which regions of the file have QCOW_OFLAG_ZERO) - but doesn't show which clusters have mappings; while 'qemu-img map' sees "zero":true for both unallocated and zero clusters for any qcow2 with no backing layer (so less useful at detecting true zero clusters), but reliably shows mappings. So we have to rely on both queries side-by-side at each point of the test. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-10-eblake@redhat.com Signed-off-by: Max Reitz --- tests/qemu-iotests/179 | 130 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/179.out | 156 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 287 insertions(+) create mode 100755 tests/qemu-iotests/179 create mode 100644 tests/qemu-iotests/179.out diff --git a/tests/qemu-iotests/179 b/tests/qemu-iotests/179 new file mode 100755 index 0000000000..7bc8db8fe0 --- /dev/null +++ b/tests/qemu-iotests/179 @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Test case for write zeroes with unmap +# +# Copyright (C) 2017 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=eblake@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +# v2 images can't mark clusters as zero +_unsupported_imgopts compat=0.10 + +echo +echo '=== Testing write zeroes with unmap ===' +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img 64M +_make_test_img -b "$TEST_IMG.base" + +# Offsets chosen at or near 2M boundaries so test works at all cluster sizes +# 8k and larger (smaller clusters fail due to non-contiguous allocations) + +# Aligned writes to unallocated cluster should not allocate mapping, but must +# mark cluster as zero, whether or not unmap was requested +$QEMU_IO -c "write -z -u 2M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 6M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Unaligned writes need not allocate mapping if the cluster already reads +# as zero, but must mark cluster as zero, whether or not unmap was requested +$QEMU_IO -c "write -z -u 10485761 2097150" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 14680065 2097150" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Requesting unmap of normal data must deallocate; omitting unmap should +# preserve the mapping +$QEMU_IO -c "write 18M 14M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z -u 20M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 24M 6M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Likewise when writing on already-mapped zero data +$QEMU_IO -c "write -z -u 26M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 28M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Writing on unmapped zeroes does not allocate +$QEMU_IO -c "write -z 32M 8M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z -u 34M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z 36M 2M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map + +# Writing zero overrides a backing file, regardless of backing cluster type +$QEMU_IO -c "write -z 40M 8M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write 48M 8M" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z -u 42M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 44M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z -u 50M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 52M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z -u 58M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 60M 2M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "map" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Final check that mappings are correct and images are still sane +TEST_IMG="$TEST_IMG.base" _check_test_img +_check_test_img + +echo +echo '=== Testing cache optimization ===' +echo + +BLKDBG_TEST_IMG="blkdebug:$TEST_DIR/blkdebug.conf:$TEST_IMG.base" + +cat > "$TEST_DIR/blkdebug.conf" < Date: Sat, 6 May 2017 19:05:50 -0500 Subject: qcow2: Optimize write zero of unaligned tail cluster We've already improved discards to operate efficiently on the tail of an unaligned qcow2 image; it's time to make a similar improvement to write zeroes. The special case is only valid at the tail cluster of a file, where we must recognize that any sectors beyond the image end would implicitly read as zero, and therefore should not penalize our logic for widening a partial cluster into writing the whole cluster as zero. However, note that for now, the special case of end-of-file is only recognized if there is no backing file, or if the backing file has the same length; that's because when the backing file is shorter than the active layer, we don't have code in place to recognize that reads of a sector unallocated at the top and beyond the backing end-of-file are implicitly zero. It's not much of a real loss, because most people don't use images that aren't cluster-aligned, or where the active layer is a different size than the backing layer (especially where the difference falls within a single cluster). Update test 154 to cover the new scenarios, using two images of intentionally differing length. While at it, fix the test to gracefully skip when run as ./check -qcow2 -o compat=0.10 154 since the older format lacks zero clusters already required earlier in the test. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-11-eblake@redhat.com Signed-off-by: Max Reitz --- block/qcow2.c | 7 ++ tests/qemu-iotests/154 | 160 ++++++++++++++++++++++++++++++++++++++++++++- tests/qemu-iotests/154.out | 128 ++++++++++++++++++++++++++++++++++++ 3 files changed, 293 insertions(+), 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 3fa62b0096..72b1650953 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2451,6 +2451,10 @@ static bool is_zero_sectors(BlockDriverState *bs, int64_t start, BlockDriverState *file; int64_t res; + if (start + count > bs->total_sectors) { + count = bs->total_sectors - start; + } + if (!count) { return true; } @@ -2469,6 +2473,9 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, uint32_t tail = (offset + count) % s->cluster_size; trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count); + if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) { + tail = 0; + } if (head || tail) { int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS; diff --git a/tests/qemu-iotests/154 b/tests/qemu-iotests/154 index 7ca7219f08..dd8a426dad 100755 --- a/tests/qemu-iotests/154 +++ b/tests/qemu-iotests/154 @@ -2,7 +2,7 @@ # # qcow2 specific bdrv_pwrite_zeroes tests with backing files (complements 034) # -# Copyright (C) 2016 Red Hat, Inc. +# Copyright (C) 2016-2017 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -42,7 +42,10 @@ _supported_proto file _supported_os Linux CLUSTER_SIZE=4k -size=128M +size=$((128 * 1024 * 1024)) + +# This test requires zero clusters, added in v3 images +_unsupported_imgopts compat=0.10 echo echo == backing file contains zeros == @@ -299,6 +302,159 @@ $QEMU_IO -c "read -P 0 75k 1k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map +echo +echo == unaligned image tail cluster, no allocation needed == + +# With no backing file, write to all or part of unallocated partial cluster +# will mark the cluster as zero, but does not allocate. +# Re-create the image each time to get back to unallocated clusters. + +# Write at the front: sector-wise, the request is: 128m... | 00 -- -- -- +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at the back: sector-wise, the request is: 128m... | -- -- -- 00 +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at middle: sector-wise, the request is: 128m... | -- 00 00 -- +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write entire cluster: sector-wise, the request is: 128m... | 00 00 00 00 +_make_test_img $((size + 2048)) +$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Repeat with backing file holding unallocated cluster. +# TODO: Note that this forces an allocation, because we aren't yet able to +# quickly detect that reads beyond EOF of the backing file are always zero +CLUSTER_SIZE=2048 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) + +# Write at the front: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | 00 -- -- -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at the back: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | -- -- -- 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at middle: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | -- 00 00 -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write entire cluster: sector-wise, the request is: +# backing: 128m... | -- -- +# active: 128m... | 00 00 00 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Repeat with backing file holding zero'd cluster +# TODO: Note that this forces an allocation, because we aren't yet able to +# quickly detect that reads beyond EOF of the backing file are always zero +$QEMU_IO -c "write -z $size 512" "$TEST_IMG.base" | _filter_qemu_io + +# Write at the front: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | 00 -- -- -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at the back: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | -- -- -- 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write at middle: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | -- 00 00 -- +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Write entire cluster: sector-wise, the request is: +# backing: 128m... | 00 00 +# active: 128m... | 00 00 00 00 +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# A preallocated cluster maintains its allocation, whether it stays as +# data due to a partial write: +# Convert 128m... | XX XX => ... | XX 00 +_make_test_img $((size + 1024)) +$QEMU_IO -c "write -P 1 $((size)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# or because it is the entire cluster and can use the zero flag: +# Convert 128m... | XX XX => ... | 00 00 +$QEMU_IO -c "write -z $((size)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "alloc $size 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $size 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +echo +echo == unaligned image tail cluster, allocation required == + +# Write beyond backing file must COW +# Backing file: 128m... | XX -- +# Active layer: 128m... | -- -- 00 -- +CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -P 1 $((size)) 512" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z $((size + 1024)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 512)) 1536" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + +# Writes at boundaries of (partial) cluster must not lose mid-cluster data +# Backing file: 128m: ... | -- XX +# Active layer: 128m: ... | 00 -- -- 00 +CLUSTER_SIZE=512 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) +_make_test_img -b "$TEST_IMG.base" $((size + 2048)) +$QEMU_IO -c "write -P 1 $((size + 512)) 512" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -z $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 1024)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 1 $((size + 512)) 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((size + 1024)) 1024" "$TEST_IMG" | _filter_qemu_io +$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out index d3b68e7729..d8485eeff2 100644 --- a/tests/qemu-iotests/154.out +++ b/tests/qemu-iotests/154.out @@ -282,4 +282,132 @@ read 1024/1024 bytes at offset 76800 { "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false}, { "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, { "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}] + +== unaligned image tail cluster, no allocation needed == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 1024/1024 bytes at offset 134218240 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 +wrote 2048/2048 bytes at offset 134217728 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 1024/1024 bytes at offset 134218240 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 2048/2048 bytes at offset 134217728 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 1024/1024 bytes at offset 134218240 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 2048/2048 bytes at offset 134217728 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +2048/2048 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134218752 +wrote 1024/1024 bytes at offset 134217728 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +1024/1024 bytes allocated at offset 128 MiB +[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false}, +{ "start": 134217728, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +wrote 1024/1024 bytes at offset 134217728 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +1024/1024 bytes allocated at offset 128 MiB +read 1024/1024 bytes at offset 134217728 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false}, +{ "start": 134217728, "length": 1024, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + +== unaligned image tail cluster, allocation required == +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134218752 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 134218240 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base +wrote 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 134218752 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 134219264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134217728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 134218240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 134218752 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, +{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] *** done -- cgit v1.2.3 From f10ee139adee1c18d399dc57a7ee22a03fa59513 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:51 -0500 Subject: qcow2: Assert that cluster operations are aligned We already audited (in commit 0c1bd469) that qcow2_discard_clusters() is only passed cluster-aligned start values; but we can further tighten the assertion that the only unaligned end value is at EOF. Recent commits have taken advantage of an unaligned tail cluster, for both discard and write zeroes. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-12-eblake@redhat.com Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e2c5759d94..43bde569a7 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1559,11 +1559,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); - /* The caller must cluster-align start; round end down except at EOF */ + /* Caller must pass aligned values, except at image end */ assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); - if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { - end_offset = start_of_cluster(s, end_offset); - } + assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || + end_offset == bs->total_sectors << BDRV_SECTOR_BITS); nb_clusters = size_to_clusters(s, end_offset - offset); @@ -1646,9 +1645,17 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, int flags) { BDRVQcow2State *s = bs->opaque; + uint64_t end_offset; uint64_t nb_clusters; int ret; + end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); + + /* Caller must pass aligned values, except at image end */ + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || + end_offset == bs->total_sectors << BDRV_SECTOR_BITS); + /* The zero flag is only supported by version 3 and newer */ if (s->qcow_version < 3) { return -ENOTSUP; -- cgit v1.2.3 From d2cb36af2b0040d421b347e6e4e803e07220f78d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 6 May 2017 19:05:52 -0500 Subject: qcow2: Discard/zero clusters by byte count Passing a byte offset, but sector count, when we ultimately want to operate on cluster granularity, is madness. Clean up the external interfaces to take both offset and count as bytes, while still keeping the assertion added previously that the caller must align the values to a cluster. Then rename things to make sure backports don't get confused by changed units: instead of qcow2_discard_clusters() and qcow2_zero_clusters(), we now have qcow2_cluster_discard() and qcow2_cluster_zeroize(). The internal functions still operate on clusters at a time, and return an int for number of cleared clusters; but on an image with 2M clusters, a single L2 table holds 256k entries that each represent a 2M cluster, totalling well over INT_MAX bytes if we ever had a request for that many bytes at once. All our callers currently limit themselves to 32-bit bytes (and therefore fewer clusters), but by making this function 64-bit clean, we have one less place to clean up if we later improve the block layer to support 64-bit bytes through all operations (with the block layer auto-fragmenting on behalf of more-limited drivers), rather than the current state where some interfaces are artificially limited to INT_MAX at a time. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Message-id: 20170507000552.20847-13-eblake@redhat.com Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 42 ++++++++++++++++++++++-------------------- block/qcow2-snapshot.c | 7 +++---- block/qcow2.c | 22 +++++++++------------- block/qcow2.h | 9 +++++---- 4 files changed, 39 insertions(+), 41 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 43bde569a7..347d94b0d2 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1549,34 +1549,36 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, - int nb_sectors, enum qcow2_discard_type type, bool full_discard) +int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, enum qcow2_discard_type type, + bool full_discard) { BDRVQcow2State *s = bs->opaque; - uint64_t end_offset; + uint64_t end_offset = offset + bytes; uint64_t nb_clusters; + int64_t cleared; int ret; - end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); - /* Caller must pass aligned values, except at image end */ assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || end_offset == bs->total_sectors << BDRV_SECTOR_BITS); - nb_clusters = size_to_clusters(s, end_offset - offset); + nb_clusters = size_to_clusters(s, bytes); s->cache_discards = true; /* Each L2 table is handled by its own loop iteration */ while (nb_clusters > 0) { - ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard); - if (ret < 0) { + cleared = discard_single_l2(bs, offset, nb_clusters, type, + full_discard); + if (cleared < 0) { + ret = cleared; goto fail; } - nb_clusters -= ret; - offset += (ret * s->cluster_size); + nb_clusters -= cleared; + offset += (cleared * s->cluster_size); } ret = 0; @@ -1641,16 +1643,15 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, - int flags) +int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags) { BDRVQcow2State *s = bs->opaque; - uint64_t end_offset; + uint64_t end_offset = offset + bytes; uint64_t nb_clusters; + int64_t cleared; int ret; - end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); - /* Caller must pass aligned values, except at image end */ assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || @@ -1662,18 +1663,19 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, } /* Each L2 table is handled by its own loop iteration */ - nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); + nb_clusters = size_to_clusters(s, bytes); s->cache_discards = true; while (nb_clusters > 0) { - ret = zero_single_l2(bs, offset, nb_clusters, flags); - if (ret < 0) { + cleared = zero_single_l2(bs, offset, nb_clusters, flags); + if (cleared < 0) { + ret = cleared; goto fail; } - nb_clusters -= ret; - offset += (ret * s->cluster_size); + nb_clusters -= cleared; + offset += (cleared * s->cluster_size); } ret = 0; diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 032424322a..44243e0e95 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -440,10 +440,9 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) /* The VM state isn't needed any more in the active L1 table; in fact, it * hurts by causing expensive COW for the next snapshot. */ - qcow2_discard_clusters(bs, qcow2_vm_state_offset(s), - align_offset(sn->vm_state_size, s->cluster_size) - >> BDRV_SECTOR_BITS, - QCOW2_DISCARD_NEVER, false); + qcow2_cluster_discard(bs, qcow2_vm_state_offset(s), + align_offset(sn->vm_state_size, s->cluster_size), + QCOW2_DISCARD_NEVER, false); #ifdef DEBUG_ALLOC { diff --git a/block/qcow2.c b/block/qcow2.c index 72b1650953..a8d61f0981 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2512,7 +2512,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count); /* Whatever is left can use real zero clusters */ - ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags); + ret = qcow2_cluster_zeroize(bs, offset, count, flags); qemu_co_mutex_unlock(&s->lock); return ret; @@ -2535,8 +2535,8 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, } qemu_co_mutex_lock(&s->lock); - ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS, - QCOW2_DISCARD_REQUEST, false); + ret = qcow2_cluster_discard(bs, offset, count, QCOW2_DISCARD_REQUEST, + false); qemu_co_mutex_unlock(&s->lock); return ret; } @@ -2843,9 +2843,8 @@ fail: static int qcow2_make_empty(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; - uint64_t start_sector; - int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) / - BDRV_SECTOR_SIZE); + uint64_t offset, end_offset; + int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); int l1_clusters, ret = 0; l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); @@ -2862,18 +2861,15 @@ static int qcow2_make_empty(BlockDriverState *bs) /* This fallback code simply discards every active cluster; this is slow, * but works in all cases */ - for (start_sector = 0; start_sector < bs->total_sectors; - start_sector += sector_step) - { + end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; + for (offset = 0; offset < end_offset; offset += step) { /* As this function is generally used after committing an external * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the * default action for this kind of discard is to pass the discard, * which will ideally result in an actually smaller image file, as * is probably desired. */ - ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE, - MIN(sector_step, - bs->total_sectors - start_sector), - QCOW2_DISCARD_SNAPSHOT, true); + ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), + QCOW2_DISCARD_SNAPSHOT, true); if (ret < 0) { break; } diff --git a/block/qcow2.h b/block/qcow2.h index 810ceb84c8..1801dc30dc 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -551,10 +551,11 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int compressed_size); int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, - int nb_sectors, enum qcow2_discard_type type, bool full_discard); -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, - int flags); +int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, enum qcow2_discard_type type, + bool full_discard); +int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, int flags); int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb, -- cgit v1.2.3 From 8dd30c86dd41a727f726a086fff97c7ce06cbe1d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 28 Apr 2017 18:55:17 +0200 Subject: MAINTAINERS: Add qemu-progress to the block layer util/qemu-progress.c is currently unmaintained. The only user of its functionality is qemu-img, so it effectively is part of the block layer. Suggested-by: Fam Zheng Signed-off-by: Max Reitz Message-id: 20170428165517.30341-1-mreitz@redhat.com Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Max Reitz --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8224be0d75..145be0f45f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1170,6 +1170,7 @@ F: include/block/ F: qemu-img* F: qemu-io* F: tests/qemu-iotests/ +F: util/qemu-progress.c T: git git://repo.or.cz/qemu/kevin.git block Block I/O path -- cgit v1.2.3