/* * QEMU disk image utility * * Copyright (c) 2003-2008 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "qemu/osdep.h" #include #include "qemu-version.h" #include "qapi/error.h" #include "qapi/qapi-visit-block-core.h" #include "qapi/qobject-output-visitor.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" #include "qemu/cutils.h" #include "qemu/config-file.h" #include "qemu/option.h" #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/units.h" #include "qom/object_interfaces.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" #include "block/block_int.h" #include "block/blockjob.h" #include "block/qapi.h" #include "crypto/init.h" #include "trace/control.h" #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \ "\n" QEMU_COPYRIGHT "\n" typedef struct img_cmd_t { const char *name; int (*handler)(int argc, char **argv); } img_cmd_t; enum { OPTION_OUTPUT = 256, OPTION_BACKING_CHAIN = 257, OPTION_OBJECT = 258, OPTION_IMAGE_OPTS = 259, OPTION_PATTERN = 260, OPTION_FLUSH_INTERVAL = 261, OPTION_NO_DRAIN = 262, OPTION_TARGET_IMAGE_OPTS = 263, OPTION_SIZE = 264, OPTION_PREALLOCATION = 265, OPTION_SHRINK = 266, }; typedef enum OutputFormat { OFORMAT_JSON, OFORMAT_HUMAN, } OutputFormat; /* Default to cache=writeback as data integrity is not important for qemu-img */ #define BDRV_DEFAULT_CACHE "writeback" static void format_print(void *opaque, const char *name) { printf(" %s", name); } static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...) { va_list ap; va_start(ap, fmt); error_vreport(fmt, ap); va_end(ap); error_printf("Try 'qemu-img --help' for more information\n"); exit(EXIT_FAILURE); } static void QEMU_NORETURN missing_argument(const char *option) { error_exit("missing argument for option '%s'", option); } static void QEMU_NORETURN unrecognized_option(const char *option) { error_exit("unrecognized option '%s'", option); } /* Please keep in synch with qemu-img.texi */ static void QEMU_NORETURN help(void) { const char *help_msg = QEMU_IMG_VERSION "usage: qemu-img [standard options] command [command options]\n" "QEMU disk image utility\n" "\n" " '-h', '--help' display this help and exit\n" " '-V', '--version' output version information and exit\n" " '-T', '--trace' [[enable=]][,events=][,file=]\n" " specify tracing options\n" "\n" "Command syntax:\n" #define DEF(option, callback, arg_string) \ " " arg_string "\n" #include "qemu-img-cmds.h" #undef DEF "\n" "Command parameters:\n" " 'filename' is a disk image filename\n" " 'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n" " manual page for a description of the object properties. The most common\n" " object type is a 'secret', which is used to supply passwords and/or\n" " encryption keys.\n" " 'fmt' is the disk image format. It is guessed automatically in most cases\n" " 'cache' is the cache mode used to write the output disk image, the valid\n" " options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n" " 'directsync' and 'unsafe' (default for convert)\n" " 'src_cache' is the cache mode used to read input disk images, the valid\n" " options are the same as for the 'cache' option\n" " 'size' is the disk image size in bytes. Optional suffixes\n" " 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n" " 'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P) are\n" " supported. 'b' is ignored.\n" " 'output_filename' is the destination disk image filename\n" " 'output_fmt' is the destination format\n" " 'options' is a comma separated list of format specific options in a\n" " name=value format. Use -o ? for an overview of the options supported by the\n" " used format\n" " 'snapshot_param' is param used for internal snapshot, format\n" " is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n" " '[ID_OR_NAME]'\n" " '-c' indicates that target image must be compressed (qcow format only)\n" " '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n" " new backing file match exactly. The image doesn't need a working\n" " backing file before rebasing in this case (useful for renaming the\n" " backing file). For image creation, allow creating without attempting\n" " to open the backing file.\n" " '-h' with or without a command shows this help and lists the supported formats\n" " '-p' show progress of command (only certain commands)\n" " '-q' use Quiet mode - do not print any output (except errors)\n" " '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n" " contain only zeros for qemu-img to create a sparse image during\n" " conversion. If the number of bytes is 0, the source will not be scanned for\n" " unallocated or zero sectors, and the destination image will always be\n" " fully allocated\n" " '--output' takes the format in which the output must be done (human or json)\n" " '-n' skips the target volume creation (useful if the volume is created\n" " prior to running qemu-img)\n" "\n" "Parameters to check subcommand:\n" " '-r' tries to repair any inconsistencies that are found during the check.\n" " '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n" " kinds of errors, with a higher risk of choosing the wrong fix or\n" " hiding corruption that has already occurred.\n" "\n" "Parameters to convert subcommand:\n" " '-m' specifies how many coroutines work in parallel during the convert\n" " process (defaults to 8)\n" " '-W' allow to write to the target out of order rather than sequential\n" "\n" "Parameters to snapshot subcommand:\n" " 'snapshot' is the name of the snapshot to create, apply or delete\n" " '-a' applies a snapshot (revert disk to saved state)\n" " '-c' creates a snapshot\n" " '-d' deletes a snapshot\n" " '-l' lists all snapshots in the given image\n" "\n" "Parameters to compare subcommand:\n" " '-f' first image format\n" " '-F' second image format\n" " '-s' run in Strict mode - fail on different image size or sector allocation\n" "\n" "Parameters to dd subcommand:\n" " 'bs=BYTES' read and write up to BYTES bytes at a time " "(default: 512)\n" " 'count=N' copy only N input blocks\n" " 'if=FILE' read from FILE\n" " 'of=FILE' write to FILE\n" " 'skip=N' skip N bs-sized blocks at the start of input\n"; printf("%s\nSupported formats:", help_msg); bdrv_iterate_format(format_print, NULL, false); printf("\n\n" QEMU_HELP_BOTTOM "\n"); exit(EXIT_SUCCESS); } static QemuOptsList qemu_object_opts = { .name = "object", .implied_opt_name = "qom-type", .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head), .desc = { { } }, }; static QemuOptsList qemu_source_opts = { .name = "source", .implied_opt_name = "file", .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head), .desc = { { } }, }; static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...) { int ret = 0; if (!quiet) { va_list args; va_start(args, fmt); ret = vprintf(fmt, args); va_end(args); } return ret; } static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; QemuOptsList *create_opts = NULL; Error *local_err = NULL; /* Find driver and parse its options */ drv = bdrv_find_format(fmt); if (!drv) { error_report("Unknown file format '%s'", fmt); return 1; } if (!drv->create_opts) { error_report("Format driver '%s' does not support image creation", fmt); return 1; } create_opts = qemu_opts_append(create_opts, drv->create_opts); if (filename) { proto_drv = bdrv_find_protocol(filename, true, &local_err); if (!proto_drv) { error_report_err(local_err); qemu_opts_free(create_opts); return 1; } if (!proto_drv->create_opts) { error_report("Protocol driver '%s' does not support image creation", proto_drv->format_name); qemu_opts_free(create_opts); return 1; } create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); } if (filename) { printf("Supported options:\n"); } else { printf("Supported %s options:\n", fmt); } qemu_opts_print_help(create_opts, false); qemu_opts_free(create_opts); if (!filename) { printf("\n" "The protocol level may support further options.\n" "Specify the target filename to include those options.\n"); } return 0; } static BlockBackend *img_open_opts(const char *optstr, QemuOpts *opts, int flags, bool writethrough, bool quiet, bool force_share) { QDict *options; Error *local_err = NULL; BlockBackend *blk; options = qemu_opts_to_qdict(opts, NULL); if (force_share) { if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE) && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) { error_report("--force-share/-U conflicts with image options"); qobject_unref(options); return NULL; } qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on"); } blk = blk_new_open(NULL, NULL, options, flags, &local_err); if (!blk) { error_reportf_err(local_err, "Could not open '%s': ", optstr); return NULL; } blk_set_enable_write_cache(blk, !writethrough); return blk; } static BlockBackend *img_open_file(const char *filename, QDict *options, const char *fmt, int flags, bool writethrough, bool quiet, bool force_share) { BlockBackend *blk; Error *local_err = NULL; if (!options) { options = qdict_new(); } if (fmt) { qdict_put_str(options, "driver", fmt); } if (force_share) { qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } blk = blk_new_open(filename, NULL, options, flags, &local_err); if (!blk) { error_reportf_err(local_err, "Could not open '%s': ", filename); return NULL; } blk_set_enable_write_cache(blk, !writethrough); return blk; } static int img_add_key_secrets(void *opaque, const char *name, const char *value, Error **errp) { QDict *options = opaque; if (g_str_has_suffix(name, "key-secret")) { qdict_put_str(options, name, value); } return 0; } static BlockBackend *img_open(bool image_opts, const char *filename, const char *fmt, int flags, bool writethrough, bool quiet, bool force_share) { BlockBackend *blk; if (image_opts) { QemuOpts *opts; if (fmt) { error_report("--image-opts and --format are mutually exclusive"); return NULL; } opts = qemu_opts_parse_noisily(qemu_find_opts("source"), filename, true); if (!opts) { return NULL; } blk = img_open_opts(filename, opts, flags, writethrough, quiet, force_share); } else { blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, force_share); } return blk; } static int add_old_style_options(const char *fmt, QemuOpts *opts, const char *base_filename, const char *base_fmt) { Error *err = NULL; if (base_filename) { qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err); if (err) { error_report("Backing file not supported for file format '%s'", fmt); error_free(err); return -1; } } if (base_fmt) { qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err); if (err) { error_report("Backing file format not supported for file " "format '%s'", fmt); error_free(err); return -1; } } return 0; } static int64_t cvtnum(const char *s) { int err; uint64_t value; err = qemu_strtosz(s, NULL, &value); if (err < 0) { return err; } if (value > INT64_MAX) { return -ERANGE; } return value; } static int img_create(int argc, char **argv) { int c; uint64_t img_size = -1; const char *fmt = "raw"; const char *base_fmt = NULL; const char *filename; const char *base_filename = NULL; char *options = NULL; Error *local_err = NULL; bool quiet = false; int flags = 0; for(;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":F:b:f:ho:qu", long_options, NULL); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'F': base_fmt = optarg; break; case 'b': base_filename = optarg; break; case 'f': fmt = optarg; break; case 'o': if (!is_valid_option_list(optarg)) { error_report("Invalid option list: %s", optarg); goto fail; } if (!options) { options = g_strdup(optarg); } else { char *old_options = options; options = g_strdup_printf("%s,%s", options, optarg); g_free(old_options); } break; case 'q': quiet = true; break; case 'u': flags |= BDRV_O_NO_BACKING; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { goto fail; } } break; } } /* Get the filename */ filename = (optind < argc) ? argv[optind] : NULL; if (options && has_help_option(options)) { g_free(options); return print_block_option_help(filename, fmt); } if (optind >= argc) { error_exit("Expecting image file name"); } optind++; if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { goto fail; } /* Get image size, if specified */ if (optind < argc) { int64_t sval; sval = cvtnum(argv[optind++]); if (sval < 0) { if (sval == -ERANGE) { error_report("Image size must be less than 8 EiB!"); } else { error_report("Invalid image size specified! You may use k, M, " "G, T, P or E suffixes for "); error_report("kilobytes, megabytes, gigabytes, terabytes, " "petabytes and exabytes."); } goto fail; } img_size = (uint64_t)sval; } if (optind != argc) { error_exit("Unexpected argument: %s", argv[optind]); } bdrv_img_create(filename, fmt, base_filename, base_fmt, options, img_size, flags, quiet, &local_err); if (local_err) { error_reportf_err(local_err, "%s: ", filename); goto fail; } g_free(options); return 0; fail: g_free(options); return 1; } static void dump_json_image_check(ImageCheck *check, bool quiet) { QString *str; QObject *obj; Visitor *v = qobject_output_visitor_new(&obj); visit_type_ImageCheck(v, NULL, &check, &error_abort); visit_complete(v, &obj); str = qobject_to_json_pretty(obj); assert(str != NULL); qprintf(quiet, "%s\n", qstring_get_str(str)); qobject_unref(obj); visit_free(v); qobject_unref(str); } static void dump_human_image_check(ImageCheck *check, bool quiet) { if (!(check->corruptions || check->leaks || check->check_errors)) { qprintf(quiet, "No errors were found on the image.\n"); } else { if (check->corruptions) { qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n" "Data may be corrupted, or further writes to the image " "may corrupt it.\n", check->corruptions); } if (check->leaks) { qprintf(quiet, "\n%" PRId64 " leaked clusters were found on the image.\n" "This means waste of disk space, but no harm to data.\n", check->leaks); } if (check->check_errors) { qprintf(quiet, "\n%" PRId64 " internal errors have occurred during the check.\n", check->check_errors); } } if (check->total_clusters != 0 && check->allocated_clusters != 0) { qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, " "%0.2f%% fragmented, %0.2f%% compressed clusters\n", check->allocated_clusters, check->total_clusters, check->allocated_clusters * 100.0 / check->total_clusters, check->fragmented_clusters * 100.0 / check->allocated_clusters, check->compressed_clusters * 100.0 / check->allocated_clusters); } if (check->image_end_offset) { qprintf(quiet, "Image end offset: %" PRId64 "\n", check->image_end_offset); } } static int collect_image_check(BlockDriverState *bs, ImageCheck *check, const char *filename, const char *fmt, int fix) { int ret; BdrvCheckResult result; ret = bdrv_check(bs, &result, fix); if (ret < 0) { return ret; } check->filename = g_strdup(filename); check->format = g_strdup(bdrv_get_format_name(bs)); check->check_errors = result.check_errors; check->corruptions = result.corruptions; check->has_corruptions = result.corruptions != 0; check->leaks = result.leaks; check->has_leaks = result.leaks != 0; check->corruptions_fixed = result.corruptions_fixed; check->has_corruptions_fixed = result.corruptions != 0; check->leaks_fixed = result.leaks_fixed; check->has_leaks_fixed = result.leaks != 0; check->image_end_offset = result.image_end_offset; check->has_image_end_offset = result.image_end_offset != 0; check->total_clusters = result.bfi.total_clusters; check->has_total_clusters = result.bfi.total_clusters != 0; check->allocated_clusters = result.bfi.allocated_clusters; check->has_allocated_clusters = result.bfi.allocated_clusters != 0; check->fragmented_clusters = result.bfi.fragmented_clusters; check->has_fragmented_clusters = result.bfi.fragmented_clusters != 0; check->compressed_clusters = result.bfi.compressed_clusters; check->has_compressed_clusters = result.bfi.compressed_clusters != 0; return 0; } /* * Checks an image for consistency. Exit codes: * * 0 - Check completed, image is good * 1 - Check not completed because of internal errors * 2 - Check completed, image is corrupted * 3 - Check completed, image has leaked clusters, but is good otherwise * 63 - Checks are not supported by the image format */ static int img_check(int argc, char **argv) { int c, ret; OutputFormat output_format = OFORMAT_HUMAN; const char *filename, *fmt, *output, *cache; BlockBackend *blk; BlockDriverState *bs; int fix = 0; int flags = BDRV_O_CHECK; bool writethrough; ImageCheck *check; bool quiet = false; bool image_opts = false; bool force_share = false; fmt = NULL; output = NULL; cache = BDRV_DEFAULT_CACHE; for(;;) { int option_index = 0; static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"format", required_argument, 0, 'f'}, {"repair", required_argument, 0, 'r'}, {"output", required_argument, 0, OPTION_OUTPUT}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":hf:r:T:qU", long_options, &option_index); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt = optarg; break; case 'r': flags |= BDRV_O_RDWR; if (!strcmp(optarg, "leaks")) { fix = BDRV_FIX_LEAKS; } else if (!strcmp(optarg, "all")) { fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS; } else { error_exit("Unknown option value for -r " "(expecting 'leaks' or 'all'): %s", optarg); } break; case OPTION_OUTPUT: output = optarg; break; case 'T': cache = optarg; break; case 'q': quiet = true; break; case 'U': force_share = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[optind++]; if (output && !strcmp(output, "json")) { output_format = OFORMAT_JSON; } else if (output && !strcmp(output, "human")) { output_format = OFORMAT_HUMAN; } else if (output) { error_report("--output must be used with human or json as argument."); return 1; } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } ret = bdrv_parse_cache_mode(cache, &flags, &writethrough); if (ret < 0) { error_report("Invalid source cache option: %s", cache); return 1; } blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, force_share); if (!blk) { return 1; } bs = blk_bs(blk); check = g_new0(ImageCheck, 1); ret = collect_image_check(bs, check, filename, fmt, fix); if (ret == -ENOTSUP) { error_report("This image format does not support checks"); ret = 63; goto fail; } if (check->corruptions_fixed || check->leaks_fixed) { int corruptions_fixed, leaks_fixed; leaks_fixed = check->leaks_fixed; corruptions_fixed = check->corruptions_fixed; if (output_format == OFORMAT_HUMAN) { qprintf(quiet, "The following inconsistencies were found and repaired:\n\n" " %" PRId64 " leaked clusters\n" " %" PRId64 " corruptions\n\n" "Double checking the fixed image now...\n", check->leaks_fixed, check->corruptions_fixed); } ret = collect_image_check(bs, check, filename, fmt, 0); check->leaks_fixed = leaks_fixed; check->corruptions_fixed = corruptions_fixed; } if (!ret) { switch (output_format) { case OFORMAT_HUMAN: dump_human_image_check(check, quiet); break; case OFORMAT_JSON: dump_json_image_check(check, quiet); break; } } if (ret || check->check_errors) { if (ret) { error_report("Check failed: %s", strerror(-ret)); } else { error_report("Check failed"); } ret = 1; goto fail; } if (check->corruptions) { ret = 2; } else if (check->leaks) { ret = 3; } else { ret = 0; } fail: qapi_free_ImageCheck(check); blk_unref(blk); return ret; } typedef struct CommonBlockJobCBInfo { BlockDriverState *bs; Error **errp; } CommonBlockJobCBInfo; static void common_block_job_cb(void *opaque, int ret) { CommonBlockJobCBInfo *cbi = opaque; if (ret < 0) { error_setg_errno(cbi->errp, -ret, "Block job failed"); } } static void run_block_job(BlockJob *job, Error **errp) { AioContext *aio_context = blk_get_aio_context(job->blk); int ret = 0; aio_context_acquire(aio_context); job_ref(&job->job); do { float progress = 0.0f; aio_poll(aio_context, true); if (job->job.progress_total) { progress = (float)job->job.progress_current / job->job.progress_total * 100.f; } qemu_progress_print(progress, 0); } while (!job_is_ready(&job->job) && !job_is_completed(&job->job)); if (!job_is_completed(&job->job)) { ret = job_complete_sync(&job->job, errp); } else { ret = job->job.ret; } job_unref(&job->job); aio_context_release(aio_context); /* publish completion progress only when success */ if (!ret) { qemu_progress_print(100.f, 0); } } static int img_commit(int argc, char **argv) { int c, ret, flags; const char *filename, *fmt, *cache, *base; BlockBackend *blk; BlockDriverState *bs, *base_bs; BlockJob *job; bool progress = false, quiet = false, drop = false; bool writethrough; Error *local_err = NULL; CommonBlockJobCBInfo cbi; bool image_opts = false; AioContext *aio_context; fmt = NULL; cache = BDRV_DEFAULT_CACHE; base = NULL; for(;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":f:ht:b:dpq", long_options, NULL); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt = optarg; break; case 't': cache = optarg; break; case 'b': base = optarg; /* -b implies -d */ drop = true; break; case 'd': drop = true; break; case 'p': progress = true; break; case 'q': quiet = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } /* Progress is not shown in Quiet mode */ if (quiet) { progress = false; } if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[optind++]; if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } flags = BDRV_O_RDWR | BDRV_O_UNMAP; ret = bdrv_parse_cache_mode(cache, &flags, &writethrough); if (ret < 0) { error_report("Invalid cache option: %s", cache); return 1; } blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, false); if (!blk) { return 1; } bs = blk_bs(blk); qemu_progress_init(progress, 1.f); qemu_progress_print(0.f, 100); if (base) { base_bs = bdrv_find_backing_image(bs, base); if (!base_bs) { error_setg(&local_err, "Did not find '%s' in the backing chain of '%s'", base, filename); goto done; } } else { /* This is different from QMP, which by default uses the deepest file in * the backing chain (i.e., the very base); however, the traditional * behavior of qemu-img commit is using the immediate backing file. */ base_bs = backing_bs(bs); if (!base_bs) { error_setg(&local_err, "Image does not have a backing file"); goto done; } } cbi = (CommonBlockJobCBInfo){ .errp = &local_err, .bs = bs, }; aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0, BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, &cbi, false, &local_err); aio_context_release(aio_context); if (local_err) { goto done; } /* When the block job completes, the BlockBackend reference will point to * the old backing file. In order to avoid that the top image is already * deleted, so we can still empty it afterwards, increment the reference * counter here preemptively. */ if (!drop) { bdrv_ref(bs); } job = block_job_get("commit"); assert(job); run_block_job(job, &local_err); if (local_err) { goto unref_backing; } if (!drop && bs->drv->bdrv_make_empty) { ret = bs->drv->bdrv_make_empty(bs); if (ret) { error_setg_errno(&local_err, -ret, "Could not empty %s", filename); goto unref_backing; } } unref_backing: if (!drop) { bdrv_unref(bs); } done: qemu_progress_end(); blk_unref(blk); if (local_err) { error_report_err(local_err); return 1; } qprintf(quiet, "Image committed.\n"); return 0; } /* * Returns -1 if 'buf' contains only zeroes, otherwise the byte index * of the first sector boundary within buf where the sector contains a * non-zero byte. This function is robust to a buffer that is not * sector-aligned. */ static int64_t find_nonzero(const uint8_t *buf, int64_t n) { int64_t i; int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE); for (i = 0; i < end; i += BDRV_SECTOR_SIZE) { if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) { return i; } } if (i < n && !buffer_is_zero(buf + i, n - end)) { return i; } return -1; } /* * Returns true iff the first sector pointed to by 'buf' contains at least * a non-NUL byte. * * 'pnum' is set to the number of sectors (including and immediately following * the first one) that are known to be in the same allocated/unallocated state. * The function will try to align the end offset to alignment boundaries so * that the request will at least end aligned and consequtive requests will * also start at an aligned offset. */ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum, int64_t sector_num, int alignment) { bool is_zero; int i, tail; if (n <= 0) { *pnum = 0; return 0; } is_zero = buffer_is_zero(buf, 512); for(i = 1; i < n; i++) { buf += 512; if (is_zero != buffer_is_zero(buf, 512)) { break; } } tail = (sector_num + i) & (alignment - 1); if (tail) { if (is_zero && i <= tail) { /* treat unallocated areas which only consist * of a small tail as allocated. */ is_zero = false; } if (!is_zero) { /* align up end offset of allocated areas. */ i += alignment - tail; i = MIN(i, n); } else { /* align down end offset of zero areas. */ i -= tail; } } *pnum = i; return !is_zero; } /* * Like is_allocated_sectors, but if the buffer starts with a used sector, * up to 'min' consecutive sectors containing zeros are ignored. This avoids * breaking up write requests for only small sparse areas. */ static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum, int min, int64_t sector_num, int alignment) { int ret; int num_checked, num_used; if (n < min) { min = n; } ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment); if (!ret) { return ret; } num_used = *pnum; buf += BDRV_SECTOR_SIZE * *pnum; n -= *pnum; sector_num += *pnum; num_checked = num_used; while (n > 0) { ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment); buf += BDRV_SECTOR_SIZE * *pnum; n -= *pnum; sector_num += *pnum; num_checked += *pnum; if (ret) { num_used = num_checked; } else if (*pnum >= min) { break; } } *pnum = num_used; return 1; } /* * Compares two buffers sector by sector. Returns 0 if the first * sector of each buffer matches, non-zero otherwise. * * pnum is set to the sector-aligned size of the buffer prefix that * has the same matching status as the first sector. */ static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2, int64_t bytes, int64_t *pnum) { bool res; int64_t i = MIN(bytes, BDRV_SECTOR_SIZE); assert(bytes > 0); res = !!memcmp(buf1, buf2, i); while (i < bytes) { int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE); if (!!memcmp(buf1 + i, buf2 + i, len) != res) { break; } i += len; } *pnum = i; return res; } #define IO_BUF_SIZE (2 * MiB) /* * Check if passed sectors are empty (not allocated or contain only 0 bytes) * * Intended for use by 'qemu-img compare': Returns 0 in case sectors are * filled with 0, 1 if sectors contain non-zero data (this is a comparison * failure), and 4 on error (the exit status for read errors), after emitting * an error message. * * @param blk: BlockBackend for the image * @param offset: Starting offset to check * @param bytes: Number of bytes to check * @param filename: Name of disk file we are checking (logging purpose) * @param buffer: Allocated buffer for storing read data * @param quiet: Flag for quiet mode */ static int check_empty_sectors(BlockBackend *blk, int64_t offset, int64_t bytes, const char *filename, uint8_t *buffer, bool quiet) { int ret = 0; int64_t idx; ret = blk_pread(blk, offset, buffer, bytes); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", offset, filename, strerror(-ret)); return 4; } idx = find_nonzero(buffer, bytes); if (idx >= 0) { qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n", offset + idx); return 1; } return 0; } /* * Compares two images. Exit codes: * * 0 - Images are identical * 1 - Images differ * >1 - Error occurred */ static int img_compare(int argc, char **argv) { const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2; BlockBackend *blk1, *blk2; BlockDriverState *bs1, *bs2; int64_t total_size1, total_size2; uint8_t *buf1 = NULL, *buf2 = NULL; int64_t pnum1, pnum2; int allocated1, allocated2; int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */ bool progress = false, quiet = false, strict = false; int flags; bool writethrough; int64_t total_size; int64_t offset = 0; int64_t chunk; int c; uint64_t progress_base; bool image_opts = false; bool force_share = false; cache = BDRV_DEFAULT_CACHE; for (;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":hf:F:T:pqsU", long_options, NULL); if (c == -1) { break; } switch (c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt1 = optarg; break; case 'F': fmt2 = optarg; break; case 'T': cache = optarg; break; case 'p': progress = true; break; case 'q': quiet = true; break; case 's': strict = true; break; case 'U': force_share = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { ret = 2; goto out4; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } /* Progress is not shown in Quiet mode */ if (quiet) { progress = false; } if (optind != argc - 2) { error_exit("Expecting two image file names"); } filename1 = argv[optind++]; filename2 = argv[optind++]; if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { ret = 2; goto out4; } /* Initialize before goto out */ qemu_progress_init(progress, 2.0); flags = 0; ret = bdrv_parse_cache_mode(cache, &flags, &writethrough); if (ret < 0) { error_report("Invalid source cache option: %s", cache); ret = 2; goto out3; } blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet, force_share); if (!blk1) { ret = 2; goto out3; } blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet, force_share); if (!blk2) { ret = 2; goto out2; } bs1 = blk_bs(blk1); bs2 = blk_bs(blk2); buf1 = blk_blockalign(blk1, IO_BUF_SIZE); buf2 = blk_blockalign(blk2, IO_BUF_SIZE); total_size1 = blk_getlength(blk1); if (total_size1 < 0) { error_report("Can't get size of %s: %s", filename1, strerror(-total_size1)); ret = 4; goto out; } total_size2 = blk_getlength(blk2); if (total_size2 < 0) { error_report("Can't get size of %s: %s", filename2, strerror(-total_size2)); ret = 4; goto out; } total_size = MIN(total_size1, total_size2); progress_base = MAX(total_size1, total_size2); qemu_progress_print(0, 100); if (strict && total_size1 != total_size2) { ret = 1; qprintf(quiet, "Strict mode: Image size mismatch!\n"); goto out; } while (offset < total_size) { int status1, status2; status1 = bdrv_block_status_above(bs1, NULL, offset, total_size1 - offset, &pnum1, NULL, NULL); if (status1 < 0) { ret = 3; error_report("Sector allocation test failed for %s", filename1); goto out; } allocated1 = status1 & BDRV_BLOCK_ALLOCATED; status2 = bdrv_block_status_above(bs2, NULL, offset, total_size2 - offset, &pnum2, NULL, NULL); if (status2 < 0) { ret = 3; error_report("Sector allocation test failed for %s", filename2); goto out; } allocated2 = status2 & BDRV_BLOCK_ALLOCATED; assert(pnum1 && pnum2); chunk = MIN(pnum1, pnum2); if (strict) { if (status1 != status2) { ret = 1; qprintf(quiet, "Strict mode: Offset %" PRId64 " block status mismatch!\n", offset); goto out; } } if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) { /* nothing to do */ } else if (allocated1 == allocated2) { if (allocated1) { int64_t pnum; chunk = MIN(chunk, IO_BUF_SIZE); ret = blk_pread(blk1, offset, buf1, chunk); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", offset, filename1, strerror(-ret)); ret = 4; goto out; } ret = blk_pread(blk2, offset, buf2, chunk); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", offset, filename2, strerror(-ret)); ret = 4; goto out; } ret = compare_buffers(buf1, buf2, chunk, &pnum); if (ret || pnum != chunk) { qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n", offset + (ret ? 0 : pnum)); ret = 1; goto out; } } } else { chunk = MIN(chunk, IO_BUF_SIZE); if (allocated1) { ret = check_empty_sectors(blk1, offset, chunk, filename1, buf1, quiet); } else { ret = check_empty_sectors(blk2, offset, chunk, filename2, buf1, quiet); } if (ret) { goto out; } } offset += chunk; qemu_progress_print(((float) chunk / progress_base) * 100, 100); } if (total_size1 != total_size2) { BlockBackend *blk_over; const char *filename_over; qprintf(quiet, "Warning: Image size mismatch!\n"); if (total_size1 > total_size2) { blk_over = blk1; filename_over = filename1; } else { blk_over = blk2; filename_over = filename2; } while (offset < progress_base) { ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset, progress_base - offset, &chunk, NULL, NULL); if (ret < 0) { ret = 3; error_report("Sector allocation test failed for %s", filename_over); goto out; } if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) { chunk = MIN(chunk, IO_BUF_SIZE); ret = check_empty_sectors(blk_over, offset, chunk, filename_over, buf1, quiet); if (ret) { goto out; } } offset += chunk; qemu_progress_print(((float) chunk / progress_base) * 100, 100); } } qprintf(quiet, "Images are identical.\n"); ret = 0; out: qemu_vfree(buf1); qemu_vfree(buf2); blk_unref(blk2); out2: blk_unref(blk1); out3: qemu_progress_end(); out4: return ret; } enum ImgConvertBlockStatus { BLK_DATA, BLK_ZERO, BLK_BACKING_FILE, }; #define MAX_COROUTINES 16 typedef struct ImgConvertState { BlockBackend **src; int64_t *src_sectors; int src_num; int64_t total_sectors; int64_t allocated_sectors; int64_t allocated_done; int64_t sector_num; int64_t wr_offs; enum ImgConvertBlockStatus status; int64_t sector_next_status; BlockBackend *target; bool has_zero_init; bool compressed; bool unallocated_blocks_are_zero; bool target_has_backing; int64_t target_backing_sectors; /* negative if unknown */ bool wr_in_order; bool copy_range; int min_sparse; int alignment; size_t cluster_sectors; size_t buf_sectors; long num_coroutines; int running_coroutines; Coroutine *co[MAX_COROUTINES]; int64_t wait_sector_num[MAX_COROUTINES]; CoMutex lock; int ret; } ImgConvertState; static void convert_select_part(ImgConvertState *s, int64_t sector_num, int *src_cur, int64_t *src_cur_offset) { *src_cur = 0; *src_cur_offset = 0; while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) { *src_cur_offset += s->src_sectors[*src_cur]; (*src_cur)++; assert(*src_cur < s->src_num); } } static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num) { int64_t src_cur_offset; int ret, n, src_cur; bool post_backing_zero = false; convert_select_part(s, sector_num, &src_cur, &src_cur_offset); assert(s->total_sectors > sector_num); n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); if (s->target_backing_sectors >= 0) { if (sector_num >= s->target_backing_sectors) { post_backing_zero = s->unallocated_blocks_are_zero; } else if (sector_num + n > s->target_backing_sectors) { /* Split requests around target_backing_sectors (because * starting from there, zeros are handled differently) */ n = s->target_backing_sectors - sector_num; } } if (s->sector_next_status <= sector_num) { int64_t count = n * BDRV_SECTOR_SIZE; if (s->target_has_backing) { ret = bdrv_block_status(blk_bs(s->src[src_cur]), (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE, count, &count, NULL, NULL); } else { ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL, (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE, count, &count, NULL, NULL); } if (ret < 0) { error_report("error while reading block status of sector %" PRId64 ": %s", sector_num, strerror(-ret)); return ret; } n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE); if (ret & BDRV_BLOCK_ZERO) { s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO; } else if (ret & BDRV_BLOCK_DATA) { s->status = BLK_DATA; } else { s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA; } s->sector_next_status = sector_num + n; } n = MIN(n, s->sector_next_status - sector_num); if (s->status == BLK_DATA) { n = MIN(n, s->buf_sectors); } /* We need to write complete clusters for compressed images, so if an * unallocated area is shorter than that, we must consider the whole * cluster allocated. */ if (s->compressed) { if (n < s->cluster_sectors) { n = MIN(s->cluster_sectors, s->total_sectors - sector_num); s->status = BLK_DATA; } else { n = QEMU_ALIGN_DOWN(n, s->cluster_sectors); } } return n; } static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, uint8_t *buf) { int n, ret; assert(nb_sectors <= s->buf_sectors); while (nb_sectors > 0) { BlockBackend *blk; int src_cur; int64_t bs_sectors, src_cur_offset; /* In the case of compression with multiple source files, we can get a * nb_sectors that spreads into the next part. So we must be able to * read across multiple BDSes for one convert_read() call. */ convert_select_part(s, sector_num, &src_cur, &src_cur_offset); blk = s->src[src_cur]; bs_sectors = s->src_sectors[src_cur]; n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); ret = blk_co_pread( blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS, n << BDRV_SECTOR_BITS, buf, 0); if (ret < 0) { return ret; } sector_num += n; nb_sectors -= n; buf += n * BDRV_SECTOR_SIZE; } return 0; } static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, uint8_t *buf, enum ImgConvertBlockStatus status) { int ret; while (nb_sectors > 0) { int n = nb_sectors; BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0; switch (status) { case BLK_BACKING_FILE: /* If we have a backing file, leave clusters unallocated that are * unallocated in the source image, so that the backing file is * visible at the respective offset. */ assert(s->target_has_backing); break; case BLK_DATA: /* If we're told to keep the target fully allocated (-S 0) or there * is real non-zero data, we must write it. Otherwise we can treat * it as zero sectors. * Compressed clusters need to be written as a whole, so in that * case we can only save the write if the buffer is completely * zeroed. */ if (!s->min_sparse || (!s->compressed && is_allocated_sectors_min(buf, n, &n, s->min_sparse, sector_num, s->alignment)) || (s->compressed && !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))) { ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS, n << BDRV_SECTOR_BITS, buf, flags); if (ret < 0) { return ret; } break; } /* fall-through */ case BLK_ZERO: if (s->has_zero_init) { assert(!s->target_has_backing); break; } ret = blk_co_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS, n << BDRV_SECTOR_BITS, BDRV_REQ_MAY_UNMAP); if (ret < 0) { return ret; } break; } sector_num += n; nb_sectors -= n; buf += n * BDRV_SECTOR_SIZE; } return 0; } static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num, int nb_sectors) { int n, ret; while (nb_sectors > 0) { BlockBackend *blk; int src_cur; int64_t bs_sectors, src_cur_offset; int64_t offset; convert_select_part(s, sector_num, &src_cur, &src_cur_offset); offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS; blk = s->src[src_cur]; bs_sectors = s->src_sectors[src_cur]; n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset)); ret = blk_co_copy_range(blk, offset, s->target, sector_num << BDRV_SECTOR_BITS, n << BDRV_SECTOR_BITS, 0, 0); if (ret < 0) { return ret; } sector_num += n; nb_sectors -= n; } return 0; } static void coroutine_fn convert_co_do_copy(void *opaque) { ImgConvertState *s = opaque; uint8_t *buf = NULL; int ret, i; int index = -1; for (i = 0; i < s->num_coroutines; i++) { if (s->co[i] == qemu_coroutine_self()) { index = i; break; } } assert(index >= 0); s->running_coroutines++; buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE); while (1) { int n; int64_t sector_num; enum ImgConvertBlockStatus status; bool copy_range; qemu_co_mutex_lock(&s->lock); if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) { qemu_co_mutex_unlock(&s->lock); break; } n = convert_iteration_sectors(s, s->sector_num); if (n < 0) { qemu_co_mutex_unlock(&s->lock); s->ret = n; break; } /* save current sector and allocation status to local variables */ sector_num = s->sector_num; status = s->status; if (!s->min_sparse && s->status == BLK_ZERO) { n = MIN(n, s->buf_sectors); } /* increment global sector counter so that other coroutines can * already continue reading beyond this request */ s->sector_num += n; qemu_co_mutex_unlock(&s->lock); if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) { s->allocated_done += n; qemu_progress_print(100.0 * s->allocated_done / s->allocated_sectors, 0); } retry: copy_range = s->copy_range && s->status == BLK_DATA; if (status == BLK_DATA && !copy_range) { ret = convert_co_read(s, sector_num, n, buf); if (ret < 0) { error_report("error while reading sector %" PRId64 ": %s", sector_num, strerror(-ret)); s->ret = ret; } } else if (!s->min_sparse && status == BLK_ZERO) { status = BLK_DATA; memset(buf, 0x00, n * BDRV_SECTOR_SIZE); } if (s->wr_in_order) { /* keep writes in order */ while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) { s->wait_sector_num[index] = sector_num; qemu_coroutine_yield(); } s->wait_sector_num[index] = -1; } if (s->ret == -EINPROGRESS) { if (copy_range) { ret = convert_co_copy_range(s, sector_num, n); if (ret) { s->copy_range = false; goto retry; } } else { ret = convert_co_write(s, sector_num, n, buf, status); } if (ret < 0) { error_report("error while writing sector %" PRId64 ": %s", sector_num, strerror(-ret)); s->ret = ret; } } if (s->wr_in_order) { /* reenter the coroutine that might have waited * for this write to complete */ s->wr_offs = sector_num + n; for (i = 0; i < s->num_coroutines; i++) { if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) { /* * A -> B -> A cannot occur because A has * s->wait_sector_num[i] == -1 during A -> B. Therefore * B will never enter A during this time window. */ qemu_coroutine_enter(s->co[i]); break; } } } } qemu_vfree(buf); s->co[index] = NULL; s->running_coroutines--; if (!s->running_coroutines && s->ret == -EINPROGRESS) { /* the convert job finished successfully */ s->ret = 0; } } static int convert_do_copy(ImgConvertState *s) { int ret, i, n; int64_t sector_num = 0; /* Check whether we have zero initialisation or can get it efficiently */ s->has_zero_init = s->min_sparse && !s->target_has_backing ? bdrv_has_zero_init(blk_bs(s->target)) : false; if (!s->has_zero_init && !s->target_has_backing && bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) { ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); if (ret == 0) { s->has_zero_init = true; } } /* Allocate buffer for copied data. For compressed images, only one cluster * can be copied at a time. */ if (s->compressed) { if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) { error_report("invalid cluster size"); return -EINVAL; } s->buf_sectors = s->cluster_sectors; } while (sector_num < s->total_sectors) { n = convert_iteration_sectors(s, sector_num); if (n < 0) { return n; } if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO)) { s->allocated_sectors += n; } sector_num += n; } /* Do the copy */ s->sector_next_status = 0; s->ret = -EINPROGRESS; qemu_co_mutex_init(&s->lock); for (i = 0; i < s->num_coroutines; i++) { s->co[i] = qemu_coroutine_create(convert_co_do_copy, s); s->wait_sector_num[i] = -1; qemu_coroutine_enter(s->co[i]); } while (s->running_coroutines) { main_loop_wait(false); } if (s->compressed && !s->ret) { /* signal EOF to align */ ret = blk_pwrite_compressed(s->target, 0, NULL, 0); if (ret < 0) { return ret; } } return s->ret; } #define MAX_BUF_SECTORS 32768 static int img_convert(int argc, char **argv) { int c, bs_i, flags, src_flags = 0; const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe", *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL, *out_filename, *out_baseimg_param, *snapshot_name = NULL; BlockDriver *drv = NULL, *proto_drv = NULL; BlockDriverInfo bdi; BlockDriverState *out_bs; QemuOpts *opts = NULL, *sn_opts = NULL; QemuOptsList *create_opts = NULL; QDict *open_opts = NULL; char *options = NULL; Error *local_err = NULL; bool writethrough, src_writethrough, quiet = false, image_opts = false, skip_create = false, progress = false, tgt_image_opts = false; int64_t ret = -EINVAL; bool force_share = false; bool explict_min_sparse = false; ImgConvertState s = (ImgConvertState) { /* Need at least 4k of zeros for sparse detection */ .min_sparse = 8, .copy_range = false, .buf_sectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE, .wr_in_order = true, .num_coroutines = 8, }; for(;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU", long_options, NULL); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt = optarg; break; case 'O': out_fmt = optarg; break; case 'B': out_baseimg = optarg; break; case 'C': s.copy_range = true; break; case 'c': s.compressed = true; break; case 'o': if (!is_valid_option_list(optarg)) { error_report("Invalid option list: %s", optarg); goto fail_getopt; } if (!options) { options = g_strdup(optarg); } else { char *old_options = options; options = g_strdup_printf("%s,%s", options, optarg); g_free(old_options); } break; case 'l': if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts, optarg, false); if (!sn_opts) { error_report("Failed in parsing snapshot param '%s'", optarg); goto fail_getopt; } } else { snapshot_name = optarg; } break; case 'S': { int64_t sval; sval = cvtnum(optarg); if (sval < 0 || sval & (BDRV_SECTOR_SIZE - 1) || sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) { error_report("Invalid buffer size for sparse output specified. " "Valid sizes are multiples of %llu up to %llu. Select " "0 to disable sparse detection (fully allocates output).", BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE); goto fail_getopt; } s.min_sparse = sval / BDRV_SECTOR_SIZE; explict_min_sparse = true; break; } case 'p': progress = true; break; case 't': cache = optarg; break; case 'T': src_cache = optarg; break; case 'q': quiet = true; break; case 'n': skip_create = true; break; case 'm': if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) || s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) { error_report("Invalid number of coroutines. Allowed number of" " coroutines is between 1 and %d", MAX_COROUTINES); goto fail_getopt; } break; case 'W': s.wr_in_order = false; break; case 'U': force_share = true; break; case OPTION_OBJECT: { QemuOpts *object_opts; object_opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!object_opts) { goto fail_getopt; } break; } case OPTION_IMAGE_OPTS: image_opts = true; break; case OPTION_TARGET_IMAGE_OPTS: tgt_image_opts = true; break; } } if (!out_fmt && !tgt_image_opts) { out_fmt = "raw"; } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { goto fail_getopt; } if (s.compressed && s.copy_range) { error_report("Cannot enable copy offloading when -c is used"); goto fail_getopt; } if (explict_min_sparse && s.copy_range) { error_report("Cannot enable copy offloading when -S is used"); goto fail_getopt; } if (tgt_image_opts && !skip_create) { error_report("--target-image-opts requires use of -n flag"); goto fail_getopt; } s.src_num = argc - optind - 1; out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL; if (options && has_help_option(options)) { if (out_fmt) { ret = print_block_option_help(out_filename, out_fmt); goto fail_getopt; } else { error_report("Option help requires a format be specified"); goto fail_getopt; } } if (s.src_num < 1) { error_report("Must specify image file name"); goto fail_getopt; } /* ret is still -EINVAL until here */ ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough); if (ret < 0) { error_report("Invalid source cache option: %s", src_cache); goto fail_getopt; } /* Initialize before goto out */ if (quiet) { progress = false; } qemu_progress_init(progress, 1.0); qemu_progress_print(0, 100); s.src = g_new0(BlockBackend *, s.src_num); s.src_sectors = g_new(int64_t, s.src_num); for (bs_i = 0; bs_i < s.src_num; bs_i++) { s.src[bs_i] = img_open(image_opts, argv[optind + bs_i], fmt, src_flags, src_writethrough, quiet, force_share); if (!s.src[bs_i]) { ret = -1; goto out; } s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]); if (s.src_sectors[bs_i] < 0) { error_report("Could not get size of %s: %s", argv[optind + bs_i], strerror(-s.src_sectors[bs_i])); ret = -1; goto out; } s.total_sectors += s.src_sectors[bs_i]; } if (sn_opts) { bdrv_snapshot_load_tmp(blk_bs(s.src[0]), qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), &local_err); } else if (snapshot_name != NULL) { if (s.src_num > 1) { error_report("No support for concatenating multiple snapshot"); ret = -1; goto out; } bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name, &local_err); } if (local_err) { error_reportf_err(local_err, "Failed to load snapshot: "); ret = -1; goto out; } if (!skip_create) { /* Find driver and parse its options */ drv = bdrv_find_format(out_fmt); if (!drv) { error_report("Unknown file format '%s'", out_fmt); ret = -1; goto out; } proto_drv = bdrv_find_protocol(out_filename, true, &local_err); if (!proto_drv) { error_report_err(local_err); ret = -1; goto out; } if (!drv->create_opts) { error_report("Format driver '%s' does not support image creation", drv->format_name); ret = -1; goto out; } if (!proto_drv->create_opts) { error_report("Protocol driver '%s' does not support image creation", proto_drv->format_name); ret = -1; goto out; } create_opts = qemu_opts_append(create_opts, drv->create_opts); create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); if (options) { qemu_opts_do_parse(opts, options, NULL, &local_err); if (local_err) { error_report_err(local_err); ret = -1; goto out; } } qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512, &error_abort); ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL); if (ret < 0) { goto out; } } /* Get backing file name if -o backing_file was used */ out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); if (out_baseimg_param) { out_baseimg = out_baseimg_param; } s.target_has_backing = (bool) out_baseimg; if (s.src_num > 1 && out_baseimg) { error_report("Having a backing file for the target makes no sense when " "concatenating multiple input images"); ret = -1; goto out; } /* Check if compression is supported */ if (s.compressed) { bool encryption = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false); const char *encryptfmt = qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT); const char *preallocation = qemu_opt_get(opts, BLOCK_OPT_PREALLOC); if (drv && !drv->bdrv_co_pwritev_compressed) { error_report("Compression not supported for this file format"); ret = -1; goto out; } if (encryption || encryptfmt) { error_report("Compression and encryption not supported at " "the same time"); ret = -1; goto out; } if (preallocation && strcmp(preallocation, "off")) { error_report("Compression and preallocation not supported at " "the same time"); ret = -1; goto out; } } /* * The later open call will need any decryption secrets, and * bdrv_create() will purge "opts", so extract them now before * they are lost. */ if (!skip_create) { open_opts = qdict_new(); qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort); } if (!skip_create) { /* Create the new image */ ret = bdrv_create(drv, out_filename, opts, &local_err); if (ret < 0) { error_reportf_err(local_err, "%s: error while converting %s: ", out_filename, out_fmt); goto out; } } flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR; ret = bdrv_parse_cache_mode(cache, &flags, &writethrough); if (ret < 0) { error_report("Invalid cache option: %s", cache); goto out; } if (skip_create) { s.target = img_open(tgt_image_opts, out_filename, out_fmt, flags, writethrough, quiet, false); } else { /* TODO ultimately we should allow --target-image-opts * to be used even when -n is not given. * That has to wait for bdrv_create to be improved * to allow filenames in option syntax */ s.target = img_open_file(out_filename, open_opts, out_fmt, flags, writethrough, quiet, false); open_opts = NULL; /* blk_new_open will have freed it */ } if (!s.target) { ret = -1; goto out; } out_bs = blk_bs(s.target); if (s.compressed && !out_bs->drv->bdrv_co_pwritev_compressed) { error_report("Compression not supported for this file format"); ret = -1; goto out; } /* increase bufsectors from the default 4096 (2M) if opt_transfer * or discard_alignment of the out_bs is greater. Limit to * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */ s.buf_sectors = MIN(MAX_BUF_SECTORS, MAX(s.buf_sectors, MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS, out_bs->bl.pdiscard_alignment >> BDRV_SECTOR_BITS))); /* try to align the write requests to the destination to avoid unnecessary * RMW cycles. */ s.alignment = MAX(pow2floor(s.min_sparse), DIV_ROUND_UP(out_bs->bl.request_alignment, BDRV_SECTOR_SIZE)); assert(is_power_of_2(s.alignment)); if (skip_create) { int64_t output_sectors = blk_nb_sectors(s.target); if (output_sectors < 0) { error_report("unable to get output image length: %s", strerror(-output_sectors)); ret = -1; goto out; } else if (output_sectors < s.total_sectors) { error_report("output file is smaller than input file"); ret = -1; goto out; } } if (s.target_has_backing) { /* Errors are treated as "backing length unknown" (which means * s.target_backing_sectors has to be negative, which it will * be automatically). The backing file length is used only * for optimizations, so such a case is not fatal. */ s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs); } else { s.target_backing_sectors = -1; } ret = bdrv_get_info(out_bs, &bdi); if (ret < 0) { if (s.compressed) { error_report("could not get block driver info"); goto out; } } else { s.compressed = s.compressed || bdi.needs_compressed_writes; s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE; s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero; } ret = convert_do_copy(&s); out: if (!ret) { qemu_progress_print(100, 0); } qemu_progress_end(); qemu_opts_del(opts); qemu_opts_free(create_opts); qemu_opts_del(sn_opts); qobject_unref(open_opts); blk_unref(s.target); if (s.src) { for (bs_i = 0; bs_i < s.src_num; bs_i++) { blk_unref(s.src[bs_i]); } g_free(s.src); } g_free(s.src_sectors); fail_getopt: g_free(options); return !!ret; } static void dump_snapshots(BlockDriverState *bs) { QEMUSnapshotInfo *sn_tab, *sn; int nb_sns, i; nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns <= 0) return; printf("Snapshot list:\n"); bdrv_snapshot_dump(NULL); printf("\n"); for(i = 0; i < nb_sns; i++) { sn = &sn_tab[i]; bdrv_snapshot_dump(sn); printf("\n"); } g_free(sn_tab); } static void dump_json_image_info_list(ImageInfoList *list) { QString *str; QObject *obj; Visitor *v = qobject_output_visitor_new(&obj); visit_type_ImageInfoList(v, NULL, &list, &error_abort); visit_complete(v, &obj); str = qobject_to_json_pretty(obj); assert(str != NULL); printf("%s\n", qstring_get_str(str)); qobject_unref(obj); visit_free(v); qobject_unref(str); } static void dump_json_image_info(ImageInfo *info) { QString *str; QObject *obj; Visitor *v = qobject_output_visitor_new(&obj); visit_type_ImageInfo(v, NULL, &info, &error_abort); visit_complete(v, &obj); str = qobject_to_json_pretty(obj); assert(str != NULL); printf("%s\n", qstring_get_str(str)); qobject_unref(obj); visit_free(v); qobject_unref(str); } static void dump_human_image_info_list(ImageInfoList *list) { ImageInfoList *elem; bool delim = false; for (elem = list; elem; elem = elem->next) { if (delim) { printf("\n"); } delim = true; bdrv_image_info_dump(elem->value); } } static gboolean str_equal_func(gconstpointer a, gconstpointer b) { return strcmp(a, b) == 0; } /** * Open an image file chain and return an ImageInfoList * * @filename: topmost image filename * @fmt: topmost image format (may be NULL to autodetect) * @chain: true - enumerate entire backing file chain * false - only topmost image file * * Returns a list of ImageInfo objects or NULL if there was an error opening an * image file. If there was an error a message will have been printed to * stderr. */ static ImageInfoList *collect_image_info_list(bool image_opts, const char *filename, const char *fmt, bool chain, bool force_share) { ImageInfoList *head = NULL; ImageInfoList **last = &head; GHashTable *filenames; Error *err = NULL; filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL); while (filename) { BlockBackend *blk; BlockDriverState *bs; ImageInfo *info; ImageInfoList *elem; if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { error_report("Backing file '%s' creates an infinite loop.", filename); goto err; } g_hash_table_insert(filenames, (gpointer)filename, NULL); blk = img_open(image_opts, filename, fmt, BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false, force_share); if (!blk) { goto err; } bs = blk_bs(blk); bdrv_query_image_info(bs, &info, &err); if (err) { error_report_err(err); blk_unref(blk); goto err; } elem = g_new0(ImageInfoList, 1); elem->value = info; *last = elem; last = &elem->next; blk_unref(blk); filename = fmt = NULL; if (chain) { if (info->has_full_backing_filename) { filename = info->full_backing_filename; } else if (info->has_backing_filename) { error_report("Could not determine absolute backing filename," " but backing filename '%s' present", info->backing_filename); goto err; } if (info->has_backing_filename_format) { fmt = info->backing_filename_format; } } } g_hash_table_destroy(filenames); return head; err: qapi_free_ImageInfoList(head); g_hash_table_destroy(filenames); return NULL; } static int img_info(int argc, char **argv) { int c; OutputFormat output_format = OFORMAT_HUMAN; bool chain = false; const char *filename, *fmt, *output; ImageInfoList *list; bool image_opts = false; bool force_share = false; fmt = NULL; output = NULL; for(;;) { int option_index = 0; static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"format", required_argument, 0, 'f'}, {"output", required_argument, 0, OPTION_OUTPUT}, {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":f:hU", long_options, &option_index); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt = optarg; break; case 'U': force_share = true; break; case OPTION_OUTPUT: output = optarg; break; case OPTION_BACKING_CHAIN: chain = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[optind++]; if (output && !strcmp(output, "json")) { output_format = OFORMAT_JSON; } else if (output && !strcmp(output, "human")) { output_format = OFORMAT_HUMAN; } else if (output) { error_report("--output must be used with human or json as argument."); return 1; } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } list = collect_image_info_list(image_opts, filename, fmt, chain, force_share); if (!list) { return 1; } switch (output_format) { case OFORMAT_HUMAN: dump_human_image_info_list(list); break; case OFORMAT_JSON: if (chain) { dump_json_image_info_list(list); } else { dump_json_image_info(list->value); } break; } qapi_free_ImageInfoList(list); return 0; } static int dump_map_entry(OutputFormat output_format, MapEntry *e, MapEntry *next) { switch (output_format) { case OFORMAT_HUMAN: if (e->data && !e->has_offset) { error_report("File contains external, encrypted or compressed clusters."); return -1; } if (e->data && !e->zero) { printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n", e->start, e->length, e->has_offset ? e->offset : 0, e->has_filename ? e->filename : ""); } /* This format ignores the distinction between 0, ZERO and ZERO|DATA. * Modify the flags here to allow more coalescing. */ if (next && (!next->data || next->zero)) { next->data = false; next->zero = true; } break; case OFORMAT_JSON: printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64"," " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s", (e->start == 0 ? "[" : ",\n"), e->start, e->length, e->depth, e->zero ? "true" : "false", e->data ? "true" : "false"); if (e->has_offset) { printf(", \"offset\": %"PRId64"", e->offset); } putchar('}'); if (!next) { printf("]\n"); } break; } return 0; } static int get_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, MapEntry *e) { int ret; int depth; BlockDriverState *file; bool has_offset; int64_t map; char *filename = NULL; /* As an optimization, we could cache the current range of unallocated * clusters in each file of the chain, and avoid querying the same * range repeatedly. */ depth = 0; for (;;) { ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file); if (ret < 0) { return ret; } assert(bytes); if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) { break; } bs = backing_bs(bs); if (bs == NULL) { ret = 0; break; } depth++; } has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID); if (file && has_offset) { bdrv_refresh_filename(file); filename = file->filename; } *e = (MapEntry) { .start = offset, .length = bytes, .data = !!(ret & BDRV_BLOCK_DATA), .zero = !!(ret & BDRV_BLOCK_ZERO), .offset = map, .has_offset = has_offset, .depth = depth, .has_filename = filename, .filename = filename, }; return 0; } static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next) { if (curr->length == 0) { return false; } if (curr->zero != next->zero || curr->data != next->data || curr->depth != next->depth || curr->has_filename != next->has_filename || curr->has_offset != next->has_offset) { return false; } if (curr->has_filename && strcmp(curr->filename, next->filename)) { return false; } if (curr->has_offset && curr->offset + curr->length != next->offset) { return false; } return true; } static int img_map(int argc, char **argv) { int c; OutputFormat output_format = OFORMAT_HUMAN; BlockBackend *blk; BlockDriverState *bs; const char *filename, *fmt, *output; int64_t length; MapEntry curr = { .length = 0 }, next; int ret = 0; bool image_opts = false; bool force_share = false; fmt = NULL; output = NULL; for (;;) { int option_index = 0; static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"format", required_argument, 0, 'f'}, {"output", required_argument, 0, OPTION_OUTPUT}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":f:hU", long_options, &option_index); if (c == -1) { break; } switch (c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt = optarg; break; case 'U': force_share = true; break; case OPTION_OUTPUT: output = optarg; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[optind]; if (output && !strcmp(output, "json")) { output_format = OFORMAT_JSON; } else if (output && !strcmp(output, "human")) { output_format = OFORMAT_HUMAN; } else if (output) { error_report("--output must be used with human or json as argument."); return 1; } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } blk = img_open(image_opts, filename, fmt, 0, false, false, force_share); if (!blk) { return 1; } bs = blk_bs(blk); if (output_format == OFORMAT_HUMAN) { printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File"); } length = blk_getlength(blk); while (curr.start + curr.length < length) { int64_t offset = curr.start + curr.length; int64_t n; /* Probe up to 1 GiB at a time. */ n = MIN(1 * GiB, length - offset); ret = get_block_status(bs, offset, n, &next); if (ret < 0) { error_report("Could not read file metadata: %s", strerror(-ret)); goto out; } if (entry_mergeable(&curr, &next)) { curr.length += next.length; continue; } if (curr.length > 0) { ret = dump_map_entry(output_format, &curr, &next); if (ret < 0) { goto out; } } curr = next; } ret = dump_map_entry(output_format, &curr, NULL); out: blk_unref(blk); return ret < 0; } #define SNAPSHOT_LIST 1 #define SNAPSHOT_CREATE 2 #define SNAPSHOT_APPLY 3 #define SNAPSHOT_DELETE 4 static int img_snapshot(int argc, char **argv) { BlockBackend *blk; BlockDriverState *bs; QEMUSnapshotInfo sn; char *filename, *snapshot_name = NULL; int c, ret = 0, bdrv_oflags; int action = 0; qemu_timeval tv; bool quiet = false; Error *err = NULL; bool image_opts = false; bool force_share = false; bdrv_oflags = BDRV_O_RDWR; /* Parse commandline parameters */ for(;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":la:c:d:hqU", long_options, NULL); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); return 0; case 'l': if (action) { error_exit("Cannot mix '-l', '-a', '-c', '-d'"); return 0; } action = SNAPSHOT_LIST; bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */ break; case 'a': if (action) { error_exit("Cannot mix '-l', '-a', '-c', '-d'"); return 0; } action = SNAPSHOT_APPLY; snapshot_name = optarg; break; case 'c': if (action) { error_exit("Cannot mix '-l', '-a', '-c', '-d'"); return 0; } action = SNAPSHOT_CREATE; snapshot_name = optarg; break; case 'd': if (action) { error_exit("Cannot mix '-l', '-a', '-c', '-d'"); return 0; } action = SNAPSHOT_DELETE; snapshot_name = optarg; break; case 'q': quiet = true; break; case 'U': force_share = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[optind++]; if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } /* Open the image */ blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet, force_share); if (!blk) { return 1; } bs = blk_bs(blk); /* Perform the requested action */ switch(action) { case SNAPSHOT_LIST: dump_snapshots(bs); break; case SNAPSHOT_CREATE: memset(&sn, 0, sizeof(sn)); pstrcpy(sn.name, sizeof(sn.name), snapshot_name); qemu_gettimeofday(&tv); sn.date_sec = tv.tv_sec; sn.date_nsec = tv.tv_usec * 1000; ret = bdrv_snapshot_create(bs, &sn); if (ret) { error_report("Could not create snapshot '%s': %d (%s)", snapshot_name, ret, strerror(-ret)); } break; case SNAPSHOT_APPLY: ret = bdrv_snapshot_goto(bs, snapshot_name, &err); if (ret) { error_reportf_err(err, "Could not apply snapshot '%s': ", snapshot_name); } break; case SNAPSHOT_DELETE: ret = bdrv_snapshot_find(bs, &sn, snapshot_name); if (ret < 0) { error_report("Could not delete snapshot '%s': snapshot not " "found", snapshot_name); ret = 1; } else { ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err); if (ret < 0) { error_reportf_err(err, "Could not delete snapshot '%s': ", snapshot_name); ret = 1; } } break; } /* Cleanup */ blk_unref(blk); if (ret) { return 1; } return 0; } static int img_rebase(int argc, char **argv) { BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL; uint8_t *buf_old = NULL; uint8_t *buf_new = NULL; BlockDriverState *bs = NULL, *prefix_chain_bs = NULL; char *filename; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; int c, flags, src_flags, ret; bool writethrough, src_writethrough; int unsafe = 0; bool force_share = false; int progress = 0; bool quiet = false; Error *local_err = NULL; bool image_opts = false; /* Parse commandline parameters */ fmt = NULL; cache = BDRV_DEFAULT_CACHE; src_cache = BDRV_DEFAULT_CACHE; out_baseimg = NULL; out_basefmt = NULL; for(;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU", long_options, NULL); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); return 0; case 'f': fmt = optarg; break; case 'F': out_basefmt = optarg; break; case 'b': out_baseimg = optarg; break; case 'u': unsafe = 1; break; case 'p': progress = 1; break; case 't': cache = optarg; break; case 'T': src_cache = optarg; break; case 'q': quiet = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; case 'U': force_share = true; break; } } if (quiet) { progress = 0; } if (optind != argc - 1) { error_exit("Expecting one image file name"); } if (!unsafe && !out_baseimg) { error_exit("Must specify backing file (-b) or use unsafe mode (-u)"); } filename = argv[optind++]; if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } qemu_progress_init(progress, 2.0); qemu_progress_print(0, 100); flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0); ret = bdrv_parse_cache_mode(cache, &flags, &writethrough); if (ret < 0) { error_report("Invalid cache option: %s", cache); goto out; } src_flags = 0; ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough); if (ret < 0) { error_report("Invalid source cache option: %s", src_cache); goto out; } /* The source files are opened read-only, don't care about WCE */ assert((src_flags & BDRV_O_RDWR) == 0); (void) src_writethrough; /* * Open the images. * * Ignore the old backing file for unsafe rebase in case we want to correct * the reference to a renamed or moved backing file. */ blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, false); if (!blk) { ret = -1; goto out; } bs = blk_bs(blk); if (out_basefmt != NULL) { if (bdrv_find_format(out_basefmt) == NULL) { error_report("Invalid format name: '%s'", out_basefmt); ret = -1; goto out; } } /* For safe rebasing we need to compare old and new backing file */ if (!unsafe) { QDict *options = NULL; BlockDriverState *base_bs = backing_bs(bs); if (base_bs) { blk_old_backing = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); ret = blk_insert_bs(blk_old_backing, base_bs, &local_err); if (ret < 0) { error_reportf_err(local_err, "Could not reuse old backing file '%s': ", base_bs->filename); goto out; } } else { blk_old_backing = NULL; } if (out_baseimg[0]) { const char *overlay_filename; char *out_real_path; options = qdict_new(); if (out_basefmt) { qdict_put_str(options, "driver", out_basefmt); } if (force_share) { qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } bdrv_refresh_filename(bs); overlay_filename = bs->exact_filename[0] ? bs->exact_filename : bs->filename; out_real_path = bdrv_get_full_backing_filename_from_filename(overlay_filename, out_baseimg, &local_err); if (local_err) { error_reportf_err(local_err, "Could not resolve backing filename: "); ret = -1; goto out; } /* * Find out whether we rebase an image on top of a previous image * in its chain. */ prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path); blk_new_backing = blk_new_open(out_real_path, NULL, options, src_flags, &local_err); g_free(out_real_path); if (!blk_new_backing) { error_reportf_err(local_err, "Could not open new backing file '%s': ", out_baseimg); ret = -1; goto out; } } } /* * Check each unallocated cluster in the COW file. If it is unallocated, * accesses go to the backing file. We must therefore compare this cluster * in the old and new backing file, and if they differ we need to copy it * from the old backing file into the COW file. * * If qemu-img crashes during this step, no harm is done. The content of * the image is the same as the original one at any time. */ if (!unsafe) { int64_t size; int64_t old_backing_size = 0; int64_t new_backing_size = 0; uint64_t offset; int64_t n; float local_progress = 0; buf_old = blk_blockalign(blk, IO_BUF_SIZE); buf_new = blk_blockalign(blk, IO_BUF_SIZE); size = blk_getlength(blk); if (size < 0) { error_report("Could not get size of '%s': %s", filename, strerror(-size)); ret = -1; goto out; } if (blk_old_backing) { old_backing_size = blk_getlength(blk_old_backing); if (old_backing_size < 0) { char backing_name[PATH_MAX]; bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); error_report("Could not get size of '%s': %s", backing_name, strerror(-old_backing_size)); ret = -1; goto out; } } if (blk_new_backing) { new_backing_size = blk_getlength(blk_new_backing); if (new_backing_size < 0) { error_report("Could not get size of '%s': %s", out_baseimg, strerror(-new_backing_size)); ret = -1; goto out; } } if (size != 0) { local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE)); } for (offset = 0; offset < size; offset += n) { bool buf_old_is_zero = false; /* How many bytes can we handle with the next read? */ n = MIN(IO_BUF_SIZE, size - offset); /* If the cluster is allocated, we don't need to take action */ ret = bdrv_is_allocated(bs, offset, n, &n); if (ret < 0) { error_report("error while reading image metadata: %s", strerror(-ret)); goto out; } if (ret) { continue; } if (prefix_chain_bs) { /* * If cluster wasn't changed since prefix_chain, we don't need * to take action */ ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs, offset, n, &n); if (ret < 0) { error_report("error while reading image metadata: %s", strerror(-ret)); goto out; } if (!ret) { continue; } } /* * Read old and new backing file and take into consideration that * backing files may be smaller than the COW image. */ if (offset >= old_backing_size) { memset(buf_old, 0, n); buf_old_is_zero = true; } else { if (offset + n > old_backing_size) { n = old_backing_size - offset; } ret = blk_pread(blk_old_backing, offset, buf_old, n); if (ret < 0) { error_report("error while reading from old backing file"); goto out; } } if (offset >= new_backing_size || !blk_new_backing) { memset(buf_new, 0, n); } else { if (offset + n > new_backing_size) { n = new_backing_size - offset; } ret = blk_pread(blk_new_backing, offset, buf_new, n); if (ret < 0) { error_report("error while reading from new backing file"); goto out; } } /* If they differ, we need to write to the COW file */ uint64_t written = 0; while (written < n) { int64_t pnum; if (compare_buffers(buf_old + written, buf_new + written, n - written, &pnum)) { if (buf_old_is_zero) { ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0); } else { ret = blk_pwrite(blk, offset + written, buf_old + written, pnum, 0); } if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); goto out; } } written += pnum; } qemu_progress_print(local_progress, 100); } } /* * Change the backing file. All clusters that are different from the old * backing file are overwritten in the COW file now, so the visible content * doesn't change when we switch the backing file. */ if (out_baseimg && *out_baseimg) { ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt); } else { ret = bdrv_change_backing_file(bs, NULL, NULL); } if (ret == -ENOSPC) { error_report("Could not change the backing file to '%s': No " "space left in the file header", out_baseimg); } else if (ret < 0) { error_report("Could not change the backing file to '%s': %s", out_baseimg, strerror(-ret)); } qemu_progress_print(100, 0); /* * TODO At this point it is possible to check if any clusters that are * allocated in the COW file are the same in the backing file. If so, they * could be dropped from the COW file. Don't do this before switching the * backing file, in case of a crash this would lead to corruption. */ out: qemu_progress_end(); /* Cleanup */ if (!unsafe) { blk_unref(blk_old_backing); blk_unref(blk_new_backing); } qemu_vfree(buf_old); qemu_vfree(buf_new); blk_unref(blk); if (ret) { return 1; } return 0; } static int img_resize(int argc, char **argv) { Error *err = NULL; int c, ret, relative; const char *filename, *fmt, *size; int64_t n, total_size, current_size, new_size; bool quiet = false; BlockBackend *blk = NULL; PreallocMode prealloc = PREALLOC_MODE_OFF; QemuOpts *param; static QemuOptsList resize_options = { .name = "resize_options", .head = QTAILQ_HEAD_INITIALIZER(resize_options.head), .desc = { { .name = BLOCK_OPT_SIZE, .type = QEMU_OPT_SIZE, .help = "Virtual disk size" }, { /* end of list */ } }, }; bool image_opts = false; bool shrink = false; /* Remove size from argv manually so that negative numbers are not treated * as options by getopt. */ if (argc < 3) { error_exit("Not enough arguments"); return 1; } size = argv[--argc]; /* Parse getopt arguments */ fmt = NULL; for(;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"preallocation", required_argument, 0, OPTION_PREALLOCATION}, {"shrink", no_argument, 0, OPTION_SHRINK}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":f:hq", long_options, NULL); if (c == -1) { break; } switch(c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'f': fmt = optarg; break; case 'q': quiet = true; break; case OPTION_OBJECT: { QemuOpts *opts; opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { return 1; } } break; case OPTION_IMAGE_OPTS: image_opts = true; break; case OPTION_PREALLOCATION: prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg, PREALLOC_MODE__MAX, NULL); if (prealloc == PREALLOC_MODE__MAX) { error_report("Invalid preallocation mode '%s'", optarg); return 1; } break; case OPTION_SHRINK: shrink = true; break; } } if (optind != argc - 1) { error_exit("Expecting image file name and size"); } filename = argv[optind++]; if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { return 1; } /* Choose grow, shrink, or absolute resize mode */ switch (size[0]) { case '+': relative = 1; size++; break; case '-': relative = -1; size++; break; default: relative = 0; break; } /* Parse size */ param = qemu_opts_create(&resize_options, NULL, 0, &error_abort); qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err); if (err) { error_report_err(err); ret = -1; qemu_opts_del(param); goto out; } n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0); qemu_opts_del(param); blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet, false); if (!blk) { ret = -1; goto out; } current_size = blk_getlength(blk); if (current_size < 0) { error_report("Failed to inquire current image length: %s", strerror(-current_size)); ret = -1; goto out; } if (relative) { total_size = current_size + n * relative; } else { total_size = n; } if (total_size <= 0) { error_report("New image size must be positive"); ret = -1; goto out; } if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) { error_report("Preallocation can only be used for growing images"); ret = -1; goto out; } if (total_size < current_size && !shrink) { warn_report("Shrinking an image will delete all data beyond the " "shrunken image's end. Before performing such an " "operation, make sure there is no important data there."); if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) { error_report( "Use the --shrink option to perform a shrink operation."); ret = -1; goto out; } else { warn_report("Using the --shrink option will suppress this message. " "Note that future versions of qemu-img may refuse to " "shrink images without this option."); } } ret = blk_truncate(blk, total_size, prealloc, &err); if (ret < 0) { error_report_err(err); goto out; } new_size = blk_getlength(blk); if (new_size < 0) { error_report("Failed to verify truncated image length: %s", strerror(-new_size)); ret = -1; goto out; } /* Some block drivers implement a truncation method, but only so * the user can cause qemu to refresh the image's size from disk. * The idea is that the user resizes the image outside of qemu and * then invokes block_resize to inform qemu about it. * (This includes iscsi and file-posix for device files.) * Of course, that is not the behavior someone invoking * qemu-img resize would find useful, so we catch that behavior * here and tell the user. */ if (new_size != total_size && new_size == current_size) { error_report("Image was not resized; resizing may not be supported " "for this image"); ret = -1; goto out; } if (new_size != total_size) { warn_report("Image should have been resized to %" PRIi64 " bytes, but was resized to %" PRIi64 " bytes", total_size, new_size); } qprintf(quiet, "Image resized.\n"); out: blk_unref(blk); if (ret) { return 1; } return 0; } static void amend_status_cb(BlockDriverState *bs, int64_t offset, int64_t total_work_size, void *opaque) { qemu_progress_print(100.f * offset / total_work_size, 0); } static int print_amend_option_help(const char *format) { BlockDriver *drv; /* Find driver and parse its options */ drv = bdrv_find_format(format); if (!drv) { error_report("Unknown file format '%s'", format); return 1; } if (!drv->bdrv_amend_options) { error_report("Format driver '%s' does not support option amendment", format); return 1; } /* Every driver supporting amendment must have create_opts */ assert(drv->create_opts); printf("Creation options for '%s':\n", format); qemu_opts_print_help(drv->create_opts, false); printf("\nNote that not all of these options may be amendable.\n"); return 0; } static int img_amend(int argc, char **argv) { Error *err = NULL; int c, ret = 0; char *options = NULL; QemuOptsList *create_opts = NULL; QemuOpts *opts = NULL; const char *fmt = NULL, *filename, *cache; int flags; bool writethrough; bool quiet = false, progress = false; BlockBackend *blk = NULL; BlockDriverState *bs = NULL; bool image_opts = false; cache = BDRV_DEFAULT_CACHE; for (;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"object", required_argument, 0, OPTION_OBJECT}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":ho:f:t:pq", long_options, NULL); if (c == -1) { break; } switch (c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'o': if (!is_valid_option_list(optarg)) { error_report("Invalid option list: %s", optarg); ret = -1; goto out_no_progress; } if (!options) { options = g_strdup(optarg); } else { char *old_options = options; options = g_strdup_printf("%s,%s", options, optarg); g_free(old_options); } break; case 'f': fmt = optarg; break; case 't': cache = optarg; break; case 'p': progress = true; break; case 'q': quiet = true; break; case OPTION_OBJECT: opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!opts) { ret = -1; goto out_no_progress; } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } if (!options) { error_exit("Must specify options (-o)"); } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { ret = -1; goto out_no_progress; } if (quiet) { progress = false; } qemu_progress_init(progress, 1.0); filename = (optind == argc - 1) ? argv[argc - 1] : NULL; if (fmt && has_help_option(options)) { /* If a format is explicitly specified (and possibly no filename is * given), print option help here */ ret = print_amend_option_help(fmt); goto out; } if (optind != argc - 1) { error_report("Expecting one image file name"); ret = -1; goto out; } flags = BDRV_O_RDWR; ret = bdrv_parse_cache_mode(cache, &flags, &writethrough); if (ret < 0) { error_report("Invalid cache option: %s", cache); goto out; } blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, false); if (!blk) { ret = -1; goto out; } bs = blk_bs(blk); fmt = bs->drv->format_name; if (has_help_option(options)) { /* If the format was auto-detected, print option help here */ ret = print_amend_option_help(fmt); goto out; } if (!bs->drv->bdrv_amend_options) { error_report("Format driver '%s' does not support option amendment", fmt); ret = -1; goto out; } /* Every driver supporting amendment must have create_opts */ assert(bs->drv->create_opts); create_opts = qemu_opts_append(create_opts, bs->drv->create_opts); opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); qemu_opts_do_parse(opts, options, NULL, &err); if (err) { error_report_err(err); ret = -1; goto out; } /* In case the driver does not call amend_status_cb() */ qemu_progress_print(0.f, 0); ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err); qemu_progress_print(100.f, 0); if (ret < 0) { error_report_err(err); goto out; } out: qemu_progress_end(); out_no_progress: blk_unref(blk); qemu_opts_del(opts); qemu_opts_free(create_opts); g_free(options); if (ret) { return 1; } return 0; } typedef struct BenchData { BlockBackend *blk; uint64_t image_size; bool write; int bufsize; int step; int nrreq; int n; int flush_interval; bool drain_on_flush; uint8_t *buf; QEMUIOVector *qiov; int in_flight; bool in_flush; uint64_t offset; } BenchData; static void bench_undrained_flush_cb(void *opaque, int ret) { if (ret < 0) { error_report("Failed flush request: %s", strerror(-ret)); exit(EXIT_FAILURE); } } static void bench_cb(void *opaque, int ret) { BenchData *b = opaque; BlockAIOCB *acb; if (ret < 0) { error_report("Failed request: %s", strerror(-ret)); exit(EXIT_FAILURE); } if (b->in_flush) { /* Just finished a flush with drained queue: Start next requests */ assert(b->in_flight == 0); b->in_flush = false; } else if (b->in_flight > 0) { int remaining = b->n - b->in_flight; b->n--; b->in_flight--; /* Time for flush? Drain queue if requested, then flush */ if (b->flush_interval && remaining % b->flush_interval == 0) { if (!b->in_flight || !b->drain_on_flush) { BlockCompletionFunc *cb; if (b->drain_on_flush) { b->in_flush = true; cb = bench_cb; } else { cb = bench_undrained_flush_cb; } acb = blk_aio_flush(b->blk, cb, b); if (!acb) { error_report("Failed to issue flush request"); exit(EXIT_FAILURE); } } if (b->drain_on_flush) { return; } } } while (b->n > b->in_flight && b->in_flight < b->nrreq) { int64_t offset = b->offset; /* blk_aio_* might look for completed I/Os and kick bench_cb * again, so make sure this operation is counted by in_flight * and b->offset is ready for the next submission. */ b->in_flight++; b->offset += b->step; b->offset %= b->image_size; if (b->write) { acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b); } else { acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b); } if (!acb) { error_report("Failed to issue request"); exit(EXIT_FAILURE); } } } static int img_bench(int argc, char **argv) { int c, ret = 0; const char *fmt = NULL, *filename; bool quiet = false; bool image_opts = false; bool is_write = false; int count = 75000; int depth = 64; int64_t offset = 0; size_t bufsize = 4096; int pattern = 0; size_t step = 0; int flush_interval = 0; bool drain_on_flush = true; int64_t image_size; BlockBackend *blk = NULL; BenchData data = {}; int flags = 0; bool writethrough = false; struct timeval t1, t2; int i; bool force_share = false; size_t buf_size; for (;;) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"pattern", required_argument, 0, OPTION_PATTERN}, {"no-drain", no_argument, 0, OPTION_NO_DRAIN}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL); if (c == -1) { break; } switch (c) { case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'c': { unsigned long res; if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) { error_report("Invalid request count specified"); return 1; } count = res; break; } case 'd': { unsigned long res; if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) { error_report("Invalid queue depth specified"); return 1; } depth = res; break; } case 'f': fmt = optarg; break; case 'n': flags |= BDRV_O_NATIVE_AIO; break; case 'o': { offset = cvtnum(optarg); if (offset < 0) { error_report("Invalid offset specified"); return 1; } break; } break; case 'q': quiet = true; break; case 's': { int64_t sval; sval = cvtnum(optarg); if (sval < 0 || sval > INT_MAX) { error_report("Invalid buffer size specified"); return 1; } bufsize = sval; break; } case 'S': { int64_t sval; sval = cvtnum(optarg); if (sval < 0 || sval > INT_MAX) { error_report("Invalid step size specified"); return 1; } step = sval; break; } case 't': ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough); if (ret < 0) { error_report("Invalid cache mode"); ret = -1; goto out; } break; case 'w': flags |= BDRV_O_RDWR; is_write = true; break; case 'U': force_share = true; break; case OPTION_PATTERN: { unsigned long res; if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) { error_report("Invalid pattern byte specified"); return 1; } pattern = res; break; } case OPTION_FLUSH_INTERVAL: { unsigned long res; if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) { error_report("Invalid flush interval specified"); return 1; } flush_interval = res; break; } case OPTION_NO_DRAIN: drain_on_flush = false; break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } if (optind != argc - 1) { error_exit("Expecting one image file name"); } filename = argv[argc - 1]; if (!is_write && flush_interval) { error_report("--flush-interval is only available in write tests"); ret = -1; goto out; } if (flush_interval && flush_interval < depth) { error_report("Flush interval can't be smaller than depth"); ret = -1; goto out; } blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet, force_share); if (!blk) { ret = -1; goto out; } image_size = blk_getlength(blk); if (image_size < 0) { ret = image_size; goto out; } data = (BenchData) { .blk = blk, .image_size = image_size, .bufsize = bufsize, .step = step ?: bufsize, .nrreq = depth, .n = count, .offset = offset, .write = is_write, .flush_interval = flush_interval, .drain_on_flush = drain_on_flush, }; printf("Sending %d %s requests, %d bytes each, %d in parallel " "(starting at offset %" PRId64 ", step size %d)\n", data.n, data.write ? "write" : "read", data.bufsize, data.nrreq, data.offset, data.step); if (flush_interval) { printf("Sending flush every %d requests\n", flush_interval); } buf_size = data.nrreq * data.bufsize; data.buf = blk_blockalign(blk, buf_size); memset(data.buf, pattern, data.nrreq * data.bufsize); blk_register_buf(blk, data.buf, buf_size); data.qiov = g_new(QEMUIOVector, data.nrreq); for (i = 0; i < data.nrreq; i++) { qemu_iovec_init(&data.qiov[i], 1); qemu_iovec_add(&data.qiov[i], data.buf + i * data.bufsize, data.bufsize); } gettimeofday(&t1, NULL); bench_cb(&data, 0); while (data.n > 0) { main_loop_wait(false); } gettimeofday(&t2, NULL); printf("Run completed in %3.3f seconds.\n", (t2.tv_sec - t1.tv_sec) + ((double)(t2.tv_usec - t1.tv_usec) / 1000000)); out: if (data.buf) { blk_unregister_buf(blk, data.buf); } qemu_vfree(data.buf); blk_unref(blk); if (ret) { return 1; } return 0; } #define C_BS 01 #define C_COUNT 02 #define C_IF 04 #define C_OF 010 #define C_SKIP 020 struct DdInfo { unsigned int flags; int64_t count; }; struct DdIo { int bsz; /* Block size */ char *filename; uint8_t *buf; int64_t offset; }; struct DdOpts { const char *name; int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *); unsigned int flag; }; static int img_dd_bs(const char *arg, struct DdIo *in, struct DdIo *out, struct DdInfo *dd) { int64_t res; res = cvtnum(arg); if (res <= 0 || res > INT_MAX) { error_report("invalid number: '%s'", arg); return 1; } in->bsz = out->bsz = res; return 0; } static int img_dd_count(const char *arg, struct DdIo *in, struct DdIo *out, struct DdInfo *dd) { dd->count = cvtnum(arg); if (dd->count < 0) { error_report("invalid number: '%s'", arg); return 1; } return 0; } static int img_dd_if(const char *arg, struct DdIo *in, struct DdIo *out, struct DdInfo *dd) { in->filename = g_strdup(arg); return 0; } static int img_dd_of(const char *arg, struct DdIo *in, struct DdIo *out, struct DdInfo *dd) { out->filename = g_strdup(arg); return 0; } static int img_dd_skip(const char *arg, struct DdIo *in, struct DdIo *out, struct DdInfo *dd) { in->offset = cvtnum(arg); if (in->offset < 0) { error_report("invalid number: '%s'", arg); return 1; } return 0; } static int img_dd(int argc, char **argv) { int ret = 0; char *arg = NULL; char *tmp; BlockDriver *drv = NULL, *proto_drv = NULL; BlockBackend *blk1 = NULL, *blk2 = NULL; QemuOpts *opts = NULL; QemuOptsList *create_opts = NULL; Error *local_err = NULL; bool image_opts = false; int c, i; const char *out_fmt = "raw"; const char *fmt = NULL; int64_t size = 0; int64_t block_count = 0, out_pos, in_pos; bool force_share = false; struct DdInfo dd = { .flags = 0, .count = 0, }; struct DdIo in = { .bsz = 512, /* Block size is by default 512 bytes */ .filename = NULL, .buf = NULL, .offset = 0 }; struct DdIo out = { .bsz = 512, .filename = NULL, .buf = NULL, .offset = 0 }; const struct DdOpts options[] = { { "bs", img_dd_bs, C_BS }, { "count", img_dd_count, C_COUNT }, { "if", img_dd_if, C_IF }, { "of", img_dd_of, C_OF }, { "skip", img_dd_skip, C_SKIP }, { NULL, NULL, 0 } }; const struct option long_options[] = { { "help", no_argument, 0, 'h'}, { "object", required_argument, 0, OPTION_OBJECT}, { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, { "force-share", no_argument, 0, 'U'}, { 0, 0, 0, 0 } }; while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) { if (c == EOF) { break; } switch (c) { case 'O': out_fmt = optarg; break; case 'f': fmt = optarg; break; case ':': missing_argument(argv[optind - 1]); break; case '?': unrecognized_option(argv[optind - 1]); break; case 'h': help(); break; case 'U': force_share = true; break; case OPTION_OBJECT: if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) { ret = -1; goto out; } break; case OPTION_IMAGE_OPTS: image_opts = true; break; } } for (i = optind; i < argc; i++) { int j; arg = g_strdup(argv[i]); tmp = strchr(arg, '='); if (tmp == NULL) { error_report("unrecognized operand %s", arg); ret = -1; goto out; } *tmp++ = '\0'; for (j = 0; options[j].name != NULL; j++) { if (!strcmp(arg, options[j].name)) { break; } } if (options[j].name == NULL) { error_report("unrecognized operand %s", arg); ret = -1; goto out; } if (options[j].f(tmp, &in, &out, &dd) != 0) { ret = -1; goto out; } dd.flags |= options[j].flag; g_free(arg); arg = NULL; } if (!(dd.flags & C_IF && dd.flags & C_OF)) { error_report("Must specify both input and output files"); ret = -1; goto out; } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { ret = -1; goto out; } blk1 = img_open(image_opts, in.filename, fmt, 0, false, false, force_share); if (!blk1) { ret = -1; goto out; } drv = bdrv_find_format(out_fmt); if (!drv) { error_report("Unknown file format"); ret = -1; goto out; } proto_drv = bdrv_find_protocol(out.filename, true, &local_err); if (!proto_drv) { error_report_err(local_err); ret = -1; goto out; } if (!drv->create_opts) { error_report("Format driver '%s' does not support image creation", drv->format_name); ret = -1; goto out; } if (!proto_drv->create_opts) { error_report("Protocol driver '%s' does not support image creation", proto_drv->format_name); ret = -1; goto out; } create_opts = qemu_opts_append(create_opts, drv->create_opts); create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); size = blk_getlength(blk1); if (size < 0) { error_report("Failed to get size for '%s'", in.filename); ret = -1; goto out; } if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz && dd.count * in.bsz < size) { size = dd.count * in.bsz; } /* Overflow means the specified offset is beyond input image's size */ if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz || size < in.bsz * in.offset)) { qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort); } else { qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size - in.bsz * in.offset, &error_abort); } ret = bdrv_create(drv, out.filename, opts, &local_err); if (ret < 0) { error_reportf_err(local_err, "%s: error while creating output image: ", out.filename); ret = -1; goto out; } /* TODO, we can't honour --image-opts for the target, * since it needs to be given in a format compatible * with the bdrv_create() call above which does not * support image-opts style. */ blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR, false, false, false); if (!blk2) { ret = -1; goto out; } if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz || size < in.offset * in.bsz)) { /* We give a warning if the skip option is bigger than the input * size and create an empty output disk image (i.e. like dd(1)). */ error_report("%s: cannot skip to specified offset", in.filename); in_pos = size; } else { in_pos = in.offset * in.bsz; } in.buf = g_new(uint8_t, in.bsz); for (out_pos = 0; in_pos < size; block_count++) { int in_ret, out_ret; if (in_pos + in.bsz > size) { in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos); } else { in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz); } if (in_ret < 0) { error_report("error while reading from input image file: %s", strerror(-in_ret)); ret = -1; goto out; } in_pos += in_ret; out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0); if (out_ret < 0) { error_report("error while writing to output image file: %s", strerror(-out_ret)); ret = -1; goto out; } out_pos += out_ret; } out: g_free(arg); qemu_opts_del(opts); qemu_opts_free(create_opts); blk_unref(blk1); blk_unref(blk2); g_free(in.filename); g_free(out.filename); g_free(in.buf); g_free(out.buf); if (ret) { return 1; } return 0; } static void dump_json_block_measure_info(BlockMeasureInfo *info) { QString *str; QObject *obj; Visitor *v = qobject_output_visitor_new(&obj); visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort); visit_complete(v, &obj); str = qobject_to_json_pretty(obj); assert(str != NULL); printf("%s\n", qstring_get_str(str)); qobject_unref(obj); visit_free(v); qobject_unref(str); } static int img_measure(int argc, char **argv) { static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {"object", required_argument, 0, OPTION_OBJECT}, {"output", required_argument, 0, OPTION_OUTPUT}, {"size", required_argument, 0, OPTION_SIZE}, {"force-share", no_argument, 0, 'U'}, {0, 0, 0, 0} }; OutputFormat output_format = OFORMAT_HUMAN; BlockBackend *in_blk = NULL; BlockDriver *drv; const char *filename = NULL; const char *fmt = NULL; const char *out_fmt = "raw"; char *options = NULL; char *snapshot_name = NULL; bool force_share = false; QemuOpts *opts = NULL; QemuOpts *object_opts = NULL; QemuOpts *sn_opts = NULL; QemuOptsList *create_opts = NULL; bool image_opts = false; uint64_t img_size = UINT64_MAX; BlockMeasureInfo *info = NULL; Error *local_err = NULL; int ret = 1; int c; while ((c = getopt_long(argc, argv, "hf:O:o:l:U", long_options, NULL)) != -1) { switch (c) { case '?': case 'h': help(); break; case 'f': fmt = optarg; break; case 'O': out_fmt = optarg; break; case 'o': if (!is_valid_option_list(optarg)) { error_report("Invalid option list: %s", optarg); goto out; } if (!options) { options = g_strdup(optarg); } else { char *old_options = options; options = g_strdup_printf("%s,%s", options, optarg); g_free(old_options); } break; case 'l': if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) { sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts, optarg, false); if (!sn_opts) { error_report("Failed in parsing snapshot param '%s'", optarg); goto out; } } else { snapshot_name = optarg; } break; case 'U': force_share = true; break; case OPTION_OBJECT: object_opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); if (!object_opts) { goto out; } break; case OPTION_IMAGE_OPTS: image_opts = true; break; case OPTION_OUTPUT: if (!strcmp(optarg, "json")) { output_format = OFORMAT_JSON; } else if (!strcmp(optarg, "human")) { output_format = OFORMAT_HUMAN; } else { error_report("--output must be used with human or json " "as argument."); goto out; } break; case OPTION_SIZE: { int64_t sval; sval = cvtnum(optarg); if (sval < 0) { if (sval == -ERANGE) { error_report("Image size must be less than 8 EiB!"); } else { error_report("Invalid image size specified! You may use " "k, M, G, T, P or E suffixes for "); error_report("kilobytes, megabytes, gigabytes, terabytes, " "petabytes and exabytes."); } goto out; } img_size = (uint64_t)sval; } break; } } if (qemu_opts_foreach(&qemu_object_opts, user_creatable_add_opts_foreach, NULL, &error_fatal)) { goto out; } if (argc - optind > 1) { error_report("At most one filename argument is allowed."); goto out; } else if (argc - optind == 1) { filename = argv[optind]; } if (!filename && (object_opts || image_opts || fmt || snapshot_name || sn_opts)) { error_report("--object, --image-opts, -f, and -l " "require a filename argument."); goto out; } if (filename && img_size != UINT64_MAX) { error_report("--size N cannot be used together with a filename."); goto out; } if (!filename && img_size == UINT64_MAX) { error_report("Either --size N or one filename must be specified."); goto out; } if (filename) { in_blk = img_open(image_opts, filename, fmt, 0, false, false, force_share); if (!in_blk) { goto out; } if (sn_opts) { bdrv_snapshot_load_tmp(blk_bs(in_blk), qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID), qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME), &local_err); } else if (snapshot_name != NULL) { bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk), snapshot_name, &local_err); } if (local_err) { error_reportf_err(local_err, "Failed to load snapshot: "); goto out; } } drv = bdrv_find_format(out_fmt); if (!drv) { error_report("Unknown file format '%s'", out_fmt); goto out; } if (!drv->create_opts) { error_report("Format driver '%s' does not support image creation", drv->format_name); goto out; } create_opts = qemu_opts_append(create_opts, drv->create_opts); create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts); opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); if (options) { qemu_opts_do_parse(opts, options, NULL, &local_err); if (local_err) { error_report_err(local_err); error_report("Invalid options for file format '%s'", out_fmt); goto out; } } if (img_size != UINT64_MAX) { qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); } info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err); if (local_err) { error_report_err(local_err); goto out; } if (output_format == OFORMAT_HUMAN) { printf("required size: %" PRIu64 "\n", info->required); printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated); } else { dump_json_block_measure_info(info); } ret = 0; out: qapi_free_BlockMeasureInfo(info); qemu_opts_del(object_opts); qemu_opts_del(opts); qemu_opts_del(sn_opts); qemu_opts_free(create_opts); g_free(options); blk_unref(in_blk); return ret; } static const img_cmd_t img_cmds[] = { #define DEF(option, callback, arg_string) \ { option, callback }, #include "qemu-img-cmds.h" #undef DEF { NULL, NULL, }, }; int main(int argc, char **argv) { const img_cmd_t *cmd; const char *cmdname; Error *local_error = NULL; char *trace_file = NULL; int c; static const struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, {"trace", required_argument, NULL, 'T'}, {0, 0, 0, 0} }; #ifdef CONFIG_POSIX signal(SIGPIPE, SIG_IGN); #endif error_init(argv[0]); module_call_init(MODULE_INIT_TRACE); qemu_init_exec_dir(argv[0]); if (qemu_init_main_loop(&local_error)) { error_report_err(local_error); exit(EXIT_FAILURE); } qcrypto_init(&error_fatal); module_call_init(MODULE_INIT_QOM); bdrv_init(); if (argc < 2) { error_exit("Not enough arguments"); } qemu_add_opts(&qemu_object_opts); qemu_add_opts(&qemu_source_opts); qemu_add_opts(&qemu_trace_opts); while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) { switch (c) { case ':': missing_argument(argv[optind - 1]); return 0; case '?': unrecognized_option(argv[optind - 1]); return 0; case 'h': help(); return 0; case 'V': printf(QEMU_IMG_VERSION); return 0; case 'T': g_free(trace_file); trace_file = trace_opt_parse(optarg); break; } } cmdname = argv[optind]; /* reset getopt_long scanning */ argc -= optind; if (argc < 1) { return 0; } argv += optind; qemu_reset_optind(); if (!trace_init_backends()) { exit(1); } trace_init_file(trace_file); qemu_set_log(LOG_TRACE); /* find the command */ for (cmd = img_cmds; cmd->name != NULL; cmd++) { if (!strcmp(cmdname, cmd->name)) { return cmd->handler(argc, argv); } } /* not found */ error_exit("Command not found: %s", cmdname); }