aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--qemu-img.c516
1 files changed, 310 insertions, 206 deletions
diff --git a/qemu-img.c b/qemu-img.c
index 9dddfbefce..8d30e43b53 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1305,20 +1305,312 @@ out3:
return ret;
}
+enum ImgConvertBlockStatus {
+ BLK_DATA,
+ BLK_ZERO,
+ BLK_BACKING_FILE,
+};
+
+typedef struct ImgConvertState {
+ BlockBackend **src;
+ int64_t *src_sectors;
+ int src_cur, src_num;
+ int64_t src_cur_offset;
+ int64_t total_sectors;
+ int64_t allocated_sectors;
+ enum ImgConvertBlockStatus status;
+ int64_t sector_next_status;
+ BlockBackend *target;
+ bool has_zero_init;
+ bool compressed;
+ bool target_has_backing;
+ int min_sparse;
+ size_t cluster_sectors;
+ size_t buf_sectors;
+} ImgConvertState;
+
+static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+{
+ assert(sector_num >= s->src_cur_offset);
+ while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
+ s->src_cur_offset += s->src_sectors[s->src_cur];
+ s->src_cur++;
+ assert(s->src_cur < s->src_num);
+ }
+}
+
+static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
+{
+ int64_t ret;
+ int n;
+
+ convert_select_part(s, sector_num);
+
+ assert(s->total_sectors > sector_num);
+ n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
+
+ if (s->sector_next_status <= sector_num) {
+ ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
+ sector_num - s->src_cur_offset,
+ n, &n);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret & BDRV_BLOCK_ZERO) {
+ s->status = BLK_ZERO;
+ } else if (ret & BDRV_BLOCK_DATA) {
+ s->status = BLK_DATA;
+ } else if (!s->target_has_backing) {
+ /* Without a target backing file we must copy over the contents of
+ * the backing file as well. */
+ /* TODO Check block status of the backing file chain to avoid
+ * needlessly reading zeroes and limiting the iteration to the
+ * buffer size */
+ s->status = BLK_DATA;
+ } else {
+ s->status = BLK_BACKING_FILE;
+ }
+
+ s->sector_next_status = sector_num + n;
+ }
+
+ n = MIN(n, s->sector_next_status - sector_num);
+ if (s->status == BLK_DATA) {
+ n = MIN(n, s->buf_sectors);
+ }
+
+ /* We need to write complete clusters for compressed images, so if an
+ * unallocated area is shorter than that, we must consider the whole
+ * cluster allocated. */
+ if (s->compressed) {
+ if (n < s->cluster_sectors) {
+ n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
+ s->status = BLK_DATA;
+ } else {
+ n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
+ }
+ }
+
+ return n;
+}
+
+static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
+ uint8_t *buf)
+{
+ int n;
+ int ret;
+
+ if (s->status == BLK_ZERO || s->status == BLK_BACKING_FILE) {
+ return 0;
+ }
+
+ assert(nb_sectors <= s->buf_sectors);
+ while (nb_sectors > 0) {
+ BlockBackend *blk;
+ int64_t bs_sectors;
+
+ /* In the case of compression with multiple source files, we can get a
+ * nb_sectors that spreads into the next part. So we must be able to
+ * read across multiple BDSes for one convert_read() call. */
+ convert_select_part(s, sector_num);
+ blk = s->src[s->src_cur];
+ bs_sectors = s->src_sectors[s->src_cur];
+
+ n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
+ ret = blk_read(blk, sector_num - s->src_cur_offset, buf, n);
+ if (ret < 0) {
+ return ret;
+ }
+
+ sector_num += n;
+ nb_sectors -= n;
+ buf += n * BDRV_SECTOR_SIZE;
+ }
+
+ return 0;
+}
+
+static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
+ const uint8_t *buf)
+{
+ int ret;
+
+ while (nb_sectors > 0) {
+ int n = nb_sectors;
+
+ switch (s->status) {
+ case BLK_BACKING_FILE:
+ /* If we have a backing file, leave clusters unallocated that are
+ * unallocated in the source image, so that the backing file is
+ * visible at the respective offset. */
+ assert(s->target_has_backing);
+ break;
+
+ case BLK_DATA:
+ /* We must always write compressed clusters as a whole, so don't
+ * try to find zeroed parts in the buffer. We can only save the
+ * write if the buffer is completely zeroed and we're allowed to
+ * keep the target sparse. */
+ if (s->compressed) {
+ if (s->has_zero_init && s->min_sparse &&
+ buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
+ {
+ assert(!s->target_has_backing);
+ break;
+ }
+
+ ret = blk_write_compressed(s->target, sector_num, buf, n);
+ if (ret < 0) {
+ return ret;
+ }
+ break;
+ }
+
+ /* If there is real non-zero data or we're told to keep the target
+ * fully allocated (-S 0), we must write it. Otherwise we can treat
+ * it as zero sectors. */
+ if (!s->min_sparse ||
+ is_allocated_sectors_min(buf, n, &n, s->min_sparse))
+ {
+ ret = blk_write(s->target, sector_num, buf, n);
+ if (ret < 0) {
+ return ret;
+ }
+ break;
+ }
+ /* fall-through */
+
+ case BLK_ZERO:
+ if (s->has_zero_init) {
+ break;
+ }
+ ret = blk_write_zeroes(s->target, sector_num, n, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ break;
+ }
+
+ sector_num += n;
+ nb_sectors -= n;
+ buf += n * BDRV_SECTOR_SIZE;
+ }
+
+ return 0;
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+ uint8_t *buf = NULL;
+ int64_t sector_num, allocated_done;
+ int ret;
+ int n;
+
+ /* Check whether we have zero initialisation or can get it efficiently */
+ s->has_zero_init = s->min_sparse && !s->target_has_backing
+ ? bdrv_has_zero_init(blk_bs(s->target))
+ : false;
+
+ if (!s->has_zero_init && !s->target_has_backing &&
+ bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
+ {
+ ret = bdrv_make_zero(blk_bs(s->target), BDRV_REQ_MAY_UNMAP);
+ if (ret == 0) {
+ s->has_zero_init = true;
+ }
+ }
+
+ /* Allocate buffer for copied data. For compressed images, only one cluster
+ * can be copied at a time. */
+ if (s->compressed) {
+ if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
+ error_report("invalid cluster size");
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->buf_sectors = s->cluster_sectors;
+ }
+ buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+ /* Calculate allocated sectors for progress */
+ s->allocated_sectors = 0;
+ sector_num = 0;
+ while (sector_num < s->total_sectors) {
+ n = convert_iteration_sectors(s, sector_num);
+ if (n < 0) {
+ ret = n;
+ goto fail;
+ }
+ if (s->status == BLK_DATA) {
+ s->allocated_sectors += n;
+ }
+ sector_num += n;
+ }
+
+ /* Do the copy */
+ s->src_cur = 0;
+ s->src_cur_offset = 0;
+ s->sector_next_status = 0;
+
+ sector_num = 0;
+ allocated_done = 0;
+
+ while (sector_num < s->total_sectors) {
+ n = convert_iteration_sectors(s, sector_num);
+ if (n < 0) {
+ ret = n;
+ goto fail;
+ }
+ if (s->status == BLK_DATA) {
+ allocated_done += n;
+ qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
+ 0);
+ }
+
+ ret = convert_read(s, sector_num, n, buf);
+ if (ret < 0) {
+ error_report("error while reading sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ goto fail;
+ }
+
+ ret = convert_write(s, sector_num, n, buf);
+ if (ret < 0) {
+ error_report("error while writing sector %" PRId64
+ ": %s", sector_num, strerror(-ret));
+ goto fail;
+ }
+
+ sector_num += n;
+ }
+
+ if (s->compressed) {
+ /* signal EOF to align */
+ ret = blk_write_compressed(s->target, 0, NULL, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ qemu_vfree(buf);
+ return ret;
+}
+
static int img_convert(int argc, char **argv)
{
- int c, n, n1, bs_n, bs_i, compress, cluster_sectors, skip_create;
+ int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
int64_t ret = 0;
int progress = 0, flags, src_flags;
const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
BlockDriver *drv, *proto_drv;
BlockBackend **blk = NULL, *out_blk = NULL;
BlockDriverState **bs = NULL, *out_bs = NULL;
- int64_t total_sectors, nb_sectors, sector_num, bs_offset;
+ int64_t total_sectors;
int64_t *bs_sectors = NULL;
- uint8_t * buf = NULL;
size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
- const uint8_t *buf1;
BlockDriverInfo bdi;
QemuOpts *opts = NULL;
QemuOptsList *create_opts = NULL;
@@ -1329,6 +1621,7 @@ static int img_convert(int argc, char **argv)
bool quiet = false;
Error *local_err = NULL;
QemuOpts *sn_opts = NULL;
+ ImgConvertState state;
fmt = NULL;
out_fmt = "raw";
@@ -1627,9 +1920,6 @@ static int img_convert(int argc, char **argv)
}
out_bs = blk_bs(out_blk);
- bs_i = 0;
- bs_offset = 0;
-
/* increase bufsectors from the default 4096 (2M) if opt_transfer_length
* or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
* as maximum. */
@@ -1638,8 +1928,6 @@ static int img_convert(int argc, char **argv)
out_bs->bl.discard_alignment))
);
- buf = blk_blockalign(out_blk, bufsectors * BDRV_SECTOR_SIZE);
-
if (skip_create) {
int64_t output_sectors = blk_nb_sectors(out_blk);
if (output_sectors < 0) {
@@ -1666,203 +1954,20 @@ static int img_convert(int argc, char **argv)
cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
}
- if (compress) {
- if (cluster_sectors <= 0 || cluster_sectors > bufsectors) {
- error_report("invalid cluster size");
- ret = -1;
- goto out;
- }
- sector_num = 0;
-
- nb_sectors = total_sectors;
-
- for(;;) {
- int64_t bs_num;
- int remainder;
- uint8_t *buf2;
-
- nb_sectors = total_sectors - sector_num;
- if (nb_sectors <= 0)
- break;
- if (nb_sectors >= cluster_sectors)
- n = cluster_sectors;
- else
- n = nb_sectors;
-
- bs_num = sector_num - bs_offset;
- assert (bs_num >= 0);
- remainder = n;
- buf2 = buf;
- while (remainder > 0) {
- int nlow;
- while (bs_num == bs_sectors[bs_i]) {
- bs_offset += bs_sectors[bs_i];
- bs_i++;
- assert (bs_i < bs_n);
- bs_num = 0;
- /* printf("changing part: sector_num=%" PRId64 ", "
- "bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
- "\n", sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
- }
- assert (bs_num < bs_sectors[bs_i]);
-
- nlow = remainder > bs_sectors[bs_i] - bs_num
- ? bs_sectors[bs_i] - bs_num : remainder;
-
- ret = blk_read(blk[bs_i], bs_num, buf2, nlow);
- if (ret < 0) {
- error_report("error while reading sector %" PRId64 ": %s",
- bs_num, strerror(-ret));
- goto out;
- }
-
- buf2 += nlow * 512;
- bs_num += nlow;
-
- remainder -= nlow;
- }
- assert (remainder == 0);
-
- if (!buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)) {
- ret = blk_write_compressed(out_blk, sector_num, buf, n);
- if (ret != 0) {
- error_report("error while compressing sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto out;
- }
- }
- sector_num += n;
- qemu_progress_print(100.0 * sector_num / total_sectors, 0);
- }
- /* signal EOF to align */
- blk_write_compressed(out_blk, 0, NULL, 0);
- } else {
- int64_t sectors_to_read, sectors_read, sector_num_next_status;
- bool count_allocated_sectors;
- int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
-
- if (!has_zero_init && bdrv_can_write_zeroes_with_unmap(out_bs)) {
- ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP);
- if (ret < 0) {
- goto out;
- }
- has_zero_init = 1;
- }
-
- sectors_to_read = total_sectors;
- count_allocated_sectors = progress && (out_baseimg || has_zero_init);
-restart:
- sector_num = 0; // total number of sectors converted so far
- sectors_read = 0;
- sector_num_next_status = 0;
-
- for(;;) {
- nb_sectors = total_sectors - sector_num;
- if (nb_sectors <= 0) {
- if (count_allocated_sectors) {
- sectors_to_read = sectors_read;
- count_allocated_sectors = false;
- goto restart;
- }
- ret = 0;
- break;
- }
-
- while (sector_num - bs_offset >= bs_sectors[bs_i]) {
- bs_offset += bs_sectors[bs_i];
- bs_i ++;
- assert (bs_i < bs_n);
- /* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
- "bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
- sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
- }
-
- if ((out_baseimg || has_zero_init) &&
- sector_num >= sector_num_next_status) {
- n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
- ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
- n, &n1);
- if (ret < 0) {
- error_report("error while reading block status of sector %"
- PRId64 ": %s", sector_num - bs_offset,
- strerror(-ret));
- goto out;
- }
- /* If the output image is zero initialized, we are not working
- * on a shared base and the input is zero we can skip the next
- * n1 sectors */
- if (has_zero_init && !out_baseimg && (ret & BDRV_BLOCK_ZERO)) {
- sector_num += n1;
- continue;
- }
- /* If the output image is being created as a copy on write
- * image, assume that sectors which are unallocated in the
- * input image are present in both the output's and input's
- * base images (no need to copy them). */
- if (out_baseimg) {
- if (!(ret & BDRV_BLOCK_DATA)) {
- sector_num += n1;
- continue;
- }
- /* The next 'n1' sectors are allocated in the input image.
- * Copy only those as they may be followed by unallocated
- * sectors. */
- nb_sectors = n1;
- }
- /* avoid redundant callouts to get_block_status */
- sector_num_next_status = sector_num + n1;
- }
-
- n = MIN(nb_sectors, bufsectors);
-
- /* round down request length to an aligned sector, but
- * do not bother doing this on short requests. They happen
- * when we found an all-zero area, and the next sector to
- * write will not be sector_num + n. */
- if (cluster_sectors > 0 && n >= cluster_sectors) {
- int64_t next_aligned_sector = (sector_num + n);
- next_aligned_sector -= next_aligned_sector % cluster_sectors;
- if (sector_num + n > next_aligned_sector) {
- n = next_aligned_sector - sector_num;
- }
- }
-
- n = MIN(n, bs_sectors[bs_i] - (sector_num - bs_offset));
-
- sectors_read += n;
- if (count_allocated_sectors) {
- sector_num += n;
- continue;
- }
+ state = (ImgConvertState) {
+ .src = blk,
+ .src_sectors = bs_sectors,
+ .src_num = bs_n,
+ .total_sectors = total_sectors,
+ .target = out_blk,
+ .compressed = compress,
+ .target_has_backing = (bool) out_baseimg,
+ .min_sparse = min_sparse,
+ .cluster_sectors = cluster_sectors,
+ .buf_sectors = bufsectors,
+ };
+ ret = convert_do_copy(&state);
- n1 = n;
- ret = blk_read(blk[bs_i], sector_num - bs_offset, buf, n);
- if (ret < 0) {
- error_report("error while reading sector %" PRId64 ": %s",
- sector_num - bs_offset, strerror(-ret));
- goto out;
- }
- /* NOTE: at the same time we convert, we do not write zero
- sectors to have a chance to compress the image. Ideally, we
- should add a specific call to have the info to go faster */
- buf1 = buf;
- while (n > 0) {
- if (!has_zero_init ||
- is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
- ret = blk_write(out_blk, sector_num, buf1, n1);
- if (ret < 0) {
- error_report("error while writing sector %" PRId64
- ": %s", sector_num, strerror(-ret));
- goto out;
- }
- }
- sector_num += n1;
- n -= n1;
- buf1 += n1 * 512;
- }
- qemu_progress_print(100.0 * sectors_read / sectors_to_read, 0);
- }
- }
out:
if (!ret) {
qemu_progress_print(100, 0);
@@ -1870,7 +1975,6 @@ out:
qemu_progress_end();
qemu_opts_del(opts);
qemu_opts_free(create_opts);
- qemu_vfree(buf);
qemu_opts_del(sn_opts);
blk_unref(out_blk);
g_free(bs);