aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c46
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/backup.c243
-rw-r--r--block/block-backend.c3
-rw-r--r--block/qcow2-bitmap.c3
-rw-r--r--block/qcow2-cache.c1
-rw-r--r--block/qcow2-cluster.c10
-rw-r--r--block/qcow2-refcount.c1
-rw-r--r--block/qcow2-snapshot.c1
-rw-r--r--block/qcow2-threads.c268
-rw-r--r--block/qcow2.c320
-rw-r--r--block/qcow2.h26
-rw-r--r--block/quorum.c1
-rw-r--r--block/trace-events1
-rw-r--r--blockdev.c7
-rw-r--r--blockjob.c2
-rw-r--r--qapi/block-core.json4
-rw-r--r--qemu-img.c85
-rwxr-xr-xtests/qemu-iotests/0562
-rwxr-xr-xtests/qemu-iotests/0607
-rw-r--r--tests/qemu-iotests/060.out5
-rw-r--r--tests/test-bdrv-drain.c6
-rw-r--r--tests/test-bdrv-graph-mod.c1
23 files changed, 615 insertions, 430 deletions
diff --git a/block.c b/block.c
index cb11537029..1a73e310c1 100644
--- a/block.c
+++ b/block.c
@@ -2243,6 +2243,13 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
}
}
+/*
+ * This function steals the reference to child_bs from the caller.
+ * That reference is later dropped by bdrv_root_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ */
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildRole *child_role,
@@ -2255,6 +2262,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
if (ret < 0) {
bdrv_abort_perm_update(child_bs);
+ bdrv_unref(child_bs);
return NULL;
}
@@ -2274,6 +2282,14 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
return child;
}
+/*
+ * This function transfers the reference to child_bs from the caller
+ * to parent_bs. That reference is later dropped by parent_bs on
+ * bdrv_close() or if someone calls bdrv_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ */
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
@@ -2401,12 +2417,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
/* If backing_hd was already part of bs's backing chain, and
* inherits_from pointed recursively to bs then let's update it to
* point directly to bs (else it will become NULL). */
- if (update_inherits_from) {
+ if (bs->backing && update_inherits_from) {
backing_hd->inherits_from = bs;
}
- if (!bs->backing) {
- bdrv_unref(backing_hd);
- }
out:
bdrv_refresh_limits(bs, NULL);
@@ -2594,7 +2607,6 @@ BdrvChild *bdrv_open_child(const char *filename,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
- BdrvChild *c;
BlockDriverState *bs;
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
@@ -2603,13 +2615,7 @@ BdrvChild *bdrv_open_child(const char *filename,
return NULL;
}
- c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
- if (!c) {
- bdrv_unref(bs);
- return NULL;
- }
-
- return c;
+ return bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
}
/* TODO Future callers may need to specify parent/child_role in order for
@@ -3877,22 +3883,12 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv = NULL;
}
- bdrv_set_backing_hd(bs, NULL, &error_abort);
-
- if (bs->file != NULL) {
- bdrv_unref_child(bs, bs->file);
- bs->file = NULL;
- }
-
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- /* TODO Remove bdrv_unref() from drivers' close function and use
- * bdrv_unref_child() here */
- if (child->bs->inherits_from == bs) {
- child->bs->inherits_from = NULL;
- }
- bdrv_detach_child(child);
+ bdrv_unref_child(bs, child);
}
+ bs->backing = NULL;
+ bs->file = NULL;
g_free(bs->opaque);
bs->opaque = NULL;
atomic_set(&bs->copy_on_read, 0);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 7a81892a52..ae11605c9f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -6,7 +6,7 @@ block-obj-$(CONFIG_BOCHS) += bochs.o
block-obj-$(CONFIG_VVFAT) += vvfat.o
block-obj-$(CONFIG_DMG) += dmg.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o
block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-$(CONFIG_QED) += qed-check.o
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/backup.c b/block/backup.c
index 916817d8b1..00f4f8af53 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -112,7 +112,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
+ assert(QEMU_IS_ALIGNED(start, job->cluster_size));
+ hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
nbytes = MIN(job->cluster_size, job->len - start);
if (!*bounce_buffer) {
*bounce_buffer = blk_blockalign(blk, job->cluster_size);
@@ -145,7 +146,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
return nbytes;
fail:
- hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
+ hbitmap_set(job->copy_bitmap, start, job->cluster_size);
return ret;
}
@@ -165,16 +166,15 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
+ assert(QEMU_IS_ALIGNED(start, job->cluster_size));
nbytes = MIN(job->copy_range_size, end - start);
nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
- hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
- nr_clusters);
+ hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters);
ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
read_flags, write_flags);
if (ret < 0) {
trace_backup_do_cow_copy_range_fail(job, start, ret);
- hbitmap_set(job->copy_bitmap, start / job->cluster_size,
- nr_clusters);
+ hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters);
return ret;
}
@@ -202,7 +202,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
cow_request_begin(&cow_request, job, start, end);
while (start < end) {
- if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
+ if (!hbitmap_get(job->copy_bitmap, start)) {
trace_backup_do_cow_skip(job, start);
start += job->cluster_size;
continue; /* already copied */
@@ -298,12 +298,16 @@ static void backup_clean(Job *job)
assert(s->target);
blk_unref(s->target);
s->target = NULL;
+
+ if (s->copy_bitmap) {
+ hbitmap_free(s->copy_bitmap);
+ s->copy_bitmap = NULL;
+ }
}
void backup_do_checkpoint(BlockJob *job, Error **errp)
{
BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
- int64_t len;
assert(block_job_driver(job) == &backup_job_driver);
@@ -313,8 +317,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
return;
}
- len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size);
- hbitmap_set(backup_job->copy_bitmap, 0, len);
+ hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
}
static void backup_drain(BlockJob *job)
@@ -365,20 +368,44 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
return false;
}
-static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
+static bool bdrv_is_unallocated_range(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+{
+ int64_t end = offset + bytes;
+
+ while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) {
+ if (bytes == 0) {
+ return true;
+ }
+ offset += bytes;
+ bytes = end - offset;
+ }
+
+ return offset >= end;
+}
+
+static int coroutine_fn backup_loop(BackupBlockJob *job)
{
int ret;
bool error_is_read;
- int64_t cluster;
+ int64_t offset;
HBitmapIter hbi;
+ BlockDriverState *bs = blk_bs(job->common.blk);
hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
- while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
+ while ((offset = hbitmap_iter_next(&hbi)) != -1) {
+ if (job->sync_mode == MIRROR_SYNC_MODE_TOP &&
+ bdrv_is_unallocated_range(bs, offset, job->cluster_size))
+ {
+ hbitmap_reset(job->copy_bitmap, offset, job->cluster_size);
+ continue;
+ }
+
do {
if (yield_and_check(job)) {
return 0;
}
- ret = backup_do_cow(job, cluster * job->cluster_size,
+ ret = backup_do_cow(job, offset,
job->cluster_size, &error_is_read, false);
if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
BLOCK_ERROR_ACTION_REPORT)
@@ -394,66 +421,43 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* init copy_bitmap from sync_bitmap */
static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
{
- BdrvDirtyBitmapIter *dbi;
- int64_t offset;
- int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
- job->cluster_size);
+ uint64_t offset = 0;
+ uint64_t bytes = job->len;
- dbi = bdrv_dirty_iter_new(job->sync_bitmap);
- while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
- int64_t cluster = offset / job->cluster_size;
- int64_t next_cluster;
-
- offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
- if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
- hbitmap_set(job->copy_bitmap, cluster, end - cluster);
- break;
- }
+ while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap,
+ &offset, &bytes))
+ {
+ hbitmap_set(job->copy_bitmap, offset, bytes);
- offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset,
- UINT64_MAX);
- if (offset == -1) {
- hbitmap_set(job->copy_bitmap, cluster, end - cluster);
+ offset += bytes;
+ if (offset >= job->len) {
break;
}
-
- next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
- hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
- if (next_cluster >= end) {
- break;
- }
-
- bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
+ bytes = job->len - offset;
}
/* TODO job_progress_set_remaining() would make more sense */
job_progress_update(&job->common.job,
- job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size);
-
- bdrv_dirty_iter_free(dbi);
+ job->len - hbitmap_count(job->copy_bitmap));
}
static int coroutine_fn backup_run(Job *job, Error **errp)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
BlockDriverState *bs = blk_bs(s->common.blk);
- int64_t offset, nb_clusters;
int ret = 0;
QLIST_INIT(&s->inflight_reqs);
qemu_co_rwlock_init(&s->flush_rwlock);
- nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size);
job_progress_set_remaining(job, s->len);
- s->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
backup_incremental_init_copy_bitmap(s);
} else {
- hbitmap_set(s->copy_bitmap, 0, nb_clusters);
+ hbitmap_set(s->copy_bitmap, 0, s->len);
}
-
s->before_write.notify = backup_before_write_notify;
bdrv_add_before_write_notifier(bs, &s->before_write);
@@ -465,68 +469,8 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
* notify callback service CoW requests. */
job_yield(job);
}
- } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
- ret = backup_run_incremental(s);
} else {
- /* Both FULL and TOP SYNC_MODE's require copying.. */
- for (offset = 0; offset < s->len;
- offset += s->cluster_size) {
- bool error_is_read;
- int alloced = 0;
-
- if (yield_and_check(s)) {
- break;
- }
-
- if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
- int i;
- int64_t n;
-
- /* Check to see if these blocks are already in the
- * backing file. */
-
- for (i = 0; i < s->cluster_size;) {
- /* bdrv_is_allocated() only returns true/false based
- * on the first set of sectors it comes across that
- * are are all in the same state.
- * For that reason we must verify each sector in the
- * backup cluster length. We end up copying more than
- * needed but at some point that is always the case. */
- alloced =
- bdrv_is_allocated(bs, offset + i,
- s->cluster_size - i, &n);
- i += n;
-
- if (alloced || n == 0) {
- break;
- }
- }
-
- /* If the above loop never found any sectors that are in
- * the topmost image, skip this backup. */
- if (alloced == 0) {
- continue;
- }
- }
- /* FULL sync mode we copy the whole drive. */
- if (alloced < 0) {
- ret = alloced;
- } else {
- ret = backup_do_cow(s, offset, s->cluster_size,
- &error_is_read, false);
- }
- if (ret < 0) {
- /* Depending on error action, fail now or retry cluster */
- BlockErrorAction action =
- backup_error_action(s, error_is_read, -ret);
- if (action == BLOCK_ERROR_ACTION_REPORT) {
- break;
- } else {
- offset -= s->cluster_size;
- continue;
- }
- }
- }
+ ret = backup_loop(s);
}
notifier_with_return_remove(&s->before_write);
@@ -534,7 +478,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&s->flush_rwlock);
qemu_co_rwlock_unlock(&s->flush_rwlock);
- hbitmap_free(s->copy_bitmap);
return ret;
}
@@ -554,6 +497,42 @@ static const BlockJobDriver backup_job_driver = {
.drain = backup_drain,
};
+static int64_t backup_calculate_cluster_size(BlockDriverState *target,
+ Error **errp)
+{
+ int ret;
+ BlockDriverInfo bdi;
+
+ /*
+ * If there is no backing file on the target, we cannot rely on COW if our
+ * backup cluster size is smaller than the target cluster size. Even for
+ * targets with a backing file, try to avoid COW if possible.
+ */
+ ret = bdrv_get_info(target, &bdi);
+ if (ret == -ENOTSUP && !target->backing) {
+ /* Cluster size is not defined */
+ warn_report("The target block device doesn't provide "
+ "information about the block size and it doesn't have a "
+ "backing file. The default block size of %u bytes is "
+ "used. If the actual block size of the target exceeds "
+ "this default, the backup may be unusable",
+ BACKUP_CLUSTER_SIZE_DEFAULT);
+ return BACKUP_CLUSTER_SIZE_DEFAULT;
+ } else if (ret < 0 && !target->backing) {
+ error_setg_errno(errp, -ret,
+ "Couldn't determine the cluster size of the target image, "
+ "which has no backing file");
+ error_append_hint(errp,
+ "Aborting, since this may create an unusable destination image\n");
+ return ret;
+ } else if (ret < 0 && target->backing) {
+ /* Not fatal; just trudge on ahead. */
+ return BACKUP_CLUSTER_SIZE_DEFAULT;
+ }
+
+ return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@@ -565,9 +544,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
JobTxn *txn, Error **errp)
{
int64_t len;
- BlockDriverInfo bdi;
BackupBlockJob *job = NULL;
int ret;
+ int64_t cluster_size;
+ HBitmap *copy_bitmap = NULL;
assert(bs);
assert(target);
@@ -629,6 +609,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
goto error;
}
+ cluster_size = backup_calculate_cluster_size(target, errp);
+ if (cluster_size < 0) {
+ goto error;
+ }
+
+ copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size));
+
/* job->len is fixed, so we can't allow resize */
job = block_job_create(job_id, &backup_job_driver, txn, bs,
BLK_PERM_CONSISTENT_READ,
@@ -657,33 +644,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
/* Detect image-fleecing (and similar) schemes */
job->serialize_target_writes = bdrv_chain_contains(target, bs);
-
- /* If there is no backing file on the target, we cannot rely on COW if our
- * backup cluster size is smaller than the target cluster size. Even for
- * targets with a backing file, try to avoid COW if possible. */
- ret = bdrv_get_info(target, &bdi);
- if (ret == -ENOTSUP && !target->backing) {
- /* Cluster size is not defined */
- warn_report("The target block device doesn't provide "
- "information about the block size and it doesn't have a "
- "backing file. The default block size of %u bytes is "
- "used. If the actual block size of the target exceeds "
- "this default, the backup may be unusable",
- BACKUP_CLUSTER_SIZE_DEFAULT);
- job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
- } else if (ret < 0 && !target->backing) {
- error_setg_errno(errp, -ret,
- "Couldn't determine the cluster size of the target image, "
- "which has no backing file");
- error_append_hint(errp,
- "Aborting, since this may create an unusable destination image\n");
- goto error;
- } else if (ret < 0 && target->backing) {
- /* Not fatal; just trudge on ahead. */
- job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
- } else {
- job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
- }
+ job->cluster_size = cluster_size;
+ job->copy_bitmap = copy_bitmap;
+ copy_bitmap = NULL;
job->use_copy_range = true;
job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
blk_get_max_transfer(job->target));
@@ -699,6 +662,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
return &job->common;
error:
+ if (copy_bitmap) {
+ assert(!job || !job->copy_bitmap);
+ hbitmap_free(copy_bitmap);
+ }
if (sync_bitmap) {
bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
}
diff --git a/block/block-backend.c b/block/block-backend.c
index 4c0a8ef88d..ad3e1c882d 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -392,7 +392,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
perm, BLK_PERM_ALL, blk, errp);
if (!blk->root) {
- bdrv_unref(bs);
blk_unref(blk);
return NULL;
}
@@ -800,12 +799,12 @@ void blk_remove_bs(BlockBackend *blk)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+ bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
blk->perm, blk->shared_perm, blk, errp);
if (blk->root == NULL) {
return -EPERM;
}
- bdrv_ref(bs);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
if (tgm->throttle_state) {
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 8a75366c92..b2487101ed 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -29,7 +29,6 @@
#include "qapi/error.h"
#include "qemu/cutils.h"
-#include "block/block_int.h"
#include "qcow2.h"
/* NOTICE: BME here means Bitmaps Extension and used as a namespace for
@@ -754,7 +753,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
dir_offset = *offset;
}
- dir = g_try_malloc(dir_size);
+ dir = g_try_malloc0(dir_size);
if (dir == NULL) {
return -ENOMEM;
}
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index df02e7b20a..b33bcbc984 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -23,7 +23,6 @@
*/
#include "qemu/osdep.h"
-#include "block/block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
#include "trace.h"
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b36f4aa84a..cf892f37a8 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -27,7 +27,6 @@
#include "qapi/error.h"
#include "qemu-common.h"
-#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/bswap.h"
#include "trace.h"
@@ -472,13 +471,12 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
{
if (bytes && bs->encrypted) {
BDRVQcow2State *s = bs->opaque;
- int64_t offset = (s->crypt_physical_offset ?
- (cluster_offset + offset_in_cluster) :
- (src_cluster_offset + offset_in_cluster));
assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
assert((bytes & ~BDRV_SECTOR_MASK) == 0);
assert(s->crypto);
- if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) {
+ if (qcow2_co_encrypt(bs, cluster_offset,
+ src_cluster_offset + offset_in_cluster,
+ buffer, bytes) < 0) {
return false;
}
}
@@ -833,7 +831,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
assert(start->offset + start->nb_bytes <= end->offset);
assert(!m->data_qiov || m->data_qiov->size == data_bytes);
- if (start->nb_bytes == 0 && end->nb_bytes == 0) {
+ if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
return 0;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0b09d6838b..4c1794f9af 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -25,7 +25,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
-#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/range.h"
#include "qemu/bswap.h"
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index a6ffae89a6..d0e7fa9311 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -24,7 +24,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "block/block_int.h"
#include "qcow2.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
new file mode 100644
index 0000000000..3b1e63fe41
--- /dev/null
+++ b/block/qcow2-threads.c
@@ -0,0 +1,268 @@
+/*
+ * Threaded data processing for Qcow2: compression, encryption
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ * Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#define ZLIB_CONST
+#include <zlib.h>
+
+#include "qcow2.h"
+#include "block/thread-pool.h"
+#include "crypto.h"
+
+static int coroutine_fn
+qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg)
+{
+ int ret;
+ BDRVQcow2State *s = bs->opaque;
+ ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+
+ qemu_co_mutex_lock(&s->lock);
+ while (s->nb_threads >= QCOW2_MAX_THREADS) {
+ qemu_co_queue_wait(&s->thread_task_queue, &s->lock);
+ }
+ s->nb_threads++;
+ qemu_co_mutex_unlock(&s->lock);
+
+ ret = thread_pool_submit_co(pool, func, arg);
+
+ qemu_co_mutex_lock(&s->lock);
+ s->nb_threads--;
+ qemu_co_queue_next(&s->thread_task_queue);
+ qemu_co_mutex_unlock(&s->lock);
+
+ return ret;
+}
+
+
+/*
+ * Compression
+ */
+
+typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
+ const void *src, size_t src_size);
+typedef struct Qcow2CompressData {
+ void *dest;
+ size_t dest_size;
+ const void *src;
+ size_t src_size;
+ ssize_t ret;
+
+ Qcow2CompressFunc func;
+} Qcow2CompressData;
+
+/*
+ * qcow2_compress()
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: compressed size on success
+ * -ENOMEM destination buffer is not enough to store compressed data
+ * -EIO on any other error
+ */
+static ssize_t qcow2_compress(void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ ssize_t ret;
+ z_stream strm;
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+ -12, 9, Z_DEFAULT_STRATEGY);
+ if (ret != Z_OK) {
+ return -EIO;
+ }
+
+ /*
+ * strm.next_in is not const in old zlib versions, such as those used on
+ * OpenBSD/NetBSD, so cast the const away
+ */
+ strm.avail_in = src_size;
+ strm.next_in = (void *) src;
+ strm.avail_out = dest_size;
+ strm.next_out = dest;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret == Z_STREAM_END) {
+ ret = dest_size - strm.avail_out;
+ } else {
+ ret = (ret == Z_OK ? -ENOMEM : -EIO);
+ }
+
+ deflateEnd(&strm);
+
+ return ret;
+}
+
+/*
+ * qcow2_decompress()
+ *
+ * Decompress some data (not more than @src_size bytes) to produce exactly
+ * @dest_size bytes.
+ *
+ * @dest - destination buffer, @dest_size bytes
+ * @src - source buffer, @src_size bytes
+ *
+ * Returns: 0 on success
+ * -1 on fail
+ */
+static ssize_t qcow2_decompress(void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ int ret = 0;
+ z_stream strm;
+
+ memset(&strm, 0, sizeof(strm));
+ strm.avail_in = src_size;
+ strm.next_in = (void *) src;
+ strm.avail_out = dest_size;
+ strm.next_out = dest;
+
+ ret = inflateInit2(&strm, -12);
+ if (ret != Z_OK) {
+ return -1;
+ }
+
+ ret = inflate(&strm, Z_FINISH);
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
+ /*
+ * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
+ * @src buffer may be processed partly (because in qcow2 we know size of
+ * compressed data with precision of one sector)
+ */
+ ret = -1;
+ }
+
+ inflateEnd(&strm);
+
+ return ret;
+}
+
+static int qcow2_compress_pool_func(void *opaque)
+{
+ Qcow2CompressData *data = opaque;
+
+ data->ret = data->func(data->dest, data->dest_size,
+ data->src, data->src_size);
+
+ return 0;
+}
+
+static ssize_t coroutine_fn
+qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size, Qcow2CompressFunc func)
+{
+ Qcow2CompressData arg = {
+ .dest = dest,
+ .dest_size = dest_size,
+ .src = src,
+ .src_size = src_size,
+ .func = func,
+ };
+
+ qcow2_co_process(bs, qcow2_compress_pool_func, &arg);
+
+ return arg.ret;
+}
+
+ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+ qcow2_compress);
+}
+
+ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size)
+{
+ return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
+ qcow2_decompress);
+}
+
+
+/*
+ * Cryptography
+ */
+
+/*
+ * Qcow2EncDecFunc: common prototype of qcrypto_block_encrypt() and
+ * qcrypto_block_decrypt() functions.
+ */
+typedef int (*Qcow2EncDecFunc)(QCryptoBlock *block, uint64_t offset,
+ uint8_t *buf, size_t len, Error **errp);
+
+typedef struct Qcow2EncDecData {
+ QCryptoBlock *block;
+ uint64_t offset;
+ uint8_t *buf;
+ size_t len;
+
+ Qcow2EncDecFunc func;
+} Qcow2EncDecData;
+
+static int qcow2_encdec_pool_func(void *opaque)
+{
+ Qcow2EncDecData *data = opaque;
+
+ return data->func(data->block, data->offset, data->buf, data->len, NULL);
+}
+
+static int coroutine_fn
+qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset,
+ uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func)
+{
+ BDRVQcow2State *s = bs->opaque;
+ Qcow2EncDecData arg = {
+ .block = s->crypto,
+ .offset = s->crypt_physical_offset ?
+ file_cluster_offset + offset_into_cluster(s, offset) :
+ offset,
+ .buf = buf,
+ .len = len,
+ .func = func,
+ };
+
+ return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
+}
+
+int coroutine_fn
+qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+ uint64_t offset, void *buf, size_t len)
+{
+ return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
+ qcrypto_block_encrypt);
+}
+
+int coroutine_fn
+qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+ uint64_t offset, void *buf, size_t len)
+{
+ return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len,
+ qcrypto_block_decrypt);
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index d39882785d..f2cb131048 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,10 +24,6 @@
#include "qemu/osdep.h"
-#define ZLIB_CONST
-#include <zlib.h>
-
-#include "block/block_int.h"
#include "block/qdict.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
@@ -44,7 +40,6 @@
#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
-#include "block/thread-pool.h"
/*
Differences with QCOW:
@@ -302,7 +297,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
}
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
qcow2_crypto_hdr_read_func,
- bs, cflags, 1, errp);
+ bs, cflags, QCOW2_MAX_THREADS, errp);
if (!s->crypto) {
return -EINVAL;
}
@@ -1543,7 +1538,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
}
s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.",
- NULL, NULL, cflags, 1, errp);
+ NULL, NULL, cflags,
+ QCOW2_MAX_THREADS, errp);
if (!s->crypto) {
ret = -EINVAL;
goto fail;
@@ -1698,7 +1694,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
}
#endif
- qemu_co_queue_init(&s->compress_wait_queue);
+ qemu_co_queue_init(&s->thread_task_queue);
return ret;
@@ -1974,8 +1970,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
qemu_iovec_init(&hd_qiov, qiov->niov);
- qemu_co_mutex_lock(&s->lock);
-
while (bytes != 0) {
/* prepare next request */
@@ -1985,7 +1979,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
+ qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
goto fail;
}
@@ -2000,10 +1996,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
if (bs->backing) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
&hd_qiov, 0);
- qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
@@ -2019,11 +2013,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
break;
case QCOW2_CLUSTER_COMPRESSED:
- qemu_co_mutex_unlock(&s->lock);
ret = qcow2_co_preadv_compressed(bs, cluster_offset,
offset, cur_bytes,
&hd_qiov);
- qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
@@ -2060,11 +2052,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_preadv(s->data_file,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
- qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
@@ -2072,13 +2062,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
assert(s->crypto);
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- if (qcrypto_block_decrypt(s->crypto,
- (s->crypt_physical_offset ?
- cluster_offset + offset_in_cluster :
- offset),
- cluster_data,
- cur_bytes,
- NULL) < 0) {
+ if (qcow2_co_decrypt(bs, cluster_offset, offset,
+ cluster_data, cur_bytes) < 0) {
ret = -EIO;
goto fail;
}
@@ -2099,8 +2084,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
ret = 0;
fail:
- qemu_co_mutex_unlock(&s->lock);
-
qemu_iovec_destroy(&hd_qiov);
qemu_vfree(cluster_data);
@@ -2120,6 +2103,11 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
continue;
}
+ /* If COW regions are handled already, skip this too */
+ if (m->skip_cow) {
+ continue;
+ }
+
/* The data (middle) region must be immediately after the
* start region */
if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
@@ -2145,6 +2133,80 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
return false;
}
+static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+ int64_t nr;
+ return !bytes ||
+ (!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes);
+}
+
+static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
+{
+ /*
+ * This check is designed for optimization shortcut so it must be
+ * efficient.
+ * Instead of is_zero(), use is_unallocated() as it is faster (but not
+ * as accurate and can result in false negatives).
+ */
+ return is_unallocated(bs, m->offset + m->cow_start.offset,
+ m->cow_start.nb_bytes) &&
+ is_unallocated(bs, m->offset + m->cow_end.offset,
+ m->cow_end.nb_bytes);
+}
+
+static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
+{
+ BDRVQcow2State *s = bs->opaque;
+ QCowL2Meta *m;
+
+ if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
+ return 0;
+ }
+
+ if (bs->encrypted) {
+ return 0;
+ }
+
+ for (m = l2meta; m != NULL; m = m->next) {
+ int ret;
+
+ if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
+ continue;
+ }
+
+ if (!is_zero_cow(bs, m)) {
+ continue;
+ }
+
+ /*
+ * instead of writing zero COW buffers,
+ * efficiently zero out the whole clusters
+ */
+
+ ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
+ m->nb_clusters * s->cluster_size,
+ true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
+ ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
+ m->nb_clusters * s->cluster_size,
+ BDRV_REQ_NO_FALLBACK);
+ if (ret < 0) {
+ if (ret != -ENOTSUP && ret != -EAGAIN) {
+ return ret;
+ }
+ continue;
+ }
+
+ trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters);
+ m->skip_cow = true;
+ }
+ return 0;
+}
+
static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
@@ -2181,11 +2243,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&cluster_offset, &l2meta);
if (ret < 0) {
- goto fail;
+ goto out_locked;
}
assert((cluster_offset & 511) == 0);
+ ret = qcow2_pre_write_overlap_check(bs, 0,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, true);
+ if (ret < 0) {
+ goto out_locked;
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
@@ -2197,7 +2268,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
* s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
- goto fail;
+ goto out_unlocked;
}
}
@@ -2205,24 +2276,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
- if (qcrypto_block_encrypt(s->crypto,
- (s->crypt_physical_offset ?
- cluster_offset + offset_in_cluster :
- offset),
- cluster_data,
- cur_bytes, NULL) < 0) {
+ if (qcow2_co_encrypt(bs, cluster_offset, offset,
+ cluster_data, cur_bytes) < 0) {
ret = -EIO;
- goto fail;
+ goto out_unlocked;
}
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
- ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + offset_in_cluster, cur_bytes, true);
+ /* Try to efficiently initialize the physical space with zeroes */
+ ret = handle_alloc_space(bs, l2meta);
if (ret < 0) {
- goto fail;
+ goto out_unlocked;
}
/* If we need to do COW, check if it's possible to merge the
@@ -2230,22 +2297,22 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
* If it's not possible (or not necessary) then write the
* guest data now. */
if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
- qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
cluster_offset + offset_in_cluster);
ret = bdrv_co_pwritev(s->data_file,
cluster_offset + offset_in_cluster,
cur_bytes, &hd_qiov, 0);
- qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
- goto fail;
+ goto out_unlocked;
}
}
+ qemu_co_mutex_lock(&s->lock);
+
ret = qcow2_handle_l2meta(bs, &l2meta, true);
if (ret) {
- goto fail;
+ goto out_locked;
}
bytes -= cur_bytes;
@@ -2254,8 +2321,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
+ goto out_locked;
-fail:
+out_unlocked:
+ qemu_co_mutex_lock(&s->lock);
+
+out_locked:
qcow2_handle_l2meta(bs, &l2meta, false);
qemu_co_mutex_unlock(&s->lock);
@@ -3921,171 +3992,6 @@ fail:
return ret;
}
-/*
- * qcow2_compress()
- *
- * @dest - destination buffer, @dest_size bytes
- * @src - source buffer, @src_size bytes
- *
- * Returns: compressed size on success
- * -ENOMEM destination buffer is not enough to store compressed data
- * -EIO on any other error
- */
-static ssize_t qcow2_compress(void *dest, size_t dest_size,
- const void *src, size_t src_size)
-{
- ssize_t ret;
- z_stream strm;
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
- -12, 9, Z_DEFAULT_STRATEGY);
- if (ret != Z_OK) {
- return -EIO;
- }
-
- /* strm.next_in is not const in old zlib versions, such as those used on
- * OpenBSD/NetBSD, so cast the const away */
- strm.avail_in = src_size;
- strm.next_in = (void *) src;
- strm.avail_out = dest_size;
- strm.next_out = dest;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret == Z_STREAM_END) {
- ret = dest_size - strm.avail_out;
- } else {
- ret = (ret == Z_OK ? -ENOMEM : -EIO);
- }
-
- deflateEnd(&strm);
-
- return ret;
-}
-
-/*
- * qcow2_decompress()
- *
- * Decompress some data (not more than @src_size bytes) to produce exactly
- * @dest_size bytes.
- *
- * @dest - destination buffer, @dest_size bytes
- * @src - source buffer, @src_size bytes
- *
- * Returns: 0 on success
- * -1 on fail
- */
-static ssize_t qcow2_decompress(void *dest, size_t dest_size,
- const void *src, size_t src_size)
-{
- int ret = 0;
- z_stream strm;
-
- memset(&strm, 0, sizeof(strm));
- strm.avail_in = src_size;
- strm.next_in = (void *) src;
- strm.avail_out = dest_size;
- strm.next_out = dest;
-
- ret = inflateInit2(&strm, -12);
- if (ret != Z_OK) {
- return -1;
- }
-
- ret = inflate(&strm, Z_FINISH);
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
- /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
- * @src buffer may be processed partly (because in qcow2 we know size of
- * compressed data with precision of one sector) */
- ret = -1;
- }
-
- inflateEnd(&strm);
-
- return ret;
-}
-
-#define MAX_COMPRESS_THREADS 4
-
-typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size,
- const void *src, size_t src_size);
-typedef struct Qcow2CompressData {
- void *dest;
- size_t dest_size;
- const void *src;
- size_t src_size;
- ssize_t ret;
-
- Qcow2CompressFunc func;
-} Qcow2CompressData;
-
-static int qcow2_compress_pool_func(void *opaque)
-{
- Qcow2CompressData *data = opaque;
-
- data->ret = data->func(data->dest, data->dest_size,
- data->src, data->src_size);
-
- return 0;
-}
-
-static void qcow2_compress_complete(void *opaque, int ret)
-{
- qemu_coroutine_enter(opaque);
-}
-
-static ssize_t coroutine_fn
-qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size,
- const void *src, size_t src_size, Qcow2CompressFunc func)
-{
- BDRVQcow2State *s = bs->opaque;
- BlockAIOCB *acb;
- ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
- Qcow2CompressData arg = {
- .dest = dest,
- .dest_size = dest_size,
- .src = src,
- .src_size = src_size,
- .func = func,
- };
-
- while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
- qemu_co_queue_wait(&s->compress_wait_queue, NULL);
- }
-
- s->nb_compress_threads++;
- acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
- qcow2_compress_complete,
- qemu_coroutine_self());
-
- if (!acb) {
- s->nb_compress_threads--;
- return -EINVAL;
- }
- qemu_coroutine_yield();
- s->nb_compress_threads--;
- qemu_co_queue_next(&s->compress_wait_queue);
-
- return arg.ret;
-}
-
-static ssize_t coroutine_fn
-qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
- const void *src, size_t src_size)
-{
- return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
- qcow2_compress);
-}
-
-static ssize_t coroutine_fn
-qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
- const void *src, size_t src_size)
-{
- return qcow2_co_do_compress(bs, dest, dest_size, src, src_size,
- qcow2_decompress);
-}
-
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int
diff --git a/block/qcow2.h b/block/qcow2.h
index 8d92ef1fee..567375e56c 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -28,6 +28,7 @@
#include "crypto/block.h"
#include "qemu/coroutine.h"
#include "qemu/units.h"
+#include "block/block_int.h"
//#define DEBUG_ALLOC
//#define DEBUG_ALLOC2
@@ -267,6 +268,8 @@ typedef struct Qcow2BitmapHeaderExt {
uint64_t bitmap_directory_offset;
} QEMU_PACKED Qcow2BitmapHeaderExt;
+#define QCOW2_MAX_THREADS 4
+
typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
@@ -349,8 +352,8 @@ typedef struct BDRVQcow2State {
char *image_backing_format;
char *image_data_file;
- CoQueue compress_wait_queue;
- int nb_compress_threads;
+ CoQueue thread_task_queue;
+ int nb_threads;
BdrvChild *data_file;
} BDRVQcow2State;
@@ -402,6 +405,12 @@ typedef struct QCowL2Meta
*/
Qcow2COWRegion cow_end;
+ /*
+ * Indicates that COW regions are already handled and do not require
+ * any more processing.
+ */
+ bool skip_cow;
+
/**
* The I/O vector with the data from the actual guest write request.
* If non-NULL, this is meant to be merged together with the data
@@ -737,4 +746,17 @@ void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
const char *name,
Error **errp);
+ssize_t coroutine_fn
+qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size);
+ssize_t coroutine_fn
+qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size,
+ const void *src, size_t src_size);
+int coroutine_fn
+qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+ uint64_t offset, void *buf, size_t len);
+int coroutine_fn
+qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset,
+ uint64_t offset, void *buf, size_t len);
+
#endif
diff --git a/block/quorum.c b/block/quorum.c
index 352f729136..133ee18204 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1019,7 +1019,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
if (child == NULL) {
s->next_child_index--;
- bdrv_unref(child_bs);
goto out;
}
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
diff --git a/block/trace-events b/block/trace-events
index 79ccd8d824..1e0653ce6d 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -68,6 +68,7 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
+qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d"
# qcow2-cluster.c
qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
diff --git a/blockdev.c b/blockdev.c
index 64ccef735b..17c2d801d7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3466,11 +3466,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
backup->compress = false;
}
- bs = qmp_get_root_bs(backup->device, errp);
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
if (!bs) {
return NULL;
}
+ if (!bs->drv) {
+ error_setg(errp, "Device has no medium");
+ return NULL;
+ }
+
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
diff --git a/blockjob.c b/blockjob.c
index 9ca942ba01..cc5f18e7cd 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -204,6 +204,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
{
BdrvChild *c;
+ bdrv_ref(bs);
c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
job, errp);
if (c == NULL) {
@@ -211,7 +212,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
}
job->nodes = g_slist_prepend(job->nodes, c);
- bdrv_ref(bs);
bdrv_op_block_all(bs, job->blocker);
return 0;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index dcc935d655..1defcde048 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3231,6 +3231,8 @@
#
# @cor_write: a write due to copy-on-read (since 2.11)
#
+# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
+#
# Since: 2.9
##
{ 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
@@ -3249,7 +3251,7 @@
'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
- 'cor_write'] }
+ 'cor_write', 'cluster_alloc_space'] }
##
# @BlkdebugInjectErrorOptions:
diff --git a/qemu-img.c b/qemu-img.c
index 28fba1e7a7..b0535919b1 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3164,7 +3164,7 @@ static int img_rebase(int argc, char **argv)
BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
uint8_t *buf_old = NULL;
uint8_t *buf_new = NULL;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
char *filename;
const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
int c, flags, src_flags, ret;
@@ -3309,29 +3309,18 @@ static int img_rebase(int argc, char **argv)
/* For safe rebasing we need to compare old and new backing file */
if (!unsafe) {
- char backing_name[PATH_MAX];
QDict *options = NULL;
+ BlockDriverState *base_bs = backing_bs(bs);
- if (bs->backing) {
- if (bs->backing_format[0] != '\0') {
- options = qdict_new();
- qdict_put_str(options, "driver", bs->backing_format);
- }
-
- if (force_share) {
- if (!options) {
- options = qdict_new();
- }
- qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
- }
- bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
- blk_old_backing = blk_new_open(backing_name, NULL,
- options, src_flags, &local_err);
- if (!blk_old_backing) {
+ if (base_bs) {
+ blk_old_backing = blk_new(BLK_PERM_CONSISTENT_READ,
+ BLK_PERM_ALL);
+ ret = blk_insert_bs(blk_old_backing, base_bs,
+ &local_err);
+ if (ret < 0) {
error_reportf_err(local_err,
- "Could not open old backing file '%s': ",
- backing_name);
- ret = -1;
+ "Could not reuse old backing file '%s': ",
+ base_bs->filename);
goto out;
}
} else {
@@ -3364,15 +3353,34 @@ static int img_rebase(int argc, char **argv)
goto out;
}
- blk_new_backing = blk_new_open(out_real_path, NULL,
- options, src_flags, &local_err);
- g_free(out_real_path);
- if (!blk_new_backing) {
- error_reportf_err(local_err,
- "Could not open new backing file '%s': ",
- out_baseimg);
- ret = -1;
- goto out;
+ /*
+ * Find out whether we rebase an image on top of a previous image
+ * in its chain.
+ */
+ prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
+ if (prefix_chain_bs) {
+ g_free(out_real_path);
+ blk_new_backing = blk_new(BLK_PERM_CONSISTENT_READ,
+ BLK_PERM_ALL);
+ ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
+ &local_err);
+ if (ret < 0) {
+ error_reportf_err(local_err,
+ "Could not reuse backing file '%s': ",
+ out_baseimg);
+ goto out;
+ }
+ } else {
+ blk_new_backing = blk_new_open(out_real_path, NULL,
+ options, src_flags, &local_err);
+ g_free(out_real_path);
+ if (!blk_new_backing) {
+ error_reportf_err(local_err,
+ "Could not open new backing file '%s': ",
+ out_baseimg);
+ ret = -1;
+ goto out;
+ }
}
}
}
@@ -3448,6 +3456,23 @@ static int img_rebase(int argc, char **argv)
continue;
}
+ if (prefix_chain_bs) {
+ /*
+ * If cluster wasn't changed since prefix_chain, we don't need
+ * to take action
+ */
+ ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
+ offset, n, &n);
+ if (ret < 0) {
+ error_report("error while reading image metadata: %s",
+ strerror(-ret));
+ goto out;
+ }
+ if (!ret) {
+ continue;
+ }
+ }
+
/*
* Read old and new backing file and take into consideration that
* backing files may be smaller than the COW image.
diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056
index 3df323984d..f40fc11a09 100755
--- a/tests/qemu-iotests/056
+++ b/tests/qemu-iotests/056
@@ -214,7 +214,7 @@ class BackupTest(iotests.QMPTestCase):
res = self.vm.qmp('query-block-jobs')
self.assert_qmp(res, 'return[0]/status', 'concluded')
# Leave zombie job un-dismissed, observe a failure:
- res = self.qmp_backup_and_wait(serror='Need a root block node',
+ res = self.qmp_backup_and_wait(serror="Node 'drive0' is busy: block device is in use by block job: backup",
device='drive0', format=iotests.imgfmt,
sync='full', target=self.dest_img,
auto_dismiss=False)
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 89e911400c..b91d8321bb 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -150,10 +150,15 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
echo
echo "=== Testing overlap while COW is in flight ==="
echo
+BACKING_IMG=$TEST_IMG.base
+TEST_IMG=$BACKING_IMG _make_test_img 1G
+
+$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
+
# compat=0.10 is required in order to make the following discard actually
# unallocate the sector rather than make it a zero sector - we want COW, after
# all.
-IMGOPTS='compat=0.10' _make_test_img 1G
+IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
# Write two clusters, the second one enforces creation of an L2 table after
# the first data cluster.
$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index e42bf8c5a9..0f6b0658a1 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -97,7 +97,10 @@ read 512/512 bytes at offset 0
=== Testing overlap while COW is in flight ===
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=TEST_DIR/t.IMGFMT.base
wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 65536/65536 bytes at offset 536870912
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index eda90750eb..5534c2adf9 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -1436,12 +1436,6 @@ static void test_detach_indirect(bool by_parent_cb)
bdrv_unref(parent_b);
blk_unref(blk);
- /* XXX Once bdrv_close() unref's children instead of just detaching them,
- * this won't be necessary any more. */
- bdrv_unref(a);
- bdrv_unref(a);
- bdrv_unref(c);
-
g_assert_cmpint(a->refcnt, ==, 1);
g_assert_cmpint(b->refcnt, ==, 1);
g_assert_cmpint(c->refcnt, ==, 1);
diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c
index 283dc84869..747c0bf8fc 100644
--- a/tests/test-bdrv-graph-mod.c
+++ b/tests/test-bdrv-graph-mod.c
@@ -116,7 +116,6 @@ static void test_update_perm_tree(void)
g_assert_nonnull(local_err);
error_free(local_err);
- bdrv_unref(bs);
blk_unref(root);
}