diff options
-rw-r--r-- | block.c | 46 | ||||
-rw-r--r-- | block/Makefile.objs | 2 | ||||
-rw-r--r-- | block/backup.c | 243 | ||||
-rw-r--r-- | block/block-backend.c | 3 | ||||
-rw-r--r-- | block/qcow2-bitmap.c | 3 | ||||
-rw-r--r-- | block/qcow2-cache.c | 1 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 10 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 1 | ||||
-rw-r--r-- | block/qcow2-snapshot.c | 1 | ||||
-rw-r--r-- | block/qcow2-threads.c | 268 | ||||
-rw-r--r-- | block/qcow2.c | 320 | ||||
-rw-r--r-- | block/qcow2.h | 26 | ||||
-rw-r--r-- | block/quorum.c | 1 | ||||
-rw-r--r-- | block/trace-events | 1 | ||||
-rw-r--r-- | blockdev.c | 7 | ||||
-rw-r--r-- | blockjob.c | 2 | ||||
-rw-r--r-- | qapi/block-core.json | 4 | ||||
-rw-r--r-- | qemu-img.c | 85 | ||||
-rwxr-xr-x | tests/qemu-iotests/056 | 2 | ||||
-rwxr-xr-x | tests/qemu-iotests/060 | 7 | ||||
-rw-r--r-- | tests/qemu-iotests/060.out | 5 | ||||
-rw-r--r-- | tests/test-bdrv-drain.c | 6 | ||||
-rw-r--r-- | tests/test-bdrv-graph-mod.c | 1 |
23 files changed, 615 insertions, 430 deletions
@@ -2243,6 +2243,13 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) } } +/* + * This function steals the reference to child_bs from the caller. + * That reference is later dropped by bdrv_root_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, @@ -2255,6 +2262,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); if (ret < 0) { bdrv_abort_perm_update(child_bs); + bdrv_unref(child_bs); return NULL; } @@ -2274,6 +2282,14 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, return child; } +/* + * This function transfers the reference to child_bs from the caller + * to parent_bs. That reference is later dropped by parent_bs on + * bdrv_close() or if someone calls bdrv_unref_child(). + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. + */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, const char *child_name, @@ -2401,12 +2417,9 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, /* If backing_hd was already part of bs's backing chain, and * inherits_from pointed recursively to bs then let's update it to * point directly to bs (else it will become NULL). */ - if (update_inherits_from) { + if (bs->backing && update_inherits_from) { backing_hd->inherits_from = bs; } - if (!bs->backing) { - bdrv_unref(backing_hd); - } out: bdrv_refresh_limits(bs, NULL); @@ -2594,7 +2607,6 @@ BdrvChild *bdrv_open_child(const char *filename, const BdrvChildRole *child_role, bool allow_none, Error **errp) { - BdrvChild *c; BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role, @@ -2603,13 +2615,7 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } - c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp); - if (!c) { - bdrv_unref(bs); - return NULL; - } - - return c; + return bdrv_attach_child(parent, bs, bdref_key, child_role, errp); } /* TODO Future callers may need to specify parent/child_role in order for @@ -3877,22 +3883,12 @@ static void bdrv_close(BlockDriverState *bs) bs->drv = NULL; } - bdrv_set_backing_hd(bs, NULL, &error_abort); - - if (bs->file != NULL) { - bdrv_unref_child(bs, bs->file); - bs->file = NULL; - } - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - /* TODO Remove bdrv_unref() from drivers' close function and use - * bdrv_unref_child() here */ - if (child->bs->inherits_from == bs) { - child->bs->inherits_from = NULL; - } - bdrv_detach_child(child); + bdrv_unref_child(bs, child); } + bs->backing = NULL; + bs->file = NULL; g_free(bs->opaque); bs->opaque = NULL; atomic_set(&bs->copy_on_read, 0); diff --git a/block/Makefile.objs b/block/Makefile.objs index 7a81892a52..ae11605c9f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -6,7 +6,7 @@ block-obj-$(CONFIG_BOCHS) += bochs.o block-obj-$(CONFIG_VVFAT) += vvfat.o block-obj-$(CONFIG_DMG) += dmg.o -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o qcow2-threads.o block-obj-$(CONFIG_QED) += qed.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-$(CONFIG_QED) += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o diff --git a/block/backup.c b/block/backup.c index 916817d8b1..00f4f8af53 100644 --- a/block/backup.c +++ b/block/backup.c @@ -112,7 +112,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; - hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1); + assert(QEMU_IS_ALIGNED(start, job->cluster_size)); + hbitmap_reset(job->copy_bitmap, start, job->cluster_size); nbytes = MIN(job->cluster_size, job->len - start); if (!*bounce_buffer) { *bounce_buffer = blk_blockalign(blk, job->cluster_size); @@ -145,7 +146,7 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, return nbytes; fail: - hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1); + hbitmap_set(job->copy_bitmap, start, job->cluster_size); return ret; } @@ -165,16 +166,15 @@ static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); + assert(QEMU_IS_ALIGNED(start, job->cluster_size)); nbytes = MIN(job->copy_range_size, end - start); nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size); - hbitmap_reset(job->copy_bitmap, start / job->cluster_size, - nr_clusters); + hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters); ret = blk_co_copy_range(blk, start, job->target, start, nbytes, read_flags, write_flags); if (ret < 0) { trace_backup_do_cow_copy_range_fail(job, start, ret); - hbitmap_set(job->copy_bitmap, start / job->cluster_size, - nr_clusters); + hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters); return ret; } @@ -202,7 +202,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, cow_request_begin(&cow_request, job, start, end); while (start < end) { - if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) { + if (!hbitmap_get(job->copy_bitmap, start)) { trace_backup_do_cow_skip(job, start); start += job->cluster_size; continue; /* already copied */ @@ -298,12 +298,16 @@ static void backup_clean(Job *job) assert(s->target); blk_unref(s->target); s->target = NULL; + + if (s->copy_bitmap) { + hbitmap_free(s->copy_bitmap); + s->copy_bitmap = NULL; + } } void backup_do_checkpoint(BlockJob *job, Error **errp) { BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); - int64_t len; assert(block_job_driver(job) == &backup_job_driver); @@ -313,8 +317,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp) return; } - len = DIV_ROUND_UP(backup_job->len, backup_job->cluster_size); - hbitmap_set(backup_job->copy_bitmap, 0, len); + hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len); } static void backup_drain(BlockJob *job) @@ -365,20 +368,44 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job) return false; } -static int coroutine_fn backup_run_incremental(BackupBlockJob *job) +static bool bdrv_is_unallocated_range(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + int64_t end = offset + bytes; + + while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) { + if (bytes == 0) { + return true; + } + offset += bytes; + bytes = end - offset; + } + + return offset >= end; +} + +static int coroutine_fn backup_loop(BackupBlockJob *job) { int ret; bool error_is_read; - int64_t cluster; + int64_t offset; HBitmapIter hbi; + BlockDriverState *bs = blk_bs(job->common.blk); hbitmap_iter_init(&hbi, job->copy_bitmap, 0); - while ((cluster = hbitmap_iter_next(&hbi)) != -1) { + while ((offset = hbitmap_iter_next(&hbi)) != -1) { + if (job->sync_mode == MIRROR_SYNC_MODE_TOP && + bdrv_is_unallocated_range(bs, offset, job->cluster_size)) + { + hbitmap_reset(job->copy_bitmap, offset, job->cluster_size); + continue; + } + do { if (yield_and_check(job)) { return 0; } - ret = backup_do_cow(job, cluster * job->cluster_size, + ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read, false); if (ret < 0 && backup_error_action(job, error_is_read, -ret) == BLOCK_ERROR_ACTION_REPORT) @@ -394,66 +421,43 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* init copy_bitmap from sync_bitmap */ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) { - BdrvDirtyBitmapIter *dbi; - int64_t offset; - int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap), - job->cluster_size); + uint64_t offset = 0; + uint64_t bytes = job->len; - dbi = bdrv_dirty_iter_new(job->sync_bitmap); - while ((offset = bdrv_dirty_iter_next(dbi)) != -1) { - int64_t cluster = offset / job->cluster_size; - int64_t next_cluster; - - offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap); - if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) { - hbitmap_set(job->copy_bitmap, cluster, end - cluster); - break; - } + while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap, + &offset, &bytes)) + { + hbitmap_set(job->copy_bitmap, offset, bytes); - offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset, - UINT64_MAX); - if (offset == -1) { - hbitmap_set(job->copy_bitmap, cluster, end - cluster); + offset += bytes; + if (offset >= job->len) { break; } - - next_cluster = DIV_ROUND_UP(offset, job->cluster_size); - hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster); - if (next_cluster >= end) { - break; - } - - bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size); + bytes = job->len - offset; } /* TODO job_progress_set_remaining() would make more sense */ job_progress_update(&job->common.job, - job->len - hbitmap_count(job->copy_bitmap) * job->cluster_size); - - bdrv_dirty_iter_free(dbi); + job->len - hbitmap_count(job->copy_bitmap)); } static int coroutine_fn backup_run(Job *job, Error **errp) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); BlockDriverState *bs = blk_bs(s->common.blk); - int64_t offset, nb_clusters; int ret = 0; QLIST_INIT(&s->inflight_reqs); qemu_co_rwlock_init(&s->flush_rwlock); - nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size); job_progress_set_remaining(job, s->len); - s->copy_bitmap = hbitmap_alloc(nb_clusters, 0); if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { backup_incremental_init_copy_bitmap(s); } else { - hbitmap_set(s->copy_bitmap, 0, nb_clusters); + hbitmap_set(s->copy_bitmap, 0, s->len); } - s->before_write.notify = backup_before_write_notify; bdrv_add_before_write_notifier(bs, &s->before_write); @@ -465,68 +469,8 @@ static int coroutine_fn backup_run(Job *job, Error **errp) * notify callback service CoW requests. */ job_yield(job); } - } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { - ret = backup_run_incremental(s); } else { - /* Both FULL and TOP SYNC_MODE's require copying.. */ - for (offset = 0; offset < s->len; - offset += s->cluster_size) { - bool error_is_read; - int alloced = 0; - - if (yield_and_check(s)) { - break; - } - - if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { - int i; - int64_t n; - - /* Check to see if these blocks are already in the - * backing file. */ - - for (i = 0; i < s->cluster_size;) { - /* bdrv_is_allocated() only returns true/false based - * on the first set of sectors it comes across that - * are are all in the same state. - * For that reason we must verify each sector in the - * backup cluster length. We end up copying more than - * needed but at some point that is always the case. */ - alloced = - bdrv_is_allocated(bs, offset + i, - s->cluster_size - i, &n); - i += n; - - if (alloced || n == 0) { - break; - } - } - - /* If the above loop never found any sectors that are in - * the topmost image, skip this backup. */ - if (alloced == 0) { - continue; - } - } - /* FULL sync mode we copy the whole drive. */ - if (alloced < 0) { - ret = alloced; - } else { - ret = backup_do_cow(s, offset, s->cluster_size, - &error_is_read, false); - } - if (ret < 0) { - /* Depending on error action, fail now or retry cluster */ - BlockErrorAction action = - backup_error_action(s, error_is_read, -ret); - if (action == BLOCK_ERROR_ACTION_REPORT) { - break; - } else { - offset -= s->cluster_size; - continue; - } - } - } + ret = backup_loop(s); } notifier_with_return_remove(&s->before_write); @@ -534,7 +478,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp) /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&s->flush_rwlock); qemu_co_rwlock_unlock(&s->flush_rwlock); - hbitmap_free(s->copy_bitmap); return ret; } @@ -554,6 +497,42 @@ static const BlockJobDriver backup_job_driver = { .drain = backup_drain, }; +static int64_t backup_calculate_cluster_size(BlockDriverState *target, + Error **errp) +{ + int ret; + BlockDriverInfo bdi; + + /* + * If there is no backing file on the target, we cannot rely on COW if our + * backup cluster size is smaller than the target cluster size. Even for + * targets with a backing file, try to avoid COW if possible. + */ + ret = bdrv_get_info(target, &bdi); + if (ret == -ENOTSUP && !target->backing) { + /* Cluster size is not defined */ + warn_report("The target block device doesn't provide " + "information about the block size and it doesn't have a " + "backing file. The default block size of %u bytes is " + "used. If the actual block size of the target exceeds " + "this default, the backup may be unusable", + BACKUP_CLUSTER_SIZE_DEFAULT); + return BACKUP_CLUSTER_SIZE_DEFAULT; + } else if (ret < 0 && !target->backing) { + error_setg_errno(errp, -ret, + "Couldn't determine the cluster size of the target image, " + "which has no backing file"); + error_append_hint(errp, + "Aborting, since this may create an unusable destination image\n"); + return ret; + } else if (ret < 0 && target->backing) { + /* Not fatal; just trudge on ahead. */ + return BACKUP_CLUSTER_SIZE_DEFAULT; + } + + return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); +} + BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, @@ -565,9 +544,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, JobTxn *txn, Error **errp) { int64_t len; - BlockDriverInfo bdi; BackupBlockJob *job = NULL; int ret; + int64_t cluster_size; + HBitmap *copy_bitmap = NULL; assert(bs); assert(target); @@ -629,6 +609,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } + cluster_size = backup_calculate_cluster_size(target, errp); + if (cluster_size < 0) { + goto error; + } + + copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size)); + /* job->len is fixed, so we can't allow resize */ job = block_job_create(job_id, &backup_job_driver, txn, bs, BLK_PERM_CONSISTENT_READ, @@ -657,33 +644,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, /* Detect image-fleecing (and similar) schemes */ job->serialize_target_writes = bdrv_chain_contains(target, bs); - - /* If there is no backing file on the target, we cannot rely on COW if our - * backup cluster size is smaller than the target cluster size. Even for - * targets with a backing file, try to avoid COW if possible. */ - ret = bdrv_get_info(target, &bdi); - if (ret == -ENOTSUP && !target->backing) { - /* Cluster size is not defined */ - warn_report("The target block device doesn't provide " - "information about the block size and it doesn't have a " - "backing file. The default block size of %u bytes is " - "used. If the actual block size of the target exceeds " - "this default, the backup may be unusable", - BACKUP_CLUSTER_SIZE_DEFAULT); - job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT; - } else if (ret < 0 && !target->backing) { - error_setg_errno(errp, -ret, - "Couldn't determine the cluster size of the target image, " - "which has no backing file"); - error_append_hint(errp, - "Aborting, since this may create an unusable destination image\n"); - goto error; - } else if (ret < 0 && target->backing) { - /* Not fatal; just trudge on ahead. */ - job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT; - } else { - job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); - } + job->cluster_size = cluster_size; + job->copy_bitmap = copy_bitmap; + copy_bitmap = NULL; job->use_copy_range = true; job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk), blk_get_max_transfer(job->target)); @@ -699,6 +662,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, return &job->common; error: + if (copy_bitmap) { + assert(!job || !job->copy_bitmap); + hbitmap_free(copy_bitmap); + } if (sync_bitmap) { bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL); } diff --git a/block/block-backend.c b/block/block-backend.c index 4c0a8ef88d..ad3e1c882d 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -392,7 +392,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, blk->root = bdrv_root_attach_child(bs, "root", &child_root, perm, BLK_PERM_ALL, blk, errp); if (!blk->root) { - bdrv_unref(bs); blk_unref(blk); return NULL; } @@ -800,12 +799,12 @@ void blk_remove_bs(BlockBackend *blk) int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + bdrv_ref(bs); blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->perm, blk->shared_perm, blk, errp); if (blk->root == NULL) { return -EPERM; } - bdrv_ref(bs); notifier_list_notify(&blk->insert_bs_notifiers, blk); if (tgm->throttle_state) { diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 8a75366c92..b2487101ed 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -29,7 +29,6 @@ #include "qapi/error.h" #include "qemu/cutils.h" -#include "block/block_int.h" #include "qcow2.h" /* NOTICE: BME here means Bitmaps Extension and used as a namespace for @@ -754,7 +753,7 @@ static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list, dir_offset = *offset; } - dir = g_try_malloc(dir_size); + dir = g_try_malloc0(dir_size); if (dir == NULL) { return -ENOMEM; } diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index df02e7b20a..b33bcbc984 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -23,7 +23,6 @@ */ #include "qemu/osdep.h" -#include "block/block_int.h" #include "qemu-common.h" #include "qcow2.h" #include "trace.h" diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index b36f4aa84a..cf892f37a8 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -27,7 +27,6 @@ #include "qapi/error.h" #include "qemu-common.h" -#include "block/block_int.h" #include "qcow2.h" #include "qemu/bswap.h" #include "trace.h" @@ -472,13 +471,12 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, { if (bytes && bs->encrypted) { BDRVQcow2State *s = bs->opaque; - int64_t offset = (s->crypt_physical_offset ? - (cluster_offset + offset_in_cluster) : - (src_cluster_offset + offset_in_cluster)); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) { + if (qcow2_co_encrypt(bs, cluster_offset, + src_cluster_offset + offset_in_cluster, + buffer, bytes) < 0) { return false; } } @@ -833,7 +831,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) assert(start->offset + start->nb_bytes <= end->offset); assert(!m->data_qiov || m->data_qiov->size == data_bytes); - if (start->nb_bytes == 0 && end->nb_bytes == 0) { + if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { return 0; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0b09d6838b..4c1794f9af 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -25,7 +25,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "block/block_int.h" #include "qcow2.h" #include "qemu/range.h" #include "qemu/bswap.h" diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index a6ffae89a6..d0e7fa9311 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -24,7 +24,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "block/block_int.h" #include "qcow2.h" #include "qemu/bswap.h" #include "qemu/error-report.h" diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c new file mode 100644 index 0000000000..3b1e63fe41 --- /dev/null +++ b/block/qcow2-threads.c @@ -0,0 +1,268 @@ +/* + * Threaded data processing for Qcow2: compression, encryption + * + * Copyright (c) 2004-2006 Fabrice Bellard + * Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#define ZLIB_CONST +#include <zlib.h> + +#include "qcow2.h" +#include "block/thread-pool.h" +#include "crypto.h" + +static int coroutine_fn +qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, void *arg) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + + qemu_co_mutex_lock(&s->lock); + while (s->nb_threads >= QCOW2_MAX_THREADS) { + qemu_co_queue_wait(&s->thread_task_queue, &s->lock); + } + s->nb_threads++; + qemu_co_mutex_unlock(&s->lock); + + ret = thread_pool_submit_co(pool, func, arg); + + qemu_co_mutex_lock(&s->lock); + s->nb_threads--; + qemu_co_queue_next(&s->thread_task_queue); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + + +/* + * Compression + */ + +typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size, + const void *src, size_t src_size); +typedef struct Qcow2CompressData { + void *dest; + size_t dest_size; + const void *src; + size_t src_size; + ssize_t ret; + + Qcow2CompressFunc func; +} Qcow2CompressData; + +/* + * qcow2_compress() + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: compressed size on success + * -ENOMEM destination buffer is not enough to store compressed data + * -EIO on any other error + */ +static ssize_t qcow2_compress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + ssize_t ret; + z_stream strm; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -12, 9, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) { + return -EIO; + } + + /* + * strm.next_in is not const in old zlib versions, such as those used on + * OpenBSD/NetBSD, so cast the const away + */ + strm.avail_in = src_size; + strm.next_in = (void *) src; + strm.avail_out = dest_size; + strm.next_out = dest; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_END) { + ret = dest_size - strm.avail_out; + } else { + ret = (ret == Z_OK ? -ENOMEM : -EIO); + } + + deflateEnd(&strm); + + return ret; +} + +/* + * qcow2_decompress() + * + * Decompress some data (not more than @src_size bytes) to produce exactly + * @dest_size bytes. + * + * @dest - destination buffer, @dest_size bytes + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success + * -1 on fail + */ +static ssize_t qcow2_decompress(void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + int ret = 0; + z_stream strm; + + memset(&strm, 0, sizeof(strm)); + strm.avail_in = src_size; + strm.next_in = (void *) src; + strm.avail_out = dest_size; + strm.next_out = dest; + + ret = inflateInit2(&strm, -12); + if (ret != Z_OK) { + return -1; + } + + ret = inflate(&strm, Z_FINISH); + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { + /* + * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but + * @src buffer may be processed partly (because in qcow2 we know size of + * compressed data with precision of one sector) + */ + ret = -1; + } + + inflateEnd(&strm); + + return ret; +} + +static int qcow2_compress_pool_func(void *opaque) +{ + Qcow2CompressData *data = opaque; + + data->ret = data->func(data->dest, data->dest_size, + data->src, data->src_size); + + return 0; +} + +static ssize_t coroutine_fn +qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size, Qcow2CompressFunc func) +{ + Qcow2CompressData arg = { + .dest = dest, + .dest_size = dest_size, + .src = src, + .src_size = src_size, + .func = func, + }; + + qcow2_co_process(bs, qcow2_compress_pool_func, &arg); + + return arg.ret; +} + +ssize_t coroutine_fn +qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, + qcow2_compress); +} + +ssize_t coroutine_fn +qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size) +{ + return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, + qcow2_decompress); +} + + +/* + * Cryptography + */ + +/* + * Qcow2EncDecFunc: common prototype of qcrypto_block_encrypt() and + * qcrypto_block_decrypt() functions. + */ +typedef int (*Qcow2EncDecFunc)(QCryptoBlock *block, uint64_t offset, + uint8_t *buf, size_t len, Error **errp); + +typedef struct Qcow2EncDecData { + QCryptoBlock *block; + uint64_t offset; + uint8_t *buf; + size_t len; + + Qcow2EncDecFunc func; +} Qcow2EncDecData; + +static int qcow2_encdec_pool_func(void *opaque) +{ + Qcow2EncDecData *data = opaque; + + return data->func(data->block, data->offset, data->buf, data->len, NULL); +} + +static int coroutine_fn +qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset, + uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2EncDecData arg = { + .block = s->crypto, + .offset = s->crypt_physical_offset ? + file_cluster_offset + offset_into_cluster(s, offset) : + offset, + .buf = buf, + .len = len, + .func = func, + }; + + return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); +} + +int coroutine_fn +qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset, + uint64_t offset, void *buf, size_t len) +{ + return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len, + qcrypto_block_encrypt); +} + +int coroutine_fn +qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset, + uint64_t offset, void *buf, size_t len) +{ + return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len, + qcrypto_block_decrypt); +} diff --git a/block/qcow2.c b/block/qcow2.c index d39882785d..f2cb131048 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -24,10 +24,6 @@ #include "qemu/osdep.h" -#define ZLIB_CONST -#include <zlib.h> - -#include "block/block_int.h" #include "block/qdict.h" #include "sysemu/block-backend.h" #include "qemu/module.h" @@ -44,7 +40,6 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "crypto.h" -#include "block/thread-pool.h" /* Differences with QCOW: @@ -302,7 +297,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, } s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", qcow2_crypto_hdr_read_func, - bs, cflags, 1, errp); + bs, cflags, QCOW2_MAX_THREADS, errp); if (!s->crypto) { return -EINVAL; } @@ -1543,7 +1538,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; } s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", - NULL, NULL, cflags, 1, errp); + NULL, NULL, cflags, + QCOW2_MAX_THREADS, errp); if (!s->crypto) { ret = -EINVAL; goto fail; @@ -1698,7 +1694,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, } #endif - qemu_co_queue_init(&s->compress_wait_queue); + qemu_co_queue_init(&s->thread_task_queue); return ret; @@ -1974,8 +1970,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, qemu_iovec_init(&hd_qiov, qiov->niov); - qemu_co_mutex_lock(&s->lock); - while (bytes != 0) { /* prepare next request */ @@ -1985,7 +1979,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); } + qemu_co_mutex_lock(&s->lock); ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); + qemu_co_mutex_unlock(&s->lock); if (ret < 0) { goto fail; } @@ -2000,10 +1996,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, if (bs->backing) { BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - qemu_co_mutex_unlock(&s->lock); ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, &hd_qiov, 0); - qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; } @@ -2019,11 +2013,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, break; case QCOW2_CLUSTER_COMPRESSED: - qemu_co_mutex_unlock(&s->lock); ret = qcow2_co_preadv_compressed(bs, cluster_offset, offset, cur_bytes, &hd_qiov); - qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; } @@ -2060,11 +2052,9 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - qemu_co_mutex_unlock(&s->lock); ret = bdrv_co_preadv(s->data_file, cluster_offset + offset_in_cluster, cur_bytes, &hd_qiov, 0); - qemu_co_mutex_lock(&s->lock); if (ret < 0) { goto fail; } @@ -2072,13 +2062,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, assert(s->crypto); assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - if (qcrypto_block_decrypt(s->crypto, - (s->crypt_physical_offset ? - cluster_offset + offset_in_cluster : - offset), - cluster_data, - cur_bytes, - NULL) < 0) { + if (qcow2_co_decrypt(bs, cluster_offset, offset, + cluster_data, cur_bytes) < 0) { ret = -EIO; goto fail; } @@ -2099,8 +2084,6 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, ret = 0; fail: - qemu_co_mutex_unlock(&s->lock); - qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); @@ -2120,6 +2103,11 @@ static bool merge_cow(uint64_t offset, unsigned bytes, continue; } + /* If COW regions are handled already, skip this too */ + if (m->skip_cow) { + continue; + } + /* The data (middle) region must be immediately after the * start region */ if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { @@ -2145,6 +2133,80 @@ static bool merge_cow(uint64_t offset, unsigned bytes, return false; } +static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + int64_t nr; + return !bytes || + (!bdrv_is_allocated_above(bs, NULL, offset, bytes, &nr) && nr == bytes); +} + +static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m) +{ + /* + * This check is designed for optimization shortcut so it must be + * efficient. + * Instead of is_zero(), use is_unallocated() as it is faster (but not + * as accurate and can result in false negatives). + */ + return is_unallocated(bs, m->offset + m->cow_start.offset, + m->cow_start.nb_bytes) && + is_unallocated(bs, m->offset + m->cow_end.offset, + m->cow_end.nb_bytes); +} + +static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) +{ + BDRVQcow2State *s = bs->opaque; + QCowL2Meta *m; + + if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) { + return 0; + } + + if (bs->encrypted) { + return 0; + } + + for (m = l2meta; m != NULL; m = m->next) { + int ret; + + if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) { + continue; + } + + if (!is_zero_cow(bs, m)) { + continue; + } + + /* + * instead of writing zero COW buffers, + * efficiently zero out the whole clusters + */ + + ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset, + m->nb_clusters * s->cluster_size, + true); + if (ret < 0) { + return ret; + } + + BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE); + ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset, + m->nb_clusters * s->cluster_size, + BDRV_REQ_NO_FALLBACK); + if (ret < 0) { + if (ret != -ENOTSUP && ret != -EAGAIN) { + return ret; + } + continue; + } + + trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset, m->nb_clusters); + m->skip_cow = true; + } + return 0; +} + static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) @@ -2181,11 +2243,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, &cluster_offset, &l2meta); if (ret < 0) { - goto fail; + goto out_locked; } assert((cluster_offset & 511) == 0); + ret = qcow2_pre_write_overlap_check(bs, 0, + cluster_offset + offset_in_cluster, + cur_bytes, true); + if (ret < 0) { + goto out_locked; + } + + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_reset(&hd_qiov); qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); @@ -2197,7 +2268,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, * s->cluster_size); if (cluster_data == NULL) { ret = -ENOMEM; - goto fail; + goto out_unlocked; } } @@ -2205,24 +2276,20 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); - if (qcrypto_block_encrypt(s->crypto, - (s->crypt_physical_offset ? - cluster_offset + offset_in_cluster : - offset), - cluster_data, - cur_bytes, NULL) < 0) { + if (qcow2_co_encrypt(bs, cluster_offset, offset, + cluster_data, cur_bytes) < 0) { ret = -EIO; - goto fail; + goto out_unlocked; } qemu_iovec_reset(&hd_qiov); qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); } - ret = qcow2_pre_write_overlap_check(bs, 0, - cluster_offset + offset_in_cluster, cur_bytes, true); + /* Try to efficiently initialize the physical space with zeroes */ + ret = handle_alloc_space(bs, l2meta); if (ret < 0) { - goto fail; + goto out_unlocked; } /* If we need to do COW, check if it's possible to merge the @@ -2230,22 +2297,22 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, * If it's not possible (or not necessary) then write the * guest data now. */ if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { - qemu_co_mutex_unlock(&s->lock); BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), cluster_offset + offset_in_cluster); ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster, cur_bytes, &hd_qiov, 0); - qemu_co_mutex_lock(&s->lock); if (ret < 0) { - goto fail; + goto out_unlocked; } } + qemu_co_mutex_lock(&s->lock); + ret = qcow2_handle_l2meta(bs, &l2meta, true); if (ret) { - goto fail; + goto out_locked; } bytes -= cur_bytes; @@ -2254,8 +2321,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); } ret = 0; + goto out_locked; -fail: +out_unlocked: + qemu_co_mutex_lock(&s->lock); + +out_locked: qcow2_handle_l2meta(bs, &l2meta, false); qemu_co_mutex_unlock(&s->lock); @@ -3921,171 +3992,6 @@ fail: return ret; } -/* - * qcow2_compress() - * - * @dest - destination buffer, @dest_size bytes - * @src - source buffer, @src_size bytes - * - * Returns: compressed size on success - * -ENOMEM destination buffer is not enough to store compressed data - * -EIO on any other error - */ -static ssize_t qcow2_compress(void *dest, size_t dest_size, - const void *src, size_t src_size) -{ - ssize_t ret; - z_stream strm; - - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, - -12, 9, Z_DEFAULT_STRATEGY); - if (ret != Z_OK) { - return -EIO; - } - - /* strm.next_in is not const in old zlib versions, such as those used on - * OpenBSD/NetBSD, so cast the const away */ - strm.avail_in = src_size; - strm.next_in = (void *) src; - strm.avail_out = dest_size; - strm.next_out = dest; - - ret = deflate(&strm, Z_FINISH); - if (ret == Z_STREAM_END) { - ret = dest_size - strm.avail_out; - } else { - ret = (ret == Z_OK ? -ENOMEM : -EIO); - } - - deflateEnd(&strm); - - return ret; -} - -/* - * qcow2_decompress() - * - * Decompress some data (not more than @src_size bytes) to produce exactly - * @dest_size bytes. - * - * @dest - destination buffer, @dest_size bytes - * @src - source buffer, @src_size bytes - * - * Returns: 0 on success - * -1 on fail - */ -static ssize_t qcow2_decompress(void *dest, size_t dest_size, - const void *src, size_t src_size) -{ - int ret = 0; - z_stream strm; - - memset(&strm, 0, sizeof(strm)); - strm.avail_in = src_size; - strm.next_in = (void *) src; - strm.avail_out = dest_size; - strm.next_out = dest; - - ret = inflateInit2(&strm, -12); - if (ret != Z_OK) { - return -1; - } - - ret = inflate(&strm, Z_FINISH); - if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { - /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but - * @src buffer may be processed partly (because in qcow2 we know size of - * compressed data with precision of one sector) */ - ret = -1; - } - - inflateEnd(&strm); - - return ret; -} - -#define MAX_COMPRESS_THREADS 4 - -typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size, - const void *src, size_t src_size); -typedef struct Qcow2CompressData { - void *dest; - size_t dest_size; - const void *src; - size_t src_size; - ssize_t ret; - - Qcow2CompressFunc func; -} Qcow2CompressData; - -static int qcow2_compress_pool_func(void *opaque) -{ - Qcow2CompressData *data = opaque; - - data->ret = data->func(data->dest, data->dest_size, - data->src, data->src_size); - - return 0; -} - -static void qcow2_compress_complete(void *opaque, int ret) -{ - qemu_coroutine_enter(opaque); -} - -static ssize_t coroutine_fn -qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size, - const void *src, size_t src_size, Qcow2CompressFunc func) -{ - BDRVQcow2State *s = bs->opaque; - BlockAIOCB *acb; - ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); - Qcow2CompressData arg = { - .dest = dest, - .dest_size = dest_size, - .src = src, - .src_size = src_size, - .func = func, - }; - - while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { - qemu_co_queue_wait(&s->compress_wait_queue, NULL); - } - - s->nb_compress_threads++; - acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, - qcow2_compress_complete, - qemu_coroutine_self()); - - if (!acb) { - s->nb_compress_threads--; - return -EINVAL; - } - qemu_coroutine_yield(); - s->nb_compress_threads--; - qemu_co_queue_next(&s->compress_wait_queue); - - return arg.ret; -} - -static ssize_t coroutine_fn -qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, - const void *src, size_t src_size) -{ - return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, - qcow2_compress); -} - -static ssize_t coroutine_fn -qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, - const void *src, size_t src_size) -{ - return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, - qcow2_decompress); -} - /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int diff --git a/block/qcow2.h b/block/qcow2.h index 8d92ef1fee..567375e56c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -28,6 +28,7 @@ #include "crypto/block.h" #include "qemu/coroutine.h" #include "qemu/units.h" +#include "block/block_int.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -267,6 +268,8 @@ typedef struct Qcow2BitmapHeaderExt { uint64_t bitmap_directory_offset; } QEMU_PACKED Qcow2BitmapHeaderExt; +#define QCOW2_MAX_THREADS 4 + typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; @@ -349,8 +352,8 @@ typedef struct BDRVQcow2State { char *image_backing_format; char *image_data_file; - CoQueue compress_wait_queue; - int nb_compress_threads; + CoQueue thread_task_queue; + int nb_threads; BdrvChild *data_file; } BDRVQcow2State; @@ -402,6 +405,12 @@ typedef struct QCowL2Meta */ Qcow2COWRegion cow_end; + /* + * Indicates that COW regions are already handled and do not require + * any more processing. + */ + bool skip_cow; + /** * The I/O vector with the data from the actual guest write request. * If non-NULL, this is meant to be merged together with the data @@ -737,4 +746,17 @@ void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, Error **errp); +ssize_t coroutine_fn +qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size); +ssize_t coroutine_fn +qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size); +int coroutine_fn +qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset, + uint64_t offset, void *buf, size_t len); +int coroutine_fn +qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset, + uint64_t offset, void *buf, size_t len); + #endif diff --git a/block/quorum.c b/block/quorum.c index 352f729136..133ee18204 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1019,7 +1019,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp); if (child == NULL) { s->next_child_index--; - bdrv_unref(child_bs); goto out; } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); diff --git a/block/trace-events b/block/trace-events index 79ccd8d824..1e0653ce6d 100644 --- a/block/trace-events +++ b/block/trace-events @@ -68,6 +68,7 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d" qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64 qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d" qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d" +qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d" # qcow2-cluster.c qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" diff --git a/blockdev.c b/blockdev.c index 64ccef735b..17c2d801d7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3466,11 +3466,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, backup->compress = false; } - bs = qmp_get_root_bs(backup->device, errp); + bs = bdrv_lookup_bs(backup->device, backup->device, errp); if (!bs) { return NULL; } + if (!bs->drv) { + error_setg(errp, "Device has no medium"); + return NULL; + } + aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); diff --git a/blockjob.c b/blockjob.c index 9ca942ba01..cc5f18e7cd 100644 --- a/blockjob.c +++ b/blockjob.c @@ -204,6 +204,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, { BdrvChild *c; + bdrv_ref(bs); c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, job, errp); if (c == NULL) { @@ -211,7 +212,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, } job->nodes = g_slist_prepend(job->nodes, c); - bdrv_ref(bs); bdrv_op_block_all(bs, job->blocker); return 0; diff --git a/qapi/block-core.json b/qapi/block-core.json index dcc935d655..1defcde048 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3231,6 +3231,8 @@ # # @cor_write: a write due to copy-on-read (since 2.11) # +# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1) +# # Since: 2.9 ## { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG', @@ -3249,7 +3251,7 @@ 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev', 'pwritev_zero', 'pwritev_done', 'empty_image_prepare', 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters', - 'cor_write'] } + 'cor_write', 'cluster_alloc_space'] } ## # @BlkdebugInjectErrorOptions: diff --git a/qemu-img.c b/qemu-img.c index 28fba1e7a7..b0535919b1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3164,7 +3164,7 @@ static int img_rebase(int argc, char **argv) BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL; uint8_t *buf_old = NULL; uint8_t *buf_new = NULL; - BlockDriverState *bs = NULL; + BlockDriverState *bs = NULL, *prefix_chain_bs = NULL; char *filename; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; int c, flags, src_flags, ret; @@ -3309,29 +3309,18 @@ static int img_rebase(int argc, char **argv) /* For safe rebasing we need to compare old and new backing file */ if (!unsafe) { - char backing_name[PATH_MAX]; QDict *options = NULL; + BlockDriverState *base_bs = backing_bs(bs); - if (bs->backing) { - if (bs->backing_format[0] != '\0') { - options = qdict_new(); - qdict_put_str(options, "driver", bs->backing_format); - } - - if (force_share) { - if (!options) { - options = qdict_new(); - } - qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); - } - bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name)); - blk_old_backing = blk_new_open(backing_name, NULL, - options, src_flags, &local_err); - if (!blk_old_backing) { + if (base_bs) { + blk_old_backing = blk_new(BLK_PERM_CONSISTENT_READ, + BLK_PERM_ALL); + ret = blk_insert_bs(blk_old_backing, base_bs, + &local_err); + if (ret < 0) { error_reportf_err(local_err, - "Could not open old backing file '%s': ", - backing_name); - ret = -1; + "Could not reuse old backing file '%s': ", + base_bs->filename); goto out; } } else { @@ -3364,15 +3353,34 @@ static int img_rebase(int argc, char **argv) goto out; } - blk_new_backing = blk_new_open(out_real_path, NULL, - options, src_flags, &local_err); - g_free(out_real_path); - if (!blk_new_backing) { - error_reportf_err(local_err, - "Could not open new backing file '%s': ", - out_baseimg); - ret = -1; - goto out; + /* + * Find out whether we rebase an image on top of a previous image + * in its chain. + */ + prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path); + if (prefix_chain_bs) { + g_free(out_real_path); + blk_new_backing = blk_new(BLK_PERM_CONSISTENT_READ, + BLK_PERM_ALL); + ret = blk_insert_bs(blk_new_backing, prefix_chain_bs, + &local_err); + if (ret < 0) { + error_reportf_err(local_err, + "Could not reuse backing file '%s': ", + out_baseimg); + goto out; + } + } else { + blk_new_backing = blk_new_open(out_real_path, NULL, + options, src_flags, &local_err); + g_free(out_real_path); + if (!blk_new_backing) { + error_reportf_err(local_err, + "Could not open new backing file '%s': ", + out_baseimg); + ret = -1; + goto out; + } } } } @@ -3448,6 +3456,23 @@ static int img_rebase(int argc, char **argv) continue; } + if (prefix_chain_bs) { + /* + * If cluster wasn't changed since prefix_chain, we don't need + * to take action + */ + ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs, + offset, n, &n); + if (ret < 0) { + error_report("error while reading image metadata: %s", + strerror(-ret)); + goto out; + } + if (!ret) { + continue; + } + } + /* * Read old and new backing file and take into consideration that * backing files may be smaller than the COW image. diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056 index 3df323984d..f40fc11a09 100755 --- a/tests/qemu-iotests/056 +++ b/tests/qemu-iotests/056 @@ -214,7 +214,7 @@ class BackupTest(iotests.QMPTestCase): res = self.vm.qmp('query-block-jobs') self.assert_qmp(res, 'return[0]/status', 'concluded') # Leave zombie job un-dismissed, observe a failure: - res = self.qmp_backup_and_wait(serror='Need a root block node', + res = self.qmp_backup_and_wait(serror="Node 'drive0' is busy: block device is in use by block job: backup", device='drive0', format=iotests.imgfmt, sync='full', target=self.dest_img, auto_dismiss=False) diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060 index 89e911400c..b91d8321bb 100755 --- a/tests/qemu-iotests/060 +++ b/tests/qemu-iotests/060 @@ -150,10 +150,15 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io echo echo "=== Testing overlap while COW is in flight ===" echo +BACKING_IMG=$TEST_IMG.base +TEST_IMG=$BACKING_IMG _make_test_img 1G + +$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io + # compat=0.10 is required in order to make the following discard actually # unallocate the sector rather than make it a zero sector - we want COW, after # all. -IMGOPTS='compat=0.10' _make_test_img 1G +IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G # Write two clusters, the second one enforces creation of an L2 table after # the first data cluster. $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index e42bf8c5a9..0f6b0658a1 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -97,7 +97,10 @@ read 512/512 bytes at offset 0 === Testing overlap while COW is in flight === -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 backing_file=TEST_DIR/t.IMGFMT.base wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset 536870912 diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index eda90750eb..5534c2adf9 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -1436,12 +1436,6 @@ static void test_detach_indirect(bool by_parent_cb) bdrv_unref(parent_b); blk_unref(blk); - /* XXX Once bdrv_close() unref's children instead of just detaching them, - * this won't be necessary any more. */ - bdrv_unref(a); - bdrv_unref(a); - bdrv_unref(c); - g_assert_cmpint(a->refcnt, ==, 1); g_assert_cmpint(b->refcnt, ==, 1); g_assert_cmpint(c->refcnt, ==, 1); diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c index 283dc84869..747c0bf8fc 100644 --- a/tests/test-bdrv-graph-mod.c +++ b/tests/test-bdrv-graph-mod.c @@ -116,7 +116,6 @@ static void test_update_perm_tree(void) g_assert_nonnull(local_err); error_free(local_err); - bdrv_unref(bs); blk_unref(root); } |