diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile.objs | 2 | ||||
-rw-r--r-- | block/blkdebug.c | 5 | ||||
-rw-r--r-- | block/block-backend.c | 5 | ||||
-rw-r--r-- | block/commit.c | 4 | ||||
-rw-r--r-- | block/crypto.c | 101 | ||||
-rw-r--r-- | block/crypto.h | 101 | ||||
-rw-r--r-- | block/dirty-bitmap.c | 154 | ||||
-rw-r--r-- | block/file-posix.c | 201 | ||||
-rw-r--r-- | block/file-win32.c | 9 | ||||
-rw-r--r-- | block/gluster.c | 8 | ||||
-rw-r--r-- | block/io.c | 8 | ||||
-rw-r--r-- | block/iscsi.c | 9 | ||||
-rw-r--r-- | block/mirror.c | 3 | ||||
-rw-r--r-- | block/nfs.c | 9 | ||||
-rw-r--r-- | block/parallels.c | 13 | ||||
-rw-r--r-- | block/qapi.c | 2 | ||||
-rw-r--r-- | block/qcow.c | 277 | ||||
-rw-r--r-- | block/qcow2-bitmap.c | 1482 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 66 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 346 | ||||
-rw-r--r-- | block/qcow2.c | 1231 | ||||
-rw-r--r-- | block/qcow2.h | 69 | ||||
-rw-r--r-- | block/qed.c | 11 | ||||
-rw-r--r-- | block/raw-format.c | 31 | ||||
-rw-r--r-- | block/rbd.c | 9 | ||||
-rw-r--r-- | block/sheepdog.c | 11 | ||||
-rw-r--r-- | block/vdi.c | 3 | ||||
-rw-r--r-- | block/vhdx-log.c | 2 | ||||
-rw-r--r-- | block/vhdx.c | 8 | ||||
-rw-r--r-- | block/vmdk.c | 7 | ||||
-rw-r--r-- | block/vpc.c | 2 |
31 files changed, 3509 insertions, 680 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index f9368b52b8..2aaede4ae1 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,5 +1,5 @@ block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o block-obj-y += qed.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o diff --git a/block/blkdebug.c b/block/blkdebug.c index b25856c49c..c19ab28f07 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -821,9 +821,10 @@ static int64_t blkdebug_getlength(BlockDriverState *bs) return bdrv_getlength(bs->file->bs); } -static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { - return bdrv_truncate(bs->file, offset, errp); + return bdrv_truncate(bs->file, offset, prealloc, errp); } static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options) diff --git a/block/block-backend.c b/block/block-backend.c index 0df3457a09..fe3542b3f8 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1773,14 +1773,15 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, BDRV_REQ_WRITE_COMPRESSED); } -int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp) +int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc, + Error **errp) { if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; } - return bdrv_truncate(blk->root, offset, errp); + return bdrv_truncate(blk->root, offset, prealloc, errp); } static void blk_pdiscard_entry(void *opaque) diff --git a/block/commit.c b/block/commit.c index 774a8a599c..13143608f8 100644 --- a/block/commit.c +++ b/block/commit.c @@ -163,7 +163,7 @@ static void coroutine_fn commit_run(void *opaque) } if (base_len < s->common.len) { - ret = blk_truncate(s->base, s->common.len, NULL); + ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL); if (ret) { goto out; } @@ -521,7 +521,7 @@ int bdrv_commit(BlockDriverState *bs) * grow the backing file image if possible. If not possible, * we must return an error */ if (length > backing_length) { - ret = blk_truncate(backing, length, &local_err); + ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); goto ro_cleanup; diff --git a/block/crypto.c b/block/crypto.c index 10e5ddccaa..58ef6f2f52 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -24,16 +24,10 @@ #include "sysemu/block-backend.h" #include "crypto/block.h" #include "qapi/opts-visitor.h" +#include "qapi/qobject-input-visitor.h" #include "qapi-visit.h" #include "qapi/error.h" - -#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret" -#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg" -#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode" -#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg" -#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg" -#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg" -#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time" +#include "block/crypto.h" typedef struct BlockCrypto BlockCrypto; @@ -135,11 +129,7 @@ static QemuOptsList block_crypto_runtime_opts_luks = { .name = "crypto", .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head), .desc = { - { - .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET, - .type = QEMU_OPT_STRING, - .help = "ID of the secret that provides the encryption key", - }, + BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""), { /* end of list */ } }, }; @@ -154,49 +144,21 @@ static QemuOptsList block_crypto_create_opts_luks = { .type = QEMU_OPT_SIZE, .help = "Virtual disk size" }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET, - .type = QEMU_OPT_STRING, - .help = "ID of the secret that provides the encryption key", - }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG, - .type = QEMU_OPT_STRING, - .help = "Name of encryption cipher algorithm", - }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE, - .type = QEMU_OPT_STRING, - .help = "Name of encryption cipher mode", - }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, - .type = QEMU_OPT_STRING, - .help = "Name of IV generator algorithm", - }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG, - .type = QEMU_OPT_STRING, - .help = "Name of IV generator hash algorithm", - }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG, - .type = QEMU_OPT_STRING, - .help = "Name of encryption hash algorithm", - }, - { - .name = BLOCK_CRYPTO_OPT_LUKS_ITER_TIME, - .type = QEMU_OPT_NUMBER, - .help = "Time to spend in PBKDF in milliseconds", - }, + BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""), + BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""), { /* end of list */ } }, }; -static QCryptoBlockOpenOptions * +QCryptoBlockOpenOptions * block_crypto_open_opts_init(QCryptoBlockFormat format, - QemuOpts *opts, + QDict *opts, Error **errp) { Visitor *v; @@ -206,7 +168,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format, ret = g_new0(QCryptoBlockOpenOptions, 1); ret->format = format; - v = opts_visitor_new(opts); + v = qobject_input_visitor_new_keyval(QOBJECT(opts)); visit_start_struct(v, NULL, NULL, 0, &local_err); if (local_err) { @@ -219,6 +181,11 @@ block_crypto_open_opts_init(QCryptoBlockFormat format, v, &ret->u.luks, &local_err); break; + case Q_CRYPTO_BLOCK_FORMAT_QCOW: + visit_type_QCryptoBlockOptionsQCow_members( + v, &ret->u.qcow, &local_err); + break; + default: error_setg(&local_err, "Unsupported block format %d", format); break; @@ -240,9 +207,9 @@ block_crypto_open_opts_init(QCryptoBlockFormat format, } -static QCryptoBlockCreateOptions * +QCryptoBlockCreateOptions * block_crypto_create_opts_init(QCryptoBlockFormat format, - QemuOpts *opts, + QDict *opts, Error **errp) { Visitor *v; @@ -252,7 +219,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format, ret = g_new0(QCryptoBlockCreateOptions, 1); ret->format = format; - v = opts_visitor_new(opts); + v = qobject_input_visitor_new_keyval(QOBJECT(opts)); visit_start_struct(v, NULL, NULL, 0, &local_err); if (local_err) { @@ -265,6 +232,11 @@ block_crypto_create_opts_init(QCryptoBlockFormat format, v, &ret->u.luks, &local_err); break; + case Q_CRYPTO_BLOCK_FORMAT_QCOW: + visit_type_QCryptoBlockOptionsQCow_members( + v, &ret->u.qcow, &local_err); + break; + default: error_setg(&local_err, "Unsupported block format %d", format); break; @@ -299,6 +271,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, int ret = -EINVAL; QCryptoBlockOpenOptions *open_opts = NULL; unsigned int cflags = 0; + QDict *cryptoopts = NULL; bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, errp); @@ -313,7 +286,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, goto cleanup; } - open_opts = block_crypto_open_opts_init(format, opts, errp); + cryptoopts = qemu_opts_to_qdict(opts, NULL); + + open_opts = block_crypto_open_opts_init(format, cryptoopts, errp); if (!open_opts) { goto cleanup; } @@ -321,7 +296,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, if (flags & BDRV_O_NO_IO) { cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; } - crypto->block = qcrypto_block_open(open_opts, + crypto->block = qcrypto_block_open(open_opts, NULL, block_crypto_read_func, bs, cflags, @@ -333,10 +308,10 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, } bs->encrypted = true; - bs->valid_key = true; ret = 0; cleanup: + QDECREF(cryptoopts); qapi_free_QCryptoBlockOpenOptions(open_opts); return ret; } @@ -356,13 +331,16 @@ static int block_crypto_create_generic(QCryptoBlockFormat format, .opts = opts, .filename = filename, }; + QDict *cryptoopts; + + cryptoopts = qemu_opts_to_qdict(opts, NULL); - create_opts = block_crypto_create_opts_init(format, opts, errp); + create_opts = block_crypto_create_opts_init(format, cryptoopts, errp); if (!create_opts) { return -1; } - crypto = qcrypto_block_create(create_opts, + crypto = qcrypto_block_create(create_opts, NULL, block_crypto_init_func, block_crypto_write_func, &data, @@ -375,6 +353,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format, ret = 0; cleanup: + QDECREF(cryptoopts); qcrypto_block_free(crypto); blk_unref(data.blk); qapi_free_QCryptoBlockCreateOptions(create_opts); @@ -382,7 +361,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format, } static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, - Error **errp) + PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; size_t payload_offset = @@ -390,7 +369,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, offset += payload_offset; - return bdrv_truncate(bs->file, offset, errp); + return bdrv_truncate(bs->file, offset, prealloc, errp); } static void block_crypto_close(BlockDriverState *bs) diff --git a/block/crypto.h b/block/crypto.h new file mode 100644 index 0000000000..0f985ea4e2 --- /dev/null +++ b/block/crypto.h @@ -0,0 +1,101 @@ +/* + * QEMU block full disk encryption + * + * Copyright (c) 2015-2017 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef BLOCK_CRYPTO_H__ +#define BLOCK_CRYPTO_H__ + +#define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, \ + .type = QEMU_OPT_STRING, \ + .help = helpstr, \ + } + +#define BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET "key-secret" + +#define BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET(prefix) \ + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, \ + "ID of the secret that provides the AES encryption key") + +#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret" +#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg" +#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode" +#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg" +#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg" +#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg" +#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time" + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(prefix) \ + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, \ + "ID of the secret that provides the keyslot passphrase") + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of encryption cipher algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of encryption cipher mode", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of IV generator algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of IV generator hash algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_HASH_ALG, \ + .type = QEMU_OPT_STRING, \ + .help = "Name of encryption hash algorithm", \ + } + +#define BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(prefix) \ + { \ + .name = prefix BLOCK_CRYPTO_OPT_LUKS_ITER_TIME, \ + .type = QEMU_OPT_NUMBER, \ + .help = "Time to spend in PBKDF in milliseconds", \ + } + +QCryptoBlockCreateOptions * +block_crypto_create_opts_init(QCryptoBlockFormat format, + QDict *opts, + Error **errp); + +QCryptoBlockOpenOptions * +block_crypto_open_opts_init(QCryptoBlockFormat format, + QDict *opts, + Error **errp); + +#endif /* BLOCK_CRYPTO_H__ */ diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index a04c6e4154..543bddb9b5 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -43,8 +43,18 @@ struct BdrvDirtyBitmap { BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ char *name; /* Optional non-empty unique ID */ int64_t size; /* Size of the bitmap (Number of sectors) */ - bool disabled; /* Bitmap is read-only */ + bool disabled; /* Bitmap is disabled. It ignores all writes to + the device */ int active_iterators; /* How many iterators are active */ + bool readonly; /* Bitmap is read-only. This field also + prevents the respective image from being + modified (i.e. blocks writes and discards). + Such operations must fail and both the image + and this bitmap must remain unchanged while + this flag is set. */ + bool autoload; /* For persistent bitmaps: bitmap must be + autoloaded on image opening */ + bool persistent; /* bitmap must be saved to owner disk image */ QLIST_ENTRY(BdrvDirtyBitmap) list; }; @@ -93,6 +103,8 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) assert(!bdrv_dirty_bitmap_frozen(bitmap)); g_free(bitmap->name); bitmap->name = NULL; + bitmap->persistent = false; + bitmap->autoload = false; } /* Called with BQL taken. */ @@ -289,6 +301,10 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, bitmap->name = NULL; successor->name = name; bitmap->successor = NULL; + successor->persistent = bitmap->persistent; + bitmap->persistent = false; + successor->autoload = bitmap->autoload; + bitmap->autoload = false; bdrv_release_dirty_bitmap(bs, bitmap); return successor; @@ -340,15 +356,20 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) bdrv_dirty_bitmaps_unlock(bs); } +static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap) +{ + return !!bdrv_dirty_bitmap_name(bitmap); +} + /* Called with BQL taken. */ -static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - bool only_named) +static void bdrv_do_release_matching_dirty_bitmap( + BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + bool (*cond)(BdrvDirtyBitmap *bitmap)) { BdrvDirtyBitmap *bm, *next; bdrv_dirty_bitmaps_lock(bs); QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { - if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { + if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) { assert(!bm->active_iterators); assert(!bdrv_dirty_bitmap_frozen(bm)); assert(!bm->meta); @@ -373,17 +394,47 @@ out: /* Called with BQL taken. */ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) { - bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); + bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL); } /** * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). * There must not be any frozen bitmaps attached. + * This function does not remove persistent bitmaps from the storage. * Called with BQL taken. */ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) { - bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); + bdrv_do_release_matching_dirty_bitmap(bs, NULL, bdrv_dirty_bitmap_has_name); +} + +/** + * Release all persistent dirty bitmaps attached to a BDS (for use in + * bdrv_inactivate_recurse()). + * There must not be any frozen bitmaps attached. + * This function does not remove persistent bitmaps from the storage. + */ +void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs) +{ + bdrv_do_release_matching_dirty_bitmap(bs, NULL, + bdrv_dirty_bitmap_get_persistance); +} + +/** + * Remove persistent dirty bitmap from the storage if it exists. + * Absence of bitmap is not an error, because we have the following scenario: + * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no + * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should + * not fail. + * This function doesn't release corresponding BdrvDirtyBitmap. + */ +void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp) +{ + if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) { + bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp); + } } /* Called with BQL taken. */ @@ -455,7 +506,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) return granularity; } -uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) +uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap) { return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } @@ -504,6 +555,7 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors) { assert(bdrv_dirty_bitmap_enabled(bitmap)); + assert(!bdrv_dirty_bitmap_readonly(bitmap)); hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } @@ -520,6 +572,7 @@ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int64_t nr_sectors) { assert(bdrv_dirty_bitmap_enabled(bitmap)); + assert(!bdrv_dirty_bitmap_readonly(bitmap)); hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); } @@ -534,6 +587,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) { assert(bdrv_dirty_bitmap_enabled(bitmap)); + assert(!bdrv_dirty_bitmap_readonly(bitmap)); bdrv_dirty_bitmap_lock(bitmap); if (!out) { hbitmap_reset_all(bitmap->bitmap); @@ -550,6 +604,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) { HBitmap *tmp = bitmap->bitmap; assert(bdrv_dirty_bitmap_enabled(bitmap)); + assert(!bdrv_dirty_bitmap_readonly(bitmap)); bitmap->bitmap = in; hbitmap_free(tmp); } @@ -586,6 +641,13 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap, hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish); } +void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, + uint64_t start, uint64_t count, + bool finish) +{ + hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish); +} + void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) { hbitmap_deserialize_finish(bitmap->bitmap); @@ -605,6 +667,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, if (!bdrv_dirty_bitmap_enabled(bitmap)) { continue; } + assert(!bdrv_dirty_bitmap_readonly(bitmap)); hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } bdrv_dirty_bitmaps_unlock(bs); @@ -627,3 +690,78 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap) { return hbitmap_count(bitmap->meta); } + +bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->readonly; +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value) +{ + qemu_mutex_lock(bitmap->mutex); + bitmap->readonly = value; + qemu_mutex_unlock(bitmap->mutex); +} + +bool bdrv_has_readonly_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->readonly) { + return true; + } + } + + return false; +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload) +{ + qemu_mutex_lock(bitmap->mutex); + bitmap->autoload = autoload; + qemu_mutex_unlock(bitmap->mutex); +} + +bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap) +{ + return bitmap->autoload; +} + +/* Called with BQL taken. */ +void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent) +{ + qemu_mutex_lock(bitmap->mutex); + bitmap->persistent = persistent; + qemu_mutex_unlock(bitmap->mutex); +} + +bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap) +{ + return bitmap->persistent; +} + +bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->persistent && !bm->readonly) { + return true; + } + } + + return false; +} + +BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap) +{ + return bitmap == NULL ? QLIST_FIRST(&bs->dirty_bitmaps) : + QLIST_NEXT(bitmap, list); +} + +char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp) +{ + return hbitmap_sha256(bitmap->bitmap, errp); +} diff --git a/block/file-posix.c b/block/file-posix.c index 3927fabf06..cfbb236f6f 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1624,7 +1624,122 @@ static void raw_close(BlockDriverState *bs) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +/** + * Truncates the given regular file @fd to @offset and, when growing, fills the + * new space according to @prealloc. + * + * Returns: 0 on success, -errno on failure. + */ +static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc, + Error **errp) +{ + int result = 0; + int64_t current_length = 0; + char *buf = NULL; + struct stat st; + + if (fstat(fd, &st) < 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not stat file"); + return result; + } + + current_length = st.st_size; + if (current_length > offset && prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Cannot use preallocation for shrinking files"); + return -ENOTSUP; + } + + switch (prealloc) { +#ifdef CONFIG_POSIX_FALLOCATE + case PREALLOC_MODE_FALLOC: + /* + * Truncating before posix_fallocate() makes it about twice slower on + * file systems that do not support fallocate(), trying to check if a + * block is allocated before allocating it, so don't do that here. + */ + result = -posix_fallocate(fd, current_length, offset - current_length); + if (result != 0) { + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); + } + goto out; +#endif + case PREALLOC_MODE_FULL: + { + int64_t num = 0, left = offset - current_length; + + /* + * Knowing the final size from the beginning could allow the file + * system driver to do less allocations and possibly avoid + * fragmentation of the file. + */ + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + goto out; + } + + buf = g_malloc0(65536); + + result = lseek(fd, current_length, SEEK_SET); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Failed to seek to the old end of file"); + goto out; + } + + while (left > 0) { + num = MIN(left, 65536); + result = write(fd, buf, num); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not write zeros for preallocation"); + goto out; + } + left -= result; + } + if (result >= 0) { + result = fsync(fd); + if (result < 0) { + result = -errno; + error_setg_errno(errp, -result, + "Could not flush file to disk"); + goto out; + } + } + goto out; + } + case PREALLOC_MODE_OFF: + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); + } + return result; + default: + result = -ENOTSUP; + error_setg(errp, "Unsupported preallocation mode: %s", + PreallocMode_lookup[prealloc]); + return result; + } + +out: + if (result < 0) { + if (ftruncate(fd, current_length) < 0) { + error_report("Failed to restore old file length: %s", + strerror(errno)); + } + } + + g_free(buf); + return result; +} + +static int raw_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; struct stat st; @@ -1637,12 +1752,16 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) } if (S_ISREG(st.st_mode)) { - if (ftruncate(s->fd, offset) < 0) { - ret = -errno; - error_setg_errno(errp, -ret, "Failed to resize the file"); - return ret; - } - } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { + return raw_regular_truncate(s->fd, offset, prealloc, errp); + } + + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Preallocation mode '%s' unsupported for this " + "non-regular file", PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + + if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { if (offset > raw_getlength(bs)) { error_setg(errp, "Cannot grow device files"); return -EINVAL; @@ -1885,71 +2004,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) #endif } - switch (prealloc) { -#ifdef CONFIG_POSIX_FALLOCATE - case PREALLOC_MODE_FALLOC: - /* - * Truncating before posix_fallocate() makes it about twice slower on - * file systems that do not support fallocate(), trying to check if a - * block is allocated before allocating it, so don't do that here. - */ - result = -posix_fallocate(fd, 0, total_size); - if (result != 0) { - /* posix_fallocate() doesn't set errno. */ - error_setg_errno(errp, -result, - "Could not preallocate data for the new file"); - } - break; -#endif - case PREALLOC_MODE_FULL: - { - /* - * Knowing the final size from the beginning could allow the file - * system driver to do less allocations and possibly avoid - * fragmentation of the file. - */ - if (ftruncate(fd, total_size) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); - goto out_close; - } - - int64_t num = 0, left = total_size; - buf = g_malloc0(65536); - - while (left > 0) { - num = MIN(left, 65536); - result = write(fd, buf, num); - if (result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Could not write to the new file"); - break; - } - left -= result; - } - if (result >= 0) { - result = fsync(fd); - if (result < 0) { - result = -errno; - error_setg_errno(errp, -result, - "Could not flush new file to disk"); - } - } - g_free(buf); - break; - } - case PREALLOC_MODE_OFF: - if (ftruncate(fd, total_size) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); - } - break; - default: - result = -EINVAL; - error_setg(errp, "Unsupported preallocation mode: %s", - PreallocMode_lookup[prealloc]); - break; + result = raw_regular_truncate(fd, total_size, prealloc, errp); + if (result < 0) { + goto out_close; } out_close: diff --git a/block/file-win32.c b/block/file-win32.c index ef2910b03f..4706335cff 100644 --- a/block/file-win32.c +++ b/block/file-win32.c @@ -461,12 +461,19 @@ static void raw_close(BlockDriverState *bs) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int raw_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; LONG low, high; DWORD dwPtrLow; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + low = offset; high = offset >> 32; diff --git a/block/gluster.c b/block/gluster.c index c2a524140d..3064a45047 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1095,11 +1095,17 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, } static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset, - Error **errp) + PreallocMode prealloc, Error **errp) { int ret; BDRVGlusterState *s = bs->opaque; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + ret = glfs_ftruncate(s->fd, offset); if (ret < 0) { ret = -errno; diff --git a/block/io.c b/block/io.c index 23170a57ee..b413727524 100644 --- a/block/io.c +++ b/block/io.c @@ -1315,6 +1315,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, uint64_t bytes_remaining = bytes; int max_transfer; + if (bdrv_has_readonly_bitmaps(bs)) { + return -EPERM; + } + assert(is_power_of_2(align)); assert((offset & (align - 1)) == 0); assert((bytes & (align - 1)) == 0); @@ -2287,6 +2291,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, return -ENOMEDIUM; } + if (bdrv_has_readonly_bitmaps(bs)) { + return -EPERM; + } + ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { return ret; diff --git a/block/iscsi.c b/block/iscsi.c index 3aa438a0b7..d557c99668 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2079,11 +2079,18 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) } } -static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int iscsi_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { IscsiLun *iscsilun = bs->opaque; Error *local_err = NULL; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + if (iscsilun->type != TYPE_DISK) { error_setg(errp, "Cannot resize non-disk iSCSI devices"); return -ENOTSUP; diff --git a/block/mirror.c b/block/mirror.c index eaf0fe7858..8583b764a0 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -739,7 +739,8 @@ static void coroutine_fn mirror_run(void *opaque) } if (s->bdev_length > base_length) { - ret = blk_truncate(s->target, s->bdev_length, NULL); + ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF, + NULL); if (ret < 0) { goto immediate_exit; } diff --git a/block/nfs.c b/block/nfs.c index 5e985282f4..d8db419957 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -759,11 +759,18 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) return (task.ret < 0 ? task.ret : st.st_blocks * 512); } -static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { NFSClient *client = bs->opaque; int ret; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + ret = nfs_ftruncate(client->context, client->fh, offset); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to truncate file"); diff --git a/block/parallels.c b/block/parallels.c index 8be46a7d48..5bbdfabb7a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -224,7 +224,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } else { ret = bdrv_truncate(bs->file, (s->data_end + space) << BDRV_SECTOR_BITS, - NULL); + PREALLOC_MODE_OFF, NULL); } if (ret < 0) { return ret; @@ -458,7 +458,8 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, res->leaks += count; if (fix & BDRV_FIX_LEAKS) { Error *local_err = NULL; - ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err); + ret = bdrv_truncate(bs->file, res->image_end_offset, + PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); res->check_errors++; @@ -507,7 +508,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) blk_set_allow_write_beyond_eof(file, true); - ret = blk_truncate(file, 0, errp); + ret = blk_truncate(file, 0, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; } @@ -699,7 +700,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) || - bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) { + bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), + PREALLOC_MODE_OFF, NULL) != 0) { s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; } @@ -742,7 +744,8 @@ static void parallels_close(BlockDriverState *bs) } if (bs->open_flags & BDRV_O_RDWR) { - bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL); + bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, + PREALLOC_MODE_OFF, NULL); } g_free(s->bat_dirty_bmap); diff --git a/block/qapi.c b/block/qapi.c index 0a41d59bf3..080eb8f115 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -45,7 +45,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, info->ro = bs->read_only; info->drv = g_strdup(bs->drv->format_name); info->encrypted = bs->encrypted; - info->encryption_key_missing = bdrv_key_required(bs); + info->encryption_key_missing = false; info->cache = g_new(BlockdevCacheInfo, 1); *info->cache = (BlockdevCacheInfo) { diff --git a/block/qcow.c b/block/qcow.c index 7bd94dcd46..66827d6f24 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -31,8 +31,10 @@ #include "qemu/bswap.h" #include <zlib.h> #include "qapi/qmp/qerror.h" -#include "crypto/cipher.h" +#include "qapi/qmp/qstring.h" +#include "crypto/block.h" #include "migration/blocker.h" +#include "block/crypto.h" /**************************************************************/ /* QEMU COW block driver with compression and encryption support */ @@ -77,7 +79,7 @@ typedef struct BDRVQcowState { uint8_t *cluster_cache; uint8_t *cluster_data; uint64_t cluster_cache_offset; - QCryptoCipher *cipher; /* NULL if no key yet */ + QCryptoBlock *crypto; /* Disk encryption format driver */ uint32_t crypt_method_header; CoMutex lock; Error *migration_blocker; @@ -97,6 +99,15 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } +static QemuOptsList qcow_runtime_opts = { + .name = "qcow", + .head = QTAILQ_HEAD_INITIALIZER(qcow_runtime_opts.head), + .desc = { + BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."), + { /* end of list */ } + }, +}; + static int qcow_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -105,11 +116,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, int ret; QCowHeader header; Error *local_err = NULL; + QCryptoBlockOpenOptions *crypto_opts = NULL; + unsigned int cflags = 0; + QDict *encryptopts = NULL; + const char *encryptfmt; + + qdict_extract_subqdict(options, &encryptopts, "encrypt."); + encryptfmt = qdict_get_try_str(encryptopts, "format"); bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, errp); if (!bs->file) { - return -EINVAL; + ret = -EINVAL; + goto fail; } ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); @@ -155,17 +174,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - if (header.crypt_method > QCOW_CRYPT_AES) { - error_setg(errp, "invalid encryption method in qcow header"); - ret = -EINVAL; - goto fail; - } - if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128, - QCRYPTO_CIPHER_MODE_CBC)) { - error_setg(errp, "AES cipher not available"); - ret = -EINVAL; - goto fail; - } s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) { if (bdrv_uses_whitelist() && @@ -181,8 +189,44 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOSYS; goto fail; } + if (s->crypt_method_header == QCOW_CRYPT_AES) { + if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { + error_setg(errp, + "Header reported 'aes' encryption format but " + "options specify '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + qdict_del(encryptopts, "format"); + crypto_opts = block_crypto_open_opts_init( + Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + if (!crypto_opts) { + ret = -EINVAL; + goto fail; + } + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(crypto_opts, "encrypt.", + NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; + } + } else { + error_setg(errp, "invalid encryption method in qcow header"); + ret = -EINVAL; + goto fail; + } bs->encrypted = true; + } else { + if (encryptfmt) { + error_setg(errp, "No encryption in image header, but options " + "specified format '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } } s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; @@ -266,6 +310,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + QDECREF(encryptopts); + qapi_free_QCryptoBlockOpenOptions(crypto_opts); qemu_co_mutex_init(&s->lock); return 0; @@ -274,6 +320,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, qemu_vfree(s->l2_cache); g_free(s->cluster_cache); g_free(s->cluster_data); + qcrypto_block_free(s->crypto); + QDECREF(encryptopts); + qapi_free_QCryptoBlockOpenOptions(crypto_opts); return ret; } @@ -286,85 +335,6 @@ static int qcow_reopen_prepare(BDRVReopenState *state, return 0; } -static int qcow_set_key(BlockDriverState *bs, const char *key) -{ - BDRVQcowState *s = bs->opaque; - uint8_t keybuf[16]; - int len, i; - Error *err; - - memset(keybuf, 0, 16); - len = strlen(key); - if (len > 16) - len = 16; - /* XXX: we could compress the chars to 7 bits to increase - entropy */ - for(i = 0;i < len;i++) { - keybuf[i] = key[i]; - } - assert(bs->encrypted); - - qcrypto_cipher_free(s->cipher); - s->cipher = qcrypto_cipher_new( - QCRYPTO_CIPHER_ALG_AES_128, - QCRYPTO_CIPHER_MODE_CBC, - keybuf, G_N_ELEMENTS(keybuf), - &err); - - if (!s->cipher) { - /* XXX would be nice if errors in this method could - * be properly propagate to the caller. Would need - * the bdrv_set_key() API signature to be fixed. */ - error_free(err); - return -1; - } - return 0; -} - -/* The crypt function is compatible with the linux cryptoloop - algorithm for < 4 GB images. NOTE: out_buf == in_buf is - supported */ -static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num, - uint8_t *out_buf, const uint8_t *in_buf, - int nb_sectors, bool enc, Error **errp) -{ - union { - uint64_t ll[2]; - uint8_t b[16]; - } ivec; - int i; - int ret; - - for(i = 0; i < nb_sectors; i++) { - ivec.ll[0] = cpu_to_le64(sector_num); - ivec.ll[1] = 0; - if (qcrypto_cipher_setiv(s->cipher, - ivec.b, G_N_ELEMENTS(ivec.b), - errp) < 0) { - return -1; - } - if (enc) { - ret = qcrypto_cipher_encrypt(s->cipher, - in_buf, - out_buf, - 512, - errp); - } else { - ret = qcrypto_cipher_decrypt(s->cipher, - in_buf, - out_buf, - 512, - errp); - } - if (ret < 0) { - return -1; - } - sector_num++; - in_buf += 512; - out_buf += 512; - } - return 0; -} /* 'allocate' is: * @@ -473,22 +443,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, /* round to cluster size */ cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); - bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL); + bdrv_truncate(bs->file, cluster_offset + s->cluster_size, + PREALLOC_MODE_OFF, NULL); /* if encrypted, we must initialize the cluster content which won't be written */ if (bs->encrypted && (n_end - n_start) < s->cluster_sectors) { uint64_t start_sect; - assert(s->cipher); + assert(s->crypto); start_sect = (offset & ~(s->cluster_size - 1)) >> 9; - memset(s->cluster_data + 512, 0x00, 512); for(i = 0; i < s->cluster_sectors; i++) { if (i < n_start || i >= n_end) { Error *err = NULL; - if (encrypt_sectors(s, start_sect + i, - s->cluster_data, - s->cluster_data + 512, 1, - true, &err) < 0) { + memset(s->cluster_data, 0x00, 512); + if (qcrypto_block_encrypt(s->crypto, start_sect + i, + s->cluster_data, + BDRV_SECTOR_SIZE, + &err) < 0) { error_free(err); errno = EIO; return -1; @@ -533,7 +504,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, if (!cluster_offset) { return 0; } - if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) { + if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) { return BDRV_BLOCK_DATA; } cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); @@ -664,9 +635,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, break; } if (bs->encrypted) { - assert(s->cipher); - if (encrypt_sectors(s, sector_num, buf, buf, - n, false, &err) < 0) { + assert(s->crypto); + if (qcrypto_block_decrypt(s->crypto, sector_num, buf, + n * BDRV_SECTOR_SIZE, &err) < 0) { goto fail; } } @@ -700,9 +671,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, BDRVQcowState *s = bs->opaque; int index_in_cluster; uint64_t cluster_offset; - const uint8_t *src_buf; int ret = 0, n; - uint8_t *cluster_data = NULL; struct iovec hd_iov; QEMUIOVector hd_qiov; uint8_t *buf; @@ -710,7 +679,9 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, s->cluster_cache_offset = -1; /* disable compressed cache */ - if (qiov->niov > 1) { + /* We must always copy the iov when encrypting, so we + * don't modify the original data buffer during encryption */ + if (bs->encrypted || qiov->niov > 1) { buf = orig_buf = qemu_try_blockalign(bs, qiov->size); if (buf == NULL) { return -ENOMEM; @@ -739,22 +710,16 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { Error *err = NULL; - assert(s->cipher); - if (!cluster_data) { - cluster_data = g_malloc0(s->cluster_size); - } - if (encrypt_sectors(s, sector_num, cluster_data, buf, - n, true, &err) < 0) { + assert(s->crypto); + if (qcrypto_block_encrypt(s->crypto, sector_num, buf, + n * BDRV_SECTOR_SIZE, &err) < 0) { error_free(err); ret = -EIO; break; } - src_buf = cluster_data; - } else { - src_buf = buf; } - hd_iov.iov_base = (void *)src_buf; + hd_iov.iov_base = (void *)buf; hd_iov.iov_len = n * 512; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); @@ -773,10 +738,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } qemu_co_mutex_unlock(&s->lock); - if (qiov->niov > 1) { - qemu_vfree(orig_buf); - } - g_free(cluster_data); + qemu_vfree(orig_buf); return ret; } @@ -785,8 +747,8 @@ static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; - qcrypto_cipher_free(s->cipher); - s->cipher = NULL; + qcrypto_block_free(s->crypto); + s->crypto = NULL; g_free(s->l1_table); qemu_vfree(s->l2_cache); g_free(s->cluster_cache); @@ -803,17 +765,35 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) uint8_t *tmp; int64_t total_size = 0; char *backing_file = NULL; - int flags = 0; Error *local_err = NULL; int ret; BlockBackend *qcow_blk; + const char *encryptfmt = NULL; + QDict *options; + QDict *encryptopts = NULL; + QCryptoBlockCreateOptions *crypto_opts = NULL; + QCryptoBlock *crypto = NULL; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); + if (total_size == 0) { + error_setg(errp, "Image size is too small, cannot be zero length"); + ret = -EINVAL; + goto cleanup; + } + backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); - if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) { - flags |= BLOCK_FLAG_ENCRYPT; + encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); + if (encryptfmt) { + if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) { + error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and " + BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive"); + ret = -EINVAL; + goto cleanup; + } + } else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) { + encryptfmt = "aes"; } ret = bdrv_create_file(filename, opts, &local_err); @@ -833,7 +813,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) blk_set_allow_write_beyond_eof(qcow_blk, true); - ret = blk_truncate(qcow_blk, 0, errp); + ret = blk_truncate(qcow_blk, 0, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; } @@ -867,8 +847,32 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) l1_size = (total_size + (1LL << shift) - 1) >> shift; header.l1_table_offset = cpu_to_be64(header_size); - if (flags & BLOCK_FLAG_ENCRYPT) { + + options = qemu_opts_to_qdict(opts, NULL); + qdict_extract_subqdict(options, &encryptopts, "encrypt."); + QDECREF(options); + if (encryptfmt) { + if (!g_str_equal(encryptfmt, "aes")) { + error_setg(errp, "Unknown encryption format '%s', expected 'aes'", + encryptfmt); + ret = -EINVAL; + goto exit; + } header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); + + crypto_opts = block_crypto_create_opts_init( + Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + if (!crypto_opts) { + ret = -EINVAL; + goto exit; + } + + crypto = qcrypto_block_create(crypto_opts, "encrypt.", + NULL, NULL, NULL, errp); + if (!crypto) { + ret = -EINVAL; + goto exit; + } } else { header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); } @@ -903,6 +907,9 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) exit: blk_unref(qcow_blk); cleanup: + QDECREF(encryptopts); + qcrypto_block_free(crypto); + qapi_free_QCryptoBlockCreateOptions(crypto_opts); g_free(backing_file); return ret; } @@ -917,7 +924,8 @@ static int qcow_make_empty(BlockDriverState *bs) if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0) return -1; - ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL); + ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, + PREALLOC_MODE_OFF, NULL); if (ret < 0) return ret; @@ -1041,9 +1049,15 @@ static QemuOptsList qcow_create_opts = { { .name = BLOCK_OPT_ENCRYPT, .type = QEMU_OPT_BOOL, - .help = "Encrypt the image", - .def_value_str = "off" + .help = "Encrypt the image with format 'aes'. (Deprecated " + "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", + }, + { + .name = BLOCK_OPT_ENCRYPT_FORMAT, + .type = QEMU_OPT_STRING, + .help = "Encrypt the image, format choices: 'aes'", }, + BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."), { /* end of list */ } } }; @@ -1064,7 +1078,6 @@ static BlockDriver bdrv_qcow = { .bdrv_co_writev = qcow_co_writev, .bdrv_co_get_block_status = qcow_co_get_block_status, - .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed, .bdrv_get_info = qcow_get_info, diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c new file mode 100644 index 0000000000..3e8735a20d --- /dev/null +++ b/block/qcow2-bitmap.c @@ -0,0 +1,1482 @@ +/* + * Bitmaps for the QCOW version 2 format + * + * Copyright (c) 2014-2017 Vladimir Sementsov-Ogievskiy + * + * This file is derived from qcow2-snapshot.c, original copyright: + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" + +#include "block/block_int.h" +#include "block/qcow2.h" + +/* NOTICE: BME here means Bitmaps Extension and used as a namespace for + * _internal_ constants. Please do not use this _internal_ abbreviation for + * other needs and/or outside of this file. */ + +/* Bitmap directory entry constraints */ +#define BME_MAX_TABLE_SIZE 0x8000000 +#define BME_MAX_PHYS_SIZE 0x20000000 /* restrict BdrvDirtyBitmap size in RAM */ +#define BME_MAX_GRANULARITY_BITS 31 +#define BME_MIN_GRANULARITY_BITS 9 +#define BME_MAX_NAME_SIZE 1023 + +#if BME_MAX_TABLE_SIZE * 8ULL > INT_MAX +#error In the code bitmap table physical size assumed to fit into int +#endif + +/* Bitmap directory entry flags */ +#define BME_RESERVED_FLAGS 0xfffffffcU +#define BME_FLAG_IN_USE (1U << 0) +#define BME_FLAG_AUTO (1U << 1) + +/* bits [1, 8] U [56, 63] are reserved */ +#define BME_TABLE_ENTRY_RESERVED_MASK 0xff000000000001feULL +#define BME_TABLE_ENTRY_OFFSET_MASK 0x00fffffffffffe00ULL +#define BME_TABLE_ENTRY_FLAG_ALL_ONES (1ULL << 0) + +typedef struct QEMU_PACKED Qcow2BitmapDirEntry { + /* header is 8 byte aligned */ + uint64_t bitmap_table_offset; + + uint32_t bitmap_table_size; + uint32_t flags; + + uint8_t type; + uint8_t granularity_bits; + uint16_t name_size; + uint32_t extra_data_size; + /* extra data follows */ + /* name follows */ +} Qcow2BitmapDirEntry; + +typedef struct Qcow2BitmapTable { + uint64_t offset; + uint32_t size; /* number of 64bit entries */ + QSIMPLEQ_ENTRY(Qcow2BitmapTable) entry; +} Qcow2BitmapTable; +typedef QSIMPLEQ_HEAD(Qcow2BitmapTableList, Qcow2BitmapTable) + Qcow2BitmapTableList; + +typedef struct Qcow2Bitmap { + Qcow2BitmapTable table; + uint32_t flags; + uint8_t granularity_bits; + char *name; + + BdrvDirtyBitmap *dirty_bitmap; + + QSIMPLEQ_ENTRY(Qcow2Bitmap) entry; +} Qcow2Bitmap; +typedef QSIMPLEQ_HEAD(Qcow2BitmapList, Qcow2Bitmap) Qcow2BitmapList; + +typedef enum BitmapType { + BT_DIRTY_TRACKING_BITMAP = 1 +} BitmapType; + +static inline bool can_write(BlockDriverState *bs) +{ + return !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); +} + +static int update_header_sync(BlockDriverState *bs) +{ + int ret; + + ret = qcow2_update_header(bs); + if (ret < 0) { + return ret; + } + + return bdrv_flush(bs); +} + +static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++i) { + cpu_to_be64s(&bitmap_table[i]); + } +} + +static int check_table_entry(uint64_t entry, int cluster_size) +{ + uint64_t offset; + + if (entry & BME_TABLE_ENTRY_RESERVED_MASK) { + return -EINVAL; + } + + offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + if (offset != 0) { + /* if offset specified, bit 0 is reserved */ + if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { + return -EINVAL; + } + + if (offset % cluster_size != 0) { + return -EINVAL; + } + } + + return 0; +} + +static int check_constraints_on_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int granularity_bits = ctz32(granularity); + int64_t len = bdrv_getlength(bs); + + assert(granularity > 0); + assert((granularity & (granularity - 1)) == 0); + + if (len < 0) { + error_setg_errno(errp, -len, "Failed to get size of '%s'", + bdrv_get_device_or_node_name(bs)); + return len; + } + + if (granularity_bits > BME_MAX_GRANULARITY_BITS) { + error_setg(errp, "Granularity exceeds maximum (%llu bytes)", + 1ULL << BME_MAX_GRANULARITY_BITS); + return -EINVAL; + } + if (granularity_bits < BME_MIN_GRANULARITY_BITS) { + error_setg(errp, "Granularity is under minimum (%llu bytes)", + 1ULL << BME_MIN_GRANULARITY_BITS); + return -EINVAL; + } + + if ((len > (uint64_t)BME_MAX_PHYS_SIZE << granularity_bits) || + (len > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size << + granularity_bits)) + { + error_setg(errp, "Too much space will be occupied by the bitmap. " + "Use larger granularity"); + return -EINVAL; + } + + if (strlen(name) > BME_MAX_NAME_SIZE) { + error_setg(errp, "Name length exceeds maximum (%u characters)", + BME_MAX_NAME_SIZE); + return -EINVAL; + } + + return 0; +} + +static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table, + uint32_t bitmap_table_size) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + for (i = 0; i < bitmap_table_size; ++i) { + uint64_t addr = bitmap_table[i] & BME_TABLE_ENTRY_OFFSET_MASK; + if (!addr) { + continue; + } + + qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_OTHER); + bitmap_table[i] = 0; + } +} + +static int bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb, + uint64_t **bitmap_table) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + uint32_t i; + uint64_t *table; + + assert(tb->size != 0); + table = g_try_new(uint64_t, tb->size); + if (table == NULL) { + return -ENOMEM; + } + + assert(tb->size <= BME_MAX_TABLE_SIZE); + ret = bdrv_pread(bs->file, tb->offset, + table, tb->size * sizeof(uint64_t)); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < tb->size; ++i) { + be64_to_cpus(&table[i]); + ret = check_table_entry(table[i], s->cluster_size); + if (ret < 0) { + goto fail; + } + } + + *bitmap_table = table; + return 0; + +fail: + g_free(table); + + return ret; +} + +static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) +{ + int ret; + uint64_t *bitmap_table; + + ret = bitmap_table_load(bs, tb, &bitmap_table); + if (ret < 0) { + assert(bitmap_table == NULL); + return ret; + } + + clear_bitmap_table(bs, bitmap_table, tb->size); + qcow2_free_clusters(bs, tb->offset, tb->size * sizeof(uint64_t), + QCOW2_DISCARD_OTHER); + g_free(bitmap_table); + + tb->offset = 0; + tb->size = 0; + + return 0; +} + +/* This function returns the number of disk sectors covered by a single qcow2 + * cluster of bitmap data. */ +static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s, + const BdrvDirtyBitmap *bitmap) +{ + uint32_t sector_granularity = + bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; + + return (uint64_t)sector_granularity * (s->cluster_size << 3); +} + +/* load_bitmap_data + * @bitmap_table entries must satisfy specification constraints. + * @bitmap must be cleared */ +static int load_bitmap_data(BlockDriverState *bs, + const uint64_t *bitmap_table, + uint32_t bitmap_table_size, + BdrvDirtyBitmap *bitmap) +{ + int ret = 0; + BDRVQcow2State *s = bs->opaque; + uint64_t sector, sbc; + uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + uint8_t *buf = NULL; + uint64_t i, tab_size = + size_to_clusters(s, + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + + if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) { + return -EINVAL; + } + + buf = g_malloc(s->cluster_size); + sbc = sectors_covered_by_bitmap_cluster(s, bitmap); + for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) { + uint64_t count = MIN(bm_size - sector, sbc); + uint64_t entry = bitmap_table[i]; + uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + + assert(check_table_entry(entry, s->cluster_size) == 0); + + if (offset == 0) { + if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) { + bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count, + false); + } else { + /* No need to deserialize zeros because the dirty bitmap is + * already cleared */ + } + } else { + ret = bdrv_pread(bs->file, offset, buf, s->cluster_size); + if (ret < 0) { + goto finish; + } + bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count, + false); + } + } + ret = 0; + + bdrv_dirty_bitmap_deserialize_finish(bitmap); + +finish: + g_free(buf); + + return ret; +} + +static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, + Qcow2Bitmap *bm, Error **errp) +{ + int ret; + uint64_t *bitmap_table = NULL; + uint32_t granularity; + BdrvDirtyBitmap *bitmap = NULL; + + if (bm->flags & BME_FLAG_IN_USE) { + error_setg(errp, "Bitmap '%s' is in use", bm->name); + goto fail; + } + + ret = bitmap_table_load(bs, &bm->table, &bitmap_table); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Could not read bitmap_table table from image for " + "bitmap '%s'", bm->name); + goto fail; + } + + granularity = 1U << bm->granularity_bits; + bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp); + if (bitmap == NULL) { + goto fail; + } + + ret = load_bitmap_data(bs, bitmap_table, bm->table.size, bitmap); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read bitmap '%s' from image", + bm->name); + goto fail; + } + + g_free(bitmap_table); + return bitmap; + +fail: + g_free(bitmap_table); + if (bitmap != NULL) { + bdrv_release_dirty_bitmap(bs, bitmap); + } + + return NULL; +} + +/* + * Bitmap List + */ + +/* + * Bitmap List private functions + * Only Bitmap List knows about bitmap directory structure in Qcow2. + */ + +static inline void bitmap_dir_entry_to_cpu(Qcow2BitmapDirEntry *entry) +{ + be64_to_cpus(&entry->bitmap_table_offset); + be32_to_cpus(&entry->bitmap_table_size); + be32_to_cpus(&entry->flags); + be16_to_cpus(&entry->name_size); + be32_to_cpus(&entry->extra_data_size); +} + +static inline void bitmap_dir_entry_to_be(Qcow2BitmapDirEntry *entry) +{ + cpu_to_be64s(&entry->bitmap_table_offset); + cpu_to_be32s(&entry->bitmap_table_size); + cpu_to_be32s(&entry->flags); + cpu_to_be16s(&entry->name_size); + cpu_to_be32s(&entry->extra_data_size); +} + +static inline int calc_dir_entry_size(size_t name_size, size_t extra_data_size) +{ + return align_offset(sizeof(Qcow2BitmapDirEntry) + + name_size + extra_data_size, 8); +} + +static inline int dir_entry_size(Qcow2BitmapDirEntry *entry) +{ + return calc_dir_entry_size(entry->name_size, entry->extra_data_size); +} + +static inline const char *dir_entry_name_field(Qcow2BitmapDirEntry *entry) +{ + return (const char *)(entry + 1) + entry->extra_data_size; +} + +static inline char *dir_entry_copy_name(Qcow2BitmapDirEntry *entry) +{ + const char *name_field = dir_entry_name_field(entry); + return g_strndup(name_field, entry->name_size); +} + +static inline Qcow2BitmapDirEntry *next_dir_entry(Qcow2BitmapDirEntry *entry) +{ + return (Qcow2BitmapDirEntry *)((uint8_t *)entry + dir_entry_size(entry)); +} + +static int check_dir_entry(BlockDriverState *bs, Qcow2BitmapDirEntry *entry) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t phys_bitmap_bytes; + int64_t len; + + bool fail = (entry->bitmap_table_size == 0) || + (entry->bitmap_table_offset == 0) || + (entry->bitmap_table_offset % s->cluster_size) || + (entry->bitmap_table_size > BME_MAX_TABLE_SIZE) || + (entry->granularity_bits > BME_MAX_GRANULARITY_BITS) || + (entry->granularity_bits < BME_MIN_GRANULARITY_BITS) || + (entry->flags & BME_RESERVED_FLAGS) || + (entry->name_size > BME_MAX_NAME_SIZE) || + (entry->type != BT_DIRTY_TRACKING_BITMAP); + + if (fail) { + return -EINVAL; + } + + phys_bitmap_bytes = (uint64_t)entry->bitmap_table_size * s->cluster_size; + len = bdrv_getlength(bs); + + if (len < 0) { + return len; + } + + fail = (phys_bitmap_bytes > BME_MAX_PHYS_SIZE) || + (len > ((phys_bitmap_bytes * 8) << entry->granularity_bits)); + + return fail ? -EINVAL : 0; +} + +static inline void bitmap_directory_to_be(uint8_t *dir, size_t size) +{ + uint8_t *end = dir + size; + while (dir < end) { + Qcow2BitmapDirEntry *e = (Qcow2BitmapDirEntry *)dir; + dir += dir_entry_size(e); + + bitmap_dir_entry_to_be(e); + } +} + +/* + * Bitmap List public functions + */ + +static void bitmap_free(Qcow2Bitmap *bm) +{ + g_free(bm->name); + g_free(bm); +} + +static void bitmap_list_free(Qcow2BitmapList *bm_list) +{ + Qcow2Bitmap *bm; + + if (bm_list == NULL) { + return; + } + + while ((bm = QSIMPLEQ_FIRST(bm_list)) != NULL) { + QSIMPLEQ_REMOVE_HEAD(bm_list, entry); + bitmap_free(bm); + } + + g_free(bm_list); +} + +static Qcow2BitmapList *bitmap_list_new(void) +{ + Qcow2BitmapList *bm_list = g_new(Qcow2BitmapList, 1); + QSIMPLEQ_INIT(bm_list); + + return bm_list; +} + +static uint32_t bitmap_list_count(Qcow2BitmapList *bm_list) +{ + Qcow2Bitmap *bm; + uint32_t nb_bitmaps = 0; + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + nb_bitmaps++; + } + + return nb_bitmaps; +} + +/* bitmap_list_load + * Get bitmap list from qcow2 image. Actually reads bitmap directory, + * checks it and convert to bitmap list. + */ +static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset, + uint64_t size, Error **errp) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + uint8_t *dir, *dir_end; + Qcow2BitmapDirEntry *e; + uint32_t nb_dir_entries = 0; + Qcow2BitmapList *bm_list = NULL; + + if (size == 0) { + error_setg(errp, "Requested bitmap directory size is zero"); + return NULL; + } + + if (size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "Requested bitmap directory size is too big"); + return NULL; + } + + dir = g_try_malloc(size); + if (dir == NULL) { + error_setg(errp, "Failed to allocate space for bitmap directory"); + return NULL; + } + dir_end = dir + size; + + ret = bdrv_pread(bs->file, offset, dir, size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to read bitmap directory"); + goto fail; + } + + bm_list = bitmap_list_new(); + for (e = (Qcow2BitmapDirEntry *)dir; + e < (Qcow2BitmapDirEntry *)dir_end; + e = next_dir_entry(e)) + { + Qcow2Bitmap *bm; + + if ((uint8_t *)(e + 1) > dir_end) { + goto broken_dir; + } + + if (++nb_dir_entries > s->nb_bitmaps) { + error_setg(errp, "More bitmaps found than specified in header" + " extension"); + goto fail; + } + bitmap_dir_entry_to_cpu(e); + + if ((uint8_t *)next_dir_entry(e) > dir_end) { + goto broken_dir; + } + + if (e->extra_data_size != 0) { + error_setg(errp, "Bitmap extra data is not supported"); + goto fail; + } + + ret = check_dir_entry(bs, e); + if (ret < 0) { + error_setg(errp, "Bitmap '%.*s' doesn't satisfy the constraints", + e->name_size, dir_entry_name_field(e)); + goto fail; + } + + bm = g_new(Qcow2Bitmap, 1); + bm->table.offset = e->bitmap_table_offset; + bm->table.size = e->bitmap_table_size; + bm->flags = e->flags; + bm->granularity_bits = e->granularity_bits; + bm->name = dir_entry_copy_name(e); + QSIMPLEQ_INSERT_TAIL(bm_list, bm, entry); + } + + if (nb_dir_entries != s->nb_bitmaps) { + error_setg(errp, "Less bitmaps found than specified in header" + " extension"); + goto fail; + } + + if ((uint8_t *)e != dir_end) { + goto broken_dir; + } + + g_free(dir); + return bm_list; + +broken_dir: + ret = -EINVAL; + error_setg(errp, "Broken bitmap directory"); + +fail: + g_free(dir); + bitmap_list_free(bm_list); + + return NULL; +} + +int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + + if (s->nb_bitmaps == 0) { + return 0; + } + + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size, + s->bitmap_directory_offset, + s->bitmap_directory_size); + if (ret < 0) { + return ret; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, NULL); + if (bm_list == NULL) { + res->corruptions++; + return -EINVAL; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + uint64_t *bitmap_table = NULL; + int i; + + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + bm->table.offset, + bm->table.size * sizeof(uint64_t)); + if (ret < 0) { + goto out; + } + + ret = bitmap_table_load(bs, &bm->table, &bitmap_table); + if (ret < 0) { + res->corruptions++; + goto out; + } + + for (i = 0; i < bm->table.size; ++i) { + uint64_t entry = bitmap_table[i]; + uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK; + + if (check_table_entry(entry, s->cluster_size) < 0) { + res->corruptions++; + continue; + } + + if (offset == 0) { + continue; + } + + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + offset, s->cluster_size); + if (ret < 0) { + g_free(bitmap_table); + goto out; + } + } + + g_free(bitmap_table); + } + +out: + bitmap_list_free(bm_list); + + return ret; +} + +/* bitmap_list_store + * Store bitmap list to qcow2 image as a bitmap directory. + * Everything is checked. + */ +static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list, + uint64_t *offset, uint64_t *size, bool in_place) +{ + int ret; + uint8_t *dir; + int64_t dir_offset = 0; + uint64_t dir_size = 0; + Qcow2Bitmap *bm; + Qcow2BitmapDirEntry *e; + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + dir_size += calc_dir_entry_size(strlen(bm->name), 0); + } + + if (dir_size == 0 || dir_size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + return -EINVAL; + } + + if (in_place) { + if (*size != dir_size || *offset == 0) { + return -EINVAL; + } + + dir_offset = *offset; + } + + dir = g_try_malloc(dir_size); + if (dir == NULL) { + return -ENOMEM; + } + + e = (Qcow2BitmapDirEntry *)dir; + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + e->bitmap_table_offset = bm->table.offset; + e->bitmap_table_size = bm->table.size; + e->flags = bm->flags; + e->type = BT_DIRTY_TRACKING_BITMAP; + e->granularity_bits = bm->granularity_bits; + e->name_size = strlen(bm->name); + e->extra_data_size = 0; + memcpy(e + 1, bm->name, e->name_size); + + if (check_dir_entry(bs, e) < 0) { + ret = -EINVAL; + goto fail; + } + + e = next_dir_entry(e); + } + + bitmap_directory_to_be(dir, dir_size); + + if (!in_place) { + dir_offset = qcow2_alloc_clusters(bs, dir_size); + if (dir_offset < 0) { + ret = dir_offset; + goto fail; + } + } + + ret = qcow2_pre_write_overlap_check(bs, 0, dir_offset, dir_size); + if (ret < 0) { + goto fail; + } + + ret = bdrv_pwrite(bs->file, dir_offset, dir, dir_size); + if (ret < 0) { + goto fail; + } + + g_free(dir); + + if (!in_place) { + *size = dir_size; + *offset = dir_offset; + } + + return 0; + +fail: + g_free(dir); + + if (!in_place && dir_offset > 0) { + qcow2_free_clusters(bs, dir_offset, dir_size, QCOW2_DISCARD_OTHER); + } + + return ret; +} + +/* + * Bitmap List end + */ + +static int update_ext_header_and_dir_in_place(BlockDriverState *bs, + Qcow2BitmapList *bm_list) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + + if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS) || + bm_list == NULL || QSIMPLEQ_EMPTY(bm_list) || + bitmap_list_count(bm_list) != s->nb_bitmaps) + { + return -EINVAL; + } + + s->autoclear_features &= ~(uint64_t)QCOW2_AUTOCLEAR_BITMAPS; + ret = update_header_sync(bs); + if (ret < 0) { + /* Two variants are possible here: + * 1. Autoclear flag is dropped, all bitmaps will be lost. + * 2. Autoclear flag is not dropped, old state is left. + */ + return ret; + } + + /* autoclear bit is not set, so we can safely update bitmap directory */ + + ret = bitmap_list_store(bs, bm_list, &s->bitmap_directory_offset, + &s->bitmap_directory_size, true); + if (ret < 0) { + /* autoclear bit is cleared, so all leaked clusters would be removed on + * qemu-img check */ + return ret; + } + + ret = update_header_sync(bs); + if (ret < 0) { + /* autoclear bit is cleared, so all leaked clusters would be removed on + * qemu-img check */ + return ret; + } + + s->autoclear_features |= QCOW2_AUTOCLEAR_BITMAPS; + return update_header_sync(bs); + /* If final update_header_sync() fails, two variants are possible: + * 1. Autoclear flag is not set, all bitmaps will be lost. + * 2. Autoclear flag is set, header and directory are successfully updated. + */ +} + +static int update_ext_header_and_dir(BlockDriverState *bs, + Qcow2BitmapList *bm_list) +{ + BDRVQcow2State *s = bs->opaque; + int ret; + uint64_t new_offset = 0; + uint64_t new_size = 0; + uint32_t new_nb_bitmaps = 0; + uint64_t old_offset = s->bitmap_directory_offset; + uint64_t old_size = s->bitmap_directory_size; + uint32_t old_nb_bitmaps = s->nb_bitmaps; + uint64_t old_autocl = s->autoclear_features; + + if (bm_list != NULL && !QSIMPLEQ_EMPTY(bm_list)) { + new_nb_bitmaps = bitmap_list_count(bm_list); + + if (new_nb_bitmaps > QCOW2_MAX_BITMAPS) { + return -EINVAL; + } + + ret = bitmap_list_store(bs, bm_list, &new_offset, &new_size, false); + if (ret < 0) { + return ret; + } + + ret = bdrv_flush(bs->file->bs); + if (ret < 0) { + goto fail; + } + + s->autoclear_features |= QCOW2_AUTOCLEAR_BITMAPS; + } else { + s->autoclear_features &= ~(uint64_t)QCOW2_AUTOCLEAR_BITMAPS; + } + + s->bitmap_directory_offset = new_offset; + s->bitmap_directory_size = new_size; + s->nb_bitmaps = new_nb_bitmaps; + + ret = update_header_sync(bs); + if (ret < 0) { + goto fail; + } + + if (old_size > 0) { + qcow2_free_clusters(bs, old_offset, old_size, QCOW2_DISCARD_OTHER); + } + + return 0; + +fail: + if (new_offset > 0) { + qcow2_free_clusters(bs, new_offset, new_size, QCOW2_DISCARD_OTHER); + } + + s->bitmap_directory_offset = old_offset; + s->bitmap_directory_size = old_size; + s->nb_bitmaps = old_nb_bitmaps; + s->autoclear_features = old_autocl; + + return ret; +} + +/* for g_slist_foreach for GSList of BdrvDirtyBitmap* elements */ +static void release_dirty_bitmap_helper(gpointer bitmap, + gpointer bs) +{ + bdrv_release_dirty_bitmap(bs, bitmap); +} + +/* for g_slist_foreach for GSList of BdrvDirtyBitmap* elements */ +static void set_readonly_helper(gpointer bitmap, gpointer value) +{ + bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value); +} + +/* qcow2_load_autoloading_dirty_bitmaps() + * Return value is a hint for caller: true means that the Qcow2 header was + * updated. (false doesn't mean that the header should be updated by the + * caller, it just means that updating was not needed or the image cannot be + * written to). + * On failure the function returns false. + */ +bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + GSList *created_dirty_bitmaps = NULL; + bool header_updated = false; + + if (s->nb_bitmaps == 0) { + /* No bitmaps - nothing to do */ + return false; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return false; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if ((bm->flags & BME_FLAG_AUTO) && !(bm->flags & BME_FLAG_IN_USE)) { + BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp); + if (bitmap == NULL) { + goto fail; + } + + bdrv_dirty_bitmap_set_persistance(bitmap, true); + bdrv_dirty_bitmap_set_autoload(bitmap, true); + bm->flags |= BME_FLAG_IN_USE; + created_dirty_bitmaps = + g_slist_append(created_dirty_bitmaps, bitmap); + } + } + + if (created_dirty_bitmaps != NULL) { + if (can_write(bs)) { + /* in_use flags must be updated */ + int ret = update_ext_header_and_dir_in_place(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Can't update bitmap directory"); + goto fail; + } + header_updated = true; + } else { + g_slist_foreach(created_dirty_bitmaps, set_readonly_helper, + (gpointer)true); + } + } + + g_slist_free(created_dirty_bitmaps); + bitmap_list_free(bm_list); + + return header_updated; + +fail: + g_slist_foreach(created_dirty_bitmaps, release_dirty_bitmap_helper, bs); + g_slist_free(created_dirty_bitmaps); + bitmap_list_free(bm_list); + + return false; +} + +int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + GSList *ro_dirty_bitmaps = NULL; + int ret = 0; + + if (s->nb_bitmaps == 0) { + /* No bitmaps - nothing to do */ + return 0; + } + + if (!can_write(bs)) { + error_setg(errp, "Can't write to the image on reopening bitmaps rw"); + return -EINVAL; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return -EINVAL; + } + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (!(bm->flags & BME_FLAG_IN_USE)) { + BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name); + if (bitmap == NULL) { + continue; + } + + if (!bdrv_dirty_bitmap_readonly(bitmap)) { + error_setg(errp, "Bitmap %s is not readonly but not marked" + "'IN_USE' in the image. Something went wrong," + "all the bitmaps may be corrupted", bm->name); + ret = -EINVAL; + goto out; + } + + bm->flags |= BME_FLAG_IN_USE; + ro_dirty_bitmaps = g_slist_append(ro_dirty_bitmaps, bitmap); + } + } + + if (ro_dirty_bitmaps != NULL) { + /* in_use flags must be updated */ + ret = update_ext_header_and_dir_in_place(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Can't update bitmap directory"); + goto out; + } + g_slist_foreach(ro_dirty_bitmaps, set_readonly_helper, false); + } + +out: + g_slist_free(ro_dirty_bitmaps); + bitmap_list_free(bm_list); + + return ret; +} + +/* store_bitmap_data() + * Store bitmap to image, filling bitmap table accordingly. + */ +static uint64_t *store_bitmap_data(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + uint32_t *bitmap_table_size, Error **errp) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + int64_t sector; + uint64_t sbc; + uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap); + const char *bm_name = bdrv_dirty_bitmap_name(bitmap); + uint8_t *buf = NULL; + BdrvDirtyBitmapIter *dbi; + uint64_t *tb; + uint64_t tb_size = + size_to_clusters(s, + bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size)); + + if (tb_size > BME_MAX_TABLE_SIZE || + tb_size * s->cluster_size > BME_MAX_PHYS_SIZE) + { + error_setg(errp, "Bitmap '%s' is too big", bm_name); + return NULL; + } + + tb = g_try_new0(uint64_t, tb_size); + if (tb == NULL) { + error_setg(errp, "No memory"); + return NULL; + } + + dbi = bdrv_dirty_iter_new(bitmap, 0); + buf = g_malloc(s->cluster_size); + sbc = sectors_covered_by_bitmap_cluster(s, bitmap); + assert(DIV_ROUND_UP(bm_size, sbc) == tb_size); + + while ((sector = bdrv_dirty_iter_next(dbi)) != -1) { + uint64_t cluster = sector / sbc; + uint64_t end, write_size; + int64_t off; + + sector = cluster * sbc; + end = MIN(bm_size, sector + sbc); + write_size = + bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector); + assert(write_size <= s->cluster_size); + + off = qcow2_alloc_clusters(bs, s->cluster_size); + if (off < 0) { + error_setg_errno(errp, -off, + "Failed to allocate clusters for bitmap '%s'", + bm_name); + goto fail; + } + tb[cluster] = off; + + bdrv_dirty_bitmap_serialize_part(bitmap, buf, sector, end - sector); + if (write_size < s->cluster_size) { + memset(buf + write_size, 0, s->cluster_size - write_size); + } + + ret = qcow2_pre_write_overlap_check(bs, 0, off, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Qcow2 overlap check failed"); + goto fail; + } + + ret = bdrv_pwrite(bs->file, off, buf, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; + } + + if (end >= bm_size) { + break; + } + + bdrv_set_dirty_iter(dbi, end); + } + + *bitmap_table_size = tb_size; + g_free(buf); + bdrv_dirty_iter_free(dbi); + + return tb; + +fail: + clear_bitmap_table(bs, tb, tb_size); + g_free(buf); + bdrv_dirty_iter_free(dbi); + g_free(tb); + + return NULL; +} + +/* store_bitmap() + * Store bm->dirty_bitmap to qcow2. + * Set bm->table_offset and bm->table_size accordingly. + */ +static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) +{ + int ret; + uint64_t *tb; + int64_t tb_offset; + uint32_t tb_size; + BdrvDirtyBitmap *bitmap = bm->dirty_bitmap; + const char *bm_name; + + assert(bitmap != NULL); + + bm_name = bdrv_dirty_bitmap_name(bitmap); + + tb = store_bitmap_data(bs, bitmap, &tb_size, errp); + if (tb == NULL) { + return -EINVAL; + } + + assert(tb_size <= BME_MAX_TABLE_SIZE); + tb_offset = qcow2_alloc_clusters(bs, tb_size * sizeof(tb[0])); + if (tb_offset < 0) { + error_setg_errno(errp, -tb_offset, + "Failed to allocate clusters for bitmap '%s'", + bm_name); + ret = tb_offset; + goto fail; + } + + ret = qcow2_pre_write_overlap_check(bs, 0, tb_offset, + tb_size * sizeof(tb[0])); + if (ret < 0) { + error_setg_errno(errp, -ret, "Qcow2 overlap check failed"); + goto fail; + } + + bitmap_table_to_be(tb, tb_size); + ret = bdrv_pwrite(bs->file, tb_offset, tb, tb_size * sizeof(tb[0])); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; + } + + g_free(tb); + + bm->table.offset = tb_offset; + bm->table.size = tb_size; + + return 0; + +fail: + clear_bitmap_table(bs, tb, tb_size); + + if (tb_offset > 0) { + qcow2_free_clusters(bs, tb_offset, tb_size * sizeof(tb[0]), + QCOW2_DISCARD_OTHER); + } + + g_free(tb); + + return ret; +} + +static Qcow2Bitmap *find_bitmap_by_name(Qcow2BitmapList *bm_list, + const char *name) +{ + Qcow2Bitmap *bm; + + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (strcmp(name, bm->name) == 0) { + return bm; + } + } + + return NULL; +} + +void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + Qcow2Bitmap *bm; + Qcow2BitmapList *bm_list; + + if (s->nb_bitmaps == 0) { + /* Absence of the bitmap is not an error: see explanation above + * bdrv_remove_persistent_dirty_bitmap() definition. */ + return; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return; + } + + bm = find_bitmap_by_name(bm_list, name); + if (bm == NULL) { + goto fail; + } + + QSIMPLEQ_REMOVE(bm_list, bm, Qcow2Bitmap, entry); + + ret = update_ext_header_and_dir(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update bitmap extension"); + goto fail; + } + + free_bitmap_clusters(bs, &bm->table); + +fail: + bitmap_free(bm); + bitmap_list_free(bm_list); +} + +void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp) +{ + BdrvDirtyBitmap *bitmap; + BDRVQcow2State *s = bs->opaque; + uint32_t new_nb_bitmaps = s->nb_bitmaps; + uint64_t new_dir_size = s->bitmap_directory_size; + int ret; + Qcow2BitmapList *bm_list; + Qcow2Bitmap *bm; + Qcow2BitmapTableList drop_tables; + Qcow2BitmapTable *tb, *tb_next; + + if (!bdrv_has_changed_persistent_bitmaps(bs)) { + /* nothing to do */ + return; + } + + if (!can_write(bs)) { + error_setg(errp, "No write access"); + return; + } + + QSIMPLEQ_INIT(&drop_tables); + + if (s->nb_bitmaps == 0) { + bm_list = bitmap_list_new(); + } else { + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + return; + } + } + + /* check constraints and names */ + for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap != NULL; + bitmap = bdrv_dirty_bitmap_next(bs, bitmap)) + { + const char *name = bdrv_dirty_bitmap_name(bitmap); + uint32_t granularity = bdrv_dirty_bitmap_granularity(bitmap); + Qcow2Bitmap *bm; + + if (!bdrv_dirty_bitmap_get_persistance(bitmap) || + bdrv_dirty_bitmap_readonly(bitmap)) + { + continue; + } + + if (check_constraints_on_bitmap(bs, name, granularity, errp) < 0) { + error_prepend(errp, "Bitmap '%s' doesn't satisfy the constraints: ", + name); + goto fail; + } + + bm = find_bitmap_by_name(bm_list, name); + if (bm == NULL) { + if (++new_nb_bitmaps > QCOW2_MAX_BITMAPS) { + error_setg(errp, "Too many persistent bitmaps"); + goto fail; + } + + new_dir_size += calc_dir_entry_size(strlen(name), 0); + if (new_dir_size > QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "Bitmap directory is too large"); + goto fail; + } + + bm = g_new0(Qcow2Bitmap, 1); + bm->name = g_strdup(name); + QSIMPLEQ_INSERT_TAIL(bm_list, bm, entry); + } else { + if (!(bm->flags & BME_FLAG_IN_USE)) { + error_setg(errp, "Bitmap '%s' already exists in the image", + name); + goto fail; + } + tb = g_memdup(&bm->table, sizeof(bm->table)); + bm->table.offset = 0; + bm->table.size = 0; + QSIMPLEQ_INSERT_TAIL(&drop_tables, tb, entry); + } + bm->flags = bdrv_dirty_bitmap_get_autoload(bitmap) ? BME_FLAG_AUTO : 0; + bm->granularity_bits = ctz32(bdrv_dirty_bitmap_granularity(bitmap)); + bm->dirty_bitmap = bitmap; + } + + /* allocate clusters and store bitmaps */ + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (bm->dirty_bitmap == NULL) { + continue; + } + + ret = store_bitmap(bs, bm, errp); + if (ret < 0) { + goto fail; + } + } + + ret = update_ext_header_and_dir(bs, bm_list); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update bitmap extension"); + goto fail; + } + + /* Bitmap directory was successfully updated, so, old data can be dropped. + * TODO it is better to reuse these clusters */ + QSIMPLEQ_FOREACH_SAFE(tb, &drop_tables, entry, tb_next) { + free_bitmap_clusters(bs, tb); + g_free(tb); + } + + bitmap_list_free(bm_list); + return; + +fail: + QSIMPLEQ_FOREACH(bm, bm_list, entry) { + if (bm->dirty_bitmap == NULL || bm->table.offset == 0) { + continue; + } + + free_bitmap_clusters(bs, &bm->table); + } + + QSIMPLEQ_FOREACH_SAFE(tb, &drop_tables, entry, tb_next) { + g_free(tb); + } + + bitmap_list_free(bm_list); +} + +int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp) +{ + BdrvDirtyBitmap *bitmap; + Error *local_err = NULL; + + qcow2_store_persistent_dirty_bitmaps(bs, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return -EINVAL; + } + + for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap != NULL; + bitmap = bdrv_dirty_bitmap_next(bs, bitmap)) + { + if (bdrv_dirty_bitmap_get_persistance(bitmap)) { + bdrv_dirty_bitmap_set_readonly(bitmap, true); + } + } + + return 0; +} + +bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + bool found; + Qcow2BitmapList *bm_list; + + if (check_constraints_on_bitmap(bs, name, granularity, errp) != 0) { + goto fail; + } + + if (s->nb_bitmaps == 0) { + return true; + } + + if (s->nb_bitmaps >= QCOW2_MAX_BITMAPS) { + error_setg(errp, + "Maximum number of persistent bitmaps is already reached"); + goto fail; + } + + if (s->bitmap_directory_size + calc_dir_entry_size(strlen(name), 0) > + QCOW2_MAX_BITMAP_DIRECTORY_SIZE) + { + error_setg(errp, "Not enough space in the bitmap directory"); + goto fail; + } + + bm_list = bitmap_list_load(bs, s->bitmap_directory_offset, + s->bitmap_directory_size, errp); + if (bm_list == NULL) { + goto fail; + } + + found = find_bitmap_by_name(bm_list, name); + bitmap_list_free(bm_list); + if (found) { + error_setg(errp, "Bitmap with the same name is already stored"); + goto fail; + } + + return true; + +fail: + error_prepend(errp, "Can't make bitmap '%s' persistent in '%s': ", + name, bdrv_get_device_or_node_name(bs)); + return false; +} diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 3d341fd9cb..f06c08f64c 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -357,52 +357,6 @@ static int count_contiguous_clusters_unallocated(int nb_clusters, return i; } -/* The crypt function is compatible with the linux cryptoloop - algorithm for < 4 GB images. NOTE: out_buf == in_buf is - supported */ -int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, - uint8_t *out_buf, const uint8_t *in_buf, - int nb_sectors, bool enc, - Error **errp) -{ - union { - uint64_t ll[2]; - uint8_t b[16]; - } ivec; - int i; - int ret; - - for(i = 0; i < nb_sectors; i++) { - ivec.ll[0] = cpu_to_le64(sector_num); - ivec.ll[1] = 0; - if (qcrypto_cipher_setiv(s->cipher, - ivec.b, G_N_ELEMENTS(ivec.b), - errp) < 0) { - return -1; - } - if (enc) { - ret = qcrypto_cipher_encrypt(s->cipher, - in_buf, - out_buf, - 512, - errp); - } else { - ret = qcrypto_cipher_decrypt(s->cipher, - in_buf, - out_buf, - 512, - errp); - } - if (ret < 0) { - return -1; - } - sector_num++; - in_buf += 512; - out_buf += 512; - } - return 0; -} - static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, uint64_t src_cluster_offset, unsigned offset_in_cluster, @@ -435,19 +389,22 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, uint64_t src_cluster_offset, + uint64_t cluster_offset, unsigned offset_in_cluster, uint8_t *buffer, unsigned bytes) { if (bytes && bs->encrypted) { BDRVQcow2State *s = bs->opaque; - int64_t sector = (src_cluster_offset + offset_in_cluster) + int64_t sector = (s->crypt_physical_offset ? + (cluster_offset + offset_in_cluster) : + (src_cluster_offset + offset_in_cluster)) >> BDRV_SECTOR_BITS; - assert(s->cipher); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); - if (qcow2_encrypt_sectors(s, sector, buffer, buffer, - bytes >> BDRV_SECTOR_BITS, true, NULL) < 0) { + assert(s->crypto); + if (qcrypto_block_encrypt(s->crypto, sector, buffer, + bytes, NULL) < 0) { return false; } } @@ -834,10 +791,11 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) /* Encrypt the data if necessary before writing it */ if (bs->encrypted) { - if (!do_perform_cow_encrypt(bs, m->offset, start->offset, - start_buffer, start->nb_bytes) || - !do_perform_cow_encrypt(bs, m->offset, end->offset, - end_buffer, end->nb_bytes)) { + if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset, + start->offset, start_buffer, + start->nb_bytes) || + !do_perform_cow_encrypt(bs, m->offset, m->alloc_offset, + end->offset, end_buffer, end->nb_bytes)) { ret = -EIO; goto fail; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 7c06061aae..c9b0dcb4f3 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -281,25 +281,6 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, return 0; } -/* - * Rounds the refcount table size up to avoid growing the table for each single - * refcount block that is allocated. - */ -static unsigned int next_refcount_table_size(BDRVQcow2State *s, - unsigned int min_size) -{ - unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1; - unsigned int refcount_table_clusters = - MAX(1, s->refcount_table_size >> (s->cluster_bits - 3)); - - while (min_clusters > refcount_table_clusters) { - refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2; - } - - return refcount_table_clusters << (s->cluster_bits - 3); -} - - /* Checks if two offsets are described by the same refcount block */ static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a, uint64_t offset_b) @@ -321,7 +302,7 @@ static int alloc_refcount_block(BlockDriverState *bs, { BDRVQcow2State *s = bs->opaque; unsigned int refcount_table_index; - int ret; + int64_t ret; BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); @@ -396,7 +377,7 @@ static int alloc_refcount_block(BlockDriverState *bs, ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, refcount_block); if (ret < 0) { - goto fail_block; + goto fail; } memset(*refcount_block, 0, s->cluster_size); @@ -411,12 +392,12 @@ static int alloc_refcount_block(BlockDriverState *bs, ret = update_refcount(bs, new_block, s->cluster_size, 1, false, QCOW2_DISCARD_NEVER); if (ret < 0) { - goto fail_block; + goto fail; } ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { - goto fail_block; + goto fail; } /* Initialize the new refcount block only after updating its refcount, @@ -424,7 +405,7 @@ static int alloc_refcount_block(BlockDriverState *bs, ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, refcount_block); if (ret < 0) { - goto fail_block; + goto fail; } memset(*refcount_block, 0, s->cluster_size); @@ -435,7 +416,7 @@ static int alloc_refcount_block(BlockDriverState *bs, qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block); ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { - goto fail_block; + goto fail; } /* If the refcount table is big enough, just hook the block up there */ @@ -446,7 +427,7 @@ static int alloc_refcount_block(BlockDriverState *bs, s->refcount_table_offset + refcount_table_index * sizeof(uint64_t), &data64, sizeof(data64)); if (ret < 0) { - goto fail_block; + goto fail; } s->refcount_table[refcount_table_index] = new_block; @@ -490,74 +471,201 @@ static int alloc_refcount_block(BlockDriverState *bs, (new_block >> s->cluster_bits) + 1), s->refcount_block_size); - if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) { - return -EFBIG; + /* Create the new refcount table and blocks */ + uint64_t meta_offset = (blocks_used * s->refcount_block_size) * + s->cluster_size; + + ret = qcow2_refcount_area(bs, meta_offset, 0, false, + refcount_table_index, new_block); + if (ret < 0) { + return ret; } - /* And now we need at least one block more for the new metadata */ - uint64_t table_size = next_refcount_table_size(s, blocks_used + 1); - uint64_t last_table_size; - uint64_t blocks_clusters; - do { - uint64_t table_clusters = - size_to_clusters(s, table_size * sizeof(uint64_t)); - blocks_clusters = 1 + - DIV_ROUND_UP(table_clusters, s->refcount_block_size); - uint64_t meta_clusters = table_clusters + blocks_clusters; + ret = load_refcount_block(bs, new_block, refcount_block); + if (ret < 0) { + return ret; + } - last_table_size = table_size; - table_size = next_refcount_table_size(s, blocks_used + - DIV_ROUND_UP(meta_clusters, s->refcount_block_size)); + /* If we were trying to do the initial refcount update for some cluster + * allocation, we might have used the same clusters to store newly + * allocated metadata. Make the caller search some new space. */ + return -EAGAIN; - } while (last_table_size != table_size); +fail: + if (*refcount_block != NULL) { + qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); + } + return ret; +} -#ifdef DEBUG_ALLOC2 - fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n", - s->refcount_table_size, table_size); -#endif +/* + * Starting at @start_offset, this function creates new self-covering refcount + * structures: A new refcount table and refcount blocks which cover all of + * themselves, and a number of @additional_clusters beyond their end. + * @start_offset must be at the end of the image file, that is, there must be + * only empty space beyond it. + * If @exact_size is false, the refcount table will have 50 % more entries than + * necessary so it will not need to grow again soon. + * If @new_refblock_offset is not zero, it contains the offset of a refcount + * block that should be entered into the new refcount table at index + * @new_refblock_index. + * + * Returns: The offset after the new refcount structures (i.e. where the + * @additional_clusters may be placed) on success, -errno on error. + */ +int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, + uint64_t additional_clusters, bool exact_size, + int new_refblock_index, + uint64_t new_refblock_offset) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t total_refblock_count_u64, additional_refblock_count; + int total_refblock_count, table_size, area_reftable_index, table_clusters; + int i; + uint64_t table_offset, block_offset, end_offset; + int ret; + uint64_t *new_table; - /* Create the new refcount table and blocks */ - uint64_t meta_offset = (blocks_used * s->refcount_block_size) * - s->cluster_size; - uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size; - uint64_t *new_table = g_try_new0(uint64_t, table_size); - void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size); + assert(!(start_offset % s->cluster_size)); + + qcow2_refcount_metadata_size(start_offset / s->cluster_size + + additional_clusters, + s->cluster_size, s->refcount_order, + !exact_size, &total_refblock_count_u64); + if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) { + return -EFBIG; + } + total_refblock_count = total_refblock_count_u64; + + /* Index in the refcount table of the first refcount block to cover the area + * of refcount structures we are about to create; we know that + * @total_refblock_count can cover @start_offset, so this will definitely + * fit into an int. */ + area_reftable_index = (start_offset / s->cluster_size) / + s->refcount_block_size; - assert(table_size > 0 && blocks_clusters > 0); - if (new_table == NULL || new_blocks == NULL) { + if (exact_size) { + table_size = total_refblock_count; + } else { + table_size = total_refblock_count + + DIV_ROUND_UP(total_refblock_count, 2); + } + /* The qcow2 file can only store the reftable size in number of clusters */ + table_size = ROUND_UP(table_size, s->cluster_size / sizeof(uint64_t)); + table_clusters = (table_size * sizeof(uint64_t)) / s->cluster_size; + + if (table_size > QCOW_MAX_REFTABLE_SIZE) { + return -EFBIG; + } + + new_table = g_try_new0(uint64_t, table_size); + + assert(table_size > 0); + if (new_table == NULL) { ret = -ENOMEM; - goto fail_table; + goto fail; } /* Fill the new refcount table */ - memcpy(new_table, s->refcount_table, - s->refcount_table_size * sizeof(uint64_t)); - new_table[refcount_table_index] = new_block; + if (table_size > s->max_refcount_table_index) { + /* We're actually growing the reftable */ + memcpy(new_table, s->refcount_table, + (s->max_refcount_table_index + 1) * sizeof(uint64_t)); + } else { + /* Improbable case: We're shrinking the reftable. However, the caller + * has assured us that there is only empty space beyond @start_offset, + * so we can simply drop all of the refblocks that won't fit into the + * new reftable. */ + memcpy(new_table, s->refcount_table, table_size * sizeof(uint64_t)); + } - int i; - for (i = 0; i < blocks_clusters; i++) { - new_table[blocks_used + i] = meta_offset + (i * s->cluster_size); + if (new_refblock_offset) { + assert(new_refblock_index < total_refblock_count); + new_table[new_refblock_index] = new_refblock_offset; } - /* Fill the refcount blocks */ - uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t)); - int block = 0; - for (i = 0; i < table_clusters + blocks_clusters; i++) { - s->set_refcount(new_blocks, block++, 1); + /* Count how many new refblocks we have to create */ + additional_refblock_count = 0; + for (i = area_reftable_index; i < total_refblock_count; i++) { + if (!new_table[i]) { + additional_refblock_count++; + } + } + + table_offset = start_offset + additional_refblock_count * s->cluster_size; + end_offset = table_offset + table_clusters * s->cluster_size; + + /* Fill the refcount blocks, and create new ones, if necessary */ + block_offset = start_offset; + for (i = area_reftable_index; i < total_refblock_count; i++) { + void *refblock_data; + uint64_t first_offset_covered; + + /* Reuse an existing refblock if possible, create a new one otherwise */ + if (new_table[i]) { + ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i], + &refblock_data); + if (ret < 0) { + goto fail; + } + } else { + ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, + block_offset, &refblock_data); + if (ret < 0) { + goto fail; + } + memset(refblock_data, 0, s->cluster_size); + qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, + refblock_data); + + new_table[i] = block_offset; + block_offset += s->cluster_size; + } + + /* First host offset covered by this refblock */ + first_offset_covered = (uint64_t)i * s->refcount_block_size * + s->cluster_size; + if (first_offset_covered < end_offset) { + int j, end_index; + + /* Set the refcount of all of the new refcount structures to 1 */ + + if (first_offset_covered < start_offset) { + assert(i == area_reftable_index); + j = (start_offset - first_offset_covered) / s->cluster_size; + assert(j < s->refcount_block_size); + } else { + j = 0; + } + + end_index = MIN((end_offset - first_offset_covered) / + s->cluster_size, + s->refcount_block_size); + + for (; j < end_index; j++) { + /* The caller guaranteed us this space would be empty */ + assert(s->get_refcount(refblock_data, j) == 0); + s->set_refcount(refblock_data, j, 1); + } + + qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, + refblock_data); + } + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data); } + assert(block_offset == table_offset); + /* Write refcount blocks to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS); - ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks, - blocks_clusters * s->cluster_size); - g_free(new_blocks); - new_blocks = NULL; + ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { - goto fail_table; + goto fail; } /* Write refcount table to disk */ - for(i = 0; i < table_size; i++) { + for (i = 0; i < total_refblock_count; i++) { cpu_to_be64s(&new_table[i]); } @@ -565,10 +673,10 @@ static int alloc_refcount_block(BlockDriverState *bs, ret = bdrv_pwrite_sync(bs->file, table_offset, new_table, table_size * sizeof(uint64_t)); if (ret < 0) { - goto fail_table; + goto fail; } - for(i = 0; i < table_size; i++) { + for (i = 0; i < total_refblock_count; i++) { be64_to_cpus(&new_table[i]); } @@ -584,7 +692,7 @@ static int alloc_refcount_block(BlockDriverState *bs, offsetof(QCowHeader, refcount_table_offset), &data, sizeof(data)); if (ret < 0) { - goto fail_table; + goto fail; } /* And switch it in memory */ @@ -601,23 +709,10 @@ static int alloc_refcount_block(BlockDriverState *bs, qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t), QCOW2_DISCARD_OTHER); - ret = load_refcount_block(bs, new_block, refcount_block); - if (ret < 0) { - return ret; - } + return end_offset; - /* If we were trying to do the initial refcount update for some cluster - * allocation, we might have used the same clusters to store newly - * allocated metadata. Make the caller search some new space. */ - return -EAGAIN; - -fail_table: - g_free(new_blocks); +fail: g_free(new_table); -fail_block: - if (*refcount_block != NULL) { - qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); - } return ret; } @@ -1323,11 +1418,10 @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array, * * Modifies the number of errors in res. */ -static int inc_refcounts(BlockDriverState *bs, - BdrvCheckResult *res, - void **refcount_table, - int64_t *refcount_table_size, - int64_t offset, int64_t size) +int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size, + int64_t offset, int64_t size) { BDRVQcow2State *s = bs->opaque; uint64_t start, last, cluster_offset, k, refcount; @@ -1420,8 +1514,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; l2_entry &= s->cluster_offset_mask; - ret = inc_refcounts(bs, res, refcount_table, refcount_table_size, - l2_entry & ~511, nb_csectors * 512); + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + l2_entry & ~511, nb_csectors * 512); if (ret < 0) { goto fail; } @@ -1454,8 +1549,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, } /* Mark cluster as used */ - ret = inc_refcounts(bs, res, refcount_table, refcount_table_size, - offset, s->cluster_size); + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + offset, s->cluster_size); if (ret < 0) { goto fail; } @@ -1508,8 +1604,8 @@ static int check_refcounts_l1(BlockDriverState *bs, l1_size2 = l1_size * sizeof(uint64_t); /* Mark L1 table as used */ - ret = inc_refcounts(bs, res, refcount_table, refcount_table_size, - l1_table_offset, l1_size2); + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size, + l1_table_offset, l1_size2); if (ret < 0) { goto fail; } @@ -1538,8 +1634,9 @@ static int check_refcounts_l1(BlockDriverState *bs, if (l2_offset) { /* Mark L2 table as used */ l2_offset &= L1E_OFFSET_MASK; - ret = inc_refcounts(bs, res, refcount_table, refcount_table_size, - l2_offset, s->cluster_size); + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, refcount_table_size, + l2_offset, s->cluster_size); if (ret < 0) { goto fail; } @@ -1730,7 +1827,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, } ret = bdrv_truncate(bs->file, offset + s->cluster_size, - &local_err); + PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); goto resize_fail; @@ -1757,14 +1854,15 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, } res->corruptions_fixed++; - ret = inc_refcounts(bs, res, refcount_table, nb_clusters, - offset, s->cluster_size); + ret = qcow2_inc_refcounts_imrt(bs, res, + refcount_table, nb_clusters, + offset, s->cluster_size); if (ret < 0) { return ret; } /* No need to check whether the refcount is now greater than 1: * This area was just allocated and zeroed, so it can only be - * exactly 1 after inc_refcounts() */ + * exactly 1 after qcow2_inc_refcounts_imrt() */ continue; resize_fail: @@ -1779,8 +1877,8 @@ resize_fail: } if (offset != 0) { - ret = inc_refcounts(bs, res, refcount_table, nb_clusters, - offset, s->cluster_size); + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + offset, s->cluster_size); if (ret < 0) { return ret; } @@ -1820,8 +1918,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } /* header */ - ret = inc_refcounts(bs, res, refcount_table, nb_clusters, - 0, s->cluster_size); + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + 0, s->cluster_size); if (ret < 0) { return ret; } @@ -1842,16 +1940,32 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, return ret; } } - ret = inc_refcounts(bs, res, refcount_table, nb_clusters, - s->snapshots_offset, s->snapshots_size); + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + s->snapshots_offset, s->snapshots_size); if (ret < 0) { return ret; } /* refcount data */ - ret = inc_refcounts(bs, res, refcount_table, nb_clusters, - s->refcount_table_offset, - s->refcount_table_size * sizeof(uint64_t)); + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + s->refcount_table_offset, + s->refcount_table_size * sizeof(uint64_t)); + if (ret < 0) { + return ret; + } + + /* encryption */ + if (s->crypto_header.length) { + ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, + s->crypto_header.offset, + s->crypto_header.length); + if (ret < 0) { + return ret; + } + } + + /* bitmaps */ + ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters); if (ret < 0) { return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index 2f94f0326e..c144ea5620 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -37,6 +37,9 @@ #include "qemu/option_int.h" #include "qemu/cutils.h" #include "qemu/bswap.h" +#include "qapi/opts-visitor.h" +#include "qapi-visit.h" +#include "block/crypto.h" /* Differences with QCOW: @@ -63,6 +66,8 @@ typedef struct { #define QCOW2_EXT_MAGIC_END 0 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 +#define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 +#define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) { @@ -77,6 +82,86 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) } +static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, + uint8_t *buf, size_t buflen, + void *opaque, Error **errp) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + ssize_t ret; + + if ((offset + buflen) > s->crypto_header.length) { + error_setg(errp, "Request for data outside of extension header"); + return -1; + } + + ret = bdrv_pread(bs->file, + s->crypto_header.offset + offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read encryption header"); + return -1; + } + return ret; +} + + +static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, + void *opaque, Error **errp) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + int64_t ret; + int64_t clusterlen; + + ret = qcow2_alloc_clusters(bs, headerlen); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Cannot allocate cluster for LUKS header size %zu", + headerlen); + return -1; + } + + s->crypto_header.length = headerlen; + s->crypto_header.offset = ret; + + /* Zero fill remaining space in cluster so it has predictable + * content in case of future spec changes */ + clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; + ret = bdrv_pwrite_zeroes(bs->file, + ret + headerlen, + clusterlen - headerlen, 0); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not zero fill encryption header"); + return -1; + } + + return ret; +} + + +static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, + const uint8_t *buf, size_t buflen, + void *opaque, Error **errp) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + ssize_t ret; + + if ((offset + buflen) > s->crypto_header.length) { + error_setg(errp, "Request for data outside of extension header"); + return -1; + } + + ret = bdrv_pwrite(bs->file, + s->crypto_header.offset + offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read encryption header"); + return -1; + } + return ret; +} + + /* * read qcow2 extension and fill bs * start reading from start_offset @@ -86,12 +171,18 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) */ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uint64_t end_offset, void **p_feature_table, + int flags, bool *need_update_header, Error **errp) { BDRVQcow2State *s = bs->opaque; QCowExtension ext; uint64_t offset; int ret; + Qcow2BitmapHeaderExt bitmaps_ext; + + if (need_update_header != NULL) { + *need_update_header = false; + } #ifdef DEBUG_EXT printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); @@ -162,6 +253,126 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, } break; + case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { + unsigned int cflags = 0; + if (s->crypt_method_header != QCOW_CRYPT_LUKS) { + error_setg(errp, "CRYPTO header extension only " + "expected with LUKS encryption method"); + return -EINVAL; + } + if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { + error_setg(errp, "CRYPTO header extension size %u, " + "but expected size %zu", ext.len, + sizeof(Qcow2CryptoHeaderExtension)); + return -EINVAL; + } + + ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Unable to read CRYPTO header extension"); + return ret; + } + be64_to_cpus(&s->crypto_header.offset); + be64_to_cpus(&s->crypto_header.length); + + if ((s->crypto_header.offset % s->cluster_size) != 0) { + error_setg(errp, "Encryption header offset '%" PRIu64 "' is " + "not a multiple of cluster size '%u'", + s->crypto_header.offset, s->cluster_size); + return -EINVAL; + } + + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + qcow2_crypto_hdr_read_func, + bs, cflags, errp); + if (!s->crypto) { + return -EINVAL; + } + } break; + + case QCOW2_EXT_MAGIC_BITMAPS: + if (ext.len != sizeof(bitmaps_ext)) { + error_setg_errno(errp, -ret, "bitmaps_ext: " + "Invalid extension length"); + return -EINVAL; + } + + if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { + error_report("WARNING: a program lacking bitmap support " + "modified this file, so all bitmaps are now " + "considered inconsistent. Some clusters may be " + "leaked, run 'qemu-img check -r' on the image " + "file to fix."); + if (need_update_header != NULL) { + /* Updating is needed to drop invalid bitmap extension. */ + *need_update_header = true; + } + break; + } + + ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); + if (ret < 0) { + error_setg_errno(errp, -ret, "bitmaps_ext: " + "Could not read ext header"); + return ret; + } + + if (bitmaps_ext.reserved32 != 0) { + error_setg_errno(errp, -ret, "bitmaps_ext: " + "Reserved field is not zero"); + return -EINVAL; + } + + be32_to_cpus(&bitmaps_ext.nb_bitmaps); + be64_to_cpus(&bitmaps_ext.bitmap_directory_size); + be64_to_cpus(&bitmaps_ext.bitmap_directory_offset); + + if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { + error_setg(errp, + "bitmaps_ext: Image has %" PRIu32 " bitmaps, " + "exceeding the QEMU supported maximum of %d", + bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); + return -EINVAL; + } + + if (bitmaps_ext.nb_bitmaps == 0) { + error_setg(errp, "found bitmaps extension with zero bitmaps"); + return -EINVAL; + } + + if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) { + error_setg(errp, "bitmaps_ext: " + "invalid bitmap directory offset"); + return -EINVAL; + } + + if (bitmaps_ext.bitmap_directory_size > + QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { + error_setg(errp, "bitmaps_ext: " + "bitmap directory size (%" PRIu64 ") exceeds " + "the maximum supported size (%d)", + bitmaps_ext.bitmap_directory_size, + QCOW2_MAX_BITMAP_DIRECTORY_SIZE); + return -EINVAL; + } + + s->nb_bitmaps = bitmaps_ext.nb_bitmaps; + s->bitmap_directory_offset = + bitmaps_ext.bitmap_directory_offset; + s->bitmap_directory_size = + bitmaps_ext.bitmap_directory_size; + +#ifdef DEBUG_EXT + printf("Qcow2: Got bitmaps extension: " + "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", + s->bitmap_directory_offset, s->nb_bitmaps); +#endif + break; + default: /* unknown magic - save it in case we need to rewrite the header */ { @@ -461,6 +672,8 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_NUMBER, .help = "Clean unused cache entries after this time (in seconds)", }, + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", + "ID of secret providing qcow2 AES key or LUKS passphrase"), { /* end of list */ } }, }; @@ -585,6 +798,7 @@ typedef struct Qcow2ReopenState { int overlap_check; bool discard_passthrough[QCOW2_DISCARD_MAX]; uint64_t cache_clean_interval; + QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ } Qcow2ReopenState; static int qcow2_update_options_prepare(BlockDriverState *bs, @@ -598,9 +812,14 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, int overlap_check_template = 0; uint64_t l2_cache_size, refcount_cache_size; int i; + const char *encryptfmt; + QDict *encryptopts = NULL; Error *local_err = NULL; int ret; + qdict_extract_subqdict(options, &encryptopts, "encrypt."); + encryptfmt = qdict_get_try_str(encryptopts, "format"); + opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -751,8 +970,55 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, r->discard_passthrough[QCOW2_DISCARD_OTHER] = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); + switch (s->crypt_method_header) { + case QCOW_CRYPT_NONE: + if (encryptfmt) { + error_setg(errp, "No encryption in image header, but options " + "specified format '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + break; + + case QCOW_CRYPT_AES: + if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { + error_setg(errp, + "Header reported 'aes' encryption format but " + "options specify '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + qdict_del(encryptopts, "format"); + r->crypto_opts = block_crypto_open_opts_init( + Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + break; + + case QCOW_CRYPT_LUKS: + if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { + error_setg(errp, + "Header reported 'luks' encryption format but " + "options specify '%s'", encryptfmt); + ret = -EINVAL; + goto fail; + } + qdict_del(encryptopts, "format"); + r->crypto_opts = block_crypto_open_opts_init( + Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp); + break; + + default: + error_setg(errp, "Unsupported encryption method %d", + s->crypt_method_header); + break; + } + if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { + ret = -EINVAL; + goto fail; + } + ret = 0; fail: + QDECREF(encryptopts); qemu_opts_del(opts); opts = NULL; return ret; @@ -785,6 +1051,9 @@ static void qcow2_update_options_commit(BlockDriverState *bs, s->cache_clean_interval = r->cache_clean_interval; cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); } + + qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); + s->crypto_opts = r->crypto_opts; } static void qcow2_update_options_abort(BlockDriverState *bs, @@ -796,6 +1065,7 @@ static void qcow2_update_options_abort(BlockDriverState *bs, if (r->refcount_block_cache) { qcow2_cache_destroy(bs, r->refcount_block_cache); } + qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); } static int qcow2_update_options(BlockDriverState *bs, QDict *options, @@ -824,6 +1094,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; uint64_t ext_end; uint64_t l1_vm_state_index; + bool update_header = false; ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { @@ -929,7 +1200,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { void *feature_table = NULL; qcow2_read_extensions(bs, header.header_length, ext_end, - &feature_table, NULL); + &feature_table, flags, NULL, NULL); report_unsupported_feature(errp, feature_table, s->incompatible_features & ~QCOW2_INCOMPAT_MASK); @@ -961,18 +1232,6 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); s->refcount_max += s->refcount_max - 1; - if (header.crypt_method > QCOW_CRYPT_AES) { - error_setg(errp, "Unsupported encryption method: %" PRIu32, - header.crypt_method); - ret = -EINVAL; - goto fail; - } - if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128, - QCRYPTO_CIPHER_MODE_CBC)) { - error_setg(errp, "AES cipher not available"); - ret = -EINVAL; - goto fail; - } s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) { if (bdrv_uses_whitelist() && @@ -989,6 +1248,15 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + if (s->crypt_method_header == QCOW_CRYPT_AES) { + s->crypt_physical_offset = false; + } else { + /* Assuming LUKS and any future crypt methods we + * add will all use physical offsets, due to the + * fact that the alternative is insecure... */ + s->crypt_physical_offset = true; + } + bs->encrypted = true; } @@ -1116,12 +1384,36 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, /* read qcow2 extensions */ if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, - &local_err)) { + flags, &update_header, &local_err)) { error_propagate(errp, local_err); ret = -EINVAL; goto fail; } + /* qcow2_read_extension may have set up the crypto context + * if the crypt method needs a header region, some methods + * don't need header extensions, so must check here + */ + if (s->crypt_method_header && !s->crypto) { + if (s->crypt_method_header == QCOW_CRYPT_AES) { + unsigned int cflags = 0; + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", + NULL, NULL, cflags, errp); + if (!s->crypto) { + ret = -EINVAL; + goto fail; + } + } else if (!(flags & BDRV_O_NO_IO)) { + error_setg(errp, "Missing CRYPTO header for crypt method %d", + s->crypt_method_header); + ret = -EINVAL; + goto fail; + } + } + /* read the backing file name */ if (header.backing_file_offset != 0) { len = header.backing_file_size; @@ -1152,8 +1444,23 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, } /* Clear unknown autoclear feature bits */ - if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) { - s->autoclear_features = 0; + update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; + update_header = + update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); + if (update_header) { + s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; + } + + if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) { + update_header = false; + } + if (local_err != NULL) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + if (update_header) { ret = qcow2_update_header(bs); if (ret < 0) { error_setg_errno(errp, -ret, "Could not update qcow2 header"); @@ -1202,6 +1509,8 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, } g_free(s->cluster_cache); qemu_vfree(s->cluster_data); + qcrypto_block_free(s->crypto); + qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); return ret; } @@ -1229,41 +1538,6 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.pdiscard_alignment = s->cluster_size; } -static int qcow2_set_key(BlockDriverState *bs, const char *key) -{ - BDRVQcow2State *s = bs->opaque; - uint8_t keybuf[16]; - int len, i; - Error *err = NULL; - - memset(keybuf, 0, 16); - len = strlen(key); - if (len > 16) - len = 16; - /* XXX: we could compress the chars to 7 bits to increase - entropy */ - for(i = 0;i < len;i++) { - keybuf[i] = key[i]; - } - assert(bs->encrypted); - - qcrypto_cipher_free(s->cipher); - s->cipher = qcrypto_cipher_new( - QCRYPTO_CIPHER_ALG_AES_128, - QCRYPTO_CIPHER_MODE_CBC, - keybuf, G_N_ELEMENTS(keybuf), - &err); - - if (!s->cipher) { - /* XXX would be nice if errors in this method could - * be properly propagate to the caller. Would need - * the bdrv_set_key() API signature to be fixed. */ - error_free(err); - return -1; - } - return 0; -} - static int qcow2_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { @@ -1281,6 +1555,11 @@ static int qcow2_reopen_prepare(BDRVReopenState *state, /* We need to write out any unwritten data if we reopen read-only. */ if ((state->flags & BDRV_O_RDWR) == 0) { + ret = qcow2_reopen_bitmaps_ro(state->bs, errp); + if (ret < 0) { + goto fail; + } + ret = bdrv_flush(state->bs); if (ret < 0) { goto fail; @@ -1379,7 +1658,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, *pnum = bytes >> BDRV_SECTOR_BITS; if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && - !s->cipher) { + !s->crypto) { index_in_cluster = sector_num & (s->cluster_sectors - 1); cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); *file = bs->file->bs; @@ -1436,7 +1715,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, /* prepare next request */ cur_bytes = MIN(bytes, INT_MAX); - if (s->cipher) { + if (s->crypto) { cur_bytes = MIN(cur_bytes, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); } @@ -1506,7 +1785,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, } if (bs->encrypted) { - assert(s->cipher); + assert(s->crypto); /* * For encrypted images, read everything into a temporary @@ -1538,14 +1817,17 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, goto fail; } if (bs->encrypted) { - assert(s->cipher); + assert(s->crypto); assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); Error *err = NULL; - if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS, - cluster_data, cluster_data, - cur_bytes >> BDRV_SECTOR_BITS, - false, &err) < 0) { + if (qcrypto_block_decrypt(s->crypto, + (s->crypt_physical_offset ? + cluster_offset + offset_in_cluster : + offset) >> BDRV_SECTOR_BITS, + cluster_data, + cur_bytes, + &err) < 0) { error_free(err); ret = -EIO; goto fail; @@ -1661,7 +1943,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, if (bs->encrypted) { Error *err = NULL; - assert(s->cipher); + assert(s->crypto); if (!cluster_data) { cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS @@ -1676,10 +1958,12 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); - if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS, - cluster_data, cluster_data, - cur_bytes >>BDRV_SECTOR_BITS, - true, &err) < 0) { + if (qcrypto_block_encrypt(s->crypto, + (s->crypt_physical_offset ? + cluster_offset + offset_in_cluster : + offset) >> BDRV_SECTOR_BITS, + cluster_data, + cur_bytes, &err) < 0) { error_free(err); ret = -EIO; goto fail; @@ -1767,6 +2051,7 @@ static int qcow2_inactivate(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; int ret, result = 0; + Error *local_err = NULL; ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret) { @@ -1782,6 +2067,14 @@ static int qcow2_inactivate(BlockDriverState *bs) strerror(-ret)); } + qcow2_store_persistent_dirty_bitmaps(bs, &local_err); + if (local_err != NULL) { + result = -EINVAL; + error_report_err(local_err); + error_report("Persistent bitmaps are lost for node '%s'", + bdrv_get_device_or_node_name(bs)); + } + if (result == 0) { qcow2_mark_clean(bs); } @@ -1804,8 +2097,8 @@ static void qcow2_close(BlockDriverState *bs) qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); - qcrypto_cipher_free(s->cipher); - s->cipher = NULL; + qcrypto_block_free(s->crypto); + s->crypto = NULL; g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); @@ -1823,7 +2116,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) { BDRVQcow2State *s = bs->opaque; int flags = s->flags; - QCryptoCipher *cipher = NULL; + QCryptoBlock *crypto = NULL; QDict *options; Error *local_err = NULL; int ret; @@ -1833,8 +2126,8 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) * that means we don't have to worry about reopening them here. */ - cipher = s->cipher; - s->cipher = NULL; + crypto = s->crypto; + s->crypto = NULL; qcow2_close(bs); @@ -1855,7 +2148,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) return; } - s->cipher = cipher; + s->crypto = crypto; } static size_t header_ext_add(char *buf, uint32_t magic, const void *s, @@ -1981,6 +2274,22 @@ int qcow2_update_header(BlockDriverState *bs) buflen -= ret; } + /* Full disk encryption header pointer extension */ + if (s->crypto_header.offset != 0) { + cpu_to_be64s(&s->crypto_header.offset); + cpu_to_be64s(&s->crypto_header.length); + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, + &s->crypto_header, sizeof(s->crypto_header), + buflen); + be64_to_cpus(&s->crypto_header.offset); + be64_to_cpus(&s->crypto_header.length); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; + } + /* Feature table */ if (s->qcow_version >= 3) { Qcow2Feature features[] = { @@ -2010,6 +2319,25 @@ int qcow2_update_header(BlockDriverState *bs) buflen -= ret; } + /* Bitmap extension */ + if (s->nb_bitmaps > 0) { + Qcow2BitmapHeaderExt bitmaps_header = { + .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), + .bitmap_directory_size = + cpu_to_be64(s->bitmap_directory_size), + .bitmap_directory_offset = + cpu_to_be64(s->bitmap_directory_offset) + }; + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, + &bitmaps_header, sizeof(bitmaps_header), + buflen); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; + } + /* Keep unknown header extensions */ QLIST_FOREACH(uext, &s->unknown_header_ext, next) { ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); @@ -2079,24 +2407,105 @@ static int qcow2_change_backing_file(BlockDriverState *bs, return qcow2_update_header(bs); } -static int preallocate(BlockDriverState *bs) +static int qcow2_crypt_method_from_format(const char *encryptfmt) +{ + if (g_str_equal(encryptfmt, "luks")) { + return QCOW_CRYPT_LUKS; + } else if (g_str_equal(encryptfmt, "aes")) { + return QCOW_CRYPT_AES; + } else { + return -EINVAL; + } +} + +static int qcow2_set_up_encryption(BlockDriverState *bs, const char *encryptfmt, + QemuOpts *opts, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QCryptoBlockCreateOptions *cryptoopts = NULL; + QCryptoBlock *crypto = NULL; + int ret = -EINVAL; + QDict *options, *encryptopts; + int fmt; + + options = qemu_opts_to_qdict(opts, NULL); + qdict_extract_subqdict(options, &encryptopts, "encrypt."); + QDECREF(options); + + fmt = qcow2_crypt_method_from_format(encryptfmt); + + switch (fmt) { + case QCOW_CRYPT_LUKS: + cryptoopts = block_crypto_create_opts_init( + Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp); + break; + case QCOW_CRYPT_AES: + cryptoopts = block_crypto_create_opts_init( + Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); + break; + default: + error_setg(errp, "Unknown encryption format '%s'", encryptfmt); + break; + } + if (!cryptoopts) { + ret = -EINVAL; + goto out; + } + s->crypt_method_header = fmt; + + crypto = qcrypto_block_create(cryptoopts, "encrypt.", + qcow2_crypto_hdr_init_func, + qcow2_crypto_hdr_write_func, + bs, errp); + if (!crypto) { + ret = -EINVAL; + goto out; + } + + ret = qcow2_update_header(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write encryption header"); + goto out; + } + + out: + QDECREF(encryptopts); + qcrypto_block_free(crypto); + qapi_free_QCryptoBlockCreateOptions(cryptoopts); + return ret; +} + + +/** + * Preallocates metadata structures for data clusters between @offset (in the + * guest disk) and @new_length (which is thus generally the new guest disk + * size). + * + * Returns: 0 on success, -errno on failure. + */ +static int preallocate(BlockDriverState *bs, + uint64_t offset, uint64_t new_length) { + BDRVQcow2State *s = bs->opaque; uint64_t bytes; - uint64_t offset; uint64_t host_offset = 0; unsigned int cur_bytes; int ret; QCowL2Meta *meta; - bytes = bdrv_getlength(bs); - offset = 0; + if (qemu_in_coroutine()) { + qemu_co_mutex_lock(&s->lock); + } + + assert(offset <= new_length); + bytes = new_length - offset; while (bytes) { cur_bytes = MIN(bytes, INT_MAX); ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, &host_offset, &meta); if (ret < 0) { - return ret; + goto done; } while (meta) { @@ -2106,7 +2515,7 @@ static int preallocate(BlockDriverState *bs) if (ret < 0) { qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters, QCOW2_DISCARD_NEVER); - return ret; + goto done; } /* There are no dependent requests, but we need to remove our @@ -2133,32 +2542,174 @@ static int preallocate(BlockDriverState *bs) ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, &data, 1); if (ret < 0) { - return ret; + goto done; } } - return 0; + ret = 0; + +done: + if (qemu_in_coroutine()) { + qemu_co_mutex_unlock(&s->lock); + } + return ret; } -static int qcow2_create2(const char *filename, int64_t total_size, - const char *backing_file, const char *backing_format, - int flags, size_t cluster_size, PreallocMode prealloc, - QemuOpts *opts, int version, int refcount_order, - Error **errp) +/* qcow2_refcount_metadata_size: + * @clusters: number of clusters to refcount (including data and L1/L2 tables) + * @cluster_size: size of a cluster, in bytes + * @refcount_order: refcount bits power-of-2 exponent + * @generous_increase: allow for the refcount table to be 1.5x as large as it + * needs to be + * + * Returns: Number of bytes required for refcount blocks and table metadata. + */ +int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, + int refcount_order, bool generous_increase, + uint64_t *refblock_count) { + /* + * Every host cluster is reference-counted, including metadata (even + * refcount metadata is recursively included). + * + * An accurate formula for the size of refcount metadata size is difficult + * to derive. An easier method of calculation is finding the fixed point + * where no further refcount blocks or table clusters are required to + * reference count every cluster. + */ + int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t); + int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); + int64_t table = 0; /* number of refcount table clusters */ + int64_t blocks = 0; /* number of refcount block clusters */ + int64_t last; + int64_t n = 0; + + do { + last = n; + blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); + table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); + n = clusters + blocks + table; + + if (n == last && generous_increase) { + clusters += DIV_ROUND_UP(table, 2); + n = 0; /* force another loop */ + generous_increase = false; + } + } while (n != last); + + if (refblock_count) { + *refblock_count = blocks; + } + + return (blocks + table) * cluster_size; +} + +/** + * qcow2_calc_prealloc_size: + * @total_size: virtual disk size in bytes + * @cluster_size: cluster size in bytes + * @refcount_order: refcount bits power-of-2 exponent + * + * Returns: Total number of bytes required for the fully allocated image + * (including metadata). + */ +static int64_t qcow2_calc_prealloc_size(int64_t total_size, + size_t cluster_size, + int refcount_order) +{ + int64_t meta_size = 0; + uint64_t nl1e, nl2e; + int64_t aligned_total_size = align_offset(total_size, cluster_size); + + /* header: 1 cluster */ + meta_size += cluster_size; + + /* total size of L2 tables */ + nl2e = aligned_total_size / cluster_size; + nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); + meta_size += nl2e * sizeof(uint64_t); + + /* total size of L1 tables */ + nl1e = nl2e * sizeof(uint64_t) / cluster_size; + nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); + meta_size += nl1e * sizeof(uint64_t); + + /* total size of refcount table and blocks */ + meta_size += qcow2_refcount_metadata_size( + (meta_size + aligned_total_size) / cluster_size, + cluster_size, refcount_order, false, NULL); + + return meta_size + aligned_total_size; +} + +static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) +{ + size_t cluster_size; int cluster_bits; - QDict *options; - /* Calculate cluster_bits */ + cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, + DEFAULT_CLUSTER_SIZE); cluster_bits = ctz32(cluster_size); if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || (1 << cluster_bits) != cluster_size) { error_setg(errp, "Cluster size must be a power of two between %d and " "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); - return -EINVAL; + return 0; + } + return cluster_size; +} + +static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) +{ + char *buf; + int ret; + + buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); + if (!buf) { + ret = 3; /* default */ + } else if (!strcmp(buf, "0.10")) { + ret = 2; + } else if (!strcmp(buf, "1.1")) { + ret = 3; + } else { + error_setg(errp, "Invalid compatibility level: '%s'", buf); + ret = -EINVAL; + } + g_free(buf); + return ret; +} + +static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, + Error **errp) +{ + uint64_t refcount_bits; + + refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); + if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { + error_setg(errp, "Refcount width must be a power of two and may not " + "exceed 64 bits"); + return 0; } + if (version < 3 && refcount_bits != 16) { + error_setg(errp, "Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + return 0; + } + + return refcount_bits; +} + +static int qcow2_create2(const char *filename, int64_t total_size, + const char *backing_file, const char *backing_format, + int flags, size_t cluster_size, PreallocMode prealloc, + QemuOpts *opts, int version, int refcount_order, + const char *encryptfmt, Error **errp) +{ + QDict *options; + /* * Open the image file and write a minimal qcow2 header. * @@ -2178,65 +2729,9 @@ static int qcow2_create2(const char *filename, int64_t total_size, int ret; if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { - /* Note: The following calculation does not need to be exact; if it is a - * bit off, either some bytes will be "leaked" (which is fine) or we - * will need to increase the file size by some bytes (which is fine, - * too, as long as the bulk is allocated here). Therefore, using - * floating point arithmetic is fine. */ - int64_t meta_size = 0; - uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count; - int64_t aligned_total_size = align_offset(total_size, cluster_size); - int refblock_bits, refblock_size; - /* refcount entry size in bytes */ - double rces = (1 << refcount_order) / 8.; - - /* see qcow2_open() */ - refblock_bits = cluster_bits - (refcount_order - 3); - refblock_size = 1 << refblock_bits; - - /* header: 1 cluster */ - meta_size += cluster_size; - - /* total size of L2 tables */ - nl2e = aligned_total_size / cluster_size; - nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); - meta_size += nl2e * sizeof(uint64_t); - - /* total size of L1 tables */ - nl1e = nl2e * sizeof(uint64_t) / cluster_size; - nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); - meta_size += nl1e * sizeof(uint64_t); - - /* total size of refcount blocks - * - * note: every host cluster is reference-counted, including metadata - * (even refcount blocks are recursively included). - * Let: - * a = total_size (this is the guest disk size) - * m = meta size not including refcount blocks and refcount tables - * c = cluster size - * y1 = number of refcount blocks entries - * y2 = meta size including everything - * rces = refcount entry size in bytes - * then, - * y1 = (y2 + a)/c - * y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m - * we can get y1: - * y1 = (a + m) / (c - rces - rces * sizeof(u64) / c) - */ - nrefblocke = (aligned_total_size + meta_size + cluster_size) - / (cluster_size - rces - rces * sizeof(uint64_t) - / cluster_size); - refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size); - meta_size += refblock_count * cluster_size; - - /* total size of refcount tables */ - nreftablee = align_offset(refblock_count, - cluster_size / sizeof(uint64_t)); - meta_size += nreftablee * sizeof(uint64_t); - - qemu_opt_set_number(opts, BLOCK_OPT_SIZE, - aligned_total_size + meta_size, &error_abort); + int64_t prealloc_size = + qcow2_calc_prealloc_size(total_size, cluster_size, refcount_order); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, prealloc_size, &error_abort); qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc], &error_abort); } @@ -2263,7 +2758,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, *header = (QCowHeader) { .magic = cpu_to_be32(QCOW_MAGIC), .version = cpu_to_be32(version), - .cluster_bits = cpu_to_be32(cluster_bits), + .cluster_bits = cpu_to_be32(ctz32(cluster_size)), .size = cpu_to_be64(0), .l1_table_offset = cpu_to_be64(0), .l1_size = cpu_to_be32(0), @@ -2273,11 +2768,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, .header_length = cpu_to_be32(sizeof(*header)), }; - if (flags & BLOCK_FLAG_ENCRYPT) { - header->crypt_method = cpu_to_be32(QCOW_CRYPT_AES); - } else { - header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); - } + /* We'll update this to correct value later */ + header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) { header->compatible_features |= @@ -2340,7 +2832,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, } /* Okay, now that we have a valid image, let's give it the right size */ - ret = blk_truncate(blk, total_size, errp); + ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp); if (ret < 0) { error_prepend(errp, "Could not resize image: "); goto out; @@ -2356,12 +2848,17 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } + /* Want encryption? There you go. */ + if (encryptfmt) { + ret = qcow2_set_up_encryption(blk_bs(blk), encryptfmt, opts, errp); + if (ret < 0) { + goto out; + } + } + /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcow2State *s = blk_bs(blk)->opaque; - qemu_co_mutex_lock(&s->lock); - ret = preallocate(blk_bs(blk)); - qemu_co_mutex_unlock(&s->lock); + ret = preallocate(blk_bs(blk), 0, total_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); goto out; @@ -2371,11 +2868,17 @@ static int qcow2_create2(const char *filename, int64_t total_size, blk_unref(blk); blk = NULL; - /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. + * Using BDRV_O_NO_IO, since encryption is now setup we don't want to + * have to setup decryption context. We're not doing any I/O on the top + * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does + * not have effect. + */ options = qdict_new(); qdict_put_str(options, "driver", "qcow2"); blk = blk_new_open(filename, NULL, options, - BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err); + BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, + &local_err); if (blk == NULL) { error_propagate(errp, local_err); ret = -EIO; @@ -2399,9 +2902,10 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) int flags = 0; size_t cluster_size = DEFAULT_CLUSTER_SIZE; PreallocMode prealloc; - int version = 3; - uint64_t refcount_bits = 16; + int version; + uint64_t refcount_bits; int refcount_order; + const char *encryptfmt = NULL; Error *local_err = NULL; int ret; @@ -2410,11 +2914,23 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) BDRV_SECTOR_SIZE); backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); - if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) { - flags |= BLOCK_FLAG_ENCRYPT; + encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); + if (encryptfmt) { + if (qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT)) { + error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and " + BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive"); + ret = -EINVAL; + goto finish; + } + } else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) { + encryptfmt = "aes"; + } + cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto finish; } - cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, - DEFAULT_CLUSTER_SIZE); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(PreallocMode_lookup, buf, PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, @@ -2424,16 +2940,10 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) ret = -EINVAL; goto finish; } - g_free(buf); - buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); - if (!buf) { - /* keep the default */ - } else if (!strcmp(buf, "0.10")) { - version = 2; - } else if (!strcmp(buf, "1.1")) { - version = 3; - } else { - error_setg(errp, "Invalid compatibility level: '%s'", buf); + + version = qcow2_opt_get_version_del(opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); ret = -EINVAL; goto finish; } @@ -2456,19 +2966,9 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) goto finish; } - refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, - refcount_bits); - if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { - error_setg(errp, "Refcount width must be a power of two and may not " - "exceed 64 bits"); - ret = -EINVAL; - goto finish; - } - - if (version < 3 && refcount_bits != 16) { - error_setg(errp, "Different refcount widths than 16 bits require " - "compatibility level 1.1 or above (use compat=1.1 or " - "greater)"); + refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); + if (local_err) { + error_propagate(errp, local_err); ret = -EINVAL; goto finish; } @@ -2477,7 +2977,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags, cluster_size, prealloc, opts, version, refcount_order, - &local_err); + encryptfmt, &local_err); error_propagate(errp, local_err); finish: @@ -2585,12 +3085,22 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, return ret; } -static int qcow2_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int qcow2_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVQcow2State *s = bs->opaque; + uint64_t old_length; int64_t new_l1_size; int ret; + if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && + prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) + { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + if (offset & 511) { error_setg(errp, "The new size must be a multiple of 512"); return -EINVAL; @@ -2602,8 +3112,17 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, Error **errp) return -ENOTSUP; } + /* cannot proceed if image has bitmaps */ + if (s->nb_bitmaps) { + /* TODO: resize bitmaps in the image */ + error_setg(errp, "Can't resize an image which has bitmaps"); + return -ENOTSUP; + } + + old_length = bs->total_sectors * 512; + /* shrinking is currently not supported */ - if (offset < bs->total_sectors * 512) { + if (offset < old_length) { error_setg(errp, "qcow2 doesn't support shrinking images yet"); return -ENOTSUP; } @@ -2615,6 +3134,128 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, Error **errp) return ret; } + switch (prealloc) { + case PREALLOC_MODE_OFF: + break; + + case PREALLOC_MODE_METADATA: + ret = preallocate(bs, old_length, offset); + if (ret < 0) { + error_setg_errno(errp, -ret, "Preallocation failed"); + return ret; + } + break; + + case PREALLOC_MODE_FALLOC: + case PREALLOC_MODE_FULL: + { + int64_t allocation_start, host_offset, guest_offset; + int64_t clusters_allocated; + int64_t old_file_size, new_file_size; + uint64_t nb_new_data_clusters, nb_new_l2_tables; + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + return ret; + } + + nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, + s->cluster_size); + + /* This is an overestimation; we will not actually allocate space for + * these in the file but just make sure the new refcount structures are + * able to cover them so we will not have to allocate new refblocks + * while entering the data blocks in the potentially new L2 tables. + * (We do not actually care where the L2 tables are placed. Maybe they + * are already allocated or they can be placed somewhere before + * @old_file_size. It does not matter because they will be fully + * allocated automatically, so they do not need to be covered by the + * preallocation. All that matters is that we will not have to allocate + * new refcount structures for them.) */ + nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, + s->cluster_size / sizeof(uint64_t)); + /* The cluster range may not be aligned to L2 boundaries, so add one L2 + * table for a potential head/tail */ + nb_new_l2_tables++; + + allocation_start = qcow2_refcount_area(bs, old_file_size, + nb_new_data_clusters + + nb_new_l2_tables, + true, 0, 0); + if (allocation_start < 0) { + error_setg_errno(errp, -allocation_start, + "Failed to resize refcount structures"); + return -allocation_start; + } + + clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, + nb_new_data_clusters); + if (clusters_allocated < 0) { + error_setg_errno(errp, -clusters_allocated, + "Failed to allocate data clusters"); + return -clusters_allocated; + } + + assert(clusters_allocated == nb_new_data_clusters); + + /* Allocate the data area */ + new_file_size = allocation_start + + nb_new_data_clusters * s->cluster_size; + ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp); + if (ret < 0) { + error_prepend(errp, "Failed to resize underlying file: "); + qcow2_free_clusters(bs, allocation_start, + nb_new_data_clusters * s->cluster_size, + QCOW2_DISCARD_OTHER); + return ret; + } + + /* Create the necessary L2 entries */ + host_offset = allocation_start; + guest_offset = old_length; + while (nb_new_data_clusters) { + int64_t guest_cluster = guest_offset >> s->cluster_bits; + int64_t nb_clusters = MIN(nb_new_data_clusters, + s->l2_size - guest_cluster % s->l2_size); + QCowL2Meta allocation = { + .offset = guest_offset, + .alloc_offset = host_offset, + .nb_clusters = nb_clusters, + }; + qemu_co_queue_init(&allocation.dependent_requests); + + ret = qcow2_alloc_cluster_link_l2(bs, &allocation); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to update L2 tables"); + qcow2_free_clusters(bs, host_offset, + nb_new_data_clusters * s->cluster_size, + QCOW2_DISCARD_OTHER); + return ret; + } + + guest_offset += nb_clusters * s->cluster_size; + host_offset += nb_clusters * s->cluster_size; + nb_new_data_clusters -= nb_clusters; + } + break; + } + + default: + g_assert_not_reached(); + } + + if (prealloc != PREALLOC_MODE_OFF) { + /* Flush metadata before actually changing the image size */ + ret = bdrv_flush(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, + "Failed to flush the preallocated area to disk"); + return ret; + } + } + /* write updated header.size */ offset = cpu_to_be64(offset); ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), @@ -2646,7 +3287,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, /* align end of file to a sector boundary to ease reading with sector based I/Os */ cluster_offset = bdrv_getlength(bs->file->bs); - return bdrv_truncate(bs->file, cluster_offset, NULL); + return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL); } buf = qemu_blockalign(bs, s->cluster_size); @@ -2862,7 +3503,7 @@ static int make_completely_empty(BlockDriverState *bs) } ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, - &local_err); + PREALLOC_MODE_OFF, &local_err); if (ret < 0) { error_report_err(local_err); goto fail; @@ -2946,6 +3587,142 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) return 0; } +static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp) +{ + Error *local_err = NULL; + BlockMeasureInfo *info; + uint64_t required = 0; /* bytes that contribute to required size */ + uint64_t virtual_size; /* disk size as seen by guest */ + uint64_t refcount_bits; + uint64_t l2_tables; + size_t cluster_size; + int version; + char *optstr; + PreallocMode prealloc; + bool has_backing_file; + + /* Parse image creation options */ + cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); + if (local_err) { + goto err; + } + + version = qcow2_opt_get_version_del(opts, &local_err); + if (local_err) { + goto err; + } + + refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); + if (local_err) { + goto err; + } + + optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); + prealloc = qapi_enum_parse(PreallocMode_lookup, optstr, + PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, + &local_err); + g_free(optstr); + if (local_err) { + goto err; + } + + optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); + has_backing_file = !!optstr; + g_free(optstr); + + virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + cluster_size); + + /* Check that virtual disk size is valid */ + l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, + cluster_size / sizeof(uint64_t)); + if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { + error_setg(&local_err, "The image size is too large " + "(try using a larger cluster size)"); + goto err; + } + + /* Account for input image */ + if (in_bs) { + int64_t ssize = bdrv_getlength(in_bs); + if (ssize < 0) { + error_setg_errno(&local_err, -ssize, + "Unable to get image virtual_size"); + goto err; + } + + virtual_size = align_offset(ssize, cluster_size); + + if (has_backing_file) { + /* We don't how much of the backing chain is shared by the input + * image and the new image file. In the worst case the new image's + * backing file has nothing in common with the input image. Be + * conservative and assume all clusters need to be written. + */ + required = virtual_size; + } else { + int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE; + int64_t sector_num; + int pnum = 0; + + for (sector_num = 0; + sector_num < ssize / BDRV_SECTOR_SIZE; + sector_num += pnum) { + int nb_sectors = MAX(ssize / BDRV_SECTOR_SIZE - sector_num, + INT_MAX); + BlockDriverState *file; + int64_t ret; + + ret = bdrv_get_block_status_above(in_bs, NULL, + sector_num, nb_sectors, + &pnum, &file); + if (ret < 0) { + error_setg_errno(&local_err, -ret, + "Unable to get block status"); + goto err; + } + + if (ret & BDRV_BLOCK_ZERO) { + /* Skip zero regions (safe with no backing file) */ + } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == + (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { + /* Extend pnum to end of cluster for next iteration */ + pnum = ROUND_UP(sector_num + pnum, cluster_sectors) - + sector_num; + + /* Count clusters we've seen */ + required += (sector_num % cluster_sectors + pnum) * + BDRV_SECTOR_SIZE; + } + } + } + } + + /* Take into account preallocation. Nothing special is needed for + * PREALLOC_MODE_METADATA since metadata is always counted. + */ + if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { + required = virtual_size; + } + + info = g_new(BlockMeasureInfo, 1); + info->fully_allocated = + qcow2_calc_prealloc_size(virtual_size, cluster_size, + ctz32(refcount_bits)); + + /* Remove data clusters that are not required. This overestimates the + * required size because metadata needed for the fully allocated file is + * still counted. + */ + info->required = info->fully_allocated - virtual_size + required; + return info; + +err: + error_propagate(errp, local_err); + return NULL; +} + static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { BDRVQcow2State *s = bs->opaque; @@ -2959,8 +3736,14 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; - ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); + ImageInfoSpecific *spec_info; + QCryptoBlockInfo *encrypt_info = NULL; + if (s->crypto != NULL) { + encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort); + } + + spec_info = g_new(ImageInfoSpecific, 1); *spec_info = (ImageInfoSpecific){ .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1), @@ -2987,6 +3770,30 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) assert(false); } + if (encrypt_info) { + ImageInfoSpecificQCow2Encryption *qencrypt = + g_new(ImageInfoSpecificQCow2Encryption, 1); + switch (encrypt_info->format) { + case Q_CRYPTO_BLOCK_FORMAT_QCOW: + qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; + qencrypt->u.aes = encrypt_info->u.qcow; + break; + case Q_CRYPTO_BLOCK_FORMAT_LUKS: + qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; + qencrypt->u.luks = encrypt_info->u.luks; + break; + default: + abort(); + } + /* Since we did shallow copy above, erase any pointers + * in the original info */ + memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); + qapi_free_QCryptoBlockInfo(encrypt_info); + + spec_info->u.qcow2.data->has_encrypt = true; + spec_info->u.qcow2.data->encrypt = qencrypt; + } + return spec_info; } @@ -3175,6 +3982,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, const char *compat = NULL; uint64_t cluster_size = s->cluster_size; bool encrypt; + int encformat; int refcount_bits = s->refcount_bits; Error *local_err = NULL; int ret; @@ -3211,12 +4019,20 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, - !!s->cipher); + !!s->crypto); - if (encrypt != !!s->cipher) { + if (encrypt != !!s->crypto) { error_report("Changing the encryption flag is not supported"); return -ENOTSUP; } + } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) { + encformat = qcow2_crypt_method_from_format( + qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT)); + + if (encformat != s->crypt_method_header) { + error_report("Changing the encryption format is not supported"); + return -ENOTSUP; + } } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); @@ -3333,7 +4149,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, return ret; } - ret = blk_truncate(blk, new_size, &local_err); + ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, &local_err); blk_unref(blk); if (ret < 0) { error_report_err(local_err); @@ -3431,10 +4247,23 @@ static QemuOptsList qcow2_create_opts = { { .name = BLOCK_OPT_ENCRYPT, .type = QEMU_OPT_BOOL, - .help = "Encrypt the image", - .def_value_str = "off" + .help = "Encrypt the image with format 'aes'. (Deprecated " + "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", }, { + .name = BLOCK_OPT_ENCRYPT_FORMAT, + .type = QEMU_OPT_STRING, + .help = "Encrypt the image, format choices: 'aes', 'luks'", + }, + BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", + "ID of secret providing qcow AES key or LUKS passphrase"), + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), + BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), + { .name = BLOCK_OPT_CLUSTER_SIZE, .type = QEMU_OPT_SIZE, .help = "qcow2 cluster size", @@ -3476,7 +4305,6 @@ BlockDriver bdrv_qcow2 = { .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, - .bdrv_set_key = qcow2_set_key, .bdrv_co_preadv = qcow2_co_preadv, .bdrv_co_pwritev = qcow2_co_pwritev, @@ -3493,6 +4321,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_snapshot_delete = qcow2_snapshot_delete, .bdrv_snapshot_list = qcow2_snapshot_list, .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, + .bdrv_measure = qcow2_measure, .bdrv_get_info = qcow2_get_info, .bdrv_get_specific_info = qcow2_get_specific_info, @@ -3512,6 +4341,10 @@ BlockDriver bdrv_qcow2 = { .bdrv_detach_aio_context = qcow2_detach_aio_context, .bdrv_attach_aio_context = qcow2_attach_aio_context, + + .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw, + .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap, + .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap, }; static void bdrv_qcow2_init(void) diff --git a/block/qcow2.h b/block/qcow2.h index 87b15eb4aa..96a8d43c17 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -25,7 +25,7 @@ #ifndef BLOCK_QCOW2_H #define BLOCK_QCOW2_H -#include "crypto/cipher.h" +#include "crypto/block.h" #include "qemu/coroutine.h" //#define DEBUG_ALLOC @@ -36,6 +36,7 @@ #define QCOW_CRYPT_NONE 0 #define QCOW_CRYPT_AES 1 +#define QCOW_CRYPT_LUKS 2 #define QCOW_MAX_CRYPT_CLUSTERS 32 #define QCOW_MAX_SNAPSHOTS 65536 @@ -52,6 +53,10 @@ * space for snapshot names and IDs */ #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS) +/* Bitmap header extension constraints */ +#define QCOW2_MAX_BITMAPS 65535 +#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS) + /* indicate that the refcount of the referenced cluster is exactly one. */ #define QCOW_OFLAG_COPIED (1ULL << 63) /* indicate that the cluster is compressed (they never have the copied flag) */ @@ -163,6 +168,11 @@ typedef struct QCowSnapshot { struct Qcow2Cache; typedef struct Qcow2Cache Qcow2Cache; +typedef struct Qcow2CryptoHeaderExtension { + uint64_t offset; + uint64_t length; +} QEMU_PACKED Qcow2CryptoHeaderExtension; + typedef struct Qcow2UnknownHeaderExtension { uint32_t magic; uint32_t len; @@ -195,6 +205,14 @@ enum { QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS, }; +/* Autoclear feature bits */ +enum { + QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0, + QCOW2_AUTOCLEAR_BITMAPS = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR, + + QCOW2_AUTOCLEAR_MASK = QCOW2_AUTOCLEAR_BITMAPS, +}; + enum qcow2_discard_type { QCOW2_DISCARD_NEVER = 0, QCOW2_DISCARD_ALWAYS, @@ -222,6 +240,13 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array, typedef void Qcow2SetRefcountFunc(void *refcount_array, uint64_t index, uint64_t value); +typedef struct Qcow2BitmapHeaderExt { + uint32_t nb_bitmaps; + uint32_t reserved32; + uint64_t bitmap_directory_size; + uint64_t bitmap_directory_offset; +} QEMU_PACKED Qcow2BitmapHeaderExt; + typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; @@ -257,13 +282,21 @@ typedef struct BDRVQcow2State { CoMutex lock; - QCryptoCipher *cipher; /* current cipher, NULL if no key yet */ + Qcow2CryptoHeaderExtension crypto_header; /* QCow2 header extension */ + QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ + QCryptoBlock *crypto; /* Disk encryption format driver */ + bool crypt_physical_offset; /* Whether to use virtual or physical offset + for encryption initialization vector tweak */ uint32_t crypt_method_header; uint64_t snapshots_offset; int snapshots_size; unsigned int nb_snapshots; QCowSnapshot *snapshots; + uint32_t nb_bitmaps; + uint64_t bitmap_directory_size; + uint64_t bitmap_directory_offset; + int flags; int qcow_version; bool use_lazy_refcounts; @@ -492,6 +525,10 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); +int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, + int refcount_order, bool generous_increase, + uint64_t *refblock_count); + int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_mark_corrupt(BlockDriverState *bs); int qcow2_mark_consistent(BlockDriverState *bs); @@ -512,6 +549,11 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, uint64_t addend, bool decrease, enum qcow2_discard_type type); +int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset, + uint64_t additional_clusters, bool exact_size, + int new_refblock_index, + uint64_t new_refblock_offset); + int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, int64_t nb_clusters); @@ -534,6 +576,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, int64_t size); int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, int64_t size); +int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size, + int64_t offset, int64_t size); int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, @@ -545,8 +591,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, - uint8_t *out_buf, const uint8_t *in_buf, - int nb_sectors, bool enc, Error **errp); + uint8_t *buf, int nb_sectors, bool enc, Error **errp); int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, unsigned int *bytes, uint64_t *cluster_offset); @@ -605,4 +650,20 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); +/* qcow2-bitmap.c functions */ +int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + void **refcount_table, + int64_t *refcount_table_size); +bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp); +int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); +void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp); +int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); +bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); +void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); + #endif diff --git a/block/qed.c b/block/qed.c index 385381a78a..86cad2188c 100644 --- a/block/qed.c +++ b/block/qed.c @@ -583,7 +583,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, blk_set_allow_write_beyond_eof(blk, true); /* File must start empty and grow, check truncate is supported */ - ret = blk_truncate(blk, 0, errp); + ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto out; } @@ -1342,12 +1342,19 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs, QED_AIOCB_WRITE | QED_AIOCB_ZERO); } -static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVQEDState *s = bs->opaque; uint64_t old_image_size; int ret; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + if (!qed_is_image_size_valid(offset, s->header.cluster_size, s->header.table_size)) { error_setg(errp, "Invalid image size specified"); diff --git a/block/raw-format.c b/block/raw-format.c index 1ea8c2d7ff..142649ed56 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -312,6 +312,31 @@ static int64_t raw_getlength(BlockDriverState *bs) return s->size; } +static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp) +{ + BlockMeasureInfo *info; + int64_t required; + + if (in_bs) { + required = bdrv_getlength(in_bs); + if (required < 0) { + error_setg_errno(errp, -required, "Unable to get image size"); + return NULL; + } + } else { + required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + } + + info = g_new(BlockMeasureInfo, 1); + info->required = required; + + /* Unallocated sectors count towards the file size in raw images */ + info->fully_allocated = info->required; + return info; +} + static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { return bdrv_get_info(bs->file->bs, bdi); @@ -327,7 +352,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) } } -static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int raw_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRawState *s = bs->opaque; @@ -343,7 +369,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp) s->size = offset; offset += s->offset; - return bdrv_truncate(bs->file, offset, errp); + return bdrv_truncate(bs->file, offset, prealloc, errp); } static int raw_media_changed(BlockDriverState *bs) @@ -479,6 +505,7 @@ BlockDriver bdrv_raw = { .bdrv_truncate = &raw_truncate, .bdrv_getlength = &raw_getlength, .has_variable_length = true, + .bdrv_measure = &raw_measure, .bdrv_get_info = &raw_get_info, .bdrv_refresh_limits = &raw_refresh_limits, .bdrv_probe_blocksizes = &raw_probe_blocksizes, diff --git a/block/rbd.c b/block/rbd.c index d461f7dc87..9c3aa638e7 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -936,11 +936,18 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs) return info.size; } -static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVRBDState *s = bs->opaque; int r; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + r = rbd_resize(s->image, offset); if (r < 0) { error_setg_errno(errp, -r, "Failed to resize file"); diff --git a/block/sheepdog.c b/block/sheepdog.c index 08d7b11e9d..b7b7e6bbe5 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2153,13 +2153,20 @@ static int64_t sd_getlength(BlockDriverState *bs) return s->inode.vdi_size; } -static int sd_truncate(BlockDriverState *bs, int64_t offset, Error **errp) +static int sd_truncate(BlockDriverState *bs, int64_t offset, + PreallocMode prealloc, Error **errp) { BDRVSheepdogState *s = bs->opaque; int ret, fd; unsigned int datalen; uint64_t max_vdi_size; + if (prealloc != PREALLOC_MODE_OFF) { + error_setg(errp, "Unsupported preallocation mode '%s'", + PreallocMode_lookup[prealloc]); + return -ENOTSUP; + } + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; if (offset < s->inode.vdi_size) { error_setg(errp, "shrinking is not supported"); @@ -2448,7 +2455,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, BDRVSheepdogState *s = bs->opaque; if (offset > s->inode.vdi_size) { - ret = sd_truncate(bs, offset, NULL); + ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL); if (ret < 0) { return ret; } diff --git a/block/vdi.c b/block/vdi.c index 79af47763b..2b6e8fa1ed 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -832,7 +832,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } if (image_type == VDI_TYPE_STATIC) { - ret = blk_truncate(blk, offset + blocks * block_size, errp); + ret = blk_truncate(blk, offset + blocks * block_size, + PREALLOC_MODE_OFF, errp); if (ret < 0) { error_prepend(errp, "Failed to statically allocate %s", filename); goto exit; diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 3f4c2aa095..01278f3fc9 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, if (new_file_size % (1024*1024)) { /* round up to nearest 1MB boundary */ new_file_size = ((new_file_size >> 20) + 1) << 20; - bdrv_truncate(bs->file, new_file_size, NULL); + bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, NULL); } } qemu_vfree(desc_entries); diff --git a/block/vhdx.c b/block/vhdx.c index 8b270b57c9..a9cecd2773 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1171,7 +1171,8 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, /* per the spec, the address for a block is in units of 1MB */ *new_offset = ROUND_UP(*new_offset, 1024 * 1024); - return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL); + return bdrv_truncate(bs->file, *new_offset + s->block_size, + PREALLOC_MODE_OFF, NULL); } /* @@ -1607,12 +1608,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, if (type == VHDX_TYPE_DYNAMIC) { /* All zeroes, so we can just extend the file - the end of the BAT * is the furthest thing we have written yet */ - ret = blk_truncate(blk, data_file_offset, errp); + ret = blk_truncate(blk, data_file_offset, PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; } } else if (type == VHDX_TYPE_FIXED) { - ret = blk_truncate(blk, data_file_offset + image_size, errp); + ret = blk_truncate(blk, data_file_offset + image_size, + PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; } diff --git a/block/vmdk.c b/block/vmdk.c index 55581b03fe..24d71b5982 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1714,7 +1714,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, blk_set_allow_write_beyond_eof(blk, true); if (flat) { - ret = blk_truncate(blk, filesize, errp); + ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp); goto exit; } magic = cpu_to_be32(VMDK4_MAGIC); @@ -1777,7 +1777,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp); + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, + PREALLOC_MODE_OFF, errp); if (ret < 0) { goto exit; } @@ -2086,7 +2087,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = blk_truncate(new_blk, desc_len, errp); + ret = blk_truncate(new_blk, desc_len, PREALLOC_MODE_OFF, errp); } exit: if (new_blk) { diff --git a/block/vpc.c b/block/vpc.c index b313c68148..9a6f8173a5 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -858,7 +858,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, /* Add footer to total size */ total_size += HEADER_SIZE; - ret = blk_truncate(blk, total_size, errp); + ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp); if (ret < 0) { return ret; } |