diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blkdebug.c | 24 | ||||
-rw-r--r-- | block/blkverify.c | 2 | ||||
-rw-r--r-- | block/io.c | 7 | ||||
-rw-r--r-- | block/mirror.c | 30 | ||||
-rw-r--r-- | block/nbd.c | 10 | ||||
-rw-r--r-- | block/qapi.c | 19 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 14 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 450 | ||||
-rw-r--r-- | block/qcow2.c | 229 | ||||
-rw-r--r-- | block/qcow2.h | 7 | ||||
-rw-r--r-- | block/quorum.c | 2 | ||||
-rw-r--r-- | block/raw-posix.c | 20 | ||||
-rw-r--r-- | block/snapshot.c | 23 |
13 files changed, 743 insertions, 94 deletions
diff --git a/block/blkdebug.c b/block/blkdebug.c index 59c61eb6b2..86b143dc2d 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -674,17 +674,15 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset) return bdrv_truncate(bs->file->bs, offset); } -static void blkdebug_refresh_filename(BlockDriverState *bs) +static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options) { QDict *opts; const QDictEntry *e; bool force_json = false; - for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { + for (e = qdict_first(options); e; e = qdict_next(options, e)) { if (strcmp(qdict_entry_key(e), "config") && - strcmp(qdict_entry_key(e), "x-image") && - strcmp(qdict_entry_key(e), "image") && - strncmp(qdict_entry_key(e), "image.", strlen("image."))) + strcmp(qdict_entry_key(e), "x-image")) { force_json = true; break; @@ -700,7 +698,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) if (!force_json && bs->file->bs->exact_filename[0]) { snprintf(bs->exact_filename, sizeof(bs->exact_filename), "blkdebug:%s:%s", - qdict_get_try_str(bs->options, "config") ?: "", + qdict_get_try_str(options, "config") ?: "", bs->file->bs->exact_filename); } @@ -710,11 +708,8 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) QINCREF(bs->file->bs->full_open_options); qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options)); - for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { - if (strcmp(qdict_entry_key(e), "x-image") && - strcmp(qdict_entry_key(e), "image") && - strncmp(qdict_entry_key(e), "image.", strlen("image."))) - { + for (e = qdict_first(options); e; e = qdict_next(options, e)) { + if (strcmp(qdict_entry_key(e), "x-image")) { qobject_incref(qdict_entry_value(e)); qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e)); } @@ -723,6 +718,12 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) bs->full_open_options = opts; } +static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static BlockDriver bdrv_blkdebug = { .format_name = "blkdebug", .protocol_name = "blkdebug", @@ -731,6 +732,7 @@ static BlockDriver bdrv_blkdebug = { .bdrv_parse_filename = blkdebug_parse_filename, .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, + .bdrv_reopen_prepare = blkdebug_reopen_prepare, .bdrv_getlength = blkdebug_getlength, .bdrv_truncate = blkdebug_truncate, .bdrv_refresh_filename = blkdebug_refresh_filename, diff --git a/block/blkverify.c b/block/blkverify.c index c5f8e8dcba..1d754496bc 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -307,7 +307,7 @@ static void blkverify_attach_aio_context(BlockDriverState *bs, bdrv_attach_aio_context(s->test_file->bs, new_context); } -static void blkverify_refresh_filename(BlockDriverState *bs) +static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options) { BDRVBlkverifyState *s = bs->opaque; diff --git a/block/io.c b/block/io.c index e00fb5d690..841f5b503f 100644 --- a/block/io.c +++ b/block/io.c @@ -2614,10 +2614,11 @@ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) bdrv_co_ioctl_entry(&data); } else { Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry); + qemu_coroutine_enter(co, &data); - } - while (data.ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(bs), true); + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } } return data.ret; } diff --git a/block/mirror.c b/block/mirror.c index 0e8f5565a5..fc34a9c491 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -18,6 +18,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "qemu/bitmap.h" +#include "qemu/error-report.h" #define SLICE_TIME 100000000ULL /* ns */ #define MAX_IN_FLIGHT 16 @@ -370,11 +371,22 @@ static void mirror_exit(BlockJob *job, void *opaque) if (s->to_replace) { to_replace = s->to_replace; } + + /* This was checked in mirror_start_job(), but meanwhile one of the + * nodes could have been newly attached to a BlockBackend. */ + if (to_replace->blk && s->target->blk) { + error_report("block job: Can't create node with two BlockBackends"); + data->ret = -EINVAL; + goto out; + } + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } bdrv_replace_in_backing_chain(to_replace, s->target); } + +out: if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); @@ -640,7 +652,7 @@ static void mirror_complete(BlockJob *job, Error **errp) Error *local_err = NULL; int ret; - ret = bdrv_open_backing_file(s->target, NULL, &local_err); + ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err); if (ret < 0) { error_propagate(errp, local_err); return; @@ -705,6 +717,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, bool is_none_mode, BlockDriverState *base) { MirrorBlockJob *s; + BlockDriverState *replaced_bs; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -728,6 +741,21 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, buf_size = DEFAULT_MIRROR_BUF_SIZE; } + /* We can't support this case as long as the block layer can't handle + * multiple BlockBackends per BlockDriverState. */ + if (replaces) { + replaced_bs = bdrv_lookup_bs(replaces, replaces, errp); + if (replaced_bs == NULL) { + return; + } + } else { + replaced_bs = bs; + } + if (replaced_bs->blk && target->blk) { + error_setg(errp, "Can't create node with two BlockBackends"); + return; + } + s = block_job_create(driver, bs, speed, cb, opaque, errp); if (!s) { return; diff --git a/block/nbd.c b/block/nbd.c index cd6a587776..416f42b903 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -342,13 +342,13 @@ static void nbd_attach_aio_context(BlockDriverState *bs, nbd_client_attach_aio_context(bs, new_context); } -static void nbd_refresh_filename(BlockDriverState *bs) +static void nbd_refresh_filename(BlockDriverState *bs, QDict *options) { QDict *opts = qdict_new(); - const char *path = qdict_get_try_str(bs->options, "path"); - const char *host = qdict_get_try_str(bs->options, "host"); - const char *port = qdict_get_try_str(bs->options, "port"); - const char *export = qdict_get_try_str(bs->options, "export"); + const char *path = qdict_get_try_str(options, "path"); + const char *host = qdict_get_try_str(options, "host"); + const char *port = qdict_get_try_str(options, "port"); + const char *export = qdict_get_try_str(options, "export"); qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd"))); diff --git a/block/qapi.c b/block/qapi.c index c0e877e07e..fecac253de 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -245,15 +245,17 @@ void bdrv_query_image_info(BlockDriverState *bs, info->has_backing_filename = true; bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err); if (err) { - error_propagate(errp, err); - qapi_free_ImageInfo(info); + /* Can't reconstruct the full backing filename, so we must omit + * this field and apply a Best Effort to this query. */ g_free(backing_filename2); - return; + backing_filename2 = NULL; + error_free(err); } - if (strcmp(backing_filename, backing_filename2) != 0) { - info->full_backing_filename = - g_strdup(backing_filename2); + /* Always report the full_backing_filename if present, even if it's the + * same as backing_filename. That they are same is useful info. */ + if (backing_filename2) { + info->full_backing_filename = g_strdup(backing_filename2); info->has_full_backing_filename = true; } @@ -676,7 +678,10 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f, if (info->has_backing_filename) { func_fprintf(f, "backing file: %s", info->backing_filename); - if (info->has_full_backing_filename) { + if (!info->has_full_backing_filename) { + func_fprintf(f, " (cannot determine actual path)"); + } else if (strcmp(info->backing_filename, + info->full_backing_filename) != 0) { func_fprintf(f, " (actual path: %s)", info->full_backing_filename); } func_fprintf(f, "\n"); diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 24a60e2236..34112c3abb 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1641,7 +1641,8 @@ fail: static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, int l1_size, int64_t *visited_l1_entries, int64_t l1_entries, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); @@ -1667,7 +1668,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, /* unallocated */ (*visited_l1_entries)++; if (status_cb) { - status_cb(bs, *visited_l1_entries, l1_entries); + status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); } continue; } @@ -1804,7 +1805,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, (*visited_l1_entries)++; if (status_cb) { - status_cb(bs, *visited_l1_entries, l1_entries); + status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); } } @@ -1828,7 +1829,8 @@ fail: * qcow2 version which doesn't yet support metadata zero clusters. */ int qcow2_expand_zero_clusters(BlockDriverState *bs, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; uint64_t *l1_table = NULL; @@ -1845,7 +1847,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, &visited_l1_entries, l1_entries, - status_cb); + status_cb, cb_opaque); if (ret < 0) { goto fail; } @@ -1881,7 +1883,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, &visited_l1_entries, l1_entries, - status_cb); + status_cb, cb_opaque); if (ret < 0) { goto fail; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 820f412ab6..af493f8bfe 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1345,6 +1345,9 @@ static int inc_refcounts(BlockDriverState *bs, if (refcount == s->refcount_max) { fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 "\n", cluster_offset); + fprintf(stderr, "Use qemu-img amend to increase the refcount entry " + "width or qemu-img convert to create a clean copy if the " + "image cannot be opened for writing\n"); res->corruptions++; continue; } @@ -2467,3 +2470,450 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, return 0; } + +/* A pointer to a function of this type is given to walk_over_reftable(). That + * function will create refblocks and pass them to a RefblockFinishOp once they + * are completed (@refblock). @refblock_empty is set if the refblock is + * completely empty. + * + * Along with the refblock, a corresponding reftable entry is passed, in the + * reftable @reftable (which may be reallocated) at @reftable_index. + * + * @allocated should be set to true if a new cluster has been allocated. + */ +typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, + bool *allocated, Error **errp); + +/** + * This "operation" for walk_over_reftable() allocates the refblock on disk (if + * it is not empty) and inserts its offset into the new reftable. The size of + * this new reftable is increased as required. + */ +static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + + if (!refblock_empty && reftable_index >= *reftable_size) { + uint64_t *new_reftable; + uint64_t new_reftable_size; + + new_reftable_size = ROUND_UP(reftable_index + 1, + s->cluster_size / sizeof(uint64_t)); + if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) { + error_setg(errp, + "This operation would make the refcount table grow " + "beyond the maximum size supported by QEMU, aborting"); + return -ENOTSUP; + } + + new_reftable = g_try_realloc(*reftable, new_reftable_size * + sizeof(uint64_t)); + if (!new_reftable) { + error_setg(errp, "Failed to increase reftable buffer size"); + return -ENOMEM; + } + + memset(new_reftable + *reftable_size, 0, + (new_reftable_size - *reftable_size) * sizeof(uint64_t)); + + *reftable = new_reftable; + *reftable_size = new_reftable_size; + } + + if (!refblock_empty && !(*reftable)[reftable_index]) { + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + error_setg_errno(errp, -offset, "Failed to allocate refblock"); + return offset; + } + (*reftable)[reftable_index] = offset; + *allocated = true; + } + + return 0; +} + +/** + * This "operation" for walk_over_reftable() writes the refblock to disk at the + * offset specified by the new reftable's entry. It does not modify the new + * reftable or change any refcounts. + */ +static int flush_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + int ret; + + if (reftable_index < *reftable_size && (*reftable)[reftable_index]) { + offset = (*reftable)[reftable_index]; + + ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Overlap check failed"); + return ret; + } + + ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write refblock"); + return ret; + } + } else { + assert(refblock_empty); + } + + return 0; +} + +/** + * This function walks over the existing reftable and every referenced refblock; + * if @new_set_refcount is non-NULL, it is called for every refcount entry to + * create an equal new entry in the passed @new_refblock. Once that + * @new_refblock is completely filled, @operation will be called. + * + * @status_cb and @cb_opaque are used for the amend operation's status callback. + * @index is the index of the walk_over_reftable() calls and @total is the total + * number of walk_over_reftable() calls per amend operation. Both are used for + * calculating the parameters for the status callback. + * + * @allocated is set to true if a new cluster has been allocated. + */ +static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, + uint64_t *new_reftable_index, + uint64_t *new_reftable_size, + void *new_refblock, int new_refblock_size, + int new_refcount_bits, + RefblockFinishOp *operation, bool *allocated, + Qcow2SetRefcountFunc *new_set_refcount, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, int index, int total, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t reftable_index; + bool new_refblock_empty = true; + int refblock_index; + int new_refblock_index = 0; + int ret; + + for (reftable_index = 0; reftable_index < s->refcount_table_size; + reftable_index++) + { + uint64_t refblock_offset = s->refcount_table[reftable_index] + & REFT_OFFSET_MASK; + + status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index, + (uint64_t)total * s->refcount_table_size, cb_opaque); + + if (refblock_offset) { + void *refblock; + + if (offset_into_cluster(s, refblock_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" + PRIx64 " unaligned (reftable index: %#" + PRIx64 ")", refblock_offset, + reftable_index); + error_setg(errp, + "Image is corrupt (unaligned refblock offset)"); + return -EIO; + } + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset, + &refblock); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to retrieve refblock"); + return ret; + } + + for (refblock_index = 0; refblock_index < s->refcount_block_size; + refblock_index++) + { + uint64_t refcount; + + if (new_refblock_index >= new_refblock_size) { + /* new_refblock is now complete */ + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, + new_refblock_empty, allocated, errp); + if (ret < 0) { + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + return ret; + } + + (*new_reftable_index)++; + new_refblock_index = 0; + new_refblock_empty = true; + } + + refcount = s->get_refcount(refblock, refblock_index); + if (new_refcount_bits < 64 && refcount >> new_refcount_bits) { + uint64_t offset; + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + + offset = ((reftable_index << s->refcount_block_bits) + + refblock_index) << s->cluster_bits; + + error_setg(errp, "Cannot decrease refcount entry width to " + "%i bits: Cluster at offset %#" PRIx64 " has a " + "refcount of %" PRIu64, new_refcount_bits, + offset, refcount); + return -EINVAL; + } + + if (new_set_refcount) { + new_set_refcount(new_refblock, new_refblock_index++, + refcount); + } else { + new_refblock_index++; + } + new_refblock_empty = new_refblock_empty && refcount == 0; + } + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + } else { + /* No refblock means every refcount is 0 */ + for (refblock_index = 0; refblock_index < s->refcount_block_size; + refblock_index++) + { + if (new_refblock_index >= new_refblock_size) { + /* new_refblock is now complete */ + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, + new_refblock_empty, allocated, errp); + if (ret < 0) { + return ret; + } + + (*new_reftable_index)++; + new_refblock_index = 0; + new_refblock_empty = true; + } + + if (new_set_refcount) { + new_set_refcount(new_refblock, new_refblock_index++, 0); + } else { + new_refblock_index++; + } + } + } + } + + if (new_refblock_index > 0) { + /* Complete the potentially existing partially filled final refblock */ + if (new_set_refcount) { + for (; new_refblock_index < new_refblock_size; + new_refblock_index++) + { + new_set_refcount(new_refblock, new_refblock_index, 0); + } + } + + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, new_refblock_empty, + allocated, errp); + if (ret < 0) { + return ret; + } + + (*new_reftable_index)++; + } + + status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size, + (uint64_t)total * s->refcount_table_size, cb_opaque); + + return 0; +} + +int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2GetRefcountFunc *new_get_refcount; + Qcow2SetRefcountFunc *new_set_refcount; + void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size); + uint64_t *new_reftable = NULL, new_reftable_size = 0; + uint64_t *old_reftable, old_reftable_size, old_reftable_offset; + uint64_t new_reftable_index = 0; + uint64_t i; + int64_t new_reftable_offset = 0, allocated_reftable_size = 0; + int new_refblock_size, new_refcount_bits = 1 << refcount_order; + int old_refcount_order; + int walk_index = 0; + int ret; + bool new_allocation; + + assert(s->qcow_version >= 3); + assert(refcount_order >= 0 && refcount_order <= 6); + + /* see qcow2_open() */ + new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3)); + + new_get_refcount = get_refcount_funcs[refcount_order]; + new_set_refcount = set_refcount_funcs[refcount_order]; + + + do { + int total_walks; + + new_allocation = false; + + /* At least we have to do this walk and the one which writes the + * refblocks; also, at least we have to do this loop here at least + * twice (normally), first to do the allocations, and second to + * determine that everything is correctly allocated, this then makes + * three walks in total */ + total_walks = MAX(walk_index + 2, 3); + + /* First, allocate the structures so they are present in the refcount + * structures */ + ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, + &new_reftable_size, NULL, new_refblock_size, + new_refcount_bits, &alloc_refblock, + &new_allocation, NULL, status_cb, cb_opaque, + walk_index++, total_walks, errp); + if (ret < 0) { + goto done; + } + + new_reftable_index = 0; + + if (new_allocation) { + if (new_reftable_offset) { + qcow2_free_clusters(bs, new_reftable_offset, + allocated_reftable_size * sizeof(uint64_t), + QCOW2_DISCARD_NEVER); + } + + new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size * + sizeof(uint64_t)); + if (new_reftable_offset < 0) { + error_setg_errno(errp, -new_reftable_offset, + "Failed to allocate the new reftable"); + ret = new_reftable_offset; + goto done; + } + allocated_reftable_size = new_reftable_size; + } + } while (new_allocation); + + /* Second, write the new refblocks */ + ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, + &new_reftable_size, new_refblock, + new_refblock_size, new_refcount_bits, + &flush_refblock, &new_allocation, new_set_refcount, + status_cb, cb_opaque, walk_index, walk_index + 1, + errp); + if (ret < 0) { + goto done; + } + assert(!new_allocation); + + + /* Write the new reftable */ + ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset, + new_reftable_size * sizeof(uint64_t)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Overlap check failed"); + goto done; + } + + for (i = 0; i < new_reftable_size; i++) { + cpu_to_be64s(&new_reftable[i]); + } + + ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable, + new_reftable_size * sizeof(uint64_t)); + + for (i = 0; i < new_reftable_size; i++) { + be64_to_cpus(&new_reftable[i]); + } + + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write the new reftable"); + goto done; + } + + + /* Empty the refcount cache */ + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to flush the refblock cache"); + goto done; + } + + /* Update the image header to point to the new reftable; this only updates + * the fields which are relevant to qcow2_update_header(); other fields + * such as s->refcount_table or s->refcount_bits stay stale for now + * (because we have to restore everything if qcow2_update_header() fails) */ + old_refcount_order = s->refcount_order; + old_reftable_size = s->refcount_table_size; + old_reftable_offset = s->refcount_table_offset; + + s->refcount_order = refcount_order; + s->refcount_table_size = new_reftable_size; + s->refcount_table_offset = new_reftable_offset; + + ret = qcow2_update_header(bs); + if (ret < 0) { + s->refcount_order = old_refcount_order; + s->refcount_table_size = old_reftable_size; + s->refcount_table_offset = old_reftable_offset; + error_setg_errno(errp, -ret, "Failed to update the qcow2 header"); + goto done; + } + + /* Now update the rest of the in-memory information */ + old_reftable = s->refcount_table; + s->refcount_table = new_reftable; + + s->refcount_bits = 1 << refcount_order; + s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); + s->refcount_max += s->refcount_max - 1; + + s->refcount_block_bits = s->cluster_bits - (refcount_order - 3); + s->refcount_block_size = 1 << s->refcount_block_bits; + + s->get_refcount = new_get_refcount; + s->set_refcount = new_set_refcount; + + /* For cleaning up all old refblocks and the old reftable below the "done" + * label */ + new_reftable = old_reftable; + new_reftable_size = old_reftable_size; + new_reftable_offset = old_reftable_offset; + +done: + if (new_reftable) { + /* On success, new_reftable actually points to the old reftable (and + * new_reftable_size is the old reftable's size); but that is just + * fine */ + for (i = 0; i < new_reftable_size; i++) { + uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK; + if (offset) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + } + } + g_free(new_reftable); + + if (new_reftable_offset > 0) { + qcow2_free_clusters(bs, new_reftable_offset, + new_reftable_size * sizeof(uint64_t), + QCOW2_DISCARD_OTHER); + } + } + + qemu_vfree(new_refblock); + return ret; +} diff --git a/block/qcow2.c b/block/qcow2.c index 5b59fa3d7f..1789af43d2 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1282,6 +1282,52 @@ static void qcow2_reopen_abort(BDRVReopenState *state) g_free(state->opaque); } +static void qcow2_join_options(QDict *options, QDict *old_options) +{ + bool has_new_overlap_template = + qdict_haskey(options, QCOW2_OPT_OVERLAP) || + qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); + bool has_new_total_cache_size = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); + bool has_all_cache_options; + + /* New overlap template overrides all old overlap options */ + if (has_new_overlap_template) { + qdict_del(old_options, QCOW2_OPT_OVERLAP); + qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); + qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); + } + + /* New total cache size overrides all old options */ + if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { + qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); + qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + } + + qdict_join(options, old_options, false); + + /* + * If after merging all cache size options are set, an old total size is + * overwritten. Do keep all options, however, if all three are new. The + * resulting error message is what we want to happen. + */ + has_all_cache_options = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + + if (has_all_cache_options && !has_new_total_cache_size) { + qdict_del(options, QCOW2_OPT_CACHE_SIZE); + } +} + static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -2757,6 +2803,10 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) .has_corrupt = true, .refcount_bits = s->refcount_bits, }; + } else { + /* if this assertion fails, this probably means a new version was + * added without having it covered here */ + assert(false); } return spec_info; @@ -2824,7 +2874,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, * have to be removed. */ static int qcow2_downgrade(BlockDriverState *bs, int target_version, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, void *cb_opaque) { BDRVQcow2State *s = bs->opaque; int current_version = s->qcow_version; @@ -2839,13 +2889,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, } if (s->refcount_order != 4) { - /* we would have to convert the image to a refcount_order == 4 image - * here; however, since qemu (at the time of writing this) does not - * support anything different than 4 anyway, there is no point in doing - * so right now; however, we should error out (if qemu supports this in - * the future and this code has not been adapted) */ - error_report("qcow2_downgrade: Image refcount orders other than 4 are " - "currently not supported."); + error_report("compat=0.10 requires refcount_bits=16"); return -ENOTSUP; } @@ -2873,7 +2917,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, /* clearing autoclear features is trivial */ s->autoclear_features = 0; - ret = qcow2_expand_zero_clusters(bs, status_cb); + ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); if (ret < 0) { return ret; } @@ -2887,8 +2931,79 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, return 0; } +typedef enum Qcow2AmendOperation { + /* This is the value Qcow2AmendHelperCBInfo::last_operation will be + * statically initialized to so that the helper CB can discern the first + * invocation from an operation change */ + QCOW2_NO_OPERATION = 0, + + QCOW2_CHANGING_REFCOUNT_ORDER, + QCOW2_DOWNGRADING, +} Qcow2AmendOperation; + +typedef struct Qcow2AmendHelperCBInfo { + /* The code coordinating the amend operations should only modify + * these four fields; the rest will be managed by the CB */ + BlockDriverAmendStatusCB *original_status_cb; + void *original_cb_opaque; + + Qcow2AmendOperation current_operation; + + /* Total number of operations to perform (only set once) */ + int total_operations; + + /* The following fields are managed by the CB */ + + /* Number of operations completed */ + int operations_completed; + + /* Cumulative offset of all completed operations */ + int64_t offset_completed; + + Qcow2AmendOperation last_operation; + int64_t last_work_size; +} Qcow2AmendHelperCBInfo; + +static void qcow2_amend_helper_cb(BlockDriverState *bs, + int64_t operation_offset, + int64_t operation_work_size, void *opaque) +{ + Qcow2AmendHelperCBInfo *info = opaque; + int64_t current_work_size; + int64_t projected_work_size; + + if (info->current_operation != info->last_operation) { + if (info->last_operation != QCOW2_NO_OPERATION) { + info->offset_completed += info->last_work_size; + info->operations_completed++; + } + + info->last_operation = info->current_operation; + } + + assert(info->total_operations > 0); + assert(info->operations_completed < info->total_operations); + + info->last_work_size = operation_work_size; + + current_work_size = info->offset_completed + operation_work_size; + + /* current_work_size is the total work size for (operations_completed + 1) + * operations (which includes this one), so multiply it by the number of + * operations not covered and divide it by the number of operations + * covered to get a projection for the operations not covered */ + projected_work_size = current_work_size * (info->total_operations - + info->operations_completed - 1) + / (info->operations_completed + 1); + + info->original_status_cb(bs, info->offset_completed + operation_offset, + current_work_size + projected_work_size, + info->original_cb_opaque); +} + static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; @@ -2898,8 +3013,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, const char *compat = NULL; uint64_t cluster_size = s->cluster_size; bool encrypt; + int refcount_bits = s->refcount_bits; int ret; QemuOptDesc *desc = opts->list->desc; + Qcow2AmendHelperCBInfo helper_cb_info; while (desc && desc->name) { if (!qemu_opt_find(opts, desc->name)) { @@ -2917,11 +3034,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } else if (!strcmp(compat, "1.1")) { new_version = 3; } else { - fprintf(stderr, "Unknown compatibility level %s.\n", compat); + error_report("Unknown compatibility level %s", compat); return -EINVAL; } } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { - fprintf(stderr, "Cannot change preallocation mode.\n"); + error_report("Cannot change preallocation mode"); return -ENOTSUP; } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); @@ -2934,47 +3051,74 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, !!s->cipher); if (encrypt != !!s->cipher) { - fprintf(stderr, "Changing the encryption flag is not " - "supported.\n"); + error_report("Changing the encryption flag is not supported"); return -ENOTSUP; } } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); if (cluster_size != s->cluster_size) { - fprintf(stderr, "Changing the cluster size is not " - "supported.\n"); + error_report("Changing the cluster size is not supported"); return -ENOTSUP; } } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, lazy_refcounts); } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { - error_report("Cannot change refcount entry width"); - return -ENOTSUP; + refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, + refcount_bits); + + if (refcount_bits <= 0 || refcount_bits > 64 || + !is_power_of_2(refcount_bits)) + { + error_report("Refcount width must be a power of two and may " + "not exceed 64 bits"); + return -EINVAL; + } } else { - /* if this assertion fails, this probably means a new option was + /* if this point is reached, this probably means a new option was * added without having it covered here */ - assert(false); + abort(); } desc++; } - if (new_version != old_version) { - if (new_version > old_version) { - /* Upgrade */ - s->qcow_version = new_version; - ret = qcow2_update_header(bs); - if (ret < 0) { - s->qcow_version = old_version; - return ret; - } - } else { - ret = qcow2_downgrade(bs, new_version, status_cb); - if (ret < 0) { - return ret; - } + helper_cb_info = (Qcow2AmendHelperCBInfo){ + .original_status_cb = status_cb, + .original_cb_opaque = cb_opaque, + .total_operations = (new_version < old_version) + + (s->refcount_bits != refcount_bits) + }; + + /* Upgrade first (some features may require compat=1.1) */ + if (new_version > old_version) { + s->qcow_version = new_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = old_version; + return ret; + } + } + + if (s->refcount_bits != refcount_bits) { + int refcount_order = ctz32(refcount_bits); + Error *local_error = NULL; + + if (new_version < 3 && refcount_bits != 16) { + error_report("Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + return -EINVAL; + } + + helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; + ret = qcow2_change_refcount_order(bs, refcount_order, + &qcow2_amend_helper_cb, + &helper_cb_info, &local_error); + if (ret < 0) { + error_report_err(local_error); + return ret; } } @@ -2989,9 +3133,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, if (s->use_lazy_refcounts != lazy_refcounts) { if (lazy_refcounts) { - if (s->qcow_version < 3) { - fprintf(stderr, "Lazy refcounts only supported with compatibility " - "level 1.1 and above (use compat=1.1 or greater)\n"); + if (new_version < 3) { + error_report("Lazy refcounts only supported with compatibility " + "level 1.1 and above (use compat=1.1 or greater)"); return -EINVAL; } s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; @@ -3025,6 +3169,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } } + /* Downgrade last (so unsupported features can be removed before) */ + if (new_version < old_version) { + helper_cb_info.current_operation = QCOW2_DOWNGRADING; + ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, + &helper_cb_info); + if (ret < 0) { + return ret; + } + } + return 0; } @@ -3145,6 +3299,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_reopen_commit = qcow2_reopen_commit, .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, diff --git a/block/qcow2.h b/block/qcow2.h index b8c500b9dc..a063a3c1a1 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -529,6 +529,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, int64_t size); +int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp); + /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); @@ -553,7 +557,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); int qcow2_expand_zero_clusters(BlockDriverState *bs, - BlockDriverAmendStatusCB *status_cb); + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque); /* qcow2-snapshot.c functions */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); diff --git a/block/quorum.c b/block/quorum.c index d162459bd9..6793f126c5 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -997,7 +997,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs, } } -static void quorum_refresh_filename(BlockDriverState *bs) +static void quorum_refresh_filename(BlockDriverState *bs, QDict *options) { BDRVQuorumState *s = bs->opaque; QDict *opts; diff --git a/block/raw-posix.c b/block/raw-posix.c index ffeebe1a4c..076d0708a7 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -500,21 +500,17 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, goto fail; } if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) { - error_printf("WARNING: aio=native was specified for '%s', but " - "it requires cache.direct=on, which was not " - "specified. Falling back to aio=threads.\n" - " This will become an error condition in " - "future QEMU versions.\n", - bs->filename); + error_setg(errp, "aio=native was specified, but it requires " + "cache.direct=on, which was not specified."); + ret = -EINVAL; + goto fail; } #else if (bdrv_flags & BDRV_O_NATIVE_AIO) { - error_printf("WARNING: aio=native was specified for '%s', but " - "is not supported in this build. Falling back to " - "aio=threads.\n" - " This will become an error condition in " - "future QEMU versions.\n", - bs->filename); + error_setg(errp, "aio=native was specified, but is not supported " + "in this build."); + ret = -EINVAL; + goto fail; } #endif /* !defined(CONFIG_LINUX_AIO) */ diff --git a/block/snapshot.c b/block/snapshot.c index 6e9fa8da98..2d86b88a28 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -229,6 +229,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; + int ret; + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -239,18 +241,21 @@ int bdrv_snapshot_delete(BlockDriverState *bs, } /* drain all pending i/o before deleting snapshot */ - bdrv_drain(bs); + bdrv_drained_begin(bs); if (drv->bdrv_snapshot_delete) { - return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); + ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); + } else if (bs->file) { + ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp); + } else { + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support internal snapshot deletion", + drv->format_name, bdrv_get_device_name(bs)); + ret = -ENOTSUP; } - if (bs->file) { - return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp); - } - error_setg(errp, "Block format '%s' used by device '%s' " - "does not support internal snapshot deletion", - drv->format_name, bdrv_get_device_name(bs)); - return -ENOTSUP; + + bdrv_drained_end(bs); + return ret; } int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, |