diff options
-rw-r--r-- | block/dirty-bitmap.c | 18 | ||||
-rw-r--r-- | block/qcow2-bitmap.c | 12 | ||||
-rw-r--r-- | block/qcow2-cache.c | 80 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 509 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 206 | ||||
-rw-r--r-- | block/qcow2.c | 63 | ||||
-rw-r--r-- | block/qcow2.h | 33 | ||||
-rw-r--r-- | blockdev.c | 10 | ||||
-rw-r--r-- | include/block/dirty-bitmap.h | 1 | ||||
-rw-r--r-- | qapi/block-core.json | 12 | ||||
-rw-r--r-- | qemu-doc.texi | 7 | ||||
-rwxr-xr-x | tests/qemu-iotests/061 | 16 | ||||
-rw-r--r-- | tests/qemu-iotests/061.out | 61 | ||||
-rwxr-xr-x | tests/qemu-iotests/103 | 17 | ||||
-rw-r--r-- | tests/qemu-iotests/103.out | 3 | ||||
-rwxr-xr-x | tests/qemu-iotests/137 | 5 | ||||
-rw-r--r-- | tests/qemu-iotests/137.out | 2 | ||||
-rwxr-xr-x | tests/qemu-iotests/165 | 2 | ||||
-rwxr-xr-x | tests/qemu-iotests/176 | 2 |
19 files changed, 610 insertions, 449 deletions
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 7879d13ddb..909f0517f8 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -52,8 +52,6 @@ struct BdrvDirtyBitmap { Such operations must fail and both the image and this bitmap must remain unchanged while this flag is set. */ - bool autoload; /* For persistent bitmaps: bitmap must be - autoloaded on image opening */ bool persistent; /* bitmap must be saved to owner disk image */ QLIST_ENTRY(BdrvDirtyBitmap) list; }; @@ -104,7 +102,6 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) g_free(bitmap->name); bitmap->name = NULL; bitmap->persistent = false; - bitmap->autoload = false; } /* Called with BQL taken. */ @@ -261,8 +258,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, bitmap->successor = NULL; successor->persistent = bitmap->persistent; bitmap->persistent = false; - successor->autoload = bitmap->autoload; - bitmap->autoload = false; bdrv_release_dirty_bitmap(bs, bitmap); return successor; @@ -667,19 +662,6 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs) } /* Called with BQL taken. */ -void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload) -{ - qemu_mutex_lock(bitmap->mutex); - bitmap->autoload = autoload; - qemu_mutex_unlock(bitmap->mutex); -} - -bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap) -{ - return bitmap->autoload; -} - -/* Called with BQL taken. */ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent) { qemu_mutex_lock(bitmap->mutex); diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index efa10c6663..4f6fd863ea 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -933,14 +933,14 @@ static void set_readonly_helper(gpointer bitmap, gpointer value) bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value); } -/* qcow2_load_autoloading_dirty_bitmaps() +/* qcow2_load_dirty_bitmaps() * Return value is a hint for caller: true means that the Qcow2 header was * updated. (false doesn't mean that the header should be updated by the * caller, it just means that updating was not needed or the image cannot be * written to). * On failure the function returns false. */ -bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp) +bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp) { BDRVQcow2State *s = bs->opaque; Qcow2BitmapList *bm_list; @@ -960,14 +960,16 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp) } QSIMPLEQ_FOREACH(bm, bm_list, entry) { - if ((bm->flags & BME_FLAG_AUTO) && !(bm->flags & BME_FLAG_IN_USE)) { + if (!(bm->flags & BME_FLAG_IN_USE)) { BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp); if (bitmap == NULL) { goto fail; } + if (!(bm->flags & BME_FLAG_AUTO)) { + bdrv_disable_dirty_bitmap(bitmap); + } bdrv_dirty_bitmap_set_persistance(bitmap, true); - bdrv_dirty_bitmap_set_autoload(bitmap, true); bm->flags |= BME_FLAG_IN_USE; created_dirty_bitmaps = g_slist_append(created_dirty_bitmaps, bitmap); @@ -1369,7 +1371,7 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp) bm->table.size = 0; QSIMPLEQ_INSERT_TAIL(&drop_tables, tb, entry); } - bm->flags = bdrv_dirty_bitmap_get_autoload(bitmap) ? BME_FLAG_AUTO : 0; + bm->flags = bdrv_dirty_bitmap_enabled(bitmap) ? BME_FLAG_AUTO : 0; bm->granularity_bits = ctz32(bdrv_dirty_bitmap_granularity(bitmap)); bm->dirty_bitmap = bitmap; } diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index c48ffebd8f..d9dafa31e5 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -39,26 +39,23 @@ struct Qcow2Cache { Qcow2CachedTable *entries; struct Qcow2Cache *depends; int size; + int table_size; bool depends_on_flush; void *table_array; uint64_t lru_counter; uint64_t cache_clean_lru_counter; }; -static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, - Qcow2Cache *c, int table) +static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table) { - BDRVQcow2State *s = bs->opaque; - return (uint8_t *) c->table_array + (size_t) table * s->cluster_size; + return (uint8_t *) c->table_array + (size_t) table * c->table_size; } -static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, - Qcow2Cache *c, void *table) +static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table) { - BDRVQcow2State *s = bs->opaque; ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array; - int idx = table_offset / s->cluster_size; - assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0); + int idx = table_offset / c->table_size; + assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0); return idx; } @@ -74,15 +71,13 @@ static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c) } } -static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, - int i, int num_tables) +static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables) { /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */ #ifdef CONFIG_LINUX - BDRVQcow2State *s = bs->opaque; - void *t = qcow2_cache_get_table_addr(bs, c, i); + void *t = qcow2_cache_get_table_addr(c, i); int align = getpagesize(); - size_t mem_size = (size_t) s->cluster_size * num_tables; + size_t mem_size = (size_t) c->table_size * num_tables; size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t; size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align); if (mem_size > offset && length > 0) { @@ -98,7 +93,7 @@ static inline bool can_clean_entry(Qcow2Cache *c, int i) t->lru_counter <= c->cache_clean_lru_counter; } -void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) +void qcow2_cache_clean_unused(Qcow2Cache *c) { int i = 0; while (i < c->size) { @@ -118,23 +113,30 @@ void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) } if (to_clean > 0) { - qcow2_cache_table_release(bs, c, i - to_clean, to_clean); + qcow2_cache_table_release(c, i - to_clean, to_clean); } } c->cache_clean_lru_counter = c->lru_counter; } -Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) +Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, + unsigned table_size) { BDRVQcow2State *s = bs->opaque; Qcow2Cache *c; + assert(num_tables > 0); + assert(is_power_of_2(table_size)); + assert(table_size >= (1 << MIN_CLUSTER_BITS)); + assert(table_size <= s->cluster_size); + c = g_new0(Qcow2Cache, 1); c->size = num_tables; + c->table_size = table_size; c->entries = g_try_new0(Qcow2CachedTable, num_tables); c->table_array = qemu_try_blockalign(bs->file->bs, - (size_t) num_tables * s->cluster_size); + (size_t) num_tables * c->table_size); if (!c->entries || !c->table_array) { qemu_vfree(c->table_array); @@ -146,7 +148,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) return c; } -int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c) +int qcow2_cache_destroy(Qcow2Cache *c) { int i; @@ -203,13 +205,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) if (c == s->refcount_block_cache) { ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK, - c->entries[i].offset, s->cluster_size); + c->entries[i].offset, c->table_size); } else if (c == s->l2_table_cache) { ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, - c->entries[i].offset, s->cluster_size); + c->entries[i].offset, c->table_size); } else { ret = qcow2_pre_write_overlap_check(bs, 0, - c->entries[i].offset, s->cluster_size); + c->entries[i].offset, c->table_size); } if (ret < 0) { @@ -223,7 +225,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) } ret = bdrv_pwrite(bs->file, c->entries[i].offset, - qcow2_cache_get_table_addr(bs, c, i), s->cluster_size); + qcow2_cache_get_table_addr(c, i), c->table_size); if (ret < 0) { return ret; } @@ -309,7 +311,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) c->entries[i].lru_counter = 0; } - qcow2_cache_table_release(bs, c, 0, c->size); + qcow2_cache_table_release(c, 0, c->size); c->lru_counter = 0; @@ -331,7 +333,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, offset, read_from_disk); - if (offset_into_cluster(s, offset)) { + if (!QEMU_IS_ALIGNED(offset, c->table_size)) { qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s " "cache: Offset %#" PRIx64 " is unaligned", qcow2_cache_get_name(s, c), offset); @@ -339,7 +341,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, } /* Check if the table is already cached */ - i = lookup_index = (offset / s->cluster_size * 4) % c->size; + i = lookup_index = (offset / c->table_size * 4) % c->size; do { const Qcow2CachedTable *t = &c->entries[i]; if (t->offset == offset) { @@ -379,8 +381,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, } ret = bdrv_pread(bs->file, offset, - qcow2_cache_get_table_addr(bs, c, i), - s->cluster_size); + qcow2_cache_get_table_addr(c, i), + c->table_size); if (ret < 0) { return ret; } @@ -391,7 +393,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, /* And return the right table */ found: c->entries[i].ref++; - *table = qcow2_cache_get_table_addr(bs, c, i); + *table = qcow2_cache_get_table_addr(c, i); trace_qcow2_cache_get_done(qemu_coroutine_self(), c == s->l2_table_cache, i); @@ -411,9 +413,9 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, return qcow2_cache_do_get(bs, c, offset, table, false); } -void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) +void qcow2_cache_put(Qcow2Cache *c, void **table) { - int i = qcow2_cache_get_table_idx(bs, c, *table); + int i = qcow2_cache_get_table_idx(c, *table); c->entries[i].ref--; *table = NULL; @@ -425,30 +427,28 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) assert(c->entries[i].ref >= 0); } -void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, - void *table) +void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) { - int i = qcow2_cache_get_table_idx(bs, c, table); + int i = qcow2_cache_get_table_idx(c, table); assert(c->entries[i].offset != 0); c->entries[i].dirty = true; } -void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, - uint64_t offset) +void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset) { int i; for (i = 0; i < c->size; i++) { if (c->entries[i].offset == offset) { - return qcow2_cache_get_table_addr(bs, c, i); + return qcow2_cache_get_table_addr(c, i); } } return NULL; } -void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) +void qcow2_cache_discard(Qcow2Cache *c, void *table) { - int i = qcow2_cache_get_table_idx(bs, c, table); + int i = qcow2_cache_get_table_idx(c, table); assert(c->entries[i].ref == 0); @@ -456,5 +456,5 @@ void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) c->entries[i].lru_counter = 0; c->entries[i].dirty = false; - qcow2_cache_table_release(bs, c, i, 1); + qcow2_cache_table_release(c, i, 1); } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index f077cd3ac5..e406b0f3b9 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -195,20 +195,26 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, /* * l2_load * - * Loads a L2 table into memory. If the table is in the cache, the cache - * is used; otherwise the L2 table is loaded from the image file. + * @bs: The BlockDriverState + * @offset: A guest offset, used to calculate what slice of the L2 + * table to load. + * @l2_offset: Offset to the L2 table in the image file. + * @l2_slice: Location to store the pointer to the L2 slice. * - * Returns a pointer to the L2 table on success, or NULL if the read from - * the image file failed. + * Loads a L2 slice into memory (L2 slices are the parts of L2 tables + * that are loaded by the qcow2 cache). If the slice is in the cache, + * the cache is used; otherwise the L2 slice is loaded from the image + * file. */ - -static int l2_load(BlockDriverState *bs, uint64_t l2_offset, - uint64_t **l2_table) +static int l2_load(BlockDriverState *bs, uint64_t offset, + uint64_t l2_offset, uint64_t **l2_slice) { BDRVQcow2State *s = bs->opaque; + int start_of_slice = sizeof(uint64_t) * + (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); - return qcow2_cache_get(bs, s->l2_table_cache, l2_offset, - (void **)l2_table); + return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, + (void **)l2_slice); } /* @@ -257,11 +263,12 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) * */ -static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) +static int l2_allocate(BlockDriverState *bs, int l1_index) { BDRVQcow2State *s = bs->opaque; uint64_t old_l2_offset; - uint64_t *l2_table = NULL; + uint64_t *l2_slice = NULL; + unsigned slice, slice_size2, n_slices; int64_t l2_offset; int ret; @@ -292,39 +299,47 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* allocate a new entry in the l2 cache */ + slice_size2 = s->l2_slice_size * sizeof(uint64_t); + n_slices = s->cluster_size / slice_size2; + trace_qcow2_l2_allocate_get_empty(bs, l1_index); - ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); - if (ret < 0) { - goto fail; - } + for (slice = 0; slice < n_slices; slice++) { + ret = qcow2_cache_get_empty(bs, s->l2_table_cache, + l2_offset + slice * slice_size2, + (void **) &l2_slice); + if (ret < 0) { + goto fail; + } - l2_table = *table; + if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { + /* if there was no old l2 table, clear the new slice */ + memset(l2_slice, 0, slice_size2); + } else { + uint64_t *old_slice; + uint64_t old_l2_slice_offset = + (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; + + /* if there was an old l2 table, read a slice from the disk */ + BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); + ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, + (void **) &old_slice); + if (ret < 0) { + goto fail; + } - if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { - /* if there was no old l2 table, clear the new table */ - memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); - } else { - uint64_t* old_table; + memcpy(l2_slice, old_slice, slice_size2); - /* if there was an old l2 table, read it from the disk */ - BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); - ret = qcow2_cache_get(bs, s->l2_table_cache, - old_l2_offset & L1E_OFFSET_MASK, - (void**) &old_table); - if (ret < 0) { - goto fail; + qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); } - memcpy(l2_table, old_table, s->cluster_size); + /* write the l2 slice to the file */ + BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); - qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table); + trace_qcow2_l2_allocate_write_l2(bs, l1_index); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } - /* write the l2 table to the file */ - BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); - - trace_qcow2_l2_allocate_write_l2(bs, l1_index); - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { goto fail; @@ -338,14 +353,13 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) goto fail; } - *table = l2_table; trace_qcow2_l2_allocate_done(bs, l1_index, 0); return 0; fail: trace_qcow2_l2_allocate_done(bs, l1_index, ret); - if (l2_table != NULL) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) table); + if (l2_slice != NULL) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } s->l1_table[l1_index] = old_l2_offset; if (l2_offset > 0) { @@ -356,19 +370,19 @@ fail: } /* - * Checks how many clusters in a given L2 table are contiguous in the image + * Checks how many clusters in a given L2 slice are contiguous in the image * file. As soon as one of the flags in the bitmask stop_flags changes compared * to the first cluster, the search is stopped and the cluster is not counted * as contiguous. (This allows it, for example, to stop at the first compressed * cluster which may require a different handling) */ static int count_contiguous_clusters(int nb_clusters, int cluster_size, - uint64_t *l2_table, uint64_t stop_flags) + uint64_t *l2_slice, uint64_t stop_flags) { int i; QCow2ClusterType first_cluster_type; uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED; - uint64_t first_entry = be64_to_cpu(l2_table[0]); + uint64_t first_entry = be64_to_cpu(l2_slice[0]); uint64_t offset = first_entry & mask; if (!offset) { @@ -381,7 +395,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC); for (i = 0; i < nb_clusters; i++) { - uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; + uint64_t l2_entry = be64_to_cpu(l2_slice[i]) & mask; if (offset + (uint64_t) i * cluster_size != l2_entry) { break; } @@ -392,10 +406,10 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, /* * Checks how many consecutive unallocated clusters in a given L2 - * table have the same cluster type. + * slice have the same cluster type. */ static int count_contiguous_clusters_unallocated(int nb_clusters, - uint64_t *l2_table, + uint64_t *l2_slice, QCow2ClusterType wanted_type) { int i; @@ -403,7 +417,7 @@ static int count_contiguous_clusters_unallocated(int nb_clusters, assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN || wanted_type == QCOW2_CLUSTER_UNALLOCATED); for (i = 0; i < nb_clusters; i++) { - uint64_t entry = be64_to_cpu(l2_table[i]); + uint64_t entry = be64_to_cpu(l2_slice[i]); QCow2ClusterType type = qcow2_get_cluster_type(entry); if (type != wanted_type) { @@ -515,8 +529,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, { BDRVQcow2State *s = bs->opaque; unsigned int l2_index; - uint64_t l1_index, l2_offset, *l2_table; - int l1_bits, c; + uint64_t l1_index, l2_offset, *l2_slice; + int c; unsigned int offset_in_cluster; uint64_t bytes_available, bytes_needed, nb_clusters; QCow2ClusterType type; @@ -525,12 +539,12 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, offset_in_cluster = offset_into_cluster(s, offset); bytes_needed = (uint64_t) *bytes + offset_in_cluster; - l1_bits = s->l2_bits + s->cluster_bits; - /* compute how many bytes there are between the start of the cluster - * containing offset and the end of the l1 entry */ - bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)) - + offset_in_cluster; + * containing offset and the end of the l2 slice that contains + * the entry pointing to it */ + bytes_available = + ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) + << s->cluster_bits; if (bytes_needed > bytes_available) { bytes_needed = bytes_available; @@ -540,7 +554,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* seek to the l2 offset in the l1 table */ - l1_index = offset >> l1_bits; + l1_index = offset_to_l1_index(s, offset); if (l1_index >= s->l1_size) { type = QCOW2_CLUSTER_UNALLOCATED; goto out; @@ -559,17 +573,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, return -EIO; } - /* load the l2 table in memory */ + /* load the l2 slice in memory */ - ret = l2_load(bs, l2_offset, &l2_table); + ret = l2_load(bs, offset, l2_offset, &l2_slice); if (ret < 0) { return ret; } /* find the cluster offset for the given disk offset */ - l2_index = offset_to_l2_index(s, offset); - *cluster_offset = be64_to_cpu(l2_table[l2_index]); + l2_index = offset_to_l2_slice_index(s, offset); + *cluster_offset = be64_to_cpu(l2_slice[l2_index]); nb_clusters = size_to_clusters(s, bytes_needed); /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned @@ -596,14 +610,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ c = count_contiguous_clusters_unallocated(nb_clusters, - &l2_table[l2_index], type); + &l2_slice[l2_index], type); *cluster_offset = 0; break; case QCOW2_CLUSTER_ZERO_ALLOC: case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); + &l2_slice[l2_index], QCOW_OFLAG_ZERO); *cluster_offset &= L2E_OFFSET_MASK; if (offset_into_cluster(s, *cluster_offset)) { qcow2_signal_corruption(bs, true, -1, -1, @@ -619,7 +633,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, abort(); } - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); bytes_available = (int64_t)c * s->cluster_size; @@ -637,7 +651,7 @@ out: return type; fail: - qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); + qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); return ret; } @@ -645,26 +659,25 @@ fail: * get_cluster_table * * for a given disk offset, load (and allocate if needed) - * the l2 table. + * the appropriate slice of its l2 table. * - * the l2 table offset in the qcow2 file and the cluster index - * in the l2 table are given to the caller. + * the cluster index in the l2 slice is given to the caller. * * Returns 0 on success, -errno in failure case */ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, - uint64_t **new_l2_table, + uint64_t **new_l2_slice, int *new_l2_index) { BDRVQcow2State *s = bs->opaque; unsigned int l2_index; uint64_t l1_index, l2_offset; - uint64_t *l2_table = NULL; + uint64_t *l2_slice = NULL; int ret; /* seek to the l2 offset in the l1 table */ - l1_index = offset >> (s->l2_bits + s->cluster_bits); + l1_index = offset_to_l1_index(s, offset); if (l1_index >= s->l1_size) { ret = qcow2_grow_l1_table(bs, l1_index + 1, false); if (ret < 0) { @@ -681,17 +694,9 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, return -EIO; } - /* seek the l2 table of the given l2 offset */ - - if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { - /* load the l2 table in memory */ - ret = l2_load(bs, l2_offset, &l2_table); - if (ret < 0) { - return ret; - } - } else { + if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { /* First allocate a new L2 table (and do COW if needed) */ - ret = l2_allocate(bs, l1_index, &l2_table); + ret = l2_allocate(bs, l1_index); if (ret < 0) { return ret; } @@ -701,13 +706,23 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), QCOW2_DISCARD_OTHER); } + + /* Get the offset of the newly-allocated l2 table */ + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + assert(offset_into_cluster(s, l2_offset) == 0); + } + + /* load the l2 slice in memory */ + ret = l2_load(bs, offset, l2_offset, &l2_slice); + if (ret < 0) { + return ret; } /* find the cluster offset for the given disk offset */ - l2_index = offset_to_l2_index(s, offset); + l2_index = offset_to_l2_slice_index(s, offset); - *new_l2_table = l2_table; + *new_l2_slice = l2_slice; *new_l2_index = l2_index; return 0; @@ -732,26 +747,26 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, { BDRVQcow2State *s = bs->opaque; int l2_index, ret; - uint64_t *l2_table; + uint64_t *l2_slice; int64_t cluster_offset; int nb_csectors; - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { return 0; } /* Compression can't overwrite anything. Fail if the cluster was already * allocated. */ - cluster_offset = be64_to_cpu(l2_table[l2_index]); + cluster_offset = be64_to_cpu(l2_slice[l2_index]); if (cluster_offset & L2E_OFFSET_MASK) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return 0; } cluster_offset = qcow2_alloc_bytes(bs, compressed_size); if (cluster_offset < 0) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return 0; } @@ -766,9 +781,9 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, /* compressed clusters never have the copied flag */ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - l2_table[l2_index] = cpu_to_be64(cluster_offset); - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + l2_slice[l2_index] = cpu_to_be64(cluster_offset); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return cluster_offset; } @@ -907,7 +922,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; int i, j = 0, l2_index, ret; - uint64_t *old_cluster, *l2_table; + uint64_t *old_cluster, *l2_slice; uint64_t cluster_offset = m->alloc_offset; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); @@ -934,13 +949,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) s->refcount_block_cache); } - ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); if (ret < 0) { goto err; } - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); - assert(l2_index + m->nb_clusters <= s->l2_size); + assert(l2_index + m->nb_clusters <= s->l2_slice_size); for (i = 0; i < m->nb_clusters; i++) { /* if two concurrent writes happen to the same unallocated cluster * each write allocates separate cluster and writes data concurrently. @@ -948,16 +963,16 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * cluster the second one has to do RMW (which is done above by * perform_cow()), update l2 table with its cluster pointer and free * old cluster. This is what this loop does */ - if (l2_table[l2_index + i] != 0) { - old_cluster[j++] = l2_table[l2_index + i]; + if (l2_slice[l2_index + i] != 0) { + old_cluster[j++] = l2_slice[l2_index + i]; } - l2_table[l2_index + i] = cpu_to_be64((cluster_offset + + l2_slice[l2_index + i] = cpu_to_be64((cluster_offset + (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); /* * If this was a COW, we need to decrease the refcount of the old cluster. @@ -984,12 +999,12 @@ err: * which must copy from the backing file) */ static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, - uint64_t *l2_table, int l2_index) + uint64_t *l2_slice, int l2_index) { int i; for (i = 0; i < nb_clusters; i++) { - uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); + uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]); QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); switch(cluster_type) { @@ -1104,7 +1119,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, BDRVQcow2State *s = bs->opaque; int l2_index; uint64_t cluster_offset; - uint64_t *l2_table; + uint64_t *l2_slice; uint64_t nb_clusters; unsigned int keep_clusters; int ret; @@ -1116,23 +1131,23 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, == offset_into_cluster(s, *host_offset)); /* - * Calculate the number of clusters to look for. We stop at L2 table + * Calculate the number of clusters to look for. We stop at L2 slice * boundaries to keep things simple. */ nb_clusters = size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); - l2_index = offset_to_l2_index(s, guest_offset); - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + l2_index = offset_to_l2_slice_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); /* Find L2 entry for the first involved cluster */ - ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - cluster_offset = be64_to_cpu(l2_table[l2_index]); + cluster_offset = be64_to_cpu(l2_slice[l2_index]); /* Check how many clusters are already allocated and don't need COW */ if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL @@ -1160,7 +1175,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, /* We keep all QCOW_OFLAG_COPIED clusters */ keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], + &l2_slice[l2_index], QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); assert(keep_clusters <= nb_clusters); @@ -1175,7 +1190,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, /* Cleanup */ out: - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); /* Only return a host offset if we actually made progress. Otherwise we * would make requirements for handle_alloc() that it can't fulfill */ @@ -1259,7 +1274,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, { BDRVQcow2State *s = bs->opaque; int l2_index; - uint64_t *l2_table; + uint64_t *l2_slice; uint64_t entry; uint64_t nb_clusters; int ret; @@ -1272,29 +1287,29 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, assert(*bytes > 0); /* - * Calculate the number of clusters to look for. We stop at L2 table + * Calculate the number of clusters to look for. We stop at L2 slice * boundaries to keep things simple. */ nb_clusters = size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); - l2_index = offset_to_l2_index(s, guest_offset); - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + l2_index = offset_to_l2_slice_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); /* Find L2 entry for the first involved cluster */ - ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - entry = be64_to_cpu(l2_table[l2_index]); + entry = be64_to_cpu(l2_slice[l2_index]); /* For the moment, overwrite compressed clusters one by one */ if (entry & QCOW_OFLAG_COMPRESSED) { nb_clusters = 1; } else { - nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + nb_clusters = count_cow_clusters(s, nb_clusters, l2_slice, l2_index); } /* This function is only called when there were no non-COW clusters, so if @@ -1323,7 +1338,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, * nb_clusters already to a range of COW clusters */ preallocated_nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_COPIED); + &l2_slice[l2_index], QCOW_OFLAG_COPIED); assert(preallocated_nb_clusters > 0); nb_clusters = preallocated_nb_clusters; @@ -1334,7 +1349,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, keep_old_clusters = true; } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); if (!alloc_cluster_offset) { /* Allocate, if necessary at a given offset in the image file */ @@ -1616,32 +1631,32 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) /* * This discards as many clusters of nb_clusters as possible at once (i.e. - * all clusters in the same L2 table) and returns the number of discarded + * all clusters in the same L2 slice) and returns the number of discarded * clusters. */ -static int discard_single_l2(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters, enum qcow2_discard_type type, - bool full_discard) +static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, + uint64_t nb_clusters, + enum qcow2_discard_type type, bool full_discard) { BDRVQcow2State *s = bs->opaque; - uint64_t *l2_table; + uint64_t *l2_slice; int l2_index; int ret; int i; - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - /* Limit nb_clusters to one L2 table */ - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + /* Limit nb_clusters to one L2 slice */ + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); for (i = 0; i < nb_clusters; i++) { uint64_t old_l2_entry; - old_l2_entry = be64_to_cpu(l2_table[l2_index + i]); + old_l2_entry = be64_to_cpu(l2_slice[l2_index + i]); /* * If full_discard is false, make sure that a discarded area reads back @@ -1679,18 +1694,18 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, } /* First remove L2 entries */ - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); if (!full_discard && s->qcow_version >= 3) { - l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); + l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); } else { - l2_table[l2_index + i] = cpu_to_be64(0); + l2_slice[l2_index + i] = cpu_to_be64(0); } /* Then decrease the refcount */ qcow2_free_any_clusters(bs, old_l2_entry, 1, type); } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return nb_clusters; } @@ -1714,10 +1729,10 @@ int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, s->cache_discards = true; - /* Each L2 table is handled by its own loop iteration */ + /* Each L2 slice is handled by its own loop iteration */ while (nb_clusters > 0) { - cleared = discard_single_l2(bs, offset, nb_clusters, type, - full_discard); + cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, + full_discard); if (cleared < 0) { ret = cleared; goto fail; @@ -1737,33 +1752,33 @@ fail: /* * This zeroes as many clusters of nb_clusters as possible at once (i.e. - * all clusters in the same L2 table) and returns the number of zeroed + * all clusters in the same L2 slice) and returns the number of zeroed * clusters. */ -static int zero_single_l2(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters, int flags) +static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, + uint64_t nb_clusters, int flags) { BDRVQcow2State *s = bs->opaque; - uint64_t *l2_table; + uint64_t *l2_slice; int l2_index; int ret; int i; bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP); - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); if (ret < 0) { return ret; } - /* Limit nb_clusters to one L2 table */ - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + /* Limit nb_clusters to one L2 slice */ + nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); assert(nb_clusters <= INT_MAX); for (i = 0; i < nb_clusters; i++) { uint64_t old_offset; QCow2ClusterType cluster_type; - old_offset = be64_to_cpu(l2_table[l2_index + i]); + old_offset = be64_to_cpu(l2_slice[l2_index + i]); /* * Minimize L2 changes if the cluster already reads back as @@ -1775,16 +1790,16 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, continue; } - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) { - l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); + l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { - l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); + l2_slice[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); } } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); return nb_clusters; } @@ -1808,13 +1823,13 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, return -ENOTSUP; } - /* Each L2 table is handled by its own loop iteration */ + /* Each L2 slice is handled by its own loop iteration */ nb_clusters = size_to_clusters(s, bytes); s->cache_discards = true; while (nb_clusters > 0) { - cleared = zero_single_l2(bs, offset, nb_clusters, flags); + cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); if (cleared < 0) { ret = cleared; goto fail; @@ -1848,22 +1863,25 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, { BDRVQcow2State *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); - uint64_t *l2_table = NULL; + uint64_t *l2_slice = NULL; + unsigned slice, slice_size2, n_slices; int ret; int i, j; + slice_size2 = s->l2_slice_size * sizeof(uint64_t); + n_slices = s->cluster_size / slice_size2; + if (!is_active_l1) { /* inactive L2 tables require a buffer to be stored in when loading * them from disk */ - l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size); - if (l2_table == NULL) { + l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); + if (l2_slice == NULL) { return -ENOMEM; } } for (i = 0; i < l1_size; i++) { uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; - bool l2_dirty = false; uint64_t l2_refcount; if (!l2_offset) { @@ -1883,124 +1901,131 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } - if (is_active_l1) { - /* get active L2 tables from cache */ - ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, - (void **)&l2_table); - } else { - /* load inactive L2 tables from disk */ - ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, - (void *)l2_table, s->cluster_sectors); - } - if (ret < 0) { - goto fail; - } - ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, &l2_refcount); if (ret < 0) { goto fail; } - for (j = 0; j < s->l2_size; j++) { - uint64_t l2_entry = be64_to_cpu(l2_table[j]); - int64_t offset = l2_entry & L2E_OFFSET_MASK; - QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); - - if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && - cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { - continue; + for (slice = 0; slice < n_slices; slice++) { + uint64_t slice_offset = l2_offset + slice * slice_size2; + bool l2_dirty = false; + if (is_active_l1) { + /* get active L2 tables from cache */ + ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, + (void **)&l2_slice); + } else { + /* load inactive L2 tables from disk */ + ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2); + } + if (ret < 0) { + goto fail; } - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - if (!bs->backing) { - /* not backed; therefore we can simply deallocate the - * cluster */ - l2_table[j] = 0; - l2_dirty = true; + for (j = 0; j < s->l2_slice_size; j++) { + uint64_t l2_entry = be64_to_cpu(l2_slice[j]); + int64_t offset = l2_entry & L2E_OFFSET_MASK; + QCow2ClusterType cluster_type = + qcow2_get_cluster_type(l2_entry); + + if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && + cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { continue; } - offset = qcow2_alloc_clusters(bs, s->cluster_size); - if (offset < 0) { - ret = offset; - goto fail; - } + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + if (!bs->backing) { + /* not backed; therefore we can simply deallocate the + * cluster */ + l2_slice[j] = 0; + l2_dirty = true; + continue; + } - if (l2_refcount > 1) { - /* For shared L2 tables, set the refcount accordingly (it is - * already 1 and needs to be l2_refcount) */ - ret = qcow2_update_cluster_refcount(bs, - offset >> s->cluster_bits, + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + ret = offset; + goto fail; + } + + if (l2_refcount > 1) { + /* For shared L2 tables, set the refcount accordingly + * (it is already 1 and needs to be l2_refcount) */ + ret = qcow2_update_cluster_refcount( + bs, offset >> s->cluster_bits, refcount_diff(1, l2_refcount), false, QCOW2_DISCARD_OTHER); - if (ret < 0) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_OTHER); - goto fail; + if (ret < 0) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + goto fail; + } } } - } - if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, - "Cluster allocation offset " - "%#" PRIx64 " unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", offset, - l2_offset, j); - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); + if (offset_into_cluster(s, offset)) { + int l2_index = slice * s->l2_slice_size + j; + qcow2_signal_corruption( + bs, true, -1, -1, + "Cluster allocation offset " + "%#" PRIx64 " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", offset, + l2_offset, l2_index); + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + ret = -EIO; + goto fail; } - ret = -EIO; - goto fail; - } - ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); - if (ret < 0) { - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); + ret = qcow2_pre_write_overlap_check(bs, 0, offset, + s->cluster_size); + if (ret < 0) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + goto fail; } - goto fail; - } - ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); - if (ret < 0) { - if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { - qcow2_free_clusters(bs, offset, s->cluster_size, - QCOW2_DISCARD_ALWAYS); + ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0); + if (ret < 0) { + if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + } + goto fail; } - goto fail; - } - if (l2_refcount == 1) { - l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); - } else { - l2_table[j] = cpu_to_be64(offset); + if (l2_refcount == 1) { + l2_slice[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + } else { + l2_slice[j] = cpu_to_be64(offset); + } + l2_dirty = true; } - l2_dirty = true; - } - if (is_active_l1) { - if (l2_dirty) { - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table); - qcow2_cache_depends_on_flush(s->l2_table_cache); - } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); - } else { - if (l2_dirty) { - ret = qcow2_pre_write_overlap_check(bs, - QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset, - s->cluster_size); - if (ret < 0) { - goto fail; + if (is_active_l1) { + if (l2_dirty) { + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); + qcow2_cache_depends_on_flush(s->l2_table_cache); } + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } else { + if (l2_dirty) { + ret = qcow2_pre_write_overlap_check( + bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, + slice_offset, slice_size2); + if (ret < 0) { + goto fail; + } - ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE, - (void *)l2_table, s->cluster_sectors); - if (ret < 0) { - goto fail; + ret = bdrv_pwrite(bs->file, slice_offset, + l2_slice, slice_size2); + if (ret < 0) { + goto fail; + } } } } @@ -2014,11 +2039,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, ret = 0; fail: - if (l2_table) { + if (l2_slice) { if (!is_active_l1) { - qemu_vfree(l2_table); + qemu_vfree(l2_slice); } else { - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } } return ret; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 92701ab7af..d46b69d7f3 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -277,7 +277,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, block_index = cluster_index & (s->refcount_block_size - 1); *refcount = s->get_refcount(refcount_block, block_index); - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); return 0; } @@ -421,7 +421,7 @@ static int alloc_refcount_block(BlockDriverState *bs, /* Now the new refcount block needs to be written to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block); ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { goto fail; @@ -449,7 +449,7 @@ static int alloc_refcount_block(BlockDriverState *bs, return -EAGAIN; } - qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); + qcow2_cache_put(s->refcount_block_cache, refcount_block); /* * If we come here, we need to grow the refcount table. Again, a new @@ -501,7 +501,7 @@ static int alloc_refcount_block(BlockDriverState *bs, fail: if (*refcount_block != NULL) { - qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); + qcow2_cache_put(s->refcount_block_cache, refcount_block); } return ret; } @@ -623,7 +623,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, goto fail; } memset(refblock_data, 0, s->cluster_size); - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock_data); new_table[i] = block_offset; @@ -656,11 +656,11 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, s->set_refcount(refblock_data, j, 1); } - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock_data); } - qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data); + qcow2_cache_put(s->refcount_block_cache, &refblock_data); } assert(block_offset == table_offset); @@ -836,7 +836,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, /* Load the refcount block and allocate it if needed */ if (table_index != old_table_index) { if (refcount_block) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); } ret = alloc_refcount_block(bs, cluster_index, &refcount_block); if (ret < 0) { @@ -845,8 +845,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, } old_table_index = table_index; - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, - refcount_block); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block); /* we can update the count and save it */ block_index = cluster_index & (s->refcount_block_size - 1); @@ -872,16 +871,16 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, if (refcount == 0) { void *table; - table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + table = qcow2_cache_is_table_offset(s->refcount_block_cache, offset); if (table != NULL) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); - qcow2_cache_discard(bs, s->refcount_block_cache, table); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); + qcow2_cache_discard(s->refcount_block_cache, table); } - table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset); + table = qcow2_cache_is_table_offset(s->l2_table_cache, offset); if (table != NULL) { - qcow2_cache_discard(bs, s->l2_table_cache, table); + qcow2_cache_discard(s->l2_table_cache, table); } if (s->discard_passthrough[type]) { @@ -898,7 +897,7 @@ fail: /* Write last changed block to disk */ if (refcount_block) { - qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); + qcow2_cache_put(s->refcount_block_cache, &refcount_block); } /* @@ -1184,17 +1183,20 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { BDRVQcow2State *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount; + uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount; bool l1_allocated = false; int64_t old_entry, old_l2_offset; + unsigned slice, slice_size2, n_slices; int i, j, l1_modified = 0, nb_csectors; int ret; assert(addend >= -1 && addend <= 1); - l2_table = NULL; + l2_slice = NULL; l1_table = NULL; l1_size2 = l1_size * sizeof(uint64_t); + slice_size2 = s->l2_slice_size * sizeof(uint64_t); + n_slices = s->cluster_size / slice_size2; s->cache_discards = true; @@ -1237,91 +1239,97 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } - ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, - (void**) &l2_table); - if (ret < 0) { - goto fail; - } + for (slice = 0; slice < n_slices; slice++) { + ret = qcow2_cache_get(bs, s->l2_table_cache, + l2_offset + slice * slice_size2, + (void **) &l2_slice); + if (ret < 0) { + goto fail; + } - for (j = 0; j < s->l2_size; j++) { - uint64_t cluster_index; - uint64_t offset; - - entry = be64_to_cpu(l2_table[j]); - old_entry = entry; - entry &= ~QCOW_OFLAG_COPIED; - offset = entry & L2E_OFFSET_MASK; - - switch (qcow2_get_cluster_type(entry)) { - case QCOW2_CLUSTER_COMPRESSED: - nb_csectors = ((entry >> s->csize_shift) & - s->csize_mask) + 1; - if (addend != 0) { - ret = update_refcount(bs, - (entry & s->cluster_offset_mask) & ~511, + for (j = 0; j < s->l2_slice_size; j++) { + uint64_t cluster_index; + uint64_t offset; + + entry = be64_to_cpu(l2_slice[j]); + old_entry = entry; + entry &= ~QCOW_OFLAG_COPIED; + offset = entry & L2E_OFFSET_MASK; + + switch (qcow2_get_cluster_type(entry)) { + case QCOW2_CLUSTER_COMPRESSED: + nb_csectors = ((entry >> s->csize_shift) & + s->csize_mask) + 1; + if (addend != 0) { + ret = update_refcount( + bs, (entry & s->cluster_offset_mask) & ~511, nb_csectors * 512, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); - if (ret < 0) { + if (ret < 0) { + goto fail; + } + } + /* compressed clusters are never modified */ + refcount = 2; + break; + + case QCOW2_CLUSTER_NORMAL: + case QCOW2_CLUSTER_ZERO_ALLOC: + if (offset_into_cluster(s, offset)) { + /* Here l2_index means table (not slice) index */ + int l2_index = slice * s->l2_slice_size + j; + qcow2_signal_corruption( + bs, true, -1, -1, "Cluster " + "allocation offset %#" PRIx64 + " unaligned (L2 offset: %#" + PRIx64 ", L2 index: %#x)", + offset, l2_offset, l2_index); + ret = -EIO; goto fail; } - } - /* compressed clusters are never modified */ - refcount = 2; - break; - - case QCOW2_CLUSTER_NORMAL: - case QCOW2_CLUSTER_ZERO_ALLOC: - if (offset_into_cluster(s, offset)) { - qcow2_signal_corruption(bs, true, -1, -1, "Cluster " - "allocation offset %#" PRIx64 - " unaligned (L2 offset: %#" - PRIx64 ", L2 index: %#x)", - offset, l2_offset, j); - ret = -EIO; - goto fail; - } - cluster_index = offset >> s->cluster_bits; - assert(cluster_index); - if (addend != 0) { - ret = qcow2_update_cluster_refcount(bs, - cluster_index, abs(addend), addend < 0, - QCOW2_DISCARD_SNAPSHOT); + cluster_index = offset >> s->cluster_bits; + assert(cluster_index); + if (addend != 0) { + ret = qcow2_update_cluster_refcount( + bs, cluster_index, abs(addend), addend < 0, + QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } + } + + ret = qcow2_get_refcount(bs, cluster_index, &refcount); if (ret < 0) { goto fail; } - } + break; - ret = qcow2_get_refcount(bs, cluster_index, &refcount); - if (ret < 0) { - goto fail; - } - break; - - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_UNALLOCATED: - refcount = 0; - break; + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_UNALLOCATED: + refcount = 0; + break; - default: - abort(); - } + default: + abort(); + } - if (refcount == 1) { - entry |= QCOW_OFLAG_COPIED; - } - if (entry != old_entry) { - if (addend > 0) { - qcow2_cache_set_dependency(bs, s->l2_table_cache, - s->refcount_block_cache); + if (refcount == 1) { + entry |= QCOW_OFLAG_COPIED; + } + if (entry != old_entry) { + if (addend > 0) { + qcow2_cache_set_dependency(bs, s->l2_table_cache, + s->refcount_block_cache); + } + l2_slice[j] = cpu_to_be64(entry); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, + l2_slice); } - l2_table[j] = cpu_to_be64(entry); - qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, - l2_table); } - } - qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table); + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); + } if (addend != 0) { ret = qcow2_update_cluster_refcount(bs, l2_offset >> @@ -1348,8 +1356,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, ret = bdrv_flush(bs); fail: - if (l2_table) { - qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (l2_slice) { + qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); } s->cache_discards = false; @@ -2849,7 +2857,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, new_reftable_size, new_refblock, new_refblock_empty, allocated, errp); if (ret < 0) { - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); return ret; } @@ -2862,7 +2870,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, if (new_refcount_bits < 64 && refcount >> new_refcount_bits) { uint64_t offset; - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); offset = ((reftable_index << s->refcount_block_bits) + refblock_index) << s->cluster_bits; @@ -2883,7 +2891,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, new_refblock_empty = new_refblock_empty && refcount == 0; } - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); } else { /* No refblock means every refcount is 0 */ for (refblock_index = 0; refblock_index < s->refcount_block_size; @@ -3175,24 +3183,24 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs, offset_to_reftable_index(s, discard_block_offs), discard_block_offs, s->get_refcount(refblock, block_index)); - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); return -EINVAL; } s->set_refcount(refblock, block_index, 0); - qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock); - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); if (cluster_index < s->free_cluster_index) { s->free_cluster_index = cluster_index; } - refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, + refblock = qcow2_cache_is_table_offset(s->refcount_block_cache, discard_block_offs); if (refblock) { /* discard refblock from the cache if refblock is cached */ - qcow2_cache_discard(bs, s->refcount_block_cache, refblock); + qcow2_cache_discard(s->refcount_block_cache, refblock); } update_refcount_discard(bs, discard_block_offs, s->cluster_size); @@ -3235,7 +3243,7 @@ int qcow2_shrink_reftable(BlockDriverState *bs) } else { unused_block = buffer_is_zero(refblock, s->cluster_size); } - qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + qcow2_cache_put(s->refcount_block_cache, &refblock); reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); } diff --git a/block/qcow2.c b/block/qcow2.c index 801e29fc56..57a517e2bd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -676,6 +676,11 @@ static QemuOptsList qcow2_runtime_opts = { .help = "Maximum L2 table cache size", }, { + .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Size of each entry in the L2 cache", + }, + { .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, .type = QEMU_OPT_SIZE, .help = "Maximum refcount block cache size", @@ -706,8 +711,8 @@ static void cache_clean_timer_cb(void *opaque) { BlockDriverState *bs = opaque; BDRVQcow2State *s = bs->opaque; - qcow2_cache_clean_unused(bs, s->l2_table_cache); - qcow2_cache_clean_unused(bs, s->refcount_block_cache); + qcow2_cache_clean_unused(s->l2_table_cache); + qcow2_cache_clean_unused(s->refcount_block_cache); timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + (int64_t) s->cache_clean_interval * 1000); } @@ -747,6 +752,7 @@ static void qcow2_attach_aio_context(BlockDriverState *bs, static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *l2_cache_size, + uint64_t *l2_cache_entry_size, uint64_t *refcount_cache_size, Error **errp) { BDRVQcow2State *s = bs->opaque; @@ -762,6 +768,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, *refcount_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); + *l2_cache_entry_size = qemu_opt_get_size( + opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); + if (combined_cache_size_set) { if (l2_cache_size_set && refcount_cache_size_set) { error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE @@ -802,11 +811,21 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, / DEFAULT_L2_REFCOUNT_SIZE_RATIO; } } + + if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || + *l2_cache_entry_size > s->cluster_size || + !is_power_of_2(*l2_cache_entry_size)) { + error_setg(errp, "L2 cache entry size must be a power of two " + "between %d and the cluster size (%d)", + 1 << MIN_CLUSTER_BITS, s->cluster_size); + return; + } } typedef struct Qcow2ReopenState { Qcow2Cache *l2_table_cache; Qcow2Cache *refcount_block_cache; + int l2_slice_size; /* Number of entries in a slice of the L2 table */ bool use_lazy_refcounts; int overlap_check; bool discard_passthrough[QCOW2_DISCARD_MAX]; @@ -823,7 +842,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, QemuOpts *opts = NULL; const char *opt_overlap_check, *opt_overlap_check_template; int overlap_check_template = 0; - uint64_t l2_cache_size, refcount_cache_size; + uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; int i; const char *encryptfmt; QDict *encryptopts = NULL; @@ -842,15 +861,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, } /* get L2 table/refcount block cache size from command line options */ - read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, - &local_err); + read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, + &refcount_cache_size, &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; goto fail; } - l2_cache_size /= s->cluster_size; + l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } @@ -888,8 +907,11 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, } } - r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); - r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); + r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); + r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, + l2_cache_entry_size); + r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, + s->cluster_size); if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { error_setg(errp, "Could not allocate metadata caches"); ret = -ENOMEM; @@ -1044,13 +1066,14 @@ static void qcow2_update_options_commit(BlockDriverState *bs, int i; if (s->l2_table_cache) { - qcow2_cache_destroy(bs, s->l2_table_cache); + qcow2_cache_destroy(s->l2_table_cache); } if (s->refcount_block_cache) { - qcow2_cache_destroy(bs, s->refcount_block_cache); + qcow2_cache_destroy(s->refcount_block_cache); } s->l2_table_cache = r->l2_table_cache; s->refcount_block_cache = r->refcount_block_cache; + s->l2_slice_size = r->l2_slice_size; s->overlap_check = r->overlap_check; s->use_lazy_refcounts = r->use_lazy_refcounts; @@ -1073,10 +1096,10 @@ static void qcow2_update_options_abort(BlockDriverState *bs, Qcow2ReopenState *r) { if (r->l2_table_cache) { - qcow2_cache_destroy(bs, r->l2_table_cache); + qcow2_cache_destroy(r->l2_table_cache); } if (r->refcount_block_cache) { - qcow2_cache_destroy(bs, r->refcount_block_cache); + qcow2_cache_destroy(r->refcount_block_cache); } qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); } @@ -1460,7 +1483,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; } - if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) { + if (qcow2_load_dirty_bitmaps(bs, &local_err)) { update_header = false; } if (local_err != NULL) { @@ -1514,10 +1537,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, s->l1_table = NULL; cache_clean_timer_del(bs); if (s->l2_table_cache) { - qcow2_cache_destroy(bs, s->l2_table_cache); + qcow2_cache_destroy(s->l2_table_cache); } if (s->refcount_block_cache) { - qcow2_cache_destroy(bs, s->refcount_block_cache); + qcow2_cache_destroy(s->refcount_block_cache); } qcrypto_block_free(s->crypto); qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); @@ -2065,8 +2088,8 @@ static void qcow2_close(BlockDriverState *bs) } cache_clean_timer_del(bs); - qcow2_cache_destroy(bs, s->l2_table_cache); - qcow2_cache_destroy(bs, s->refcount_block_cache); + qcow2_cache_destroy(s->l2_table_cache); + qcow2_cache_destroy(s->refcount_block_cache); qcrypto_block_free(s->crypto); s->crypto = NULL; @@ -3259,9 +3282,9 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, host_offset = allocation_start; guest_offset = old_length; while (nb_new_data_clusters) { - int64_t guest_cluster = guest_offset >> s->cluster_bits; - int64_t nb_clusters = MIN(nb_new_data_clusters, - s->l2_size - guest_cluster % s->l2_size); + int64_t nb_clusters = MIN( + nb_new_data_clusters, + s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); QCowL2Meta allocation = { .offset = guest_offset, .alloc_offset = host_offset, diff --git a/block/qcow2.h b/block/qcow2.h index 46c8cf44ec..883802241f 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -68,7 +68,7 @@ #define MAX_CLUSTER_BITS 21 /* Must be at least 2 to cover COW */ -#define MIN_L2_CACHE_SIZE 2 /* clusters */ +#define MIN_L2_CACHE_SIZE 2 /* cache entries */ /* Must be at least 4 to cover all cases of refcount table growth */ #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */ @@ -100,6 +100,7 @@ #define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2" #define QCOW2_OPT_CACHE_SIZE "cache-size" #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" +#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size" #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" #define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" @@ -251,6 +252,7 @@ typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; int cluster_sectors; + int l2_slice_size; int l2_bits; int l2_size; int l1_size; @@ -463,11 +465,21 @@ static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size) return (size + (1ULL << shift) - 1) >> shift; } +static inline int offset_to_l1_index(BDRVQcow2State *s, uint64_t offset) +{ + return offset >> (s->l2_bits + s->cluster_bits); +} + static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset) { return (offset >> s->cluster_bits) & (s->l2_size - 1); } +static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset) +{ + return (offset >> s->cluster_bits) & (s->l2_slice_size - 1); +} + static inline int64_t align_offset(int64_t offset, int n) { offset = (offset + n - 1) & ~(n - 1); @@ -636,34 +648,33 @@ void qcow2_free_snapshots(BlockDriverState *bs); int qcow2_read_snapshots(BlockDriverState *bs); /* qcow2-cache.c functions */ -Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables); -int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c); +Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, + unsigned table_size); +int qcow2_cache_destroy(Qcow2Cache *c); -void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c, - void *table); +void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table); int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, Qcow2Cache *dependency); void qcow2_cache_depends_on_flush(Qcow2Cache *c); -void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c); +void qcow2_cache_clean_unused(Qcow2Cache *c); int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table); -void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); -void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, - uint64_t offset); -void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table); +void qcow2_cache_put(Qcow2Cache *c, void **table); +void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset); +void qcow2_cache_discard(Qcow2Cache *c, void *table); /* qcow2-bitmap.c functions */ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res, void **refcount_table, int64_t *refcount_table_size); -bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp); +bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp); int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp); int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); diff --git a/blockdev.c b/blockdev.c index 7423c5317b..3fb1ca803c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2825,14 +2825,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, if (!has_persistent) { persistent = false; } - if (!has_autoload) { - autoload = false; - } - if (has_autoload && !persistent) { - error_setg(errp, "Autoload flag must be used only for persistent " - "bitmaps"); - return; + if (has_autoload) { + warn_report("Autoload option is deprecated and its value is ignored"); } if (persistent && @@ -2847,7 +2842,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, } bdrv_dirty_bitmap_set_persistance(bitmap, persistent); - bdrv_dirty_bitmap_set_autoload(bitmap, autoload); } void qmp_block_dirty_bitmap_remove(const char *node, const char *name, diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 3da8486ab1..e3f4bbf51d 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -66,7 +66,6 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap, void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value); -void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload); void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent); diff --git a/qapi/block-core.json b/qapi/block-core.json index 8046c2da23..5c5921bfb7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1593,9 +1593,9 @@ # Qcow2 disks support persistent bitmaps. Default is false for # block-dirty-bitmap-add. (Since: 2.10) # -# @autoload: the bitmap will be automatically loaded when the image it is stored -# in is opened. This flag may only be specified for persistent -# bitmaps. Default is false for block-dirty-bitmap-add. (Since: 2.10) +# @autoload: ignored and deprecated since 2.12. +# Currently, all dirty tracking bitmaps are loaded from Qcow2 on +# open. # # Since: 2.4 ## @@ -2521,6 +2521,11 @@ # @l2-cache-size: the maximum size of the L2 table cache in # bytes (since 2.2) # +# @l2-cache-entry-size: the size of each entry in the L2 cache in +# bytes. It must be a power of two between 512 +# and the cluster size. The default value is +# the cluster size (since 2.12) +# # @refcount-cache-size: the maximum size of the refcount block cache # in bytes (since 2.2) # @@ -2542,6 +2547,7 @@ '*overlap-check': 'Qcow2OverlapChecks', '*cache-size': 'int', '*l2-cache-size': 'int', + '*l2-cache-entry-size': 'int', '*refcount-cache-size': 'int', '*cache-clean-interval': 'int', '*encrypt': 'BlockdevQcow2Encryption' } } diff --git a/qemu-doc.texi b/qemu-doc.texi index 769968aba4..137f5814a8 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -2757,6 +2757,13 @@ used and it will be removed with no replacement. The ``convert -s snapshot_id_or_name'' argument is obsoleted by the ``convert -l snapshot_param'' argument instead. +@section QEMU Machine Protocol (QMP) commands + +@subsection block-dirty-bitmap-add "autoload" parameter (since 2.12.0) + +"autoload" parameter is now ignored. All bitmaps are automatically loaded +from qcow2 images. + @section System emulator human monitor commands @subsection host_net_add (since 2.10.0) diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index f5678b10c9..911b6f2894 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -54,6 +54,22 @@ $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io _check_test_img echo +echo "=== Testing version downgrade with zero expansion and 4K cache entries ===" +echo +IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M +$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -z 32M 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io +$PYTHON qcow2.py "$TEST_IMG" dump-header +$QEMU_IMG amend -o "compat=0.10" --image-opts \ + driver=qcow2,file.filename=$TEST_IMG,l2-cache-entry-size=4096 +$PYTHON qcow2.py "$TEST_IMG" dump-header +$QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -P 0 32M 128k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io +_check_test_img + +echo echo "=== Testing dirty version downgrade ===" echo IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out index 942485de99..e857ef9a7d 100644 --- a/tests/qemu-iotests/061.out +++ b/tests/qemu-iotests/061.out @@ -52,6 +52,67 @@ read 131072/131072 bytes at offset 0 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. +=== Testing version downgrade with zero expansion and 4K cache entries === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 33554432 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +128 KiB (0x20000) bytes allocated at offset 0 bytes (0x0) +31.875 MiB (0x1fe0000) bytes not allocated at offset 128 KiB (0x20000) +128 KiB (0x20000) bytes allocated at offset 32 MiB (0x2000000) +31.875 MiB (0x1fe0000) bytes not allocated at offset 32.125 MiB (0x2020000) +magic 0x514649fb +version 3 +backing_file_offset 0x0 +backing_file_size 0x0 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 +incompatible_features 0x0 +compatible_features 0x1 +autoclear_features 0x0 +refcount_order 4 +header_length 104 + +Header extension: +magic 0x6803f857 +length 144 +data <binary> + +magic 0x514649fb +version 2 +backing_file_offset 0x0 +backing_file_size 0x0 +cluster_bits 16 +size 67108864 +crypt_method 0 +l1_size 1 +l1_table_offset 0x30000 +refcount_table_offset 0x10000 +refcount_table_clusters 1 +nb_snapshots 0 +snapshot_offset 0x0 +incompatible_features 0x0 +compatible_features 0x0 +autoclear_features 0x0 +refcount_order 4 +header_length 72 + +read 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 131072/131072 bytes at offset 33554432 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +64 MiB (0x4000000) bytes not allocated at offset 0 bytes (0x0) +No errors were found on the image. + === Testing dirty version downgrade === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103 index d0cfab8844..2841318492 100755 --- a/tests/qemu-iotests/103 +++ b/tests/qemu-iotests/103 @@ -66,6 +66,14 @@ $QEMU_IO -c "open -o cache-size=1M,refcount-cache-size=2M $TEST_IMG" 2>&1 \ $QEMU_IO -c "open -o cache-size=0,l2-cache-size=0,refcount-cache-size=0 $TEST_IMG" \ 2>&1 | _filter_testdir | _filter_imgfmt +# Invalid cache entry sizes +$QEMU_IO -c "open -o l2-cache-entry-size=256 $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=4242 $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=128k $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt + echo echo '=== Testing valid option combinations ===' echo @@ -94,6 +102,15 @@ $QEMU_IO -c "open -o l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \ -c 'read -P 42 0 64k' \ | _filter_qemu_io +# Valid cache entry sizes +$QEMU_IO -c "open -o l2-cache-entry-size=512 $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=16k $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt +$QEMU_IO -c "open -o l2-cache-entry-size=64k $TEST_IMG" \ + 2>&1 | _filter_testdir | _filter_imgfmt + + echo echo '=== Testing minimal L2 cache and COW ===' echo diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out index b7aaadf89a..bd45d3875a 100644 --- a/tests/qemu-iotests/103.out +++ b/tests/qemu-iotests/103.out @@ -9,6 +9,9 @@ can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cach can't open device TEST_DIR/t.IMGFMT: l2-cache-size may not exceed cache-size can't open device TEST_DIR/t.IMGFMT: refcount-cache-size may not exceed cache-size can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cache-size may not be set the same time +can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536) +can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536) +can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536) === Testing valid option combinations === diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137 index 5a01250005..87965625d8 100755 --- a/tests/qemu-iotests/137 +++ b/tests/qemu-iotests/137 @@ -83,6 +83,9 @@ $QEMU_IO \ -c "reopen -o overlap-check.inactive-l2=off" \ -c "reopen -o cache-size=1M" \ -c "reopen -o l2-cache-size=512k" \ + -c "reopen -o l2-cache-entry-size=512" \ + -c "reopen -o l2-cache-entry-size=4k" \ + -c "reopen -o l2-cache-entry-size=64k" \ -c "reopen -o refcount-cache-size=128k" \ -c "reopen -o cache-clean-interval=5" \ -c "reopen -o cache-clean-interval=0" \ @@ -107,6 +110,8 @@ $QEMU_IO \ -c "reopen -o cache-size=1M,l2-cache-size=2M" \ -c "reopen -o cache-size=1M,refcount-cache-size=2M" \ -c "reopen -o l2-cache-size=256T" \ + -c "reopen -o l2-cache-entry-size=33k" \ + -c "reopen -o l2-cache-entry-size=128k" \ -c "reopen -o refcount-cache-size=256T" \ -c "reopen -o overlap-check=constant,overlap-check.template=all" \ -c "reopen -o overlap-check=blubb" \ diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out index 05efd74d17..e28e1eadba 100644 --- a/tests/qemu-iotests/137.out +++ b/tests/qemu-iotests/137.out @@ -20,6 +20,8 @@ cache-size, l2-cache-size and refcount-cache-size may not be set the same time l2-cache-size may not exceed cache-size refcount-cache-size may not exceed cache-size L2 cache size too big +L2 cache entry size must be a power of two between 512 and the cluster size (65536) +L2 cache entry size must be a power of two between 512 and the cluster size (65536) L2 cache size too big Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all') Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165 index a3932db3de..2936929627 100755 --- a/tests/qemu-iotests/165 +++ b/tests/qemu-iotests/165 @@ -64,7 +64,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase): def qmpAddBitmap(self): self.vm.qmp('block-dirty-bitmap-add', node='drive0', - name='bitmap0', persistent=True, autoload=True) + name='bitmap0', persistent=True) def test_persistent(self): self.vm = self.mkVm() diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176 index d38b3aeb91..32baa116dd 100755 --- a/tests/qemu-iotests/176 +++ b/tests/qemu-iotests/176 @@ -95,7 +95,7 @@ case $reason in "file": { "driver": "file", "filename": "$TEST_IMG" } } } { "execute": "block-dirty-bitmap-add", "arguments": { "node": "drive0", "name": "bitmap0", - "persistent": true, "autoload": true } } + "persistent": true } } { "execute": "quit" } EOF ;; |