aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/dirty-bitmap.c18
-rw-r--r--block/gluster.c116
-rw-r--r--block/qcow2-bitmap.c12
-rw-r--r--block/qcow2-cache.c80
-rw-r--r--block/qcow2-cluster.c519
-rw-r--r--block/qcow2-refcount.c206
-rw-r--r--block/qcow2.c63
-rw-r--r--block/qcow2.h33
-rw-r--r--block/sheepdog.c56
-rw-r--r--blockdev.c15
-rw-r--r--docs/qemu-block-drivers.texi10
-rw-r--r--include/block/dirty-bitmap.h1
-rw-r--r--qapi/block-core.json12
-rw-r--r--qemu-doc.texi7
-rw-r--r--qemu-img.texi74
-rw-r--r--qemu-io.c27
-rw-r--r--tests/qemu-iotests/059.out2
-rwxr-xr-xtests/qemu-iotests/06116
-rw-r--r--tests/qemu-iotests/061.out61
-rwxr-xr-xtests/qemu-iotests/10317
-rw-r--r--tests/qemu-iotests/103.out3
-rwxr-xr-xtests/qemu-iotests/1375
-rw-r--r--tests/qemu-iotests/137.out2
-rwxr-xr-xtests/qemu-iotests/15514
-rwxr-xr-xtests/qemu-iotests/1652
-rwxr-xr-xtests/qemu-iotests/1762
-rw-r--r--tests/qemu-iotests/sample_images/afl9.vmdk.bz2bin178 -> 618 bytes
27 files changed, 816 insertions, 557 deletions
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 7879d13ddb..909f0517f8 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -52,8 +52,6 @@ struct BdrvDirtyBitmap {
Such operations must fail and both the image
and this bitmap must remain unchanged while
this flag is set. */
- bool autoload; /* For persistent bitmaps: bitmap must be
- autoloaded on image opening */
bool persistent; /* bitmap must be saved to owner disk image */
QLIST_ENTRY(BdrvDirtyBitmap) list;
};
@@ -104,7 +102,6 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
g_free(bitmap->name);
bitmap->name = NULL;
bitmap->persistent = false;
- bitmap->autoload = false;
}
/* Called with BQL taken. */
@@ -261,8 +258,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
bitmap->successor = NULL;
successor->persistent = bitmap->persistent;
bitmap->persistent = false;
- successor->autoload = bitmap->autoload;
- bitmap->autoload = false;
bdrv_release_dirty_bitmap(bs, bitmap);
return successor;
@@ -667,19 +662,6 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
}
/* Called with BQL taken. */
-void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
-{
- qemu_mutex_lock(bitmap->mutex);
- bitmap->autoload = autoload;
- qemu_mutex_unlock(bitmap->mutex);
-}
-
-bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
-{
- return bitmap->autoload;
-}
-
-/* Called with BQL taken. */
void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
{
qemu_mutex_lock(bitmap->mutex);
diff --git a/block/gluster.c b/block/gluster.c
index d8decc41ad..3f17b7819d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -965,12 +965,68 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
}
#endif
+static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset,
+ PreallocMode prealloc, Error **errp)
+{
+ int64_t current_length;
+
+ current_length = glfs_lseek(fd, 0, SEEK_END);
+ if (current_length < 0) {
+ error_setg_errno(errp, errno, "Failed to determine current size");
+ return -errno;
+ }
+
+ if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
+ error_setg(errp, "Cannot use preallocation for shrinking files");
+ return -ENOTSUP;
+ }
+
+ if (current_length == offset) {
+ return 0;
+ }
+
+ switch (prealloc) {
+#ifdef CONFIG_GLUSTERFS_FALLOCATE
+ case PREALLOC_MODE_FALLOC:
+ if (glfs_fallocate(fd, 0, current_length, offset - current_length)) {
+ error_setg_errno(errp, errno, "Could not preallocate data");
+ return -errno;
+ }
+ break;
+#endif /* CONFIG_GLUSTERFS_FALLOCATE */
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+ case PREALLOC_MODE_FULL:
+ if (glfs_ftruncate(fd, offset)) {
+ error_setg_errno(errp, errno, "Could not resize file");
+ return -errno;
+ }
+ if (glfs_zerofill(fd, current_length, offset - current_length)) {
+ error_setg_errno(errp, errno, "Could not zerofill the new area");
+ return -errno;
+ }
+ break;
+#endif /* CONFIG_GLUSTERFS_ZEROFILL */
+ case PREALLOC_MODE_OFF:
+ if (glfs_ftruncate(fd, offset)) {
+ error_setg_errno(errp, errno, "Could not resize file");
+ return -errno;
+ }
+ break;
+ default:
+ error_setg(errp, "Unsupported preallocation mode: %s",
+ PreallocMode_str(prealloc));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int qemu_gluster_create(const char *filename,
QemuOpts *opts, Error **errp)
{
BlockdevOptionsGluster *gconf;
struct glfs *glfs;
- struct glfs_fd *fd;
+ struct glfs_fd *fd = NULL;
int ret = 0;
PreallocMode prealloc;
int64_t total_size = 0;
@@ -1019,45 +1075,14 @@ static int qemu_gluster_create(const char *filename,
goto out;
}
- switch (prealloc) {
-#ifdef CONFIG_GLUSTERFS_FALLOCATE
- case PREALLOC_MODE_FALLOC:
- if (glfs_fallocate(fd, 0, 0, total_size)) {
- error_setg(errp, "Could not preallocate data for the new file");
- ret = -errno;
- }
- break;
-#endif /* CONFIG_GLUSTERFS_FALLOCATE */
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
- case PREALLOC_MODE_FULL:
- if (!glfs_ftruncate(fd, total_size)) {
- if (glfs_zerofill(fd, 0, total_size)) {
- error_setg(errp, "Could not zerofill the new file");
- ret = -errno;
- }
- } else {
- error_setg(errp, "Could not resize file");
- ret = -errno;
- }
- break;
-#endif /* CONFIG_GLUSTERFS_ZEROFILL */
- case PREALLOC_MODE_OFF:
- if (glfs_ftruncate(fd, total_size) != 0) {
+ ret = qemu_gluster_do_truncate(fd, total_size, prealloc, errp);
+
+out:
+ if (fd) {
+ if (glfs_close(fd) != 0 && ret == 0) {
ret = -errno;
- error_setg(errp, "Could not resize file");
}
- break;
- default:
- ret = -EINVAL;
- error_setg(errp, "Unsupported preallocation mode: %s",
- PreallocMode_str(prealloc));
- break;
- }
-
- if (glfs_close(fd) != 0) {
- ret = -errno;
}
-out:
qapi_free_BlockdevOptionsGluster(gconf);
glfs_clear_preopened(glfs);
return ret;
@@ -1097,23 +1122,8 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
- int ret;
BDRVGlusterState *s = bs->opaque;
-
- if (prealloc != PREALLOC_MODE_OFF) {
- error_setg(errp, "Unsupported preallocation mode '%s'",
- PreallocMode_str(prealloc));
- return -ENOTSUP;
- }
-
- ret = glfs_ftruncate(s->fd, offset);
- if (ret < 0) {
- ret = -errno;
- error_setg_errno(errp, -ret, "Failed to truncate file");
- return ret;
- }
-
- return 0;
+ return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp);
}
static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index efa10c6663..4f6fd863ea 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -933,14 +933,14 @@ static void set_readonly_helper(gpointer bitmap, gpointer value)
bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value);
}
-/* qcow2_load_autoloading_dirty_bitmaps()
+/* qcow2_load_dirty_bitmaps()
* Return value is a hint for caller: true means that the Qcow2 header was
* updated. (false doesn't mean that the header should be updated by the
* caller, it just means that updating was not needed or the image cannot be
* written to).
* On failure the function returns false.
*/
-bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp)
+bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
Qcow2BitmapList *bm_list;
@@ -960,14 +960,16 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp)
}
QSIMPLEQ_FOREACH(bm, bm_list, entry) {
- if ((bm->flags & BME_FLAG_AUTO) && !(bm->flags & BME_FLAG_IN_USE)) {
+ if (!(bm->flags & BME_FLAG_IN_USE)) {
BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp);
if (bitmap == NULL) {
goto fail;
}
+ if (!(bm->flags & BME_FLAG_AUTO)) {
+ bdrv_disable_dirty_bitmap(bitmap);
+ }
bdrv_dirty_bitmap_set_persistance(bitmap, true);
- bdrv_dirty_bitmap_set_autoload(bitmap, true);
bm->flags |= BME_FLAG_IN_USE;
created_dirty_bitmaps =
g_slist_append(created_dirty_bitmaps, bitmap);
@@ -1369,7 +1371,7 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp)
bm->table.size = 0;
QSIMPLEQ_INSERT_TAIL(&drop_tables, tb, entry);
}
- bm->flags = bdrv_dirty_bitmap_get_autoload(bitmap) ? BME_FLAG_AUTO : 0;
+ bm->flags = bdrv_dirty_bitmap_enabled(bitmap) ? BME_FLAG_AUTO : 0;
bm->granularity_bits = ctz32(bdrv_dirty_bitmap_granularity(bitmap));
bm->dirty_bitmap = bitmap;
}
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index c48ffebd8f..d9dafa31e5 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -39,26 +39,23 @@ struct Qcow2Cache {
Qcow2CachedTable *entries;
struct Qcow2Cache *depends;
int size;
+ int table_size;
bool depends_on_flush;
void *table_array;
uint64_t lru_counter;
uint64_t cache_clean_lru_counter;
};
-static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
- Qcow2Cache *c, int table)
+static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
{
- BDRVQcow2State *s = bs->opaque;
- return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+ return (uint8_t *) c->table_array + (size_t) table * c->table_size;
}
-static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
- Qcow2Cache *c, void *table)
+static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
{
- BDRVQcow2State *s = bs->opaque;
ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
- int idx = table_offset / s->cluster_size;
- assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
+ int idx = table_offset / c->table_size;
+ assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
return idx;
}
@@ -74,15 +71,13 @@ static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
}
}
-static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
- int i, int num_tables)
+static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
{
/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
#ifdef CONFIG_LINUX
- BDRVQcow2State *s = bs->opaque;
- void *t = qcow2_cache_get_table_addr(bs, c, i);
+ void *t = qcow2_cache_get_table_addr(c, i);
int align = getpagesize();
- size_t mem_size = (size_t) s->cluster_size * num_tables;
+ size_t mem_size = (size_t) c->table_size * num_tables;
size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
if (mem_size > offset && length > 0) {
@@ -98,7 +93,7 @@ static inline bool can_clean_entry(Qcow2Cache *c, int i)
t->lru_counter <= c->cache_clean_lru_counter;
}
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+void qcow2_cache_clean_unused(Qcow2Cache *c)
{
int i = 0;
while (i < c->size) {
@@ -118,23 +113,30 @@ void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
}
if (to_clean > 0) {
- qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+ qcow2_cache_table_release(c, i - to_clean, to_clean);
}
}
c->cache_clean_lru_counter = c->lru_counter;
}
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
+ unsigned table_size)
{
BDRVQcow2State *s = bs->opaque;
Qcow2Cache *c;
+ assert(num_tables > 0);
+ assert(is_power_of_2(table_size));
+ assert(table_size >= (1 << MIN_CLUSTER_BITS));
+ assert(table_size <= s->cluster_size);
+
c = g_new0(Qcow2Cache, 1);
c->size = num_tables;
+ c->table_size = table_size;
c->entries = g_try_new0(Qcow2CachedTable, num_tables);
c->table_array = qemu_try_blockalign(bs->file->bs,
- (size_t) num_tables * s->cluster_size);
+ (size_t) num_tables * c->table_size);
if (!c->entries || !c->table_array) {
qemu_vfree(c->table_array);
@@ -146,7 +148,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
return c;
}
-int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_destroy(Qcow2Cache *c)
{
int i;
@@ -203,13 +205,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
if (c == s->refcount_block_cache) {
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
- c->entries[i].offset, s->cluster_size);
+ c->entries[i].offset, c->table_size);
} else if (c == s->l2_table_cache) {
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
- c->entries[i].offset, s->cluster_size);
+ c->entries[i].offset, c->table_size);
} else {
ret = qcow2_pre_write_overlap_check(bs, 0,
- c->entries[i].offset, s->cluster_size);
+ c->entries[i].offset, c->table_size);
}
if (ret < 0) {
@@ -223,7 +225,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
}
ret = bdrv_pwrite(bs->file, c->entries[i].offset,
- qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
+ qcow2_cache_get_table_addr(c, i), c->table_size);
if (ret < 0) {
return ret;
}
@@ -309,7 +311,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
c->entries[i].lru_counter = 0;
}
- qcow2_cache_table_release(bs, c, 0, c->size);
+ qcow2_cache_table_release(c, 0, c->size);
c->lru_counter = 0;
@@ -331,7 +333,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
offset, read_from_disk);
- if (offset_into_cluster(s, offset)) {
+ if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
"cache: Offset %#" PRIx64 " is unaligned",
qcow2_cache_get_name(s, c), offset);
@@ -339,7 +341,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
}
/* Check if the table is already cached */
- i = lookup_index = (offset / s->cluster_size * 4) % c->size;
+ i = lookup_index = (offset / c->table_size * 4) % c->size;
do {
const Qcow2CachedTable *t = &c->entries[i];
if (t->offset == offset) {
@@ -379,8 +381,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
}
ret = bdrv_pread(bs->file, offset,
- qcow2_cache_get_table_addr(bs, c, i),
- s->cluster_size);
+ qcow2_cache_get_table_addr(c, i),
+ c->table_size);
if (ret < 0) {
return ret;
}
@@ -391,7 +393,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
/* And return the right table */
found:
c->entries[i].ref++;
- *table = qcow2_cache_get_table_addr(bs, c, i);
+ *table = qcow2_cache_get_table_addr(c, i);
trace_qcow2_cache_get_done(qemu_coroutine_self(),
c == s->l2_table_cache, i);
@@ -411,9 +413,9 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
return qcow2_cache_do_get(bs, c, offset, table, false);
}
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+void qcow2_cache_put(Qcow2Cache *c, void **table)
{
- int i = qcow2_cache_get_table_idx(bs, c, *table);
+ int i = qcow2_cache_get_table_idx(c, *table);
c->entries[i].ref--;
*table = NULL;
@@ -425,30 +427,28 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
assert(c->entries[i].ref >= 0);
}
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
- void *table)
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
{
- int i = qcow2_cache_get_table_idx(bs, c, table);
+ int i = qcow2_cache_get_table_idx(c, table);
assert(c->entries[i].offset != 0);
c->entries[i].dirty = true;
}
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
- uint64_t offset)
+void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
{
int i;
for (i = 0; i < c->size; i++) {
if (c->entries[i].offset == offset) {
- return qcow2_cache_get_table_addr(bs, c, i);
+ return qcow2_cache_get_table_addr(c, i);
}
}
return NULL;
}
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
+void qcow2_cache_discard(Qcow2Cache *c, void *table)
{
- int i = qcow2_cache_get_table_idx(bs, c, table);
+ int i = qcow2_cache_get_table_idx(c, table);
assert(c->entries[i].ref == 0);
@@ -456,5 +456,5 @@ void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
c->entries[i].lru_counter = 0;
c->entries[i].dirty = false;
- qcow2_cache_table_release(bs, c, i, 1);
+ qcow2_cache_table_release(c, i, 1);
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 3a979bcd82..e406b0f3b9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -195,20 +195,26 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
/*
* l2_load
*
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
+ * @bs: The BlockDriverState
+ * @offset: A guest offset, used to calculate what slice of the L2
+ * table to load.
+ * @l2_offset: Offset to the L2 table in the image file.
+ * @l2_slice: Location to store the pointer to the L2 slice.
*
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
+ * Loads a L2 slice into memory (L2 slices are the parts of L2 tables
+ * that are loaded by the qcow2 cache). If the slice is in the cache,
+ * the cache is used; otherwise the L2 slice is loaded from the image
+ * file.
*/
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
- uint64_t **l2_table)
+static int l2_load(BlockDriverState *bs, uint64_t offset,
+ uint64_t l2_offset, uint64_t **l2_slice)
{
BDRVQcow2State *s = bs->opaque;
+ int start_of_slice = sizeof(uint64_t) *
+ (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset));
- return qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
- (void **)l2_table);
+ return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice,
+ (void **)l2_slice);
}
/*
@@ -257,11 +263,12 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
*
*/
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+static int l2_allocate(BlockDriverState *bs, int l1_index)
{
BDRVQcow2State *s = bs->opaque;
uint64_t old_l2_offset;
- uint64_t *l2_table = NULL;
+ uint64_t *l2_slice = NULL;
+ unsigned slice, slice_size2, n_slices;
int64_t l2_offset;
int ret;
@@ -292,39 +299,47 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
/* allocate a new entry in the l2 cache */
+ slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+ n_slices = s->cluster_size / slice_size2;
+
trace_qcow2_l2_allocate_get_empty(bs, l1_index);
- ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
- if (ret < 0) {
- goto fail;
- }
+ for (slice = 0; slice < n_slices; slice++) {
+ ret = qcow2_cache_get_empty(bs, s->l2_table_cache,
+ l2_offset + slice * slice_size2,
+ (void **) &l2_slice);
+ if (ret < 0) {
+ goto fail;
+ }
- l2_table = *table;
+ if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
+ /* if there was no old l2 table, clear the new slice */
+ memset(l2_slice, 0, slice_size2);
+ } else {
+ uint64_t *old_slice;
+ uint64_t old_l2_slice_offset =
+ (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2;
+
+ /* if there was an old l2 table, read a slice from the disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+ ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset,
+ (void **) &old_slice);
+ if (ret < 0) {
+ goto fail;
+ }
- if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
- /* if there was no old l2 table, clear the new table */
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- } else {
- uint64_t* old_table;
+ memcpy(l2_slice, old_slice, slice_size2);
- /* if there was an old l2 table, read it from the disk */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
- ret = qcow2_cache_get(bs, s->l2_table_cache,
- old_l2_offset & L1E_OFFSET_MASK,
- (void**) &old_table);
- if (ret < 0) {
- goto fail;
+ qcow2_cache_put(s->l2_table_cache, (void **) &old_slice);
}
- memcpy(l2_table, old_table, s->cluster_size);
+ /* write the l2 slice to the file */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table);
+ trace_qcow2_l2_allocate_write_l2(bs, l1_index);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
}
- /* write the l2 table to the file */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
- trace_qcow2_l2_allocate_write_l2(bs, l1_index);
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
goto fail;
@@ -338,14 +353,13 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
goto fail;
}
- *table = l2_table;
trace_qcow2_l2_allocate_done(bs, l1_index, 0);
return 0;
fail:
trace_qcow2_l2_allocate_done(bs, l1_index, ret);
- if (l2_table != NULL) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+ if (l2_slice != NULL) {
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
}
s->l1_table[l1_index] = old_l2_offset;
if (l2_offset > 0) {
@@ -356,19 +370,19 @@ fail:
}
/*
- * Checks how many clusters in a given L2 table are contiguous in the image
+ * Checks how many clusters in a given L2 slice are contiguous in the image
* file. As soon as one of the flags in the bitmask stop_flags changes compared
* to the first cluster, the search is stopped and the cluster is not counted
* as contiguous. (This allows it, for example, to stop at the first compressed
* cluster which may require a different handling)
*/
static int count_contiguous_clusters(int nb_clusters, int cluster_size,
- uint64_t *l2_table, uint64_t stop_flags)
+ uint64_t *l2_slice, uint64_t stop_flags)
{
int i;
QCow2ClusterType first_cluster_type;
uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
- uint64_t first_entry = be64_to_cpu(l2_table[0]);
+ uint64_t first_entry = be64_to_cpu(l2_slice[0]);
uint64_t offset = first_entry & mask;
if (!offset) {
@@ -381,7 +395,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
for (i = 0; i < nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+ uint64_t l2_entry = be64_to_cpu(l2_slice[i]) & mask;
if (offset + (uint64_t) i * cluster_size != l2_entry) {
break;
}
@@ -392,10 +406,10 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
/*
* Checks how many consecutive unallocated clusters in a given L2
- * table have the same cluster type.
+ * slice have the same cluster type.
*/
static int count_contiguous_clusters_unallocated(int nb_clusters,
- uint64_t *l2_table,
+ uint64_t *l2_slice,
QCow2ClusterType wanted_type)
{
int i;
@@ -403,7 +417,7 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
wanted_type == QCOW2_CLUSTER_UNALLOCATED);
for (i = 0; i < nb_clusters; i++) {
- uint64_t entry = be64_to_cpu(l2_table[i]);
+ uint64_t entry = be64_to_cpu(l2_slice[i]);
QCow2ClusterType type = qcow2_get_cluster_type(entry);
if (type != wanted_type) {
@@ -515,8 +529,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
{
BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
- uint64_t l1_index, l2_offset, *l2_table;
- int l1_bits, c;
+ uint64_t l1_index, l2_offset, *l2_slice;
+ int c;
unsigned int offset_in_cluster;
uint64_t bytes_available, bytes_needed, nb_clusters;
QCow2ClusterType type;
@@ -525,12 +539,12 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
offset_in_cluster = offset_into_cluster(s, offset);
bytes_needed = (uint64_t) *bytes + offset_in_cluster;
- l1_bits = s->l2_bits + s->cluster_bits;
-
/* compute how many bytes there are between the start of the cluster
- * containing offset and the end of the l1 entry */
- bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
- + offset_in_cluster;
+ * containing offset and the end of the l2 slice that contains
+ * the entry pointing to it */
+ bytes_available =
+ ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset)))
+ << s->cluster_bits;
if (bytes_needed > bytes_available) {
bytes_needed = bytes_available;
@@ -540,7 +554,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
/* seek to the l2 offset in the l1 table */
- l1_index = offset >> l1_bits;
+ l1_index = offset_to_l1_index(s, offset);
if (l1_index >= s->l1_size) {
type = QCOW2_CLUSTER_UNALLOCATED;
goto out;
@@ -559,17 +573,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
return -EIO;
}
- /* load the l2 table in memory */
+ /* load the l2 slice in memory */
- ret = l2_load(bs, l2_offset, &l2_table);
+ ret = l2_load(bs, offset, l2_offset, &l2_slice);
if (ret < 0) {
return ret;
}
/* find the cluster offset for the given disk offset */
- l2_index = offset_to_l2_index(s, offset);
- *cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ l2_index = offset_to_l2_slice_index(s, offset);
+ *cluster_offset = be64_to_cpu(l2_slice[l2_index]);
nb_clusters = size_to_clusters(s, bytes_needed);
/* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
@@ -596,14 +610,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
case QCOW2_CLUSTER_UNALLOCATED:
/* how many empty clusters ? */
c = count_contiguous_clusters_unallocated(nb_clusters,
- &l2_table[l2_index], type);
+ &l2_slice[l2_index], type);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_ZERO_ALLOC:
case QCOW2_CLUSTER_NORMAL:
/* how many allocated clusters ? */
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], QCOW_OFLAG_ZERO);
+ &l2_slice[l2_index], QCOW_OFLAG_ZERO);
*cluster_offset &= L2E_OFFSET_MASK;
if (offset_into_cluster(s, *cluster_offset)) {
qcow2_signal_corruption(bs, true, -1, -1,
@@ -619,7 +633,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
abort();
}
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
bytes_available = (int64_t)c * s->cluster_size;
@@ -637,7 +651,7 @@ out:
return type;
fail:
- qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice);
return ret;
}
@@ -645,26 +659,25 @@ fail:
* get_cluster_table
*
* for a given disk offset, load (and allocate if needed)
- * the l2 table.
+ * the appropriate slice of its l2 table.
*
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
+ * the cluster index in the l2 slice is given to the caller.
*
* Returns 0 on success, -errno in failure case
*/
static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
- uint64_t **new_l2_table,
+ uint64_t **new_l2_slice,
int *new_l2_index)
{
BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset;
- uint64_t *l2_table = NULL;
+ uint64_t *l2_slice = NULL;
int ret;
/* seek to the l2 offset in the l1 table */
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ l1_index = offset_to_l1_index(s, offset);
if (l1_index >= s->l1_size) {
ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
if (ret < 0) {
@@ -681,17 +694,9 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
return -EIO;
}
- /* seek the l2 table of the given l2 offset */
-
- if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
- /* load the l2 table in memory */
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
- } else {
+ if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) {
/* First allocate a new L2 table (and do COW if needed) */
- ret = l2_allocate(bs, l1_index, &l2_table);
+ ret = l2_allocate(bs, l1_index);
if (ret < 0) {
return ret;
}
@@ -701,13 +706,23 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
}
+
+ /* Get the offset of the newly-allocated l2 table */
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ assert(offset_into_cluster(s, l2_offset) == 0);
+ }
+
+ /* load the l2 slice in memory */
+ ret = l2_load(bs, offset, l2_offset, &l2_slice);
+ if (ret < 0) {
+ return ret;
}
/* find the cluster offset for the given disk offset */
- l2_index = offset_to_l2_index(s, offset);
+ l2_index = offset_to_l2_slice_index(s, offset);
- *new_l2_table = l2_table;
+ *new_l2_slice = l2_slice;
*new_l2_index = l2_index;
return 0;
@@ -732,26 +747,26 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
{
BDRVQcow2State *s = bs->opaque;
int l2_index, ret;
- uint64_t *l2_table;
+ uint64_t *l2_slice;
int64_t cluster_offset;
int nb_csectors;
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
if (ret < 0) {
return 0;
}
/* Compression can't overwrite anything. Fail if the cluster was already
* allocated. */
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ cluster_offset = be64_to_cpu(l2_slice[l2_index]);
if (cluster_offset & L2E_OFFSET_MASK) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
return 0;
}
cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
if (cluster_offset < 0) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
return 0;
}
@@ -766,9 +781,9 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
/* compressed clusters never have the copied flag */
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
- l2_table[l2_index] = cpu_to_be64(cluster_offset);
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+ l2_slice[l2_index] = cpu_to_be64(cluster_offset);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
return cluster_offset;
}
@@ -907,7 +922,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcow2State *s = bs->opaque;
int i, j = 0, l2_index, ret;
- uint64_t *old_cluster, *l2_table;
+ uint64_t *old_cluster, *l2_slice;
uint64_t cluster_offset = m->alloc_offset;
trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
@@ -934,13 +949,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
s->refcount_block_cache);
}
- ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
+ ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index);
if (ret < 0) {
goto err;
}
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
- assert(l2_index + m->nb_clusters <= s->l2_size);
+ assert(l2_index + m->nb_clusters <= s->l2_slice_size);
for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster
* each write allocates separate cluster and writes data concurrently.
@@ -948,16 +963,16 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
* cluster the second one has to do RMW (which is done above by
* perform_cow()), update l2 table with its cluster pointer and free
* old cluster. This is what this loop does */
- if (l2_table[l2_index + i] != 0) {
- old_cluster[j++] = l2_table[l2_index + i];
+ if (l2_slice[l2_index + i] != 0) {
+ old_cluster[j++] = l2_slice[l2_index + i];
}
- l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+ l2_slice[l2_index + i] = cpu_to_be64((cluster_offset +
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
}
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
/*
* If this was a COW, we need to decrease the refcount of the old cluster.
@@ -984,12 +999,12 @@ err:
* which must copy from the backing file)
*/
static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
- uint64_t *l2_table, int l2_index)
+ uint64_t *l2_slice, int l2_index)
{
int i;
for (i = 0; i < nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+ uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
switch(cluster_type) {
@@ -1104,7 +1119,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t cluster_offset;
- uint64_t *l2_table;
+ uint64_t *l2_slice;
uint64_t nb_clusters;
unsigned int keep_clusters;
int ret;
@@ -1116,23 +1131,23 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
== offset_into_cluster(s, *host_offset));
/*
- * Calculate the number of clusters to look for. We stop at L2 table
+ * Calculate the number of clusters to look for. We stop at L2 slice
* boundaries to keep things simple.
*/
nb_clusters =
size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ l2_index = offset_to_l2_slice_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
assert(nb_clusters <= INT_MAX);
/* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index);
if (ret < 0) {
return ret;
}
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ cluster_offset = be64_to_cpu(l2_slice[l2_index]);
/* Check how many clusters are already allocated and don't need COW */
if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
@@ -1160,7 +1175,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
/* We keep all QCOW_OFLAG_COPIED clusters */
keep_clusters =
count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index],
+ &l2_slice[l2_index],
QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
assert(keep_clusters <= nb_clusters);
@@ -1175,7 +1190,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
/* Cleanup */
out:
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
/* Only return a host offset if we actually made progress. Otherwise we
* would make requirements for handle_alloc() that it can't fulfill */
@@ -1259,7 +1274,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
{
BDRVQcow2State *s = bs->opaque;
int l2_index;
- uint64_t *l2_table;
+ uint64_t *l2_slice;
uint64_t entry;
uint64_t nb_clusters;
int ret;
@@ -1272,29 +1287,29 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
assert(*bytes > 0);
/*
- * Calculate the number of clusters to look for. We stop at L2 table
+ * Calculate the number of clusters to look for. We stop at L2 slice
* boundaries to keep things simple.
*/
nb_clusters =
size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ l2_index = offset_to_l2_slice_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
assert(nb_clusters <= INT_MAX);
/* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index);
if (ret < 0) {
return ret;
}
- entry = be64_to_cpu(l2_table[l2_index]);
+ entry = be64_to_cpu(l2_slice[l2_index]);
/* For the moment, overwrite compressed clusters one by one */
if (entry & QCOW_OFLAG_COMPRESSED) {
nb_clusters = 1;
} else {
- nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+ nb_clusters = count_cow_clusters(s, nb_clusters, l2_slice, l2_index);
}
/* This function is only called when there were no non-COW clusters, so if
@@ -1323,7 +1338,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
* nb_clusters already to a range of COW clusters */
preallocated_nb_clusters =
count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], QCOW_OFLAG_COPIED);
+ &l2_slice[l2_index], QCOW_OFLAG_COPIED);
assert(preallocated_nb_clusters > 0);
nb_clusters = preallocated_nb_clusters;
@@ -1334,7 +1349,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
keep_old_clusters = true;
}
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
if (!alloc_cluster_offset) {
/* Allocate, if necessary at a given offset in the image file */
@@ -1616,32 +1631,32 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
/*
* This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
+ * all clusters in the same L2 slice) and returns the number of discarded
* clusters.
*/
-static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- uint64_t nb_clusters, enum qcow2_discard_type type,
- bool full_discard)
+static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
+ uint64_t nb_clusters,
+ enum qcow2_discard_type type, bool full_discard)
{
BDRVQcow2State *s = bs->opaque;
- uint64_t *l2_table;
+ uint64_t *l2_slice;
int l2_index;
int ret;
int i;
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
if (ret < 0) {
return ret;
}
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ /* Limit nb_clusters to one L2 slice */
+ nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
assert(nb_clusters <= INT_MAX);
for (i = 0; i < nb_clusters; i++) {
uint64_t old_l2_entry;
- old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+ old_l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
/*
* If full_discard is false, make sure that a discarded area reads back
@@ -1679,18 +1694,18 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
}
/* First remove L2 entries */
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
if (!full_discard && s->qcow_version >= 3) {
- l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
} else {
- l2_table[l2_index + i] = cpu_to_be64(0);
+ l2_slice[l2_index + i] = cpu_to_be64(0);
}
/* Then decrease the refcount */
qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
}
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
return nb_clusters;
}
@@ -1714,10 +1729,10 @@ int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
s->cache_discards = true;
- /* Each L2 table is handled by its own loop iteration */
+ /* Each L2 slice is handled by its own loop iteration */
while (nb_clusters > 0) {
- cleared = discard_single_l2(bs, offset, nb_clusters, type,
- full_discard);
+ cleared = discard_in_l2_slice(bs, offset, nb_clusters, type,
+ full_discard);
if (cleared < 0) {
ret = cleared;
goto fail;
@@ -1737,33 +1752,33 @@ fail:
/*
* This zeroes as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of zeroed
+ * all clusters in the same L2 slice) and returns the number of zeroed
* clusters.
*/
-static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
- uint64_t nb_clusters, int flags)
+static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
+ uint64_t nb_clusters, int flags)
{
BDRVQcow2State *s = bs->opaque;
- uint64_t *l2_table;
+ uint64_t *l2_slice;
int l2_index;
int ret;
int i;
bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
if (ret < 0) {
return ret;
}
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ /* Limit nb_clusters to one L2 slice */
+ nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
assert(nb_clusters <= INT_MAX);
for (i = 0; i < nb_clusters; i++) {
uint64_t old_offset;
QCow2ClusterType cluster_type;
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
+ old_offset = be64_to_cpu(l2_slice[l2_index + i]);
/*
* Minimize L2 changes if the cluster already reads back as
@@ -1775,16 +1790,16 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
continue;
}
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
- l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
} else {
- l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
+ l2_slice[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
}
}
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
return nb_clusters;
}
@@ -1808,13 +1823,13 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
return -ENOTSUP;
}
- /* Each L2 table is handled by its own loop iteration */
+ /* Each L2 slice is handled by its own loop iteration */
nb_clusters = size_to_clusters(s, bytes);
s->cache_discards = true;
while (nb_clusters > 0) {
- cleared = zero_single_l2(bs, offset, nb_clusters, flags);
+ cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags);
if (cleared < 0) {
ret = cleared;
goto fail;
@@ -1848,22 +1863,25 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
{
BDRVQcow2State *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
- uint64_t *l2_table = NULL;
+ uint64_t *l2_slice = NULL;
+ unsigned slice, slice_size2, n_slices;
int ret;
int i, j;
+ slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+ n_slices = s->cluster_size / slice_size2;
+
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
- if (l2_table == NULL) {
+ l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2);
+ if (l2_slice == NULL) {
return -ENOMEM;
}
}
for (i = 0; i < l1_size; i++) {
uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
- bool l2_dirty = false;
uint64_t l2_refcount;
if (!l2_offset) {
@@ -1883,124 +1901,131 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- if (is_active_l1) {
- /* get active L2 tables from cache */
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
- (void **)&l2_table);
- } else {
- /* load inactive L2 tables from disk */
- ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
- }
- if (ret < 0) {
- goto fail;
- }
-
ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
&l2_refcount);
if (ret < 0) {
goto fail;
}
- for (j = 0; j < s->l2_size; j++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[j]);
- int64_t offset = l2_entry & L2E_OFFSET_MASK;
- QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
-
- if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
- cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
- continue;
+ for (slice = 0; slice < n_slices; slice++) {
+ uint64_t slice_offset = l2_offset + slice * slice_size2;
+ bool l2_dirty = false;
+ if (is_active_l1) {
+ /* get active L2 tables from cache */
+ ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset,
+ (void **)&l2_slice);
+ } else {
+ /* load inactive L2 tables from disk */
+ ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2);
+ }
+ if (ret < 0) {
+ goto fail;
}
- if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
- if (!bs->backing) {
- /* not backed; therefore we can simply deallocate the
- * cluster */
- l2_table[j] = 0;
- l2_dirty = true;
+ for (j = 0; j < s->l2_slice_size; j++) {
+ uint64_t l2_entry = be64_to_cpu(l2_slice[j]);
+ int64_t offset = l2_entry & L2E_OFFSET_MASK;
+ QCow2ClusterType cluster_type =
+ qcow2_get_cluster_type(l2_entry);
+
+ if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
+ cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
continue;
}
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- ret = offset;
- goto fail;
- }
+ if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+ if (!bs->backing) {
+ /* not backed; therefore we can simply deallocate the
+ * cluster */
+ l2_slice[j] = 0;
+ l2_dirty = true;
+ continue;
+ }
+
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ ret = offset;
+ goto fail;
+ }
- if (l2_refcount > 1) {
- /* For shared L2 tables, set the refcount accordingly (it is
- * already 1 and needs to be l2_refcount) */
- ret = qcow2_update_cluster_refcount(bs,
- offset >> s->cluster_bits,
+ if (l2_refcount > 1) {
+ /* For shared L2 tables, set the refcount accordingly
+ * (it is already 1 and needs to be l2_refcount) */
+ ret = qcow2_update_cluster_refcount(
+ bs, offset >> s->cluster_bits,
refcount_diff(1, l2_refcount), false,
QCOW2_DISCARD_OTHER);
- if (ret < 0) {
- qcow2_free_clusters(bs, offset, s->cluster_size,
- QCOW2_DISCARD_OTHER);
- goto fail;
+ if (ret < 0) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_OTHER);
+ goto fail;
+ }
}
}
- }
- if (offset_into_cluster(s, offset)) {
- qcow2_signal_corruption(bs, true, -1, -1,
- "Cluster allocation offset "
- "%#" PRIx64 " unaligned (L2 offset: %#"
- PRIx64 ", L2 index: %#x)", offset,
- l2_offset, j);
- if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
- qcow2_free_clusters(bs, offset, s->cluster_size,
- QCOW2_DISCARD_ALWAYS);
+ if (offset_into_cluster(s, offset)) {
+ int l2_index = slice * s->l2_slice_size + j;
+ qcow2_signal_corruption(
+ bs, true, -1, -1,
+ "Cluster allocation offset "
+ "%#" PRIx64 " unaligned (L2 offset: %#"
+ PRIx64 ", L2 index: %#x)", offset,
+ l2_offset, l2_index);
+ if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ }
+ ret = -EIO;
+ goto fail;
}
- ret = -EIO;
- goto fail;
- }
- ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
- if (ret < 0) {
- if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
- qcow2_free_clusters(bs, offset, s->cluster_size,
- QCOW2_DISCARD_ALWAYS);
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset,
+ s->cluster_size);
+ if (ret < 0) {
+ if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ }
+ goto fail;
}
- goto fail;
- }
- ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
- if (ret < 0) {
- if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
- qcow2_free_clusters(bs, offset, s->cluster_size,
- QCOW2_DISCARD_ALWAYS);
+ ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
+ if (ret < 0) {
+ if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ }
+ goto fail;
}
- goto fail;
- }
- if (l2_refcount == 1) {
- l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
- } else {
- l2_table[j] = cpu_to_be64(offset);
+ if (l2_refcount == 1) {
+ l2_slice[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+ } else {
+ l2_slice[j] = cpu_to_be64(offset);
+ }
+ l2_dirty = true;
}
- l2_dirty = true;
- }
- if (is_active_l1) {
- if (l2_dirty) {
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
- qcow2_cache_depends_on_flush(s->l2_table_cache);
- }
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
- } else {
- if (l2_dirty) {
- ret = qcow2_pre_write_overlap_check(bs,
- QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset,
- s->cluster_size);
- if (ret < 0) {
- goto fail;
+ if (is_active_l1) {
+ if (l2_dirty) {
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
}
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
+ } else {
+ if (l2_dirty) {
+ ret = qcow2_pre_write_overlap_check(
+ bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2,
+ slice_offset, slice_size2);
+ if (ret < 0) {
+ goto fail;
+ }
- ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
+ ret = bdrv_pwrite(bs->file, slice_offset,
+ l2_slice, slice_size2);
+ if (ret < 0) {
+ goto fail;
+ }
}
}
}
@@ -2014,11 +2039,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
ret = 0;
fail:
- if (l2_table) {
+ if (l2_slice) {
if (!is_active_l1) {
- qemu_vfree(l2_table);
+ qemu_vfree(l2_slice);
} else {
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
}
}
return ret;
@@ -2070,7 +2095,15 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size *
sizeof(uint64_t), BDRV_SECTOR_SIZE);
- l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+ uint64_t *new_l1_table =
+ g_try_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+
+ if (!new_l1_table) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ l1_table = new_l1_table;
ret = bdrv_read(bs->file,
s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 92701ab7af..d46b69d7f3 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -277,7 +277,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
block_index = cluster_index & (s->refcount_block_size - 1);
*refcount = s->get_refcount(refcount_block, block_index);
- qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+ qcow2_cache_put(s->refcount_block_cache, &refcount_block);
return 0;
}
@@ -421,7 +421,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Now the new refcount block needs to be written to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
- qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail;
@@ -449,7 +449,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
return -EAGAIN;
}
- qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+ qcow2_cache_put(s->refcount_block_cache, refcount_block);
/*
* If we come here, we need to grow the refcount table. Again, a new
@@ -501,7 +501,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
fail:
if (*refcount_block != NULL) {
- qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+ qcow2_cache_put(s->refcount_block_cache, refcount_block);
}
return ret;
}
@@ -623,7 +623,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
goto fail;
}
memset(refblock_data, 0, s->cluster_size);
- qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
refblock_data);
new_table[i] = block_offset;
@@ -656,11 +656,11 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
s->set_refcount(refblock_data, j, 1);
}
- qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
refblock_data);
}
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data);
+ qcow2_cache_put(s->refcount_block_cache, &refblock_data);
}
assert(block_offset == table_offset);
@@ -836,7 +836,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
/* Load the refcount block and allocate it if needed */
if (table_index != old_table_index) {
if (refcount_block) {
- qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+ qcow2_cache_put(s->refcount_block_cache, &refcount_block);
}
ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
if (ret < 0) {
@@ -845,8 +845,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
}
old_table_index = table_index;
- qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
- refcount_block);
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
/* we can update the count and save it */
block_index = cluster_index & (s->refcount_block_size - 1);
@@ -872,16 +871,16 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
if (refcount == 0) {
void *table;
- table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
+ table = qcow2_cache_is_table_offset(s->refcount_block_cache,
offset);
if (table != NULL) {
- qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
- qcow2_cache_discard(bs, s->refcount_block_cache, table);
+ qcow2_cache_put(s->refcount_block_cache, &refcount_block);
+ qcow2_cache_discard(s->refcount_block_cache, table);
}
- table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset);
+ table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
if (table != NULL) {
- qcow2_cache_discard(bs, s->l2_table_cache, table);
+ qcow2_cache_discard(s->l2_table_cache, table);
}
if (s->discard_passthrough[type]) {
@@ -898,7 +897,7 @@ fail:
/* Write last changed block to disk */
if (refcount_block) {
- qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+ qcow2_cache_put(s->refcount_block_cache, &refcount_block);
}
/*
@@ -1184,17 +1183,20 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
BDRVQcow2State *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
+ uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
bool l1_allocated = false;
int64_t old_entry, old_l2_offset;
+ unsigned slice, slice_size2, n_slices;
int i, j, l1_modified = 0, nb_csectors;
int ret;
assert(addend >= -1 && addend <= 1);
- l2_table = NULL;
+ l2_slice = NULL;
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
+ slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+ n_slices = s->cluster_size / slice_size2;
s->cache_discards = true;
@@ -1237,91 +1239,97 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
goto fail;
}
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
- (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
+ for (slice = 0; slice < n_slices; slice++) {
+ ret = qcow2_cache_get(bs, s->l2_table_cache,
+ l2_offset + slice * slice_size2,
+ (void **) &l2_slice);
+ if (ret < 0) {
+ goto fail;
+ }
- for (j = 0; j < s->l2_size; j++) {
- uint64_t cluster_index;
- uint64_t offset;
-
- entry = be64_to_cpu(l2_table[j]);
- old_entry = entry;
- entry &= ~QCOW_OFLAG_COPIED;
- offset = entry & L2E_OFFSET_MASK;
-
- switch (qcow2_get_cluster_type(entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- nb_csectors = ((entry >> s->csize_shift) &
- s->csize_mask) + 1;
- if (addend != 0) {
- ret = update_refcount(bs,
- (entry & s->cluster_offset_mask) & ~511,
+ for (j = 0; j < s->l2_slice_size; j++) {
+ uint64_t cluster_index;
+ uint64_t offset;
+
+ entry = be64_to_cpu(l2_slice[j]);
+ old_entry = entry;
+ entry &= ~QCOW_OFLAG_COPIED;
+ offset = entry & L2E_OFFSET_MASK;
+
+ switch (qcow2_get_cluster_type(entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ nb_csectors = ((entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ if (addend != 0) {
+ ret = update_refcount(
+ bs, (entry & s->cluster_offset_mask) & ~511,
nb_csectors * 512, abs(addend), addend < 0,
QCOW2_DISCARD_SNAPSHOT);
- if (ret < 0) {
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ /* compressed clusters are never modified */
+ refcount = 2;
+ break;
+
+ case QCOW2_CLUSTER_NORMAL:
+ case QCOW2_CLUSTER_ZERO_ALLOC:
+ if (offset_into_cluster(s, offset)) {
+ /* Here l2_index means table (not slice) index */
+ int l2_index = slice * s->l2_slice_size + j;
+ qcow2_signal_corruption(
+ bs, true, -1, -1, "Cluster "
+ "allocation offset %#" PRIx64
+ " unaligned (L2 offset: %#"
+ PRIx64 ", L2 index: %#x)",
+ offset, l2_offset, l2_index);
+ ret = -EIO;
goto fail;
}
- }
- /* compressed clusters are never modified */
- refcount = 2;
- break;
-
- case QCOW2_CLUSTER_NORMAL:
- case QCOW2_CLUSTER_ZERO_ALLOC:
- if (offset_into_cluster(s, offset)) {
- qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
- "allocation offset %#" PRIx64
- " unaligned (L2 offset: %#"
- PRIx64 ", L2 index: %#x)",
- offset, l2_offset, j);
- ret = -EIO;
- goto fail;
- }
- cluster_index = offset >> s->cluster_bits;
- assert(cluster_index);
- if (addend != 0) {
- ret = qcow2_update_cluster_refcount(bs,
- cluster_index, abs(addend), addend < 0,
- QCOW2_DISCARD_SNAPSHOT);
+ cluster_index = offset >> s->cluster_bits;
+ assert(cluster_index);
+ if (addend != 0) {
+ ret = qcow2_update_cluster_refcount(
+ bs, cluster_index, abs(addend), addend < 0,
+ QCOW2_DISCARD_SNAPSHOT);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = qcow2_get_refcount(bs, cluster_index, &refcount);
if (ret < 0) {
goto fail;
}
- }
+ break;
- ret = qcow2_get_refcount(bs, cluster_index, &refcount);
- if (ret < 0) {
- goto fail;
- }
- break;
-
- case QCOW2_CLUSTER_ZERO_PLAIN:
- case QCOW2_CLUSTER_UNALLOCATED:
- refcount = 0;
- break;
+ case QCOW2_CLUSTER_ZERO_PLAIN:
+ case QCOW2_CLUSTER_UNALLOCATED:
+ refcount = 0;
+ break;
- default:
- abort();
- }
+ default:
+ abort();
+ }
- if (refcount == 1) {
- entry |= QCOW_OFLAG_COPIED;
- }
- if (entry != old_entry) {
- if (addend > 0) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
+ if (refcount == 1) {
+ entry |= QCOW_OFLAG_COPIED;
+ }
+ if (entry != old_entry) {
+ if (addend > 0) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+ l2_slice[j] = cpu_to_be64(entry);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache,
+ l2_slice);
}
- l2_table[j] = cpu_to_be64(entry);
- qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
- l2_table);
}
- }
- qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
+ }
if (addend != 0) {
ret = qcow2_update_cluster_refcount(bs, l2_offset >>
@@ -1348,8 +1356,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
ret = bdrv_flush(bs);
fail:
- if (l2_table) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (l2_slice) {
+ qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
}
s->cache_discards = false;
@@ -2849,7 +2857,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
new_reftable_size, new_refblock,
new_refblock_empty, allocated, errp);
if (ret < 0) {
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ qcow2_cache_put(s->refcount_block_cache, &refblock);
return ret;
}
@@ -2862,7 +2870,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
uint64_t offset;
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ qcow2_cache_put(s->refcount_block_cache, &refblock);
offset = ((reftable_index << s->refcount_block_bits)
+ refblock_index) << s->cluster_bits;
@@ -2883,7 +2891,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
new_refblock_empty = new_refblock_empty && refcount == 0;
}
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ qcow2_cache_put(s->refcount_block_cache, &refblock);
} else {
/* No refblock means every refcount is 0 */
for (refblock_index = 0; refblock_index < s->refcount_block_size;
@@ -3175,24 +3183,24 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs,
offset_to_reftable_index(s, discard_block_offs),
discard_block_offs,
s->get_refcount(refblock, block_index));
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ qcow2_cache_put(s->refcount_block_cache, &refblock);
return -EINVAL;
}
s->set_refcount(refblock, block_index, 0);
- qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock);
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ qcow2_cache_put(s->refcount_block_cache, &refblock);
if (cluster_index < s->free_cluster_index) {
s->free_cluster_index = cluster_index;
}
- refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
+ refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
discard_block_offs);
if (refblock) {
/* discard refblock from the cache if refblock is cached */
- qcow2_cache_discard(bs, s->refcount_block_cache, refblock);
+ qcow2_cache_discard(s->refcount_block_cache, refblock);
}
update_refcount_discard(bs, discard_block_offs, s->cluster_size);
@@ -3235,7 +3243,7 @@ int qcow2_shrink_reftable(BlockDriverState *bs)
} else {
unused_block = buffer_is_zero(refblock, s->cluster_size);
}
- qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ qcow2_cache_put(s->refcount_block_cache, &refblock);
reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 801e29fc56..57a517e2bd 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -676,6 +676,11 @@ static QemuOptsList qcow2_runtime_opts = {
.help = "Maximum L2 table cache size",
},
{
+ .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Size of each entry in the L2 cache",
+ },
+ {
.name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Maximum refcount block cache size",
@@ -706,8 +711,8 @@ static void cache_clean_timer_cb(void *opaque)
{
BlockDriverState *bs = opaque;
BDRVQcow2State *s = bs->opaque;
- qcow2_cache_clean_unused(bs, s->l2_table_cache);
- qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+ qcow2_cache_clean_unused(s->l2_table_cache);
+ qcow2_cache_clean_unused(s->refcount_block_cache);
timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
(int64_t) s->cache_clean_interval * 1000);
}
@@ -747,6 +752,7 @@ static void qcow2_attach_aio_context(BlockDriverState *bs,
static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
uint64_t *l2_cache_size,
+ uint64_t *l2_cache_entry_size,
uint64_t *refcount_cache_size, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
@@ -762,6 +768,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
*refcount_cache_size = qemu_opt_get_size(opts,
QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
+ *l2_cache_entry_size = qemu_opt_get_size(
+ opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
+
if (combined_cache_size_set) {
if (l2_cache_size_set && refcount_cache_size_set) {
error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
@@ -802,11 +811,21 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
}
}
+
+ if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
+ *l2_cache_entry_size > s->cluster_size ||
+ !is_power_of_2(*l2_cache_entry_size)) {
+ error_setg(errp, "L2 cache entry size must be a power of two "
+ "between %d and the cluster size (%d)",
+ 1 << MIN_CLUSTER_BITS, s->cluster_size);
+ return;
+ }
}
typedef struct Qcow2ReopenState {
Qcow2Cache *l2_table_cache;
Qcow2Cache *refcount_block_cache;
+ int l2_slice_size; /* Number of entries in a slice of the L2 table */
bool use_lazy_refcounts;
int overlap_check;
bool discard_passthrough[QCOW2_DISCARD_MAX];
@@ -823,7 +842,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
QemuOpts *opts = NULL;
const char *opt_overlap_check, *opt_overlap_check_template;
int overlap_check_template = 0;
- uint64_t l2_cache_size, refcount_cache_size;
+ uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
int i;
const char *encryptfmt;
QDict *encryptopts = NULL;
@@ -842,15 +861,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
}
/* get L2 table/refcount block cache size from command line options */
- read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
- &local_err);
+ read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
+ &refcount_cache_size, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail;
}
- l2_cache_size /= s->cluster_size;
+ l2_cache_size /= l2_cache_entry_size;
if (l2_cache_size < MIN_L2_CACHE_SIZE) {
l2_cache_size = MIN_L2_CACHE_SIZE;
}
@@ -888,8 +907,11 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
}
}
- r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
- r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
+ r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
+ r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
+ l2_cache_entry_size);
+ r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
+ s->cluster_size);
if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
error_setg(errp, "Could not allocate metadata caches");
ret = -ENOMEM;
@@ -1044,13 +1066,14 @@ static void qcow2_update_options_commit(BlockDriverState *bs,
int i;
if (s->l2_table_cache) {
- qcow2_cache_destroy(bs, s->l2_table_cache);
+ qcow2_cache_destroy(s->l2_table_cache);
}
if (s->refcount_block_cache) {
- qcow2_cache_destroy(bs, s->refcount_block_cache);
+ qcow2_cache_destroy(s->refcount_block_cache);
}
s->l2_table_cache = r->l2_table_cache;
s->refcount_block_cache = r->refcount_block_cache;
+ s->l2_slice_size = r->l2_slice_size;
s->overlap_check = r->overlap_check;
s->use_lazy_refcounts = r->use_lazy_refcounts;
@@ -1073,10 +1096,10 @@ static void qcow2_update_options_abort(BlockDriverState *bs,
Qcow2ReopenState *r)
{
if (r->l2_table_cache) {
- qcow2_cache_destroy(bs, r->l2_table_cache);
+ qcow2_cache_destroy(r->l2_table_cache);
}
if (r->refcount_block_cache) {
- qcow2_cache_destroy(bs, r->refcount_block_cache);
+ qcow2_cache_destroy(r->refcount_block_cache);
}
qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
}
@@ -1460,7 +1483,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
}
- if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) {
+ if (qcow2_load_dirty_bitmaps(bs, &local_err)) {
update_header = false;
}
if (local_err != NULL) {
@@ -1514,10 +1537,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
s->l1_table = NULL;
cache_clean_timer_del(bs);
if (s->l2_table_cache) {
- qcow2_cache_destroy(bs, s->l2_table_cache);
+ qcow2_cache_destroy(s->l2_table_cache);
}
if (s->refcount_block_cache) {
- qcow2_cache_destroy(bs, s->refcount_block_cache);
+ qcow2_cache_destroy(s->refcount_block_cache);
}
qcrypto_block_free(s->crypto);
qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
@@ -2065,8 +2088,8 @@ static void qcow2_close(BlockDriverState *bs)
}
cache_clean_timer_del(bs);
- qcow2_cache_destroy(bs, s->l2_table_cache);
- qcow2_cache_destroy(bs, s->refcount_block_cache);
+ qcow2_cache_destroy(s->l2_table_cache);
+ qcow2_cache_destroy(s->refcount_block_cache);
qcrypto_block_free(s->crypto);
s->crypto = NULL;
@@ -3259,9 +3282,9 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
host_offset = allocation_start;
guest_offset = old_length;
while (nb_new_data_clusters) {
- int64_t guest_cluster = guest_offset >> s->cluster_bits;
- int64_t nb_clusters = MIN(nb_new_data_clusters,
- s->l2_size - guest_cluster % s->l2_size);
+ int64_t nb_clusters = MIN(
+ nb_new_data_clusters,
+ s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
QCowL2Meta allocation = {
.offset = guest_offset,
.alloc_offset = host_offset,
diff --git a/block/qcow2.h b/block/qcow2.h
index 46c8cf44ec..883802241f 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -68,7 +68,7 @@
#define MAX_CLUSTER_BITS 21
/* Must be at least 2 to cover COW */
-#define MIN_L2_CACHE_SIZE 2 /* clusters */
+#define MIN_L2_CACHE_SIZE 2 /* cache entries */
/* Must be at least 4 to cover all cases of refcount table growth */
#define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
@@ -100,6 +100,7 @@
#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
#define QCOW2_OPT_CACHE_SIZE "cache-size"
#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
+#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size"
#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
@@ -251,6 +252,7 @@ typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
int cluster_sectors;
+ int l2_slice_size;
int l2_bits;
int l2_size;
int l1_size;
@@ -463,11 +465,21 @@ static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
return (size + (1ULL << shift) - 1) >> shift;
}
+static inline int offset_to_l1_index(BDRVQcow2State *s, uint64_t offset)
+{
+ return offset >> (s->l2_bits + s->cluster_bits);
+}
+
static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
{
return (offset >> s->cluster_bits) & (s->l2_size - 1);
}
+static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset)
+{
+ return (offset >> s->cluster_bits) & (s->l2_slice_size - 1);
+}
+
static inline int64_t align_offset(int64_t offset, int n)
{
offset = (offset + n - 1) & ~(n - 1);
@@ -636,34 +648,33 @@ void qcow2_free_snapshots(BlockDriverState *bs);
int qcow2_read_snapshots(BlockDriverState *bs);
/* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
+ unsigned table_size);
+int qcow2_cache_destroy(Qcow2Cache *c);
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
- void *table);
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
+void qcow2_cache_clean_unused(Qcow2Cache *c);
int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table);
int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table);
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
- uint64_t offset);
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table);
+void qcow2_cache_put(Qcow2Cache *c, void **table);
+void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset);
+void qcow2_cache_discard(Qcow2Cache *c, void *table);
/* qcow2-bitmap.c functions */
int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
void **refcount_table,
int64_t *refcount_table_size);
-bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp);
int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index af125a2c8d..ac02b10fe0 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1826,40 +1826,34 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
return 0;
}
-static int sd_prealloc(const char *filename, Error **errp)
+static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
+ Error **errp)
{
BlockBackend *blk = NULL;
- BDRVSheepdogState *base = NULL;
+ BDRVSheepdogState *base = bs->opaque;
unsigned long buf_size;
uint32_t idx, max_idx;
uint32_t object_size;
- int64_t vdi_size;
void *buf = NULL;
int ret;
- blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
- if (blk == NULL) {
- ret = -EIO;
+ blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+ BLK_PERM_ALL);
+
+ ret = blk_insert_bs(blk, bs, errp);
+ if (ret < 0) {
goto out_with_err_set;
}
blk_set_allow_write_beyond_eof(blk, true);
- vdi_size = blk_getlength(blk);
- if (vdi_size < 0) {
- ret = vdi_size;
- goto out;
- }
-
- base = blk_bs(blk)->opaque;
object_size = (UINT32_C(1) << base->inode.block_size_shift);
buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
buf = g_malloc0(buf_size);
- max_idx = DIV_ROUND_UP(vdi_size, buf_size);
+ max_idx = DIV_ROUND_UP(new_size, buf_size);
- for (idx = 0; idx < max_idx; idx++) {
+ for (idx = old_size / buf_size; idx < max_idx; idx++) {
/*
* The created image can be a cloned image, so we need to read
* a data from the source image.
@@ -2108,7 +2102,20 @@ static int sd_create(const char *filename, QemuOpts *opts,
}
if (prealloc) {
- ret = sd_prealloc(filename, errp);
+ BlockDriverState *bs;
+ QDict *opts;
+
+ opts = qdict_new();
+ qdict_put_str(opts, "driver", "sheepdog");
+ bs = bdrv_open(filename, NULL, opts, BDRV_O_PROTOCOL | BDRV_O_RDWR,
+ errp);
+ if (!bs) {
+ goto out;
+ }
+
+ ret = sd_prealloc(bs, 0, s->inode.vdi_size, errp);
+
+ bdrv_unref(bs);
}
out:
g_free(backing_file);
@@ -2173,15 +2180,16 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
int ret, fd;
unsigned int datalen;
uint64_t max_vdi_size;
+ int64_t old_size = s->inode.vdi_size;
- if (prealloc != PREALLOC_MODE_OFF) {
+ if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) {
error_setg(errp, "Unsupported preallocation mode '%s'",
PreallocMode_str(prealloc));
return -ENOTSUP;
}
max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
- if (offset < s->inode.vdi_size) {
+ if (offset < old_size) {
error_setg(errp, "shrinking is not supported");
return -EINVAL;
} else if (offset > max_vdi_size) {
@@ -2204,9 +2212,17 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
if (ret < 0) {
error_setg_errno(errp, -ret, "failed to update an inode");
+ return ret;
}
- return ret;
+ if (prealloc == PREALLOC_MODE_FULL) {
+ ret = sd_prealloc(bs, old_size, offset, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
}
/*
diff --git a/blockdev.c b/blockdev.c
index bdbdeae7e4..3fb1ca803c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2825,14 +2825,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
if (!has_persistent) {
persistent = false;
}
- if (!has_autoload) {
- autoload = false;
- }
- if (has_autoload && !persistent) {
- error_setg(errp, "Autoload flag must be used only for persistent "
- "bitmaps");
- return;
+ if (has_autoload) {
+ warn_report("Autoload option is deprecated and its value is ignored");
}
if (persistent &&
@@ -2847,7 +2842,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
}
bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
- bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
}
void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
@@ -3569,6 +3563,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
return;
}
+ /* Early check to avoid creating target */
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
+ return;
+ }
+
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
index cd74767ed3..f1793692bb 100644
--- a/docs/qemu-block-drivers.texi
+++ b/docs/qemu-block-drivers.texi
@@ -845,6 +845,16 @@ QEMU transparently handles lock handover during shared storage migration. For
shared virtual disk images between multiple VMs, the "share-rw" device option
should be used.
+By default, the guest has exclusive write access to its disk image. If the
+guest can safely share the disk image with other writers the @code{-device
+...,share-rw=on} parameter can be used. This is only safe if the guest is
+running software, such as a cluster file system, that coordinates disk accesses
+to avoid corruption.
+
+Note that share-rw=on only declares the guest's ability to share the disk.
+Some QEMU features, such as image file formats, require exclusive write access
+to the disk image and this is unaffected by the share-rw=on option.
+
Alternatively, locking can be fully disabled by "locking=off" block device
option. In the command line, the option is usually in the form of
"file.locking=off" as the protocol driver is normally placed as a "file" child
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 3da8486ab1..e3f4bbf51d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -66,7 +66,6 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value);
-void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload);
void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap,
bool persistent);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8046c2da23..5c5921bfb7 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1593,9 +1593,9 @@
# Qcow2 disks support persistent bitmaps. Default is false for
# block-dirty-bitmap-add. (Since: 2.10)
#
-# @autoload: the bitmap will be automatically loaded when the image it is stored
-# in is opened. This flag may only be specified for persistent
-# bitmaps. Default is false for block-dirty-bitmap-add. (Since: 2.10)
+# @autoload: ignored and deprecated since 2.12.
+# Currently, all dirty tracking bitmaps are loaded from Qcow2 on
+# open.
#
# Since: 2.4
##
@@ -2521,6 +2521,11 @@
# @l2-cache-size: the maximum size of the L2 table cache in
# bytes (since 2.2)
#
+# @l2-cache-entry-size: the size of each entry in the L2 cache in
+# bytes. It must be a power of two between 512
+# and the cluster size. The default value is
+# the cluster size (since 2.12)
+#
# @refcount-cache-size: the maximum size of the refcount block cache
# in bytes (since 2.2)
#
@@ -2542,6 +2547,7 @@
'*overlap-check': 'Qcow2OverlapChecks',
'*cache-size': 'int',
'*l2-cache-size': 'int',
+ '*l2-cache-entry-size': 'int',
'*refcount-cache-size': 'int',
'*cache-clean-interval': 'int',
'*encrypt': 'BlockdevQcow2Encryption' } }
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 769968aba4..137f5814a8 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -2757,6 +2757,13 @@ used and it will be removed with no replacement.
The ``convert -s snapshot_id_or_name'' argument is obsoleted
by the ``convert -l snapshot_param'' argument instead.
+@section QEMU Machine Protocol (QMP) commands
+
+@subsection block-dirty-bitmap-add "autoload" parameter (since 2.12.0)
+
+"autoload" parameter is now ignored. All bitmaps are automatically loaded
+from qcow2 images.
+
@section System emulator human monitor commands
@subsection host_net_add (since 2.10.0)
diff --git a/qemu-img.texi b/qemu-img.texi
index fdcf120f36..8a26400adb 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -33,38 +33,14 @@ The following commands are supported:
Command parameters:
@table @var
-@item filename
- is a disk image filename
-
-@item --object @var{objectdef}
-
-is a QEMU user creatable object definition. See the @code{qemu(1)} manual
-page for a description of the object properties. The most common object
-type is a @code{secret}, which is used to supply passwords and/or encryption
-keys.
-
-@item --image-opts
-
-Indicates that the source @var{filename} parameter is to be interpreted as a
-full option string, not a plain filename. This parameter is mutually
-exclusive with the @var{-f} parameter.
-
-@item --target-image-opts
-Indicates that the @var{output_filename} parameter(s) are to be interpreted as
-a full option string, not a plain filename. This parameter is mutually
-exclusive with the @var{-O} parameters. It is currently required to also use
-the @var{-n} parameter to skip image creation. This restriction may be relaxed
-in a future release.
+@item filename
+is a disk image filename
@item fmt
is the disk image format. It is guessed automatically in most cases. See below
for a description of the supported disk formats.
-@item --backing-chain
-will enumerate information about backing files in a disk image chain. Refer
-below for further description.
-
@item size
is the disk image size in bytes. Optional suffixes @code{k} or @code{K}
(kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M)
@@ -74,42 +50,86 @@ and T (terabyte, 1024G) are supported. @code{b} is ignored.
is the destination disk image filename
@item output_fmt
- is the destination format
+is the destination format
+
@item options
is a comma separated list of format specific options in a
name=value format. Use @code{-o ?} for an overview of the options supported
by the used format or see the format descriptions below for details.
+
@item snapshot_param
is param used for internal snapshot, format is
'snapshot.id=[ID],snapshot.name=[NAME]' or '[ID_OR_NAME]'
+
@item snapshot_id_or_name
is deprecated, use snapshot_param instead
+@end table
+
+@table @option
+
+@item --object @var{objectdef}
+is a QEMU user creatable object definition. See the @code{qemu(1)} manual
+page for a description of the object properties. The most common object
+type is a @code{secret}, which is used to supply passwords and/or encryption
+keys.
+
+@item --image-opts
+Indicates that the source @var{filename} parameter is to be interpreted as a
+full option string, not a plain filename. This parameter is mutually
+exclusive with the @var{-f} parameter.
+
+@item --target-image-opts
+Indicates that the @var{output_filename} parameter(s) are to be interpreted as
+a full option string, not a plain filename. This parameter is mutually
+exclusive with the @var{-O} parameters. It is currently required to also use
+the @var{-n} parameter to skip image creation. This restriction may be relaxed
+in a future release.
+
+@item --force-share (-U)
+If specified, @code{qemu-img} will open the image in shared mode, allowing
+other QEMU processes to open it in write mode. For example, this can be used to
+get the image information (with 'info' subcommand) when the image is used by a
+running guest. Note that this could produce inconsistent results because of
+concurrent metadata changes, etc. This option is only allowed when opening
+images in read-only mode.
+
+@item --backing-chain
+will enumerate information about backing files in a disk image chain. Refer
+below for further description.
+
@item -c
indicates that target image must be compressed (qcow format only)
+
@item -h
with or without a command shows help and lists the supported formats
+
@item -p
display progress bar (compare, convert and rebase commands only).
If the @var{-p} option is not used for a command that supports it, the
progress is reported when the process receives a @code{SIGUSR1} or
@code{SIGINFO} signal.
+
@item -q
Quiet mode - do not print any output (except errors). There's no progress bar
in case both @var{-q} and @var{-p} options are used.
+
@item -S @var{size}
indicates the consecutive number of bytes that must contain only zeros
for qemu-img to create a sparse image during conversion. This value is rounded
down to the nearest 512 bytes. You may use the common size suffixes like
@code{k} for kilobytes.
+
@item -t @var{cache}
specifies the cache mode that should be used with the (destination) file. See
the documentation of the emulator's @code{-drive cache=...} option for allowed
values.
+
@item -T @var{src_cache}
specifies the cache mode that should be used with the source file(s). See
the documentation of the emulator's @code{-drive cache=...} option for allowed
values.
+
@end table
Parameters to snapshot subcommand:
diff --git a/qemu-io.c b/qemu-io.c
index f554ab614b..2c00ea068e 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -11,6 +11,9 @@
#include "qemu/osdep.h"
#include <getopt.h>
#include <libgen.h>
+#ifndef _WIN32
+#include <termios.h>
+#endif
#include "qapi/error.h"
#include "qemu-io.h"
@@ -42,6 +45,26 @@ static bool imageOpts;
static ReadLineState *readline_state;
+static int ttyEOF;
+
+static int get_eof_char(void)
+{
+#ifdef _WIN32
+ return 0x4; /* Ctrl-D */
+#else
+ struct termios tty;
+ if (tcgetattr(STDIN_FILENO, &tty) != 0) {
+ if (errno == ENOTTY) {
+ return 0x0; /* just expect read() == 0 */
+ } else {
+ return 0x4; /* Ctrl-D */
+ }
+ }
+
+ return tty.c_cc[VEOF];
+#endif
+}
+
static int close_f(BlockBackend *blk, int argc, char **argv)
{
blk_unref(qemuio_blk);
@@ -323,7 +346,8 @@ static char *fetchline_readline(void)
readline_start(readline_state, get_prompt(), 0, readline_func, &line);
while (!line) {
int ch = getchar();
- if (ch == EOF) {
+ if (ttyEOF != 0x0 && ch == ttyEOF) {
+ printf("\n");
break;
}
readline_handle_byte(readline_state, ch);
@@ -593,6 +617,7 @@ int main(int argc, char **argv)
qemuio_add_command(&close_cmd);
if (isatty(STDIN_FILENO)) {
+ ttyEOF = get_eof_char();
readline_state = readline_init(readline_printf_func,
readline_flush_func,
NULL,
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index 1ac5d56233..f6dce7947c 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2358,5 +2358,5 @@ Offset Length Mapped to File
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
=== Testing afl image with a very large capacity ===
-qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': Could not open 'TEST_DIR/afl9.IMGFMT': Invalid argument
+qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
*** done
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index f5678b10c9..911b6f2894 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -54,6 +54,22 @@ $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
_check_test_img
echo
+echo "=== Testing version downgrade with zero expansion and 4K cache entries ==="
+echo
+IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
+$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z 32M 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io
+$PYTHON qcow2.py "$TEST_IMG" dump-header
+$QEMU_IMG amend -o "compat=0.10" --image-opts \
+ driver=qcow2,file.filename=$TEST_IMG,l2-cache-entry-size=4096
+$PYTHON qcow2.py "$TEST_IMG" dump-header
+$QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 32M 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io
+_check_test_img
+
+echo
echo "=== Testing dirty version downgrade ==="
echo
IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index 942485de99..e857ef9a7d 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -52,6 +52,67 @@ read 131072/131072 bytes at offset 0
128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
No errors were found on the image.
+=== Testing version downgrade with zero expansion and 4K cache entries ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 33554432
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+128 KiB (0x20000) bytes allocated at offset 0 bytes (0x0)
+31.875 MiB (0x1fe0000) bytes not allocated at offset 128 KiB (0x20000)
+128 KiB (0x20000) bytes allocated at offset 32 MiB (0x2000000)
+31.875 MiB (0x1fe0000) bytes not allocated at offset 32.125 MiB (0x2020000)
+magic 0x514649fb
+version 3
+backing_file_offset 0x0
+backing_file_size 0x0
+cluster_bits 16
+size 67108864
+crypt_method 0
+l1_size 1
+l1_table_offset 0x30000
+refcount_table_offset 0x10000
+refcount_table_clusters 1
+nb_snapshots 0
+snapshot_offset 0x0
+incompatible_features 0x0
+compatible_features 0x1
+autoclear_features 0x0
+refcount_order 4
+header_length 104
+
+Header extension:
+magic 0x6803f857
+length 144
+data <binary>
+
+magic 0x514649fb
+version 2
+backing_file_offset 0x0
+backing_file_size 0x0
+cluster_bits 16
+size 67108864
+crypt_method 0
+l1_size 1
+l1_table_offset 0x30000
+refcount_table_offset 0x10000
+refcount_table_clusters 1
+nb_snapshots 0
+snapshot_offset 0x0
+incompatible_features 0x0
+compatible_features 0x0
+autoclear_features 0x0
+refcount_order 4
+header_length 72
+
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 33554432
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+64 MiB (0x4000000) bytes not allocated at offset 0 bytes (0x0)
+No errors were found on the image.
+
=== Testing dirty version downgrade ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103
index d0cfab8844..2841318492 100755
--- a/tests/qemu-iotests/103
+++ b/tests/qemu-iotests/103
@@ -66,6 +66,14 @@ $QEMU_IO -c "open -o cache-size=1M,refcount-cache-size=2M $TEST_IMG" 2>&1 \
$QEMU_IO -c "open -o cache-size=0,l2-cache-size=0,refcount-cache-size=0 $TEST_IMG" \
2>&1 | _filter_testdir | _filter_imgfmt
+# Invalid cache entry sizes
+$QEMU_IO -c "open -o l2-cache-entry-size=256 $TEST_IMG" \
+ 2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=4242 $TEST_IMG" \
+ 2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=128k $TEST_IMG" \
+ 2>&1 | _filter_testdir | _filter_imgfmt
+
echo
echo '=== Testing valid option combinations ==='
echo
@@ -94,6 +102,15 @@ $QEMU_IO -c "open -o l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \
-c 'read -P 42 0 64k' \
| _filter_qemu_io
+# Valid cache entry sizes
+$QEMU_IO -c "open -o l2-cache-entry-size=512 $TEST_IMG" \
+ 2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=16k $TEST_IMG" \
+ 2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=64k $TEST_IMG" \
+ 2>&1 | _filter_testdir | _filter_imgfmt
+
+
echo
echo '=== Testing minimal L2 cache and COW ==='
echo
diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out
index b7aaadf89a..bd45d3875a 100644
--- a/tests/qemu-iotests/103.out
+++ b/tests/qemu-iotests/103.out
@@ -9,6 +9,9 @@ can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cach
can't open device TEST_DIR/t.IMGFMT: l2-cache-size may not exceed cache-size
can't open device TEST_DIR/t.IMGFMT: refcount-cache-size may not exceed cache-size
can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cache-size may not be set the same time
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536)
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536)
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536)
=== Testing valid option combinations ===
diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137
index 5a01250005..87965625d8 100755
--- a/tests/qemu-iotests/137
+++ b/tests/qemu-iotests/137
@@ -83,6 +83,9 @@ $QEMU_IO \
-c "reopen -o overlap-check.inactive-l2=off" \
-c "reopen -o cache-size=1M" \
-c "reopen -o l2-cache-size=512k" \
+ -c "reopen -o l2-cache-entry-size=512" \
+ -c "reopen -o l2-cache-entry-size=4k" \
+ -c "reopen -o l2-cache-entry-size=64k" \
-c "reopen -o refcount-cache-size=128k" \
-c "reopen -o cache-clean-interval=5" \
-c "reopen -o cache-clean-interval=0" \
@@ -107,6 +110,8 @@ $QEMU_IO \
-c "reopen -o cache-size=1M,l2-cache-size=2M" \
-c "reopen -o cache-size=1M,refcount-cache-size=2M" \
-c "reopen -o l2-cache-size=256T" \
+ -c "reopen -o l2-cache-entry-size=33k" \
+ -c "reopen -o l2-cache-entry-size=128k" \
-c "reopen -o refcount-cache-size=256T" \
-c "reopen -o overlap-check=constant,overlap-check.template=all" \
-c "reopen -o overlap-check=blubb" \
diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out
index 05efd74d17..e28e1eadba 100644
--- a/tests/qemu-iotests/137.out
+++ b/tests/qemu-iotests/137.out
@@ -20,6 +20,8 @@ cache-size, l2-cache-size and refcount-cache-size may not be set the same time
l2-cache-size may not exceed cache-size
refcount-cache-size may not exceed cache-size
L2 cache size too big
+L2 cache entry size must be a power of two between 512 and the cluster size (65536)
+L2 cache entry size must be a power of two between 512 and the cluster size (65536)
L2 cache size too big
Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all')
Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all
diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
index fc9fa975be..42dae04c83 100755
--- a/tests/qemu-iotests/155
+++ b/tests/qemu-iotests/155
@@ -64,7 +64,7 @@ class BaseClass(iotests.QMPTestCase):
'file': {'driver': 'file',
'filename': source_img}}
self.vm.add_blockdev(self.qmp_to_opts(blockdev))
- self.vm.add_device('floppy,id=qdev0,drive=source')
+ self.vm.add_device('virtio-blk,id=qdev0,drive=source')
self.vm.launch()
self.assertIntactSourceBackingChain()
@@ -173,21 +173,24 @@ class MirrorBaseClass(BaseClass):
def testFull(self):
self.runMirror('full')
- node = self.findBlockNode('target', 'qdev0')
+ node = self.findBlockNode('target',
+ '/machine/peripheral/qdev0/virtio-backend')
self.assertCorrectBackingImage(node, None)
self.assertIntactSourceBackingChain()
def testTop(self):
self.runMirror('top')
- node = self.findBlockNode('target', 'qdev0')
+ node = self.findBlockNode('target',
+ '/machine/peripheral/qdev0/virtio-backend')
self.assertCorrectBackingImage(node, back2_img)
self.assertIntactSourceBackingChain()
def testNone(self):
self.runMirror('none')
- node = self.findBlockNode('target', 'qdev0')
+ node = self.findBlockNode('target',
+ '/machine/peripheral/qdev0/virtio-backend')
self.assertCorrectBackingImage(node, source_img)
self.assertIntactSourceBackingChain()
@@ -239,7 +242,8 @@ class TestCommit(BaseClass):
self.vm.event_wait('BLOCK_JOB_COMPLETED')
- node = self.findBlockNode(None, 'qdev0')
+ node = self.findBlockNode(None,
+ '/machine/peripheral/qdev0/virtio-backend')
self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename',
back1_img)
self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename',
diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165
index a3932db3de..2936929627 100755
--- a/tests/qemu-iotests/165
+++ b/tests/qemu-iotests/165
@@ -64,7 +64,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase):
def qmpAddBitmap(self):
self.vm.qmp('block-dirty-bitmap-add', node='drive0',
- name='bitmap0', persistent=True, autoload=True)
+ name='bitmap0', persistent=True)
def test_persistent(self):
self.vm = self.mkVm()
diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176
index d38b3aeb91..32baa116dd 100755
--- a/tests/qemu-iotests/176
+++ b/tests/qemu-iotests/176
@@ -95,7 +95,7 @@ case $reason in
"file": { "driver": "file", "filename": "$TEST_IMG" } } }
{ "execute": "block-dirty-bitmap-add",
"arguments": { "node": "drive0", "name": "bitmap0",
- "persistent": true, "autoload": true } }
+ "persistent": true } }
{ "execute": "quit" }
EOF
;;
diff --git a/tests/qemu-iotests/sample_images/afl9.vmdk.bz2 b/tests/qemu-iotests/sample_images/afl9.vmdk.bz2
index 03615d36a1..9fcd0af45a 100644
--- a/tests/qemu-iotests/sample_images/afl9.vmdk.bz2
+++ b/tests/qemu-iotests/sample_images/afl9.vmdk.bz2
Binary files differ