diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-03-28 12:57:37 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-03-28 12:57:37 -0500 |
commit | fde245ca7ea790495db370cd260259595dbdf874 (patch) | |
tree | 64e544e1075625a90214a0a2afebc76686b44ee0 | |
parent | 837df37e881e14ffdb05c2b912b2ad30b851c9d3 (diff) | |
parent | 5d186eb03eb37b257e29a4731ca484362d5fc4e4 (diff) |
Merge remote-tracking branch 'stefanha/block' into staging
# By Kevin Wolf (22) and Peter Lieven (1)
# Via Stefan Hajnoczi
* stefanha/block: (23 commits)
block: Fix direct use of protocols as driver for bdrv_open()
qcow2: Gather clusters in a looping loop
qcow2: Move cluster gathering to a non-looping loop
qcow2: Allow requests with multiple l2metas
qcow2: Use byte granularity in qcow2_alloc_cluster_offset()
qcow2: Prepare handle_alloc/copied() for byte granularity
qcow2: handle_copied(): Implement non-zero host_offset
qcow2: handle_copied(): Get rid of keep_clusters parameter
qcow2: handle_copied(): Get rid of nb_clusters parameter
qcow2: Factor out handle_copied()
qcow2: Clean up handle_alloc()
qcow2: Finalise interface of handle_alloc()
qcow2: handle_alloc(): Get rid of keep_clusters parameter
qcow2: handle_alloc(): Get rid of nb_clusters parameter
qcow2: Factor out handle_alloc()
qcow2: Decouple cluster allocation from cluster reuse code
qcow2: Change handle_dependency to byte granularity
qcow2: Improve check for overlapping allocations
qcow2: Handle dependencies earlier
qcow2: Remove bogus unlock of s->lock
...
-rw-r--r-- | block.c | 26 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 509 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 4 | ||||
-rw-r--r-- | block/qcow2.c | 16 | ||||
-rw-r--r-- | block/qcow2.h | 29 | ||||
-rw-r--r-- | tests/qemu-iotests/038.out | 10 | ||||
-rw-r--r-- | tests/qemu-iotests/044.out | 4 | ||||
-rwxr-xr-x | tests/qemu-iotests/046 | 49 | ||||
-rw-r--r-- | tests/qemu-iotests/046.out | 76 | ||||
-rw-r--r-- | trace-events | 2 | ||||
-rw-r--r-- | vl.c | 4 |
11 files changed, 553 insertions, 176 deletions
@@ -680,6 +680,18 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename, flags, drv->format_name); + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { + return -ENOTSUP; + } + + /* bdrv_open() with directly using a protocol as drv. This layer is already + * opened, so assign it to bs (while file becomes a closed BlockDriverState) + * and return immediately. */ + if (file != NULL && drv->bdrv_file_open) { + bdrv_swap(file, bs); + return 0; + } + bs->open_flags = flags; bs->buffer_alignment = 512; @@ -694,10 +706,6 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bs->filename[0] = '\0'; } - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) { - return -ENOTSUP; - } - bs->drv = drv; bs->opaque = g_malloc0(drv->instance_size); @@ -708,13 +716,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, /* Open the image, either directly or using a protocol */ if (drv->bdrv_file_open) { - if (file != NULL) { - bdrv_swap(file, bs); - ret = 0; - } else { - assert(drv->bdrv_parse_filename || filename != NULL); - ret = drv->bdrv_file_open(bs, filename, options, open_flags); - } + assert(file == NULL); + assert(drv->bdrv_parse_filename || filename != NULL); + ret = drv->bdrv_file_open(bs, filename, options, open_flags); } else { assert(file != NULL); bs->file = file; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d72d063e6d..c71470a3db 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -759,31 +759,50 @@ out: * Check if there already is an AIO write request in flight which allocates * the same cluster. In this case we need to wait until the previous * request has completed and updated the L2 table accordingly. + * + * Returns: + * 0 if there was no dependency. *cur_bytes indicates the number of + * bytes from guest_offset that can be read before the next + * dependency must be processed (or the request is complete) + * + * -EAGAIN if we had to wait for another request, previously gathered + * information on cluster allocation may be invalid now. The caller + * must start over anyway, so consider *cur_bytes undefined. */ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, - unsigned int *nb_clusters) + uint64_t *cur_bytes, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; QCowL2Meta *old_alloc; + uint64_t bytes = *cur_bytes; QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { - uint64_t start = guest_offset >> s->cluster_bits; - uint64_t end = start + *nb_clusters; - uint64_t old_start = old_alloc->offset >> s->cluster_bits; - uint64_t old_end = old_start + old_alloc->nb_clusters; + uint64_t start = guest_offset; + uint64_t end = start + bytes; + uint64_t old_start = l2meta_cow_start(old_alloc); + uint64_t old_end = l2meta_cow_end(old_alloc); - if (end < old_start || start > old_end) { + if (end <= old_start || start >= old_end) { /* No intersection */ } else { if (start < old_start) { /* Stop at the start of a running allocation */ - *nb_clusters = old_start - start; + bytes = old_start - start; } else { - *nb_clusters = 0; + bytes = 0; + } + + /* Stop if already an l2meta exists. After yielding, it wouldn't + * be valid any more, so we'd have to clean up the old L2Metas + * and deal with requests depending on them before starting to + * gather new ones. Not worth the trouble. */ + if (bytes == 0 && *m) { + *cur_bytes = 0; + return 0; } - if (*nb_clusters == 0) { + if (bytes == 0) { /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(&s->lock); @@ -794,14 +813,118 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, } } - if (!*nb_clusters) { - abort(); - } + /* Make sure that existing clusters and new allocations are only used up to + * the next dependency if we shortened the request above */ + *cur_bytes = bytes; return 0; } /* + * Checks how many already allocated clusters that don't require a copy on + * write there are at the given guest_offset (up to *bytes). If + * *host_offset is not zero, only physically contiguous clusters beginning at + * this host offset are counted. + * + * Note that guest_offset may not be cluster aligned. In this case, the + * returned *host_offset points to exact byte referenced by guest_offset and + * therefore isn't cluster aligned as well. + * + * Returns: + * 0: if no allocated clusters are available at the given offset. + * *bytes is normally unchanged. It is set to 0 if the cluster + * is allocated and doesn't need COW, but doesn't have the right + * physical offset. + * + * 1: if allocated clusters that don't require a COW are available at + * the requested offset. *bytes may have decreased and describes + * the length of the area that can be written to. + * + * -errno: in error cases + */ +static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index; + uint64_t cluster_offset; + uint64_t *l2_table; + unsigned int nb_clusters; + unsigned int keep_clusters; + int ret, pret; + + trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, + *bytes); + + assert(*host_offset == 0 || offset_into_cluster(s, guest_offset) + == offset_into_cluster(s, *host_offset)); + + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ + nb_clusters = + size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); + + l2_index = offset_to_l2_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + if (ret < 0) { + return ret; + } + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + /* Check how many clusters are already allocated and don't need COW */ + if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL + && (cluster_offset & QCOW_OFLAG_COPIED)) + { + /* If a specific host_offset is required, check it */ + bool offset_matches = + (cluster_offset & L2E_OFFSET_MASK) == *host_offset; + + if (*host_offset != 0 && !offset_matches) { + *bytes = 0; + ret = 0; + goto out; + } + + /* We keep all QCOW_OFLAG_COPIED clusters */ + keep_clusters = + count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, + QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); + assert(keep_clusters <= nb_clusters); + + *bytes = MIN(*bytes, + keep_clusters * s->cluster_size + - offset_into_cluster(s, guest_offset)); + + ret = 1; + } else { + ret = 0; + } + + /* Cleanup */ +out: + pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (pret < 0) { + return pret; + } + + /* Only return a host offset if we actually made progress. Otherwise we + * would make requirements for handle_alloc() that it can't fulfill */ + if (ret) { + *host_offset = (cluster_offset & L2E_OFFSET_MASK) + + offset_into_cluster(s, guest_offset); + } + + return ret; +} + +/* * Allocates new clusters for the given guest_offset. * * At most *nb_clusters are allocated, and on return *nb_clusters is updated to @@ -824,16 +947,10 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, uint64_t *host_offset, unsigned int *nb_clusters) { BDRVQcowState *s = bs->opaque; - int ret; trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, *host_offset, *nb_clusters); - ret = handle_dependencies(bs, guest_offset, nb_clusters); - if (ret < 0) { - return ret; - } - /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -845,7 +962,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { - ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); + int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret < 0) { return ret; } @@ -855,6 +972,151 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, } /* + * Allocates new clusters for an area that either is yet unallocated or needs a + * copy on write. If *host_offset is non-zero, clusters are only allocated if + * the new allocation can match the specified host offset. + * + * Note that guest_offset may not be cluster aligned. In this case, the + * returned *host_offset points to exact byte referenced by guest_offset and + * therefore isn't cluster aligned as well. + * + * Returns: + * 0: if no clusters could be allocated. *bytes is set to 0, + * *host_offset is left unchanged. + * + * 1: if new clusters were allocated. *bytes may be decreased if the + * new allocation doesn't cover all of the requested area. + * *host_offset is updated to contain the host offset of the first + * newly allocated cluster. + * + * -errno: in error cases + */ +static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index; + uint64_t *l2_table; + uint64_t entry; + unsigned int nb_clusters; + int ret; + + uint64_t alloc_cluster_offset; + + trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, + *bytes); + assert(*bytes > 0); + + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ + nb_clusters = + size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); + + l2_index = offset_to_l2_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + if (ret < 0) { + return ret; + } + + entry = be64_to_cpu(l2_table[l2_index]); + + /* For the moment, overwrite compressed clusters one by one */ + if (entry & QCOW_OFLAG_COMPRESSED) { + nb_clusters = 1; + } else { + nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + } + + /* This function is only called when there were no non-COW clusters, so if + * we can't find any unallocated or COW clusters either, something is + * wrong with our code. */ + assert(nb_clusters > 0); + + ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (ret < 0) { + return ret; + } + + /* Allocate, if necessary at a given offset in the image file */ + alloc_cluster_offset = start_of_cluster(s, *host_offset); + ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + /* Can't extend contiguous allocation */ + if (nb_clusters == 0) { + *bytes = 0; + return 0; + } + + /* + * Save info needed for meta data update. + * + * requested_sectors: Number of sectors from the start of the first + * newly allocated cluster to the end of the (possibly shortened + * before) write request. + * + * avail_sectors: Number of sectors from the start of the first + * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the area that the write + * request actually writes to (excluding COW at the end) + */ + int requested_sectors = + (*bytes + offset_into_cluster(s, guest_offset)) + >> BDRV_SECTOR_BITS; + int avail_sectors = nb_clusters + << (s->cluster_bits - BDRV_SECTOR_BITS); + int alloc_n_start = offset_into_cluster(s, guest_offset) + >> BDRV_SECTOR_BITS; + int nb_sectors = MIN(requested_sectors, avail_sectors); + QCowL2Meta *old_m = *m; + + *m = g_malloc0(sizeof(**m)); + + **m = (QCowL2Meta) { + .next = old_m, + + .alloc_offset = alloc_cluster_offset, + .offset = start_of_cluster(s, guest_offset), + .nb_clusters = nb_clusters, + .nb_available = nb_sectors, + + .cow_start = { + .offset = 0, + .nb_sectors = alloc_n_start, + }, + .cow_end = { + .offset = nb_sectors * BDRV_SECTOR_SIZE, + .nb_sectors = avail_sectors - nb_sectors, + }, + }; + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); + + *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); + *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) + - offset_into_cluster(s, guest_offset)); + assert(*bytes != 0); + + return 1; + +fail: + if (*m && (*m)->nb_clusters > 0) { + QLIST_REMOVE(*m, next_in_flight); + } + return ret; +} + +/* * alloc_cluster_offset * * For a given offset on the virtual disk, find the cluster offset in qcow2 @@ -877,161 +1139,110 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; - int l2_index, ret, sectors; - uint64_t *l2_table; - unsigned int nb_clusters, keep_clusters; + uint64_t start, remaining; uint64_t cluster_offset; + uint64_t cur_bytes; + int ret; trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, n_start, n_end); - /* Find L2 entry for the first involved cluster */ -again: - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); - if (ret < 0) { - return ret; - } - - /* - * Calculate the number of clusters to look for. We stop at L2 table - * boundaries to keep things simple. - */ - nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS), - s->l2_size - l2_index); + assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset)); + offset = start_of_cluster(s, offset); - cluster_offset = be64_to_cpu(l2_table[l2_index]); +again: + start = offset + (n_start << BDRV_SECTOR_BITS); + remaining = (n_end - n_start) << BDRV_SECTOR_BITS; + cluster_offset = 0; + *host_offset = 0; + cur_bytes = 0; + *m = NULL; - /* - * Check how many clusters are already allocated and don't need COW, and how - * many need a new allocation. - */ - if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL - && (cluster_offset & QCOW_OFLAG_COPIED)) - { - /* We keep all QCOW_OFLAG_COPIED clusters */ - keep_clusters = - count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, - QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); - assert(keep_clusters <= nb_clusters); - nb_clusters -= keep_clusters; - } else { - keep_clusters = 0; - cluster_offset = 0; - } + while (true) { - if (nb_clusters > 0) { - /* For the moment, overwrite compressed clusters one by one */ - uint64_t entry = be64_to_cpu(l2_table[l2_index + keep_clusters]); - if (entry & QCOW_OFLAG_COMPRESSED) { - nb_clusters = 1; - } else { - nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, - l2_index + keep_clusters); + if (!*host_offset) { + *host_offset = start_of_cluster(s, cluster_offset); } - } - cluster_offset &= L2E_OFFSET_MASK; + assert(remaining >= cur_bytes); - /* - * The L2 table isn't used any more after this. As long as the cache works - * synchronously, it's important to release it before calling - * do_alloc_cluster_offset, which may yield if we need to wait for another - * request to complete. If we still had the reference, we could use up the - * whole cache with sleeping requests. - */ - ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - if (ret < 0) { - return ret; - } + start += cur_bytes; + remaining -= cur_bytes; + cluster_offset += cur_bytes; - /* If there is something left to allocate, do that now */ - if (nb_clusters > 0) { - uint64_t alloc_offset; - uint64_t alloc_cluster_offset; - uint64_t keep_bytes = keep_clusters * s->cluster_size; - - /* Calculate start and size of allocation */ - alloc_offset = offset + keep_bytes; - - if (keep_clusters == 0) { - alloc_cluster_offset = 0; - } else { - alloc_cluster_offset = cluster_offset + keep_bytes; + if (remaining == 0) { + break; } - /* Allocate, if necessary at a given offset in the image file */ - ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset, - &nb_clusters); + cur_bytes = remaining; + + /* + * Now start gathering as many contiguous clusters as possible: + * + * 1. Check for overlaps with in-flight allocations + * + * a) Overlap not in the first cluster -> shorten this request and + * let the caller handle the rest in its next loop iteration. + * + * b) Real overlaps of two requests. Yield and restart the search + * for contiguous clusters (the situation could have changed + * while we were sleeping) + * + * c) TODO: Request starts in the same cluster as the in-flight + * allocation ends. Shorten the COW of the in-fight allocation, + * set cluster_offset to write to the same cluster and set up + * the right synchronisation between the in-flight request and + * the new one. + */ + ret = handle_dependencies(bs, start, &cur_bytes, m); if (ret == -EAGAIN) { + /* Currently handle_dependencies() doesn't yield if we already had + * an allocation. If it did, we would have to clean up the L2Meta + * structs before starting over. */ + assert(*m == NULL); goto again; } else if (ret < 0) { - goto fail; + return ret; + } else if (cur_bytes == 0) { + break; + } else { + /* handle_dependencies() may have decreased cur_bytes (shortened + * the allocations below) so that the next dependency is processed + * correctly during the next loop iteration. */ } - /* save info needed for meta data update */ - if (nb_clusters > 0) { - /* - * requested_sectors: Number of sectors from the start of the first - * newly allocated cluster to the end of the (possibly shortened - * before) write request. - * - * avail_sectors: Number of sectors from the start of the first - * newly allocated to the end of the last newly allocated cluster. - * - * nb_sectors: The number of sectors from the start of the first - * newly allocated cluster to the end of the aread that the write - * request actually writes to (excluding COW at the end) - */ - int requested_sectors = n_end - keep_clusters * s->cluster_sectors; - int avail_sectors = nb_clusters - << (s->cluster_bits - BDRV_SECTOR_BITS); - int alloc_n_start = keep_clusters == 0 ? n_start : 0; - int nb_sectors = MIN(requested_sectors, avail_sectors); - - if (keep_clusters == 0) { - cluster_offset = alloc_cluster_offset; - } - - *m = g_malloc0(sizeof(**m)); - - **m = (QCowL2Meta) { - .alloc_offset = alloc_cluster_offset, - .offset = alloc_offset & ~(s->cluster_size - 1), - .nb_clusters = nb_clusters, - .nb_available = nb_sectors, - - .cow_start = { - .offset = 0, - .nb_sectors = alloc_n_start, - }, - .cow_end = { - .offset = nb_sectors * BDRV_SECTOR_SIZE, - .nb_sectors = avail_sectors - nb_sectors, - }, - }; - qemu_co_queue_init(&(*m)->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); + /* + * 2. Count contiguous COPIED clusters. + */ + ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); + if (ret < 0) { + return ret; + } else if (ret) { + continue; + } else if (cur_bytes == 0) { + break; } - } - /* Some cleanup work */ - sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9); - if (sectors > n_end) { - sectors = n_end; + /* + * 3. If the request still hasn't completed, allocate new clusters, + * considering any cluster_offset of steps 1c or 2. + */ + ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); + if (ret < 0) { + return ret; + } else if (ret) { + continue; + } else { + assert(cur_bytes == 0); + break; + } } - assert(sectors > n_start); - *num = sectors - n_start; - *host_offset = cluster_offset; + *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS); + assert(*num > 0); + assert(*host_offset != 0); return 0; - -fail: - if (*m && (*m)->nb_clusters > 0) { - QLIST_REMOVE(*m, next_in_flight); - } - return ret; } static int decompress_buffer(uint8_t *out_buf, int out_buf_size, diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 9bfb390519..c38e970bf2 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1152,9 +1152,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, size = bdrv_getlength(bs->file); nb_clusters = size_to_clusters(s, size); - res->bfi.total_clusters = nb_clusters; refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t)); + res->bfi.total_clusters = + size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); + /* header */ inc_refcounts(bs, res, refcount_table, nb_clusters, 0, s->cluster_size); diff --git a/block/qcow2.c b/block/qcow2.c index 8ea696a1aa..7e7d775b37 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -858,7 +858,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - if (l2meta != NULL) { + while (l2meta != NULL) { + QCowL2Meta *next; + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret < 0) { goto fail; @@ -869,12 +871,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QLIST_REMOVE(l2meta, next_in_flight); } - qemu_co_mutex_unlock(&s->lock); qemu_co_queue_restart_all(&l2meta->dependent_requests); - qemu_co_mutex_lock(&s->lock); + next = l2meta->next; g_free(l2meta); - l2meta = NULL; + l2meta = next; } remaining_sectors -= cur_nr_sectors; @@ -887,12 +888,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, fail: qemu_co_mutex_unlock(&s->lock); - if (l2meta != NULL) { + while (l2meta != NULL) { + QCowL2Meta *next; + if (l2meta->nb_clusters != 0) { QLIST_REMOVE(l2meta, next_in_flight); } qemu_co_queue_restart_all(&l2meta->dependent_requests); + + next = l2meta->next; g_free(l2meta); + l2meta = next; } qemu_iovec_destroy(&hd_qiov); diff --git a/block/qcow2.h b/block/qcow2.h index e4b5e11a91..bf8db2abd3 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -250,6 +250,9 @@ typedef struct QCowL2Meta */ Qcow2COWRegion cow_end; + /** Pointer to next L2Meta of the same write request */ + struct QCowL2Meta *next; + QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; @@ -266,6 +269,16 @@ enum { #define REFT_OFFSET_MASK 0xffffffffffffff00ULL +static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset) +{ + return offset & ~(s->cluster_size - 1); +} + +static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset) +{ + return offset & (s->cluster_size - 1); +} + static inline int size_to_clusters(BDRVQcowState *s, int64_t size) { return (size + (s->cluster_size - 1)) >> s->cluster_bits; @@ -277,6 +290,11 @@ static inline int size_to_l1(BDRVQcowState *s, int64_t size) return (size + (1ULL << shift) - 1) >> shift; } +static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset) +{ + return (offset >> s->cluster_bits) & (s->l2_size - 1); +} + static inline int64_t align_offset(int64_t offset, int n) { offset = (offset + n - 1) & ~(n - 1); @@ -302,6 +320,17 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY); } +static inline uint64_t l2meta_cow_start(QCowL2Meta *m) +{ + return m->offset + m->cow_start.offset; +} + +static inline uint64_t l2meta_cow_end(QCowL2Meta *m) +{ + return m->offset + m->cow_end.offset + + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS); +} + // FIXME Need qcow2_ prefix to global functions /* qcow2.c functions */ diff --git a/tests/qemu-iotests/038.out b/tests/qemu-iotests/038.out index acc7629267..9cd0cd8771 100644 --- a/tests/qemu-iotests/038.out +++ b/tests/qemu-iotests/038.out @@ -517,9 +517,7 @@ qemu-io> wrote 65536/65536 bytes at offset 16711680 qemu-io> Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944 backing_file='TEST_DIR/t.IMGFMT.base' == Some concurrent requests touching the same cluster == -qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> wrote 81920/81920 bytes at offset XXX -80 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 65536/65536 bytes at offset XXX +qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -579,6 +577,8 @@ wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 81920/81920 bytes at offset XXX +80 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX @@ -645,6 +645,8 @@ wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 81920/81920 bytes at offset XXX +80 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX @@ -703,8 +705,6 @@ wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset XXX 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 81920/81920 bytes at offset XXX -80 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == Verify image content == qemu-io> read 4096/4096 bytes at offset 2064384 diff --git a/tests/qemu-iotests/044.out b/tests/qemu-iotests/044.out index 5eed3f87a3..5c5aa929fb 100644 --- a/tests/qemu-iotests/044.out +++ b/tests/qemu-iotests/044.out @@ -1,6 +1,6 @@ No errors were found on the image. -7292415/8391499= 86.90% allocated, 0.00% fragmented, 0.00% compressed clusters -Image end offset: 4296447488 +7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters +Image end offset: 4296448000 . ---------------------------------------------------------------------- Ran 1 tests diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046 index e0176f42df..987bfff8fa 100755 --- a/tests/qemu-iotests/046 +++ b/tests/qemu-iotests/046 @@ -66,7 +66,7 @@ function backing_io() done } -backing_io 0 16 write | $QEMU_IO $TEST_IMG | _filter_qemu_io +backing_io 0 32 write | $QEMU_IO $TEST_IMG | _filter_qemu_io mv $TEST_IMG $TEST_IMG.base @@ -153,6 +153,36 @@ aio_write -P 101 0xaa000 0xe000 resume A aio_flush EOF + +# Reverse sequential write +cat <<EOF +break write_aio A +aio_write -P 121 0xdc000 0x2000 +wait_break A +aio_write -P 120 0xc4000 0x18000 +resume A +aio_flush +EOF + +# Reverse sequential write with a gap +cat <<EOF +break write_aio A +aio_write -P 141 0xfc000 0x2000 +wait_break A +aio_write -P 140 0xe4000 0x14000 +resume A +aio_flush +EOF + +# Allocate an area in the middle and then overwrite with a larger request +cat <<EOF +break write_aio A +aio_write -P 161 0x10c000 0x8000 +wait_break A +aio_write -P 160 0x104000 0x18000 +resume A +aio_flush +EOF } overlay_io | $QEMU_IO blkdebug::$TEST_IMG | _filter_qemu_io |\ @@ -203,6 +233,23 @@ function verify_io() echo read -P 10 0xa8000 0x2000 echo read -P 101 0xaa000 0xe000 echo read -P 110 0xb8000 0x8000 + + echo read -P 12 0xc0000 0x4000 + echo read -P 120 0xc4000 0x18000 + echo read -P 121 0xdc000 0x2000 + echo read -P 13 0xde000 0x2000 + + echo read -P 14 0xe0000 0x4000 + echo read -P 140 0xe4000 0x14000 + echo read -P 15 0xf8000 0x4000 + echo read -P 141 0xfc000 0x2000 + echo read -P 15 0xfe000 0x2000 + + echo read -P 16 0x100000 0x4000 + echo read -P 160 0x104000 0x8000 + # Undefined content for 0x10c000 0x8000 + echo read -P 160 0x114000 0x8000 + echo read -P 17 0x11c000 0x4000 } verify_io | $QEMU_IO $TEST_IMG | _filter_qemu_io diff --git a/tests/qemu-iotests/046.out b/tests/qemu-iotests/046.out index 565360fe60..4b50a17eeb 100644 --- a/tests/qemu-iotests/046.out +++ b/tests/qemu-iotests/046.out @@ -34,6 +34,38 @@ qemu-io> wrote 65536/65536 bytes at offset 917504 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qemu-io> wrote 65536/65536 bytes at offset 983040 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1048576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1114112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1179648 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1245184 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1310720 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1376256 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1441792 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1507328 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1572864 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1638400 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1703936 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1769472 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1835008 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1900544 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 1966080 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 2031616 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qemu-io> Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944 backing_file='TEST_DIR/t.IMGFMT.base' == Some concurrent requests touching the same cluster == @@ -89,6 +121,24 @@ qemu-io> wrote 8192/8192 bytes at offset XXX 8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 57344/57344 bytes at offset XXX 56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 98304/98304 bytes at offset XXX +96 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 81920/81920 bytes at offset XXX +80 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 32768/32768 bytes at offset XXX +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 98304/98304 bytes at offset XXX +96 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qemu-io> == Verify image content == qemu-io> read 65536/65536 bytes at offset 0 @@ -159,5 +209,31 @@ qemu-io> read 57344/57344 bytes at offset 696320 56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qemu-io> read 32768/32768 bytes at offset 753664 32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 786432 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 98304/98304 bytes at offset 802816 +96 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 901120 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 909312 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 917504 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 81920/81920 bytes at offset 933888 +80 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 1015808 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 1032192 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 1040384 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 1048576 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 1064960 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 1130496 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 1163264 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) qemu-io> No errors were found on the image. *** done diff --git a/trace-events b/trace-events index 85dd49c180..7f34112424 100644 --- a/trace-events +++ b/trace-events @@ -484,6 +484,8 @@ qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d" qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64 qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d" +qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64 +qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64 qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d" qcow2_cluster_alloc_phys(void *co) "co %p" qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d" @@ -2941,6 +2941,8 @@ int main(int argc, char **argv, char **envp) nb_numa_nodes = 0; nb_nics = 0; + bdrv_init_with_whitelist(); + autostart= 1; /* first pass of option parsing */ @@ -4199,8 +4201,6 @@ int main(int argc, char **argv, char **envp) cpu_exec_init_all(); - bdrv_init_with_whitelist(); - blk_mig_init(); /* open the virtual block devices */ |