diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-12-05 09:39:25 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-12-05 09:39:25 -0600 |
commit | eb5d5beaebd102599a915f6c4813d445ddc9dc84 (patch) | |
tree | 12ce2331571a30c67bde0b8f4ddb55996dd0ba65 /block | |
parent | f6480ca3f3423be5bee8b673ee6f5cc387659def (diff) | |
parent | 922453bca6a927bb527068ae8679d587cfa45dbc (diff) |
Merge remote-tracking branch 'kwolf/for-anthony' into staging
Diffstat (limited to 'block')
-rw-r--r-- | block/cow.c | 46 | ||||
-rw-r--r-- | block/qcow.c | 12 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 115 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 7 | ||||
-rw-r--r-- | block/qcow2-snapshot.c | 330 | ||||
-rw-r--r-- | block/qcow2.c | 28 | ||||
-rw-r--r-- | block/qed-table.c | 6 | ||||
-rw-r--r-- | block/qed.c | 15 | ||||
-rw-r--r-- | block/sheepdog.c | 4 | ||||
-rw-r--r-- | block/vdi.c | 6 | ||||
-rw-r--r-- | block/vmdk.c | 8 | ||||
-rw-r--r-- | block/vvfat.c | 4 |
12 files changed, 362 insertions, 219 deletions
diff --git a/block/cow.c b/block/cow.c index 3448296190..3c527358c6 100644 --- a/block/cow.c +++ b/block/cow.c @@ -132,8 +132,8 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) /* Return true if first block has been changed (ie. current version is * in COW file). Set the number of continuous blocks for which that * is true. */ -static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *num_same) +static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *num_same) { int changed; @@ -171,14 +171,14 @@ static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, return error; } -static int cow_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) +static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) { BDRVCowState *s = bs->opaque; int ret, n; while (nb_sectors > 0) { - if (cow_is_allocated(bs, sector_num, nb_sectors, &n)) { + if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) { ret = bdrv_pread(bs->file, s->cow_sectors_offset + sector_num * 512, buf, n * 512); @@ -243,12 +243,12 @@ static void cow_close(BlockDriverState *bs) static int cow_create(const char *filename, QEMUOptionParameter *options) { - int fd, cow_fd; struct cow_header_v2 cow_header; struct stat st; int64_t image_sectors = 0; const char *image_filename = NULL; int ret; + BlockDriverState *cow_bs; /* Read out options */ while (options && options->name) { @@ -260,10 +260,16 @@ static int cow_create(const char *filename, QEMUOptionParameter *options) options++; } - cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, - 0644); - if (cow_fd < 0) - return -errno; + ret = bdrv_create_file(filename, options); + if (ret < 0) { + return ret; + } + + ret = bdrv_file_open(&cow_bs, filename, BDRV_O_RDWR); + if (ret < 0) { + return ret; + } + memset(&cow_header, 0, sizeof(cow_header)); cow_header.magic = cpu_to_be32(COW_MAGIC); cow_header.version = cpu_to_be32(COW_VERSION); @@ -271,16 +277,9 @@ static int cow_create(const char *filename, QEMUOptionParameter *options) /* Note: if no file, we put a dummy mtime */ cow_header.mtime = cpu_to_be32(0); - fd = open(image_filename, O_RDONLY | O_BINARY); - if (fd < 0) { - close(cow_fd); - goto mtime_fail; - } - if (fstat(fd, &st) != 0) { - close(fd); + if (stat(image_filename, &st) != 0) { goto mtime_fail; } - close(fd); cow_header.mtime = cpu_to_be32(st.st_mtime); mtime_fail: pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file), @@ -288,21 +287,20 @@ static int cow_create(const char *filename, QEMUOptionParameter *options) } cow_header.sectorsize = cpu_to_be32(512); cow_header.size = cpu_to_be64(image_sectors * 512); - ret = qemu_write_full(cow_fd, &cow_header, sizeof(cow_header)); + ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header)); if (ret != sizeof(cow_header)) { - ret = -errno; goto exit; } /* resize to include at least all the bitmap */ - ret = ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3)); + ret = bdrv_truncate(cow_bs, + sizeof(cow_header) + ((image_sectors + 7) >> 3)); if (ret) { - ret = -errno; goto exit; } exit: - close(cow_fd); + bdrv_delete(cow_bs); return ret; } @@ -337,7 +335,7 @@ static BlockDriver bdrv_cow = { .bdrv_read = cow_co_read, .bdrv_write = cow_co_write, .bdrv_co_flush_to_disk = cow_co_flush, - .bdrv_is_allocated = cow_is_allocated, + .bdrv_co_is_allocated = cow_co_is_allocated, .create_options = cow_create_options, }; diff --git a/block/qcow.c b/block/qcow.c index 4814ed0ced..b16955d764 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -368,14 +368,16 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, return cluster_offset; } -static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) { BDRVQcowState *s = bs->opaque; int index_in_cluster, n; uint64_t cluster_offset; + qemu_co_mutex_lock(&s->lock); cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + qemu_co_mutex_unlock(&s->lock); index_in_cluster = sector_num & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) @@ -433,7 +435,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) return 0; } -static int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, +static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; @@ -531,7 +533,7 @@ fail: goto done; } -static int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, +static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; @@ -844,7 +846,7 @@ static BlockDriver bdrv_qcow = { .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, .bdrv_co_flush_to_disk = qcow_co_flush, - .bdrv_is_allocated = qcow_is_allocated, + .bdrv_co_is_allocated = qcow_co_is_allocated, .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index f4e049fa90..07a2e936fd 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -289,89 +289,62 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, } } - -static int qcow2_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) +static int coroutine_fn copy_sectors(BlockDriverState *bs, + uint64_t start_sect, + uint64_t cluster_offset, + int n_start, int n_end) { BDRVQcowState *s = bs->opaque; - int ret, index_in_cluster, n, n1; - uint64_t cluster_offset; - struct iovec iov; QEMUIOVector qiov; + struct iovec iov; + int n, ret; - while (nb_sectors > 0) { - n = nb_sectors; - - ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n, - &cluster_offset); - if (ret < 0) { - return ret; - } - - index_in_cluster = sector_num & (s->cluster_sectors - 1); - if (!cluster_offset) { - if (bs->backing_hd) { - /* read from the base image */ - iov.iov_base = buf; - iov.iov_len = n * 512; - qemu_iovec_init_external(&qiov, &iov, 1); - - n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n); - if (n1 > 0) { - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING); - ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); - if (ret < 0) - return -1; - } - } else { - memset(buf, 0, 512 * n); - } - } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { - if (qcow2_decompress_cluster(bs, cluster_offset) < 0) - return -1; - memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); - } else { - BLKDBG_EVENT(bs->file, BLKDBG_READ); - ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512); - if (ret != n * 512) - return -1; - if (s->crypt_method) { - qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0, - &s->aes_decrypt_key); - } - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; + /* + * If this is the last cluster and it is only partially used, we must only + * copy until the end of the image, or bdrv_check_request will fail for the + * bdrv_read/write calls below. + */ + if (start_sect + n_end > bs->total_sectors) { + n_end = bs->total_sectors - start_sect; } - return 0; -} - -static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, - uint64_t cluster_offset, int n_start, int n_end) -{ - BDRVQcowState *s = bs->opaque; - int n, ret; n = n_end - n_start; - if (n <= 0) + if (n <= 0) { return 0; + } + + iov.iov_len = n * BDRV_SECTOR_SIZE; + iov.iov_base = qemu_blockalign(bs, iov.iov_len); + + qemu_iovec_init_external(&qiov, &iov, 1); + BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); - ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n); - if (ret < 0) - return ret; + + /* Call .bdrv_co_readv() directly instead of using the public block-layer + * interface. This avoids double I/O throttling and request tracking, + * which can lead to deadlock when block layer copy-on-read is enabled. + */ + ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); + if (ret < 0) { + goto out; + } + if (s->crypt_method) { qcow2_encrypt_sectors(s, start_sect + n_start, - s->cluster_data, - s->cluster_data, n, 1, + iov.iov_base, iov.iov_base, n, 1, &s->aes_encrypt_key); } + BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); - ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, - s->cluster_data, n); - if (ret < 0) - return ret; - return 0; + ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov); + if (ret < 0) { + goto out; + } + + ret = 0; +out: + qemu_vfree(iov.iov_base); + return ret; } @@ -620,7 +593,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; if (m->n_start) { cow = true; + qemu_co_mutex_unlock(&s->lock); ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); + qemu_co_mutex_lock(&s->lock); if (ret < 0) goto err; } @@ -628,8 +603,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) if (m->nb_available & (s->cluster_sectors - 1)) { uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); cow = true; + qemu_co_mutex_unlock(&s->lock); ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), m->nb_available - end, s->cluster_sectors); + qemu_co_mutex_lock(&s->lock); if (ret < 0) goto err; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 9605367777..2db2ede3d1 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -700,6 +700,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, l2_table = NULL; l1_table = NULL; l1_size2 = l1_size * sizeof(uint64_t); + + /* WARNING: qcow2_snapshot_goto relies on this function not using the + * l1_table_offset when it is the current s->l1_table_offset! Be careful + * when changing this! */ if (l1_table_offset != s->l1_table_offset) { if (l1_size2 != 0) { l1_table = g_malloc0(align_offset(l1_size2, 512)); @@ -819,7 +823,8 @@ fail: qcow2_cache_set_writethrough(bs, s->refcount_block_cache, old_refcount_writethrough); - if (l1_modified) { + /* Update L1 only if it isn't deleted anyway (addend = -1) */ + if (addend >= 0 && l1_modified) { for(i = 0; i < l1_size; i++) cpu_to_be64s(&l1_table[i]); if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index bdc33ba94c..c3112bf71a 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -68,6 +68,7 @@ int qcow2_read_snapshots(BlockDriverState *bs) int i, id_str_size, name_size; int64_t offset; uint32_t extra_data_size; + int ret; if (!s->nb_snapshots) { s->snapshots = NULL; @@ -77,10 +78,15 @@ int qcow2_read_snapshots(BlockDriverState *bs) offset = s->snapshots_offset; s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot)); + for(i = 0; i < s->nb_snapshots; i++) { + /* Read statically sized part of the snapshot header */ offset = align_offset(offset, 8); - if (bdrv_pread(bs->file, offset, &h, sizeof(h)) != sizeof(h)) + ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); + if (ret < 0) { goto fail; + } + offset += sizeof(h); sn = s->snapshots + i; sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); @@ -94,25 +100,34 @@ int qcow2_read_snapshots(BlockDriverState *bs) id_str_size = be16_to_cpu(h.id_str_size); name_size = be16_to_cpu(h.name_size); + /* Skip extra data */ offset += extra_data_size; + /* Read snapshot ID */ sn->id_str = g_malloc(id_str_size + 1); - if (bdrv_pread(bs->file, offset, sn->id_str, id_str_size) != id_str_size) + ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); + if (ret < 0) { goto fail; + } offset += id_str_size; sn->id_str[id_str_size] = '\0'; + /* Read snapshot name */ sn->name = g_malloc(name_size + 1); - if (bdrv_pread(bs->file, offset, sn->name, name_size) != name_size) + ret = bdrv_pread(bs->file, offset, sn->name, name_size); + if (ret < 0) { goto fail; + } offset += name_size; sn->name[name_size] = '\0'; } + s->snapshots_size = offset - s->snapshots_offset; return 0; - fail: + +fail: qcow2_free_snapshots(bs); - return -1; + return ret; } /* add at the end of the file a new list of snapshots */ @@ -122,9 +137,12 @@ static int qcow2_write_snapshots(BlockDriverState *bs) QCowSnapshot *sn; QCowSnapshotHeader h; int i, name_size, id_str_size, snapshots_size; - uint64_t data64; - uint32_t data32; + struct { + uint32_t nb_snapshots; + uint64_t snapshots_offset; + } QEMU_PACKED header_data; int64_t offset, snapshots_offset; + int ret; /* compute the size of the snapshots */ offset = 0; @@ -137,6 +155,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) } snapshots_size = offset; + /* Allocate space for the new snapshot list */ snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size); bdrv_flush(bs->file); offset = snapshots_offset; @@ -144,6 +163,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) return offset; } + /* Write all snapshots to the new list */ for(i = 0; i < s->nb_snapshots; i++) { sn = s->snapshots + i; memset(&h, 0, sizeof(h)); @@ -159,34 +179,55 @@ static int qcow2_write_snapshots(BlockDriverState *bs) h.id_str_size = cpu_to_be16(id_str_size); h.name_size = cpu_to_be16(name_size); offset = align_offset(offset, 8); - if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0) + + ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); + if (ret < 0) { goto fail; + } offset += sizeof(h); - if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0) + + ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size); + if (ret < 0) { goto fail; + } offset += id_str_size; - if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0) + + ret = bdrv_pwrite(bs->file, offset, sn->name, name_size); + if (ret < 0) { goto fail; + } offset += name_size; } - /* update the various header fields */ - data64 = cpu_to_be64(snapshots_offset); - if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset), - &data64, sizeof(data64)) < 0) + /* + * Update the header to point to the new snapshot table. This requires the + * new table and its refcounts to be stable on disk. + */ + ret = bdrv_flush(bs); + if (ret < 0) { goto fail; - data32 = cpu_to_be32(s->nb_snapshots); - if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), - &data32, sizeof(data32)) < 0) + } + + QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) != + offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots)); + + header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots); + header_data.snapshots_offset = cpu_to_be64(snapshots_offset); + + ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), + &header_data, sizeof(header_data)); + if (ret < 0) { goto fail; + } /* free the old snapshot table */ qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size); s->snapshots_offset = snapshots_offset; s->snapshots_size = snapshots_size; return 0; - fail: - return -1; + +fail: + return ret; } static void find_new_snapshot_id(BlockDriverState *bs, @@ -236,72 +277,92 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name) int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { BDRVQcowState *s = bs->opaque; - QCowSnapshot *snapshots1, sn1, *sn = &sn1; + QCowSnapshot *new_snapshot_list = NULL; + QCowSnapshot *old_snapshot_list = NULL; + QCowSnapshot sn1, *sn = &sn1; int i, ret; uint64_t *l1_table = NULL; int64_t l1_table_offset; memset(sn, 0, sizeof(*sn)); + /* Generate an ID if it wasn't passed */ if (sn_info->id_str[0] == '\0') { - /* compute a new id */ find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); } - /* check that the ID is unique */ - if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) + /* Check that the ID is unique */ + if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) { return -ENOENT; + } + /* Populate sn with passed data */ sn->id_str = g_strdup(sn_info->id_str); - if (!sn->id_str) - goto fail; sn->name = g_strdup(sn_info->name); - if (!sn->name) - goto fail; + sn->vm_state_size = sn_info->vm_state_size; sn->date_sec = sn_info->date_sec; sn->date_nsec = sn_info->date_nsec; sn->vm_clock_nsec = sn_info->vm_clock_nsec; - ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); - if (ret < 0) - goto fail; - - /* create the L1 table of the snapshot */ + /* Allocate the L1 table of the snapshot and copy the current one there. */ l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); if (l1_table_offset < 0) { + ret = l1_table_offset; goto fail; } - bdrv_flush(bs->file); sn->l1_table_offset = l1_table_offset; sn->l1_size = s->l1_size; - if (s->l1_size != 0) { - l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); - } else { - l1_table = NULL; - } - + l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); for(i = 0; i < s->l1_size; i++) { l1_table[i] = cpu_to_be64(s->l1_table[i]); } - if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset, - l1_table, s->l1_size * sizeof(uint64_t)) < 0) + + ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table, + s->l1_size * sizeof(uint64_t)); + if (ret < 0) { goto fail; + } + g_free(l1_table); l1_table = NULL; - snapshots1 = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); + /* + * Increase the refcounts of all clusters and make sure everything is + * stable on disk before updating the snapshot table to contain a pointer + * to the new L1 table. + */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); + if (ret < 0) { + goto fail; + } + + ret = bdrv_flush(bs); + if (ret < 0) { + goto fail; + } + + /* Append the new snapshot to the snapshot list */ + new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); if (s->snapshots) { - memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot)); - g_free(s->snapshots); + memcpy(new_snapshot_list, s->snapshots, + s->nb_snapshots * sizeof(QCowSnapshot)); + old_snapshot_list = s->snapshots; } - s->snapshots = snapshots1; + s->snapshots = new_snapshot_list; s->snapshots[s->nb_snapshots++] = *sn; - if (qcow2_write_snapshots(bs) < 0) + ret = qcow2_write_snapshots(bs); + if (ret < 0) { + g_free(s->snapshots); + s->snapshots = old_snapshot_list; goto fail; + } + + g_free(old_snapshot_list); + #ifdef DEBUG_ALLOC { BdrvCheckResult result = {0}; @@ -309,10 +370,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } #endif return 0; - fail: + +fail: + g_free(sn->id_str); g_free(sn->name); g_free(l1_table); - return -1; + + return ret; } /* copy the snapshot 'snapshot_name' into the current disk image */ @@ -322,38 +386,92 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) QCowSnapshot *sn; int i, snapshot_index; int cur_l1_bytes, sn_l1_bytes; + int ret; + uint64_t *sn_l1_table = NULL; + /* Search the snapshot */ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); - if (snapshot_index < 0) + if (snapshot_index < 0) { return -ENOENT; + } sn = &s->snapshots[snapshot_index]; - if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0) - goto fail; - - if (qcow2_grow_l1_table(bs, sn->l1_size, true) < 0) + /* + * Make sure that the current L1 table is big enough to contain the whole + * L1 table of the snapshot. If the snapshot L1 table is smaller, the + * current one must be padded with zeros. + */ + ret = qcow2_grow_l1_table(bs, sn->l1_size, true); + if (ret < 0) { goto fail; + } cur_l1_bytes = s->l1_size * sizeof(uint64_t); sn_l1_bytes = sn->l1_size * sizeof(uint64_t); - if (cur_l1_bytes > sn_l1_bytes) { - memset(s->l1_table + sn->l1_size, 0, cur_l1_bytes - sn_l1_bytes); + /* + * Copy the snapshot L1 table to the current L1 table. + * + * Before overwriting the old current L1 table on disk, make sure to + * increase all refcounts for the clusters referenced by the new one. + * Decrease the refcount referenced by the old one only when the L1 + * table is overwritten. + */ + sn_l1_table = g_malloc0(cur_l1_bytes); + + ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes); + if (ret < 0) { + goto fail; } - /* copy the snapshot l1 table to the current l1 table */ - if (bdrv_pread(bs->file, sn->l1_table_offset, - s->l1_table, sn_l1_bytes) < 0) + ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, + sn->l1_size, 1); + if (ret < 0) { goto fail; - if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, - s->l1_table, cur_l1_bytes) < 0) + } + + ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table, + cur_l1_bytes); + if (ret < 0) { goto fail; + } + + /* + * Decrease refcount of clusters of current L1 table. + * + * At this point, the in-memory s->l1_table points to the old L1 table, + * whereas on disk we already have the new one. + * + * qcow2_update_snapshot_refcount special cases the current L1 table to use + * the in-memory data instead of really using the offset to load a new one, + * which is why this works. + */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, + s->l1_size, -1); + + /* + * Now update the in-memory L1 table to be in sync with the on-disk one. We + * need to do this even if updating refcounts failed. + */ for(i = 0;i < s->l1_size; i++) { - be64_to_cpus(&s->l1_table[i]); + s->l1_table[i] = be64_to_cpu(sn_l1_table[i]); } - if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0) + if (ret < 0) { goto fail; + } + + g_free(sn_l1_table); + sn_l1_table = NULL; + + /* + * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed + * when we decreased the refcount of the old snapshot. + */ + ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); + if (ret < 0) { + goto fail; + } #ifdef DEBUG_ALLOC { @@ -362,39 +480,59 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) } #endif return 0; - fail: - return -EIO; + +fail: + g_free(sn_l1_table); + return ret; } int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) { BDRVQcowState *s = bs->opaque; - QCowSnapshot *sn; + QCowSnapshot sn; int snapshot_index, ret; + /* Search the snapshot */ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); - if (snapshot_index < 0) + if (snapshot_index < 0) { return -ENOENT; - sn = &s->snapshots[snapshot_index]; + } + sn = s->snapshots[snapshot_index]; + + /* Remove it from the snapshot list */ + memmove(s->snapshots + snapshot_index, + s->snapshots + snapshot_index + 1, + (s->nb_snapshots - snapshot_index - 1) * sizeof(sn)); + s->nb_snapshots--; + ret = qcow2_write_snapshots(bs); + if (ret < 0) { + return ret; + } - ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1); - if (ret < 0) + /* + * The snapshot is now unused, clean up. If we fail after this point, we + * won't recover but just leak clusters. + */ + g_free(sn.id_str); + g_free(sn.name); + + /* + * Now decrease the refcounts of clusters referenced by the snapshot and + * free the L1 table. + */ + ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset, + sn.l1_size, -1); + if (ret < 0) { return ret; + } + qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t)); + /* must update the copied flag on the current cluster offsets */ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); - if (ret < 0) - return ret; - qcow2_free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t)); - - g_free(sn->id_str); - g_free(sn->name); - memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn)); - s->nb_snapshots--; - ret = qcow2_write_snapshots(bs); if (ret < 0) { - /* XXX: restore snapshot if error ? */ return ret; } + #ifdef DEBUG_ALLOC { BdrvCheckResult result = {0}; @@ -435,32 +573,42 @@ int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) { - int i, snapshot_index, l1_size2; + int i, snapshot_index; BDRVQcowState *s = bs->opaque; QCowSnapshot *sn; + uint64_t *new_l1_table; + int new_l1_bytes; + int ret; + + assert(bs->read_only); + /* Search the snapshot */ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name); if (snapshot_index < 0) { return -ENOENT; } - sn = &s->snapshots[snapshot_index]; - s->l1_size = sn->l1_size; - l1_size2 = s->l1_size * sizeof(uint64_t); - if (s->l1_table != NULL) { - g_free(s->l1_table); - } - s->l1_table_offset = sn->l1_table_offset; - s->l1_table = g_malloc0(align_offset(l1_size2, 512)); + /* Allocate and read in the snapshot's L1 table */ + new_l1_bytes = s->l1_size * sizeof(uint64_t); + new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512)); - if (bdrv_pread(bs->file, sn->l1_table_offset, - s->l1_table, l1_size2) != l1_size2) { - return -1; + ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes); + if (ret < 0) { + g_free(new_l1_table); + return ret; } + /* Switch the L1 table */ + g_free(s->l1_table); + + s->l1_size = sn->l1_size; + s->l1_table_offset = sn->l1_table_offset; + s->l1_table = new_l1_table; + for(i = 0;i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); } + return 0; } diff --git a/block/qcow2.c b/block/qcow2.c index 9e1b1eb2ed..37cd4424d4 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -273,8 +273,9 @@ static int qcow2_open(BlockDriverState *bs, int flags) } bs->backing_file[len] = '\0'; } - if (qcow2_read_snapshots(bs) < 0) { - ret = -EINVAL; + + ret = qcow2_read_snapshots(bs); + if (ret < 0) { goto fail; } @@ -343,16 +344,19 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) return 0; } -static int qcow2_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) { + BDRVQcowState *s = bs->opaque; uint64_t cluster_offset; int ret; *pnum = nb_sectors; - /* FIXME We can get errors here, but the bdrv_is_allocated interface can't - * pass them on today */ + /* FIXME We can get errors here, but the bdrv_co_is_allocated interface + * can't pass them on today */ + qemu_co_mutex_lock(&s->lock); ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); + qemu_co_mutex_unlock(&s->lock); if (ret < 0) { *pnum = 0; } @@ -377,7 +381,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, return n1; } -static int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, +static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov) { BDRVQcowState *s = bs->opaque; @@ -512,12 +516,12 @@ static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) /* Restart all dependent requests */ if (!qemu_co_queue_empty(&m->dependent_requests)) { qemu_co_mutex_unlock(&s->lock); - while(qemu_co_queue_next(&m->dependent_requests)); + qemu_co_queue_restart_all(&m->dependent_requests); qemu_co_mutex_lock(&s->lock); } } -static int qcow2_co_writev(BlockDriverState *bs, +static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov) @@ -1137,7 +1141,7 @@ fail: return ret; } -static int qcow2_co_flush_to_os(BlockDriverState *bs) +static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; int ret; @@ -1159,7 +1163,7 @@ static int qcow2_co_flush_to_os(BlockDriverState *bs) return 0; } -static int qcow2_co_flush_to_disk(BlockDriverState *bs) +static coroutine_fn int qcow2_co_flush_to_disk(BlockDriverState *bs) { return bdrv_co_flush(bs->file); } @@ -1276,7 +1280,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, .bdrv_create = qcow2_create, - .bdrv_is_allocated = qcow2_is_allocated, + .bdrv_co_is_allocated = qcow2_co_is_allocated, .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/qed-table.c b/block/qed-table.c index f31f9ff069..8ee844346c 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -29,7 +29,7 @@ static void qed_read_table_cb(void *opaque, int ret) { QEDReadTableCB *read_table_cb = opaque; QEDTable *table = read_table_cb->table; - int noffsets = read_table_cb->iov.iov_len / sizeof(uint64_t); + int noffsets = read_table_cb->qiov.size / sizeof(uint64_t); int i; /* Handle I/O error */ @@ -65,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, qemu_iovec_init_external(qiov, &read_table_cb->iov, 1); aiocb = bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov, - read_table_cb->iov.iov_len / BDRV_SECTOR_SIZE, + qiov->size / BDRV_SECTOR_SIZE, qed_read_table_cb, read_table_cb); if (!aiocb) { qed_read_table_cb(read_table_cb, -EIO); @@ -160,7 +160,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, aiocb = bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, &write_table_cb->qiov, - write_table_cb->iov.iov_len / BDRV_SECTOR_SIZE, + write_table_cb->qiov.size / BDRV_SECTOR_SIZE, qed_write_table_cb, write_table_cb); if (!aiocb) { qed_write_table_cb(write_table_cb, -EIO); diff --git a/block/qed.c b/block/qed.c index 7e22e77b9d..22e467202f 100644 --- a/block/qed.c +++ b/block/qed.c @@ -661,6 +661,7 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options) } typedef struct { + Coroutine *co; int is_allocated; int *pnum; } QEDIsAllocatedCB; @@ -670,10 +671,14 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l QEDIsAllocatedCB *cb = opaque; *cb->pnum = len / BDRV_SECTOR_SIZE; cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO); + if (cb->co) { + qemu_coroutine_enter(cb->co, NULL); + } } -static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) { BDRVQEDState *s = bs->opaque; uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; @@ -686,8 +691,10 @@ static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num, qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb); + /* Now sleep if the callback wasn't invoked immediately */ while (cb.is_allocated == -1) { - qemu_aio_wait(); + cb.co = qemu_coroutine_self(); + qemu_coroutine_yield(); } qed_unref_l2_cache_entry(request.l2_table); @@ -1485,7 +1492,7 @@ static BlockDriver bdrv_qed = { .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_create = bdrv_qed_create, - .bdrv_is_allocated = bdrv_qed_is_allocated, + .bdrv_co_is_allocated = bdrv_qed_co_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty, .bdrv_aio_readv = bdrv_qed_aio_readv, .bdrv_aio_writev = bdrv_qed_aio_writev, diff --git a/block/sheepdog.c b/block/sheepdog.c index 62f1f3a0cf..aa9707f2ae 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1715,7 +1715,7 @@ out: return 1; } -static int sd_co_writev(BlockDriverState *bs, int64_t sector_num, +static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { SheepdogAIOCB *acb; @@ -1744,7 +1744,7 @@ static int sd_co_writev(BlockDriverState *bs, int64_t sector_num, return acb->ret; } -static int sd_co_readv(BlockDriverState *bs, int64_t sector_num, +static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { SheepdogAIOCB *acb; diff --git a/block/vdi.c b/block/vdi.c index 02da6b44d0..e1d8cffc2d 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -472,8 +472,8 @@ static int vdi_open(BlockDriverState *bs, int flags) return -1; } -static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) { /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ BDRVVdiState *s = (BDRVVdiState *)bs->opaque; @@ -996,7 +996,7 @@ static BlockDriver bdrv_vdi = { .bdrv_close = vdi_close, .bdrv_create = vdi_create, .bdrv_co_flush_to_disk = vdi_co_flush, - .bdrv_is_allocated = vdi_is_allocated, + .bdrv_co_is_allocated = vdi_co_is_allocated, .bdrv_make_empty = vdi_make_empty, .bdrv_aio_readv = vdi_aio_readv, diff --git a/block/vmdk.c b/block/vmdk.c index f5441591d7..5623ac10cd 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -861,8 +861,8 @@ static VmdkExtent *find_extent(BDRVVmdkState *s, return NULL; } -static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) { BDRVVmdkState *s = bs->opaque; int64_t index_in_cluster, n, ret; @@ -873,8 +873,10 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, if (!extent) { return 0; } + qemu_co_mutex_lock(&s->lock); ret = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0, &offset); + qemu_co_mutex_unlock(&s->lock); /* get_cluster_offset returning 0 means success */ ret = !ret; @@ -1596,7 +1598,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, .bdrv_co_flush_to_disk = vmdk_co_flush, - .bdrv_is_allocated = vmdk_is_allocated, + .bdrv_co_is_allocated = vmdk_co_is_allocated, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .create_options = vmdk_create_options, diff --git a/block/vvfat.c b/block/vvfat.c index a310ce8c3e..eeffc4a4a8 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2758,7 +2758,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num, return ret; } -static int vvfat_is_allocated(BlockDriverState *bs, +static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int* n) { BDRVVVFATState* s = bs->opaque; @@ -2855,7 +2855,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_read = vvfat_co_read, .bdrv_write = vvfat_co_write, .bdrv_close = vvfat_close, - .bdrv_is_allocated = vvfat_is_allocated, + .bdrv_co_is_allocated = vvfat_co_is_allocated, .protocol_name = "fat", }; |