diff options
Diffstat (limited to 'block/vmdk.c')
-rw-r--r-- | block/vmdk.c | 208 |
1 files changed, 145 insertions, 63 deletions
diff --git a/block/vmdk.c b/block/vmdk.c index 7bad757a33..608daaf930 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -32,11 +32,25 @@ #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') #define VMDK4_COMPRESSION_DEFLATE 1 +#define VMDK4_FLAG_NL_DETECT (1 << 0) #define VMDK4_FLAG_RGD (1 << 1) +/* Zeroed-grain enable bit */ +#define VMDK4_FLAG_ZERO_GRAIN (1 << 2) #define VMDK4_FLAG_COMPRESS (1 << 16) #define VMDK4_FLAG_MARKER (1 << 17) #define VMDK4_GD_AT_END 0xffffffffffffffffULL +#define VMDK_GTE_ZEROED 0x1 + +/* VMDK internal error codes */ +#define VMDK_OK 0 +#define VMDK_ERROR (-1) +/* Cluster not allocated */ +#define VMDK_UNALLOC (-2) +#define VMDK_ZEROED (-3) + +#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" + typedef struct { uint32_t version; uint32_t flags; @@ -73,6 +87,8 @@ typedef struct VmdkExtent { bool flat; bool compressed; bool has_marker; + bool has_zero_grain; + int version; int64_t sectors; int64_t end_sector; int64_t flat_start_offset; @@ -108,6 +124,7 @@ typedef struct VmdkMetaData { unsigned int l2_index; unsigned int l2_offset; int valid; + uint32_t *l2_cache_entry; } VmdkMetaData; typedef struct VmdkGrainMarker { @@ -561,6 +578,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, extent->compressed = le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; + extent->version = le32_to_cpu(header.version); + extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; ret = vmdk_init_tables(bs, extent); if (ret) { /* free extent allocated by vmdk_add_extent */ @@ -578,22 +597,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, opt_pos = strstr(desc, opt_name); if (!opt_pos) { - return -1; + return VMDK_ERROR; } /* Skip "=\"" following opt_name */ opt_pos += strlen(opt_name) + 2; if (opt_pos >= end) { - return -1; + return VMDK_ERROR; } opt_end = opt_pos; while (opt_end < end && *opt_end != '"') { opt_end++; } if (opt_end == end || buf_size < opt_end - opt_pos + 1) { - return -1; + return VMDK_ERROR; } pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); - return 0; + return VMDK_OK; } /* Open an extent file and append to bs array */ @@ -772,7 +791,7 @@ static int get_whole_cluster(BlockDriverState *bs, int ret; if (!vmdk_is_cid_valid(bs)) { - return -1; + return VMDK_ERROR; } /* floor offset to cluster */ @@ -780,30 +799,31 @@ static int get_whole_cluster(BlockDriverState *bs, ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, extent->cluster_sectors); if (ret < 0) { - return -1; + return VMDK_ERROR; } /* Write grain only into the active image */ ret = bdrv_write(extent->file, cluster_offset, whole_grain, extent->cluster_sectors); if (ret < 0) { - return -1; + return VMDK_ERROR; } } - return 0; + return VMDK_OK; } static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) { + uint32_t offset; + QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset)); + offset = cpu_to_le32(m_data->offset); /* update L2 table */ if (bdrv_pwrite_sync( extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), - sizeof(m_data->offset) - ) < 0) { - return -1; + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; } /* update backup L2 table */ if (extent->l1_backup_table_offset != 0) { @@ -812,13 +832,15 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), sizeof(m_data->offset) - ) < 0) { - return -1; + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; } } + if (m_data->l2_cache_entry) { + *m_data->l2_cache_entry = offset; + } - return 0; + return VMDK_OK; } static int get_cluster_offset(BlockDriverState *bs, @@ -830,24 +852,25 @@ static int get_cluster_offset(BlockDriverState *bs, { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; - uint32_t min_count, *l2_table, tmp = 0; + uint32_t min_count, *l2_table; + bool zeroed = false; if (m_data) { m_data->valid = 0; } if (extent->flat) { *cluster_offset = extent->flat_start_offset; - return 0; + return VMDK_OK; } offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; l1_index = (offset >> 9) / extent->l1_entry_sectors; if (l1_index >= extent->l1_size) { - return -1; + return VMDK_ERROR; } l2_offset = extent->l1_table[l1_index]; if (!l2_offset) { - return -1; + return VMDK_UNALLOC; } for (i = 0; i < L2_CACHE_SIZE; i++) { if (l2_offset == extent->l2_cache_offsets[i]) { @@ -877,7 +900,7 @@ static int get_cluster_offset(BlockDriverState *bs, l2_table, extent->l2_size * sizeof(uint32_t) ) != extent->l2_size * sizeof(uint32_t)) { - return -1; + return VMDK_ERROR; } extent->l2_cache_offsets[min_index] = l2_offset; @@ -886,9 +909,21 @@ static int get_cluster_offset(BlockDriverState *bs, l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; *cluster_offset = le32_to_cpu(l2_table[l2_index]); - if (!*cluster_offset) { + if (m_data) { + m_data->valid = 1; + m_data->l1_index = l1_index; + m_data->l2_index = l2_index; + m_data->offset = *cluster_offset; + m_data->l2_offset = l2_offset; + m_data->l2_cache_entry = &l2_table[l2_index]; + } + if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) { + zeroed = true; + } + + if (!*cluster_offset || zeroed) { if (!allocate) { - return -1; + return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; } /* Avoid the L2 tables update for the images that have snapshots. */ @@ -901,8 +936,7 @@ static int get_cluster_offset(BlockDriverState *bs, } *cluster_offset >>= 9; - tmp = cpu_to_le32(*cluster_offset); - l2_table[l2_index] = tmp; + l2_table[l2_index] = cpu_to_le32(*cluster_offset); /* First of all we write grain itself, to avoid race condition * that may to corrupt the image. @@ -911,19 +945,15 @@ static int get_cluster_offset(BlockDriverState *bs, */ if (get_whole_cluster( bs, extent, *cluster_offset, offset, allocate) == -1) { - return -1; + return VMDK_ERROR; } if (m_data) { - m_data->offset = tmp; - m_data->l1_index = l1_index; - m_data->l2_index = l2_index; - m_data->l2_offset = l2_offset; - m_data->valid = 1; + m_data->offset = *cluster_offset; } } *cluster_offset <<= 9; - return 0; + return VMDK_OK; } static VmdkExtent *find_extent(BDRVVmdkState *s, @@ -959,8 +989,8 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, ret = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0, &offset); qemu_co_mutex_unlock(&s->lock); - /* get_cluster_offset returning 0 means success */ - ret = !ret; + + ret = (ret == VMDK_OK || ret == VMDK_ZEROED); index_in_cluster = sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; @@ -1103,9 +1133,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, if (n > nb_sectors) { n = nb_sectors; } - if (ret) { + if (ret != VMDK_OK) { /* if not allocated, try to read from parent image, if exist */ - if (bs->backing_hd) { + if (bs->backing_hd && ret != VMDK_ZEROED) { if (!vmdk_is_cid_valid(bs)) { return -EINVAL; } @@ -1142,8 +1172,17 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, return ret; } +/** + * vmdk_write: + * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature + * if possible, otherwise return -ENOTSUP. + * @zero_dry_run: used for zeroed == true only, don't update L2 table, just + * + * Returns: error code with 0 for success. + */ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) + const uint8_t *buf, int nb_sectors, + bool zeroed, bool zero_dry_run) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; @@ -1173,7 +1212,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, sector_num << 9, !extent->compressed, &cluster_offset); if (extent->compressed) { - if (ret == 0) { + if (ret == VMDK_OK) { /* Refuse write to allocated cluster for streamOptimized */ fprintf(stderr, "VMDK: can't write to allocated cluster" @@ -1189,7 +1228,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, &cluster_offset); } } - if (ret) { + if (ret == VMDK_ERROR) { return -EINVAL; } extent_begin_sector = extent->end_sector - extent->sectors; @@ -1199,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (n > nb_sectors) { n = nb_sectors; } - - ret = vmdk_write_extent(extent, - cluster_offset, index_in_cluster * 512, - buf, n, sector_num); - if (ret) { - return ret; - } - if (m_data.valid) { - /* update L2 tables */ - if (vmdk_L2update(extent, &m_data) == -1) { - return -EIO; + if (zeroed) { + /* Do zeroed write, buf is ignored */ + if (extent->has_zero_grain && + index_in_cluster == 0 && + n >= extent->cluster_sectors) { + n = extent->cluster_sectors; + if (!zero_dry_run) { + m_data.offset = VMDK_GTE_ZEROED; + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + return -EIO; + } + } + } else { + return -ENOTSUP; + } + } else { + ret = vmdk_write_extent(extent, + cluster_offset, index_in_cluster * 512, + buf, n, sector_num); + if (ret) { + return ret; + } + if (m_data.valid) { + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + return -EIO; + } } } nb_sectors -= n; @@ -1235,14 +1291,29 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, int ret; BDRVVmdkState *s = bs->opaque; qemu_co_mutex_lock(&s->lock); - ret = vmdk_write(bs, sector_num, buf, nb_sectors); + ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true); + if (!ret) { + ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false); + } qemu_co_mutex_unlock(&s->lock); return ret; } static int vmdk_create_extent(const char *filename, int64_t filesize, - bool flat, bool compress) + bool flat, bool compress, bool zeroed_grain) { int ret, i; int fd = 0; @@ -1264,9 +1335,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } magic = cpu_to_be32(VMDK4_MAGIC); memset(&header, 0, sizeof(header)); - header.version = 1; - header.flags = - 3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0); + header.version = zeroed_grain ? 2 : 1; + header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT + | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) + | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; header.capacity = filesize / 512; header.granularity = 128; @@ -1357,7 +1429,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, if (filename == NULL || !strlen(filename)) { fprintf(stderr, "Vmdk: no filename provided.\n"); - return -1; + return VMDK_ERROR; } p = strrchr(filename, '/'); if (p == NULL) { @@ -1369,7 +1441,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, if (p != NULL) { p++; if (p - filename >= buf_len) { - return -1; + return VMDK_ERROR; } pstrcpy(path, p - filename + 1, filename); } else { @@ -1382,12 +1454,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix, postfix[0] = '\0'; } else { if (q - p >= buf_len) { - return -1; + return VMDK_ERROR; } pstrcpy(prefix, q - p + 1, p); pstrcpy(postfix, buf_len, q); } - return 0; + return VMDK_OK; } static int relative_path(char *dest, int dest_size, @@ -1403,11 +1475,11 @@ static int relative_path(char *dest, int dest_size, #endif if (!(dest && base && target)) { - return -1; + return VMDK_ERROR; } if (path_is_absolute(target)) { pstrcpy(dest, dest_size, target); - return 0; + return VMDK_OK; } while (base[i] == target[i]) { i++; @@ -1426,7 +1498,7 @@ static int relative_path(char *dest, int dest_size, pstrcat(dest, dest_size, sep); } pstrcat(dest, dest_size, q); - return 0; + return VMDK_OK; } static int vmdk_create(const char *filename, QEMUOptionParameter *options) @@ -1447,6 +1519,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) char parent_desc_line[BUF_SIZE] = ""; uint32_t parent_cid = 0xffffffff; uint32_t number_heads = 16; + bool zeroed_grain = false; const char desc_template[] = "# Disk DescriptorFile\n" "version=1\n" @@ -1482,6 +1555,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0; } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { fmt = options->value.s; + } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) { + zeroed_grain |= options->value.n; } options++; } @@ -1568,7 +1643,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) snprintf(ext_filename, sizeof(ext_filename), "%s%s", path, desc_filename); - if (vmdk_create_extent(ext_filename, size, flat, compress)) { + if (vmdk_create_extent(ext_filename, size, + flat, compress, zeroed_grain)) { return -EINVAL; } filesize -= size; @@ -1694,6 +1770,11 @@ static QEMUOptionParameter vmdk_create_options[] = { "VMDK flat extent format, can be one of " "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " }, + { + .name = BLOCK_OPT_ZEROED_GRAIN, + .type = OPT_FLAG, + .help = "Enable efficient zero writes using the zeroed-grain GTE feature" + }, { NULL } }; @@ -1705,6 +1786,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_read = vmdk_co_read, .bdrv_write = vmdk_co_write, + .bdrv_co_write_zeroes = vmdk_co_write_zeroes, .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, .bdrv_co_flush_to_disk = vmdk_co_flush, |