diff options
author | Anthony Liguori <aliguori@amazon.com> | 2013-11-13 11:47:44 -0800 |
---|---|---|
committer | Anthony Liguori <aliguori@amazon.com> | 2013-11-13 11:47:44 -0800 |
commit | deb0f500651317863922964c87b4fa64eecdbd73 (patch) | |
tree | f67b4c5eabb590c5a51b64796cdb19e1dce07847 | |
parent | 70c4c5b5621f6d954843547bcab9db857d882e99 (diff) | |
parent | 7e382003f1bd9d8a441ecc5ac8a74bad3564d943 (diff) |
Merge remote-tracking branch 'stefanha/block' into staging
# By Jeff Cody (26) and others
# Via Stefan Hajnoczi
* stefanha/block: (37 commits)
block: Round up total_sectors
block: vhdx qemu-iotest - log replay of data sector
block: qemu-iotests for vhdx, add write test support
block: vhdx - update _make_test_img() to filter out vhdx options
block: vhdx - add .bdrv_create() support
block: vhdx - fix comment typos in header, fix incorrect struct fields
block: vhdx - break out code operations to functions
block: vhdx - move more endian translations to vhdx-endian.c
block: vhdx - remove BAT file offset bit shifting
block: vhdx write support
block: vhdx - add log write support
block: vhdx - add region overlap detection for image files
block: vhdx - log parsing, replay, and flush support
block: vhdx code movement - move vhdx_close() above vhdx_open()
block: vhdx - update log guid in header, and first write tracker
block: vhdx - break endian translation functions out
block: vhdx - log support struct and defines
block: vhdx code movement - VHDXMetadataEntries and BDRVVHDXState to header.
block: vhdx - add header update capability.
block: vhdx - minor comments and typo correction.
...
Message-id: 1383905551-16411-1-git-send-email-stefanha@redhat.com
Signed-off-by: Anthony Liguori <aliguori@amazon.com>
35 files changed, 2897 insertions, 262 deletions
@@ -640,7 +640,7 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) if (length < 0) { return length; } - hint = length >> BDRV_SECTOR_BITS; + hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); } bs->total_sectors = hint; @@ -1084,8 +1084,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, snprintf(backing_filename, sizeof(backing_filename), "%s", filename); } else if (!realpath(filename, backing_filename)) { - error_setg_errno(errp, errno, "Could not resolve path '%s'", filename); ret = -errno; + error_setg_errno(errp, errno, "Could not resolve path '%s'", filename); goto fail; } diff --git a/block/Makefile.objs b/block/Makefile.objs index 3bb85b535c..f43ecbc044 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o v block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o -block-obj-y += vhdx.o +block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += parallels.o blkdebug.o blkverify.o block-obj-y += snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o diff --git a/block/raw-posix.c b/block/raw-posix.c index f6d48bbdb2..ace5d962e8 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1842,7 +1842,8 @@ static BlockDriver bdrv_host_cdrom = { #endif /* __linux__ */ #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) -static int cdrom_open(BlockDriverState *bs, QDict *options, int flags) +static int cdrom_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) { BDRVRawState *s = bs->opaque; Error *local_err = NULL; diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c new file mode 100644 index 0000000000..fe879ed995 --- /dev/null +++ b/block/vhdx-endian.c @@ -0,0 +1,216 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody <jcody@redhat.com> + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "block/block_int.h" +#include "block/vhdx.h" + +#include <uuid/uuid.h> + + +/* + * All the VHDX formats on disk are little endian - the following + * are helper import/export functions to correctly convert + * endianness from disk read to native cpu format, and back again. + */ + + +/* VHDX File Header */ + + +void vhdx_header_le_import(VHDXHeader *h) +{ + assert(h != NULL); + + le32_to_cpus(&h->signature); + le32_to_cpus(&h->checksum); + le64_to_cpus(&h->sequence_number); + + leguid_to_cpus(&h->file_write_guid); + leguid_to_cpus(&h->data_write_guid); + leguid_to_cpus(&h->log_guid); + + le16_to_cpus(&h->log_version); + le16_to_cpus(&h->version); + le32_to_cpus(&h->log_length); + le64_to_cpus(&h->log_offset); +} + +void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h) +{ + assert(orig_h != NULL); + assert(new_h != NULL); + + new_h->signature = cpu_to_le32(orig_h->signature); + new_h->checksum = cpu_to_le32(orig_h->checksum); + new_h->sequence_number = cpu_to_le64(orig_h->sequence_number); + + new_h->file_write_guid = orig_h->file_write_guid; + new_h->data_write_guid = orig_h->data_write_guid; + new_h->log_guid = orig_h->log_guid; + + cpu_to_leguids(&new_h->file_write_guid); + cpu_to_leguids(&new_h->data_write_guid); + cpu_to_leguids(&new_h->log_guid); + + new_h->log_version = cpu_to_le16(orig_h->log_version); + new_h->version = cpu_to_le16(orig_h->version); + new_h->log_length = cpu_to_le32(orig_h->log_length); + new_h->log_offset = cpu_to_le64(orig_h->log_offset); +} + + +/* VHDX Log Headers */ + + +void vhdx_log_desc_le_import(VHDXLogDescriptor *d) +{ + assert(d != NULL); + + le32_to_cpus(&d->signature); + le32_to_cpus(&d->trailing_bytes); + le64_to_cpus(&d->leading_bytes); + le64_to_cpus(&d->file_offset); + le64_to_cpus(&d->sequence_number); +} + +void vhdx_log_desc_le_export(VHDXLogDescriptor *d) +{ + assert(d != NULL); + + cpu_to_le32s(&d->signature); + cpu_to_le32s(&d->trailing_bytes); + cpu_to_le64s(&d->leading_bytes); + cpu_to_le64s(&d->file_offset); + cpu_to_le64s(&d->sequence_number); +} + +void vhdx_log_data_le_export(VHDXLogDataSector *d) +{ + assert(d != NULL); + + cpu_to_le32s(&d->data_signature); + cpu_to_le32s(&d->sequence_high); + cpu_to_le32s(&d->sequence_low); +} + +void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr) +{ + assert(hdr != NULL); + + le32_to_cpus(&hdr->signature); + le32_to_cpus(&hdr->checksum); + le32_to_cpus(&hdr->entry_length); + le32_to_cpus(&hdr->tail); + le64_to_cpus(&hdr->sequence_number); + le32_to_cpus(&hdr->descriptor_count); + leguid_to_cpus(&hdr->log_guid); + le64_to_cpus(&hdr->flushed_file_offset); + le64_to_cpus(&hdr->last_file_offset); +} + +void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr) +{ + assert(hdr != NULL); + + cpu_to_le32s(&hdr->signature); + cpu_to_le32s(&hdr->checksum); + cpu_to_le32s(&hdr->entry_length); + cpu_to_le32s(&hdr->tail); + cpu_to_le64s(&hdr->sequence_number); + cpu_to_le32s(&hdr->descriptor_count); + cpu_to_leguids(&hdr->log_guid); + cpu_to_le64s(&hdr->flushed_file_offset); + cpu_to_le64s(&hdr->last_file_offset); +} + + +/* Region table entries */ +void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr) +{ + assert(hdr != NULL); + + le32_to_cpus(&hdr->signature); + le32_to_cpus(&hdr->checksum); + le32_to_cpus(&hdr->entry_count); +} + +void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr) +{ + assert(hdr != NULL); + + cpu_to_le32s(&hdr->signature); + cpu_to_le32s(&hdr->checksum); + cpu_to_le32s(&hdr->entry_count); +} + +void vhdx_region_entry_le_import(VHDXRegionTableEntry *e) +{ + assert(e != NULL); + + leguid_to_cpus(&e->guid); + le64_to_cpus(&e->file_offset); + le32_to_cpus(&e->length); + le32_to_cpus(&e->data_bits); +} + +void vhdx_region_entry_le_export(VHDXRegionTableEntry *e) +{ + assert(e != NULL); + + cpu_to_leguids(&e->guid); + cpu_to_le64s(&e->file_offset); + cpu_to_le32s(&e->length); + cpu_to_le32s(&e->data_bits); +} + + +/* Metadata headers & table */ +void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr) +{ + assert(hdr != NULL); + + le64_to_cpus(&hdr->signature); + le16_to_cpus(&hdr->entry_count); +} + +void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr) +{ + assert(hdr != NULL); + + cpu_to_le64s(&hdr->signature); + cpu_to_le16s(&hdr->entry_count); +} + +void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e) +{ + assert(e != NULL); + + leguid_to_cpus(&e->item_id); + le32_to_cpus(&e->offset); + le32_to_cpus(&e->length); + le32_to_cpus(&e->data_bits); +} +void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e) +{ + assert(e != NULL); + + cpu_to_leguids(&e->item_id); + cpu_to_le32s(&e->offset); + cpu_to_le32s(&e->length); + cpu_to_le32s(&e->data_bits); +} diff --git a/block/vhdx-log.c b/block/vhdx-log.c new file mode 100644 index 0000000000..ee5583c309 --- /dev/null +++ b/block/vhdx-log.c @@ -0,0 +1,1010 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody <jcody@redhat.com> + * + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 + * + * This file covers the functionality of the metadata log writing, parsing, and + * replay. + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ +#include "qemu-common.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "block/vhdx.h" + + +typedef struct VHDXLogSequence { + bool valid; + uint32_t count; + VHDXLogEntries log; + VHDXLogEntryHeader hdr; +} VHDXLogSequence; + +typedef struct VHDXLogDescEntries { + VHDXLogEntryHeader hdr; + VHDXLogDescriptor desc[]; +} VHDXLogDescEntries; + +static const MSGUID zero_guid = { 0 }; + +/* The log located on the disk is circular buffer containing + * sectors of 4096 bytes each. + * + * It is assumed for the read/write functions below that the + * circular buffer scheme uses a 'one sector open' to indicate + * the buffer is full. Given the validation methods used for each + * sector, this method should be compatible with other methods that + * do not waste a sector. + */ + + +/* Allow peeking at the hdr entry at the beginning of the current + * read index, without advancing the read index */ +static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, + VHDXLogEntryHeader *hdr) +{ + int ret = 0; + uint64_t offset; + uint32_t read; + + assert(hdr != NULL); + + /* peek is only supported on sector boundaries */ + if (log->read % VHDX_LOG_SECTOR_SIZE) { + ret = -EFAULT; + goto exit; + } + + read = log->read; + /* we are guaranteed that a) log sectors are 4096 bytes, + * and b) the log length is a multiple of 1MB. So, there + * is always a round number of sectors in the buffer */ + if ((read + sizeof(VHDXLogEntryHeader)) > log->length) { + read = 0; + } + + if (read == log->write) { + ret = -EINVAL; + goto exit; + } + + offset = log->offset + read; + + ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader)); + if (ret < 0) { + goto exit; + } + +exit: + return ret; +} + +/* Index increment for log, based on sector boundaries */ +static int vhdx_log_inc_idx(uint32_t idx, uint64_t length) +{ + idx += VHDX_LOG_SECTOR_SIZE; + /* we are guaranteed that a) log sectors are 4096 bytes, + * and b) the log length is a multiple of 1MB. So, there + * is always a round number of sectors in the buffer */ + return idx >= length ? 0 : idx; +} + + +/* Reset the log to empty */ +static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s) +{ + MSGUID guid = { 0 }; + s->log.read = s->log.write = 0; + /* a log guid of 0 indicates an empty log to any parser of v0 + * VHDX logs */ + vhdx_update_headers(bs, s, false, &guid); +} + +/* Reads num_sectors from the log (all log sectors are 4096 bytes), + * into buffer 'buffer'. Upon return, *sectors_read will contain + * the number of sectors successfully read. + * + * It is assumed that 'buffer' is already allocated, and of sufficient + * size (i.e. >= 4096*num_sectors). + * + * If 'peek' is true, then the tail (read) pointer for the circular buffer is + * not modified. + * + * 0 is returned on success, -errno otherwise. */ +static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_read, void *buffer, + uint32_t num_sectors, bool peek) +{ + int ret = 0; + uint64_t offset; + uint32_t read; + + read = log->read; + + *sectors_read = 0; + while (num_sectors) { + if (read == log->write) { + /* empty */ + break; + } + offset = log->offset + read; + + ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + read = vhdx_log_inc_idx(read, log->length); + + *sectors_read = *sectors_read + 1; + num_sectors--; + } + +exit: + if (!peek) { + log->read = read; + } + return ret; +} + +/* Writes num_sectors to the log (all log sectors are 4096 bytes), + * from buffer 'buffer'. Upon return, *sectors_written will contain + * the number of sectors successfully written. + * + * It is assumed that 'buffer' is at least 4096*num_sectors large. + * + * 0 is returned on success, -errno otherwise */ +static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, + uint32_t *sectors_written, void *buffer, + uint32_t num_sectors) +{ + int ret = 0; + uint64_t offset; + uint32_t write; + void *buffer_tmp; + BDRVVHDXState *s = bs->opaque; + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + write = log->write; + + buffer_tmp = buffer; + while (num_sectors) { + + offset = log->offset + write; + write = vhdx_log_inc_idx(write, log->length); + if (write == log->read) { + /* full */ + break; + } + ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + buffer_tmp += VHDX_LOG_SECTOR_SIZE; + + log->write = write; + *sectors_written = *sectors_written + 1; + num_sectors--; + } + +exit: + return ret; +} + + +/* Validates a log entry header */ +static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, + BDRVVHDXState *s) +{ + int valid = false; + + if (memcmp(&hdr->signature, "loge", 4)) { + goto exit; + } + + /* if the individual entry length is larger than the whole log + * buffer, that is obviously invalid */ + if (log->length < hdr->entry_length) { + goto exit; + } + + /* length of entire entry must be in units of 4KB (log sector size) */ + if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) { + goto exit; + } + + /* per spec, sequence # must be > 0 */ + if (hdr->sequence_number == 0) { + goto exit; + } + + /* log entries are only valid if they match the file-wide log guid + * found in the active header */ + if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) { + goto exit; + } + + if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) { + goto exit; + } + + valid = true; + +exit: + return valid; +} + +/* + * Given a log header, this will validate that the descriptors and the + * corresponding data sectors (if applicable) + * + * Validation consists of: + * 1. Making sure the sequence numbers matches the entry header + * 2. Verifying a valid signature ('zero' or 'desc' for descriptors) + * 3. File offset field is a multiple of 4KB + * 4. If a data descriptor, the corresponding data sector + * has its signature ('data') and matching sequence number + * + * @desc: the data buffer containing the descriptor + * @hdr: the log entry header + * + * Returns true if valid + */ +static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, + VHDXLogEntryHeader *hdr) +{ + bool ret = false; + + if (desc->sequence_number != hdr->sequence_number) { + goto exit; + } + if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) { + goto exit; + } + + if (!memcmp(&desc->signature, "zero", 4)) { + if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { + /* valid */ + ret = true; + } + } else if (!memcmp(&desc->signature, "desc", 4)) { + /* valid */ + ret = true; + } + +exit: + return ret; +} + + +/* Prior to sector data for a log entry, there is the header + * and the descriptors referenced in the header: + * + * [] = 4KB sector + * + * [ hdr, desc ][ desc ][ ... ][ data ][ ... ] + * + * The first sector in a log entry has a 64 byte header, and + * up to 126 32-byte descriptors. If more descriptors than + * 126 are required, then subsequent sectors can have up to 128 + * descriptors. Each sector is 4KB. Data follows the descriptor + * sectors. + * + * This will return the number of sectors needed to encompass + * the passed number of descriptors in desc_cnt. + * + * This will never return 0, even if desc_cnt is 0. + */ +static int vhdx_compute_desc_sectors(uint32_t desc_cnt) +{ + uint32_t desc_sectors; + + desc_cnt += 2; /* account for header in first sector */ + desc_sectors = desc_cnt / 128; + if (desc_cnt % 128) { + desc_sectors++; + } + + return desc_sectors; +} + + +/* Reads the log header, and subsequent descriptors (if any). This + * will allocate all the space for buffer, which must be NULL when + * passed into this function. Each descriptor will also be validated, + * and error returned if any are invalid. */ +static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogEntries *log, VHDXLogDescEntries **buffer) +{ + int ret = 0; + uint32_t desc_sectors; + uint32_t sectors_read; + VHDXLogEntryHeader hdr; + VHDXLogDescEntries *desc_entries = NULL; + int i; + + assert(*buffer == NULL); + + ret = vhdx_log_peek_hdr(bs, log, &hdr); + if (ret < 0) { + goto exit; + } + vhdx_log_entry_hdr_le_import(&hdr); + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { + ret = -EINVAL; + goto exit; + } + + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); + desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE); + + ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, + desc_sectors, false); + if (ret < 0) { + goto free_and_exit; + } + if (sectors_read != desc_sectors) { + ret = -EINVAL; + goto free_and_exit; + } + + /* put in proper endianness, and validate each desc */ + for (i = 0; i < hdr.descriptor_count; i++) { + vhdx_log_desc_le_import(&desc_entries->desc[i]); + if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) { + ret = -EINVAL; + goto free_and_exit; + } + } + + *buffer = desc_entries; + goto exit; + +free_and_exit: + qemu_vfree(desc_entries); +exit: + return ret; +} + + +/* Flushes the descriptor described by desc to the VHDX image file. + * If the descriptor is a data descriptor, than 'data' must be non-NULL, + * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be + * written. + * + * Verification is performed to make sure the sequence numbers of a data + * descriptor match the sequence number in the desc. + * + * For a zero descriptor, it may describe multiple sectors to fill with zeroes. + * In this case, it should be noted that zeroes are written to disk, and the + * image file is not extended as a sparse file. */ +static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, + VHDXLogDataSector *data) +{ + int ret = 0; + uint64_t seq, file_offset; + uint32_t offset = 0; + void *buffer = NULL; + uint64_t count = 1; + int i; + + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + if (!memcmp(&desc->signature, "desc", 4)) { + /* data sector */ + if (data == NULL) { + ret = -EFAULT; + goto exit; + } + + /* The sequence number of the data sector must match that + * in the descriptor */ + seq = data->sequence_high; + seq <<= 32; + seq |= data->sequence_low & 0xffffffff; + + if (seq != desc->sequence_number) { + ret = -EINVAL; + goto exit; + } + + /* Each data sector is in total 4096 bytes, however the first + * 8 bytes, and last 4 bytes, are located in the descriptor */ + memcpy(buffer, &desc->leading_bytes, 8); + offset += 8; + + memcpy(buffer+offset, data->data, 4084); + offset += 4084; + + memcpy(buffer+offset, &desc->trailing_bytes, 4); + + } else if (!memcmp(&desc->signature, "zero", 4)) { + /* write 'count' sectors of sector */ + memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); + count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; + } + + file_offset = desc->file_offset; + + /* count is only > 1 if we are writing zeroes */ + for (i = 0; i < count; i++) { + ret = bdrv_pwrite_sync(bs->file, file_offset, buffer, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + file_offset += VHDX_LOG_SECTOR_SIZE; + } + +exit: + qemu_vfree(buffer); + return ret; +} + +/* Flush the entire log (as described by 'logs') to the VHDX image + * file, and then set the log to 'empty' status once complete. + * + * The log entries should be validate prior to flushing */ +static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogSequence *logs) +{ + int ret = 0; + int i; + uint32_t cnt, sectors_read; + uint64_t new_file_size; + void *data = NULL; + VHDXLogDescEntries *desc_entries = NULL; + VHDXLogEntryHeader hdr_tmp = { 0 }; + + cnt = logs->count; + + data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + /* each iteration represents one log sequence, which may span multiple + * sectors */ + while (cnt--) { + ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); + if (ret < 0) { + goto exit; + } + /* if the log shows a FlushedFileOffset larger than our current file + * size, then that means the file has been truncated / corrupted, and + * we must refused to open it / use it */ + if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) { + ret = -EINVAL; + goto exit; + } + + ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries); + if (ret < 0) { + goto exit; + } + + for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { + if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) { + /* data sector, so read a sector to flush */ + ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, + data, 1, false); + if (ret < 0) { + goto exit; + } + if (sectors_read != 1) { + ret = -EINVAL; + goto exit; + } + } + + ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); + if (ret < 0) { + goto exit; + } + } + if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) { + new_file_size = desc_entries->hdr.last_file_offset; + if (new_file_size % (1024*1024)) { + /* round up to nearest 1MB boundary */ + new_file_size = ((new_file_size >> 20) + 1) << 20; + bdrv_truncate(bs->file, new_file_size); + } + } + qemu_vfree(desc_entries); + desc_entries = NULL; + } + + bdrv_flush(bs); + /* once the log is fully flushed, indicate that we have an empty log + * now. This also sets the log guid to 0, to indicate an empty log */ + vhdx_log_reset(bs, s); + +exit: + qemu_vfree(data); + qemu_vfree(desc_entries); + return ret; +} + +static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogEntries *log, uint64_t seq, + bool *valid, VHDXLogEntryHeader *entry) +{ + int ret = 0; + VHDXLogEntryHeader hdr; + void *buffer = NULL; + uint32_t i, desc_sectors, total_sectors, crc; + uint32_t sectors_read = 0; + VHDXLogDescEntries *desc_buffer = NULL; + + *valid = false; + + ret = vhdx_log_peek_hdr(bs, log, &hdr); + if (ret < 0) { + goto inc_and_exit; + } + + vhdx_log_entry_hdr_le_import(&hdr); + + + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { + goto inc_and_exit; + } + + if (seq > 0) { + if (hdr.sequence_number != seq + 1) { + goto inc_and_exit; + } + } + + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); + + /* Read desc sectors, and calculate log checksum */ + + total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; + + + /* read_desc() will incrememnt the read idx */ + ret = vhdx_log_read_desc(bs, s, log, &desc_buffer); + if (ret < 0) { + goto free_and_exit; + } + + crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer, + desc_sectors * VHDX_LOG_SECTOR_SIZE, 4); + crc ^= 0xffffffff; + + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + if (total_sectors > desc_sectors) { + for (i = 0; i < total_sectors - desc_sectors; i++) { + sectors_read = 0; + ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer, + 1, false); + if (ret < 0 || sectors_read != 1) { + goto free_and_exit; + } + crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1); + crc ^= 0xffffffff; + } + } + crc ^= 0xffffffff; + if (crc != desc_buffer->hdr.checksum) { + goto free_and_exit; + } + + *valid = true; + *entry = hdr; + goto free_and_exit; + +inc_and_exit: + log->read = vhdx_log_inc_idx(log->read, log->length); + +free_and_exit: + qemu_vfree(buffer); + qemu_vfree(desc_buffer); + return ret; +} + +/* Search through the log circular buffer, and find the valid, active + * log sequence, if any exists + * */ +static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, + VHDXLogSequence *logs) +{ + int ret = 0; + uint32_t tail; + bool seq_valid = false; + VHDXLogSequence candidate = { 0 }; + VHDXLogEntryHeader hdr = { 0 }; + VHDXLogEntries curr_log; + + memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries)); + curr_log.write = curr_log.length; /* assume log is full */ + curr_log.read = 0; + + + /* now we will go through the whole log sector by sector, until + * we find a valid, active log sequence, or reach the end of the + * log buffer */ + for (;;) { + uint64_t curr_seq = 0; + VHDXLogSequence current = { 0 }; + + tail = curr_log.read; + + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, + &seq_valid, &hdr); + if (ret < 0) { + goto exit; + } + + if (seq_valid) { + current.valid = true; + current.log = curr_log; + current.log.read = tail; + current.log.write = curr_log.read; + current.count = 1; + current.hdr = hdr; + + + for (;;) { + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, + &seq_valid, &hdr); + if (ret < 0) { + goto exit; + } + if (seq_valid == false) { + break; + } + current.log.write = curr_log.read; + current.count++; + + curr_seq = hdr.sequence_number; + } + } + + if (current.valid) { + if (candidate.valid == false || + current.hdr.sequence_number > candidate.hdr.sequence_number) { + candidate = current; + } + } + + if (curr_log.read < tail) { + break; + } + } + + *logs = candidate; + + if (candidate.valid) { + /* this is the next sequence number, for writes */ + s->log.sequence = candidate.hdr.sequence_number + 1; + } + + +exit: + return ret; +} + +/* Parse the replay log. Per the VHDX spec, if the log is present + * it must be replayed prior to opening the file, even read-only. + * + * If read-only, we must replay the log in RAM (or refuse to open + * a dirty VHDX file read-only) */ +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed) +{ + int ret = 0; + VHDXHeader *hdr; + VHDXLogSequence logs = { 0 }; + + hdr = s->headers[s->curr_header]; + + *flushed = false; + + /* s->log.hdr is freed in vhdx_close() */ + if (s->log.hdr == NULL) { + s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader)); + } + + s->log.offset = hdr->log_offset; + s->log.length = hdr->log_length; + + if (s->log.offset < VHDX_LOG_MIN_SIZE || + s->log.offset % VHDX_LOG_MIN_SIZE) { + ret = -EINVAL; + goto exit; + } + + /* per spec, only log version of 0 is supported */ + if (hdr->log_version != 0) { + ret = -EINVAL; + goto exit; + } + + /* If either the log guid, or log length is zero, + * then a replay log is not present */ + if (guid_eq(hdr->log_guid, zero_guid)) { + goto exit; + } + + if (hdr->log_length == 0) { + goto exit; + } + + if (hdr->log_length % VHDX_LOG_MIN_SIZE) { + ret = -EINVAL; + goto exit; + } + + + /* The log is present, we need to find if and where there is an active + * sequence of valid entries present in the log. */ + + ret = vhdx_log_search(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + if (logs.valid) { + /* now flush the log */ + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + *flushed = true; + } + + +exit: + return ret; +} + + + +static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc, + VHDXLogDataSector *sector, void *data, + uint64_t seq) +{ + /* 8 + 4084 + 4 = 4096, 1 log sector */ + memcpy(&desc->leading_bytes, data, 8); + data += 8; + cpu_to_le64s(&desc->leading_bytes); + memcpy(sector->data, data, 4084); + data += 4084; + memcpy(&desc->trailing_bytes, data, 4); + cpu_to_le32s(&desc->trailing_bytes); + data += 4; + + sector->sequence_high = (uint32_t) (seq >> 32); + sector->sequence_low = (uint32_t) (seq & 0xffffffff); + sector->data_signature = VHDX_LOG_DATA_SIGNATURE; + + vhdx_log_desc_le_export(desc); + vhdx_log_data_le_export(sector); +} + + +static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + void *buffer = NULL; + void *merged_sector = NULL; + void *data_tmp, *sector_write; + unsigned int i; + int sector_offset; + uint32_t desc_sectors, sectors, total_length; + uint32_t sectors_written = 0; + uint32_t aligned_length; + uint32_t leading_length = 0; + uint32_t trailing_length = 0; + uint32_t partial_sectors = 0; + uint32_t bytes_written = 0; + uint64_t file_offset; + VHDXHeader *header; + VHDXLogEntryHeader new_hdr; + VHDXLogDescriptor *new_desc = NULL; + VHDXLogDataSector *data_sector = NULL; + MSGUID new_guid = { 0 }; + + header = s->headers[s->curr_header]; + + /* need to have offset read data, and be on 4096 byte boundary */ + + if (length > header->log_length) { + /* no log present. we could create a log here instead of failing */ + ret = -EINVAL; + goto exit; + } + + if (guid_eq(header->log_guid, zero_guid)) { + vhdx_guid_generate(&new_guid); + vhdx_update_headers(bs, s, false, &new_guid); + } else { + /* currently, we require that the log be flushed after + * every write. */ + ret = -ENOTSUP; + goto exit; + } + + /* 0 is an invalid sequence number, but may also represent the first + * log write (or a wrapped seq) */ + if (s->log.sequence == 0) { + s->log.sequence = 1; + } + + sector_offset = offset % VHDX_LOG_SECTOR_SIZE; + file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE; + + aligned_length = length; + + /* add in the unaligned head and tail bytes */ + if (sector_offset) { + leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset); + leading_length = leading_length > length ? length : leading_length; + aligned_length -= leading_length; + partial_sectors++; + } + + sectors = aligned_length / VHDX_LOG_SECTOR_SIZE; + trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE); + if (trailing_length) { + partial_sectors++; + } + + sectors += partial_sectors; + + /* sectors is now how many sectors the data itself takes, not + * including the header and descriptor metadata */ + + new_hdr = (VHDXLogEntryHeader) { + .signature = VHDX_LOG_SIGNATURE, + .tail = s->log.tail, + .sequence_number = s->log.sequence, + .descriptor_count = sectors, + .reserved = 0, + .flushed_file_offset = bdrv_getlength(bs->file), + .last_file_offset = bdrv_getlength(bs->file), + }; + + new_hdr.log_guid = header->log_guid; + + desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count); + + total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE; + new_hdr.entry_length = total_length; + + vhdx_log_entry_hdr_le_export(&new_hdr); + + buffer = qemu_blockalign(bs, total_length); + memcpy(buffer, &new_hdr, sizeof(new_hdr)); + + new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr)); + data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE); + data_tmp = data; + + /* All log sectors are 4KB, so for any partial sectors we must + * merge the data with preexisting data from the final file + * destination */ + merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); + + for (i = 0; i < sectors; i++) { + new_desc->signature = VHDX_LOG_DESC_SIGNATURE; + new_desc->sequence_number = s->log.sequence; + new_desc->file_offset = file_offset; + + if (i == 0 && leading_length) { + /* partial sector at the front of the buffer */ + ret = bdrv_pread(bs->file, file_offset, merged_sector, + VHDX_LOG_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector + sector_offset, data_tmp, leading_length); + bytes_written = leading_length; + sector_write = merged_sector; + } else if (i == sectors - 1 && trailing_length) { + /* partial sector at the end of the buffer */ + ret = bdrv_pread(bs->file, + file_offset, + merged_sector + trailing_length, + VHDX_LOG_SECTOR_SIZE - trailing_length); + if (ret < 0) { + goto exit; + } + memcpy(merged_sector, data_tmp, trailing_length); + bytes_written = trailing_length; + sector_write = merged_sector; + } else { + bytes_written = VHDX_LOG_SECTOR_SIZE; + sector_write = data_tmp; + } + + /* populate the raw sector data into the proper structures, + * as well as update the descriptor, and convert to proper + * endianness */ + vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write, + s->log.sequence); + + data_tmp += bytes_written; + data_sector++; + new_desc++; + file_offset += VHDX_LOG_SECTOR_SIZE; + } + + /* checksum covers entire entry, from the log header through the + * last data sector */ + vhdx_update_checksum(buffer, total_length, + offsetof(VHDXLogEntryHeader, checksum)); + cpu_to_le32s((uint32_t *)(buffer + 4)); + + /* now write to the log */ + vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, + desc_sectors + sectors); + if (ret < 0) { + goto exit; + } + + if (sectors_written != desc_sectors + sectors) { + /* instead of failing, we could flush the log here */ + ret = -EINVAL; + goto exit; + } + + s->log.sequence++; + /* write new tail */ + s->log.tail = s->log.write; + +exit: + qemu_vfree(buffer); + qemu_vfree(merged_sector); + return ret; +} + +/* Perform a log write, and then immediately flush the entire log */ +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset) +{ + int ret = 0; + VHDXLogSequence logs = { .valid = true, + .count = 1, + .hdr = { 0 } }; + + + /* Make sure data written (new and/or changed blocks) is stable + * on disk, before creating log entry */ + bdrv_flush(bs); + ret = vhdx_log_write(bs, s, data, length, offset); + if (ret < 0) { + goto exit; + } + logs.log = s->log; + + /* Make sure log is stable on disk */ + bdrv_flush(bs); + ret = vhdx_log_flush(bs, s, &logs); + if (ret < 0) { + goto exit; + } + + s->log = logs.log; + +exit: + return ret; +} + diff --git a/block/vhdx.c b/block/vhdx.c index 6cb04122bb..7d1af9663b 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -6,9 +6,9 @@ * Authors: * Jeff Cody <jcody@redhat.com> * - * This is based on the "VHDX Format Specification v0.95", published 4/12/2012 + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 * by Microsoft: - * https://www.microsoft.com/en-us/download/details.aspx?id=29681 + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. @@ -22,6 +22,20 @@ #include "block/vhdx.h" #include "migration/migration.h" +#include <uuid/uuid.h> +#include <glib.h> + +/* Options for VHDX creation */ + +#define VHDX_BLOCK_OPT_LOG_SIZE "log_size" +#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size" +#define VHDX_BLOCK_OPT_ZERO "block_state_zero" + +typedef enum VHDXImageType { + VHDX_TYPE_DYNAMIC = 0, + VHDX_TYPE_FIXED, + VHDX_TYPE_DIFFERENCING, /* Currently unsupported */ +} VHDXImageType; /* Several metadata and region table data entries are identified by * guids in a MS-specific GUID format. */ @@ -104,16 +118,6 @@ static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7, META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \ META_PHYS_SECTOR_SIZE_PRESENT) -typedef struct VHDXMetadataEntries { - VHDXMetadataTableEntry file_parameters_entry; - VHDXMetadataTableEntry virtual_disk_size_entry; - VHDXMetadataTableEntry page83_data_entry; - VHDXMetadataTableEntry logical_sector_size_entry; - VHDXMetadataTableEntry phys_sector_size_entry; - VHDXMetadataTableEntry parent_locator_entry; - uint16_t present; -} VHDXMetadataEntries; - typedef struct VHDXSectorInfo { uint32_t bat_idx; /* BAT entry index */ @@ -124,44 +128,31 @@ typedef struct VHDXSectorInfo { uint64_t block_offset; /* block offset, in bytes */ } VHDXSectorInfo; +/* Calculates new checksum. + * + * Zero is substituted during crc calculation for the original crc field + * crc_offset: byte offset in buf of the buffer crc + * buf: buffer pointer + * size: size of buffer (must be > crc_offset+4) + * + * Note: The resulting checksum is in the CPU endianness, not necessarily + * in the file format endianness (LE). Any header export to disk should + * make sure that vhdx_header_le_export() is used to convert to the + * correct endianness + */ +uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset) +{ + uint32_t crc; + assert(buf != NULL); + assert(size > (crc_offset + sizeof(crc))); -typedef struct BDRVVHDXState { - CoMutex lock; - - int curr_header; - VHDXHeader *headers[2]; - - VHDXRegionTableHeader rt; - VHDXRegionTableEntry bat_rt; /* region table for the BAT */ - VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ - - VHDXMetadataTableHeader metadata_hdr; - VHDXMetadataEntries metadata_entries; - - VHDXFileParameters params; - uint32_t block_size; - uint32_t block_size_bits; - uint32_t sectors_per_block; - uint32_t sectors_per_block_bits; - - uint64_t virtual_disk_size; - uint32_t logical_sector_size; - uint32_t physical_sector_size; - - uint64_t chunk_ratio; - uint32_t chunk_ratio_bits; - uint32_t logical_sector_size_bits; - - uint32_t bat_entries; - VHDXBatEntry *bat; - uint64_t bat_offset; - - VHDXParentLocatorHeader parent_header; - VHDXParentLocatorEntry *parent_entries; + memset(buf + crc_offset, 0, sizeof(crc)); + crc = crc32c(0xffffffff, buf, size); + memcpy(buf + crc_offset, &crc, sizeof(crc)); - Error *migration_blocker; -} BDRVVHDXState; + return crc; +} uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, int crc_offset) @@ -214,6 +205,71 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset) /* + * This generates a UUID that is compliant with the MS GUIDs used + * in the VHDX spec (and elsewhere). + */ +void vhdx_guid_generate(MSGUID *guid) +{ + uuid_t uuid; + assert(guid != NULL); + + uuid_generate(uuid); + memcpy(guid, uuid, sizeof(MSGUID)); +} + +/* Check for region overlaps inside the VHDX image */ +static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length) +{ + int ret = 0; + uint64_t end; + VHDXRegionEntry *r; + + end = start + length; + QLIST_FOREACH(r, &s->regions, entries) { + if (!((start >= r->end) || (end <= r->start))) { + ret = -EINVAL; + goto exit; + } + } + +exit: + return ret; +} + +/* Register a region for future checks */ +static void vhdx_region_register(BDRVVHDXState *s, + uint64_t start, uint64_t length) +{ + VHDXRegionEntry *r; + + r = g_malloc0(sizeof(*r)); + + r->start = start; + r->end = start + length; + + QLIST_INSERT_HEAD(&s->regions, r, entries); +} + +/* Free all registered regions */ +static void vhdx_region_unregister_all(BDRVVHDXState *s) +{ + VHDXRegionEntry *r, *r_next; + + QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) { + QLIST_REMOVE(r, entries); + g_free(r); + } +} + +static void vhdx_set_shift_bits(BDRVVHDXState *s) +{ + s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size); + s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block); + s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio); + s->block_size_bits = 31 - clz32(s->block_size); +} + +/* * Per the MS VHDX Specification, for every VHDX file: * - The header section is fixed size - 1 MB * - The header section is always the first "object" @@ -232,25 +288,118 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } -/* All VHDX structures on disk are little endian */ -static void vhdx_header_le_import(VHDXHeader *h) +/* + * Writes the header to the specified offset. + * + * This will optionally read in buffer data from disk (otherwise zero-fill), + * and then update the header checksum. Header is converted to proper + * endianness before being written to the specified file offset + */ +static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr, + uint64_t offset, bool read) +{ + uint8_t *buffer = NULL; + int ret; + VHDXHeader header_le; + + assert(bs_file != NULL); + assert(hdr != NULL); + + /* the header checksum is not over just the packed size of VHDXHeader, + * but rather over the entire 'reserved' range for the header, which is + * 4KB (VHDX_HEADER_SIZE). */ + + buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE); + if (read) { + /* if true, we can't assume the extra reserved bytes are 0 */ + ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE); + if (ret < 0) { + goto exit; + } + } else { + memset(buffer, 0, VHDX_HEADER_SIZE); + } + + /* overwrite the actual VHDXHeader portion */ + memcpy(buffer, hdr, sizeof(VHDXHeader)); + hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, + offsetof(VHDXHeader, checksum)); + vhdx_header_le_export(hdr, &header_le); + ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader)); + +exit: + qemu_vfree(buffer); + return ret; +} + +/* Update the VHDX headers + * + * This follows the VHDX spec procedures for header updates. + * + * - non-current header is updated with largest sequence number + */ +static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid, MSGUID *log_guid) { - assert(h != NULL); + int ret = 0; + int hdr_idx = 0; + uint64_t header_offset = VHDX_HEADER1_OFFSET; + + VHDXHeader *active_header; + VHDXHeader *inactive_header; + + /* operate on the non-current header */ + if (s->curr_header == 0) { + hdr_idx = 1; + header_offset = VHDX_HEADER2_OFFSET; + } + + active_header = s->headers[s->curr_header]; + inactive_header = s->headers[hdr_idx]; + + inactive_header->sequence_number = active_header->sequence_number + 1; + + /* a new file guid must be generated before any file write, including + * headers */ + inactive_header->file_write_guid = s->session_guid; + + /* a new data guid only needs to be generated before any guest-visible + * writes (i.e. something observable via virtual disk read) */ + if (generate_data_write_guid) { + vhdx_guid_generate(&inactive_header->data_write_guid); + } - le32_to_cpus(&h->signature); - le32_to_cpus(&h->checksum); - le64_to_cpus(&h->sequence_number); + /* update the log guid if present */ + if (log_guid) { + inactive_header->log_guid = *log_guid; + } - leguid_to_cpus(&h->file_write_guid); - leguid_to_cpus(&h->data_write_guid); - leguid_to_cpus(&h->log_guid); + vhdx_write_header(bs->file, inactive_header, header_offset, true); + if (ret < 0) { + goto exit; + } + s->curr_header = hdr_idx; - le16_to_cpus(&h->log_version); - le16_to_cpus(&h->version); - le32_to_cpus(&h->log_length); - le64_to_cpus(&h->log_offset); +exit: + return ret; } +/* + * The VHDX spec calls for header updates to be performed twice, so that both + * the current and non-current header have valid info + */ +int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, + bool generate_data_write_guid, MSGUID *log_guid) +{ + int ret; + + ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); + if (ret < 0) { + return ret; + } + ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); + return ret; +} /* opens the specified header block from the VHDX file header section */ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) @@ -264,6 +413,7 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) uint64_t h2_seq = 0; uint8_t *buffer; + /* header1 & header2 are freed in vhdx_close() */ header1 = qemu_blockalign(bs, sizeof(VHDXHeader)); header2 = qemu_blockalign(bs, sizeof(VHDXHeader)); @@ -328,6 +478,9 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) } } + vhdx_region_register(s, s->headers[s->curr_header]->log_offset, + s->headers[s->curr_header]->log_length); + ret = 0; goto exit; @@ -364,10 +517,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) goto fail; } memcpy(&s->rt, buffer, sizeof(s->rt)); - le32_to_cpus(&s->rt.signature); - le32_to_cpus(&s->rt.checksum); - le32_to_cpus(&s->rt.entry_count); - le32_to_cpus(&s->rt.reserved); + vhdx_region_header_le_import(&s->rt); offset += sizeof(s->rt); if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) || @@ -386,10 +536,16 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) memcpy(&rt_entry, buffer + offset, sizeof(rt_entry)); offset += sizeof(rt_entry); - leguid_to_cpus(&rt_entry.guid); - le64_to_cpus(&rt_entry.file_offset); - le32_to_cpus(&rt_entry.length); - le32_to_cpus(&rt_entry.data_bits); + vhdx_region_entry_le_import(&rt_entry); + + /* check for region overlap between these entries, and any + * other memory regions in the file */ + ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length); + if (ret < 0) { + goto fail; + } + + vhdx_region_register(s, rt_entry.file_offset, rt_entry.length); /* see if we recognize the entry */ if (guid_eq(rt_entry.guid, bat_guid)) { @@ -421,6 +577,12 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) goto fail; } } + + if (!bat_rt_found || !metadata_rt_found) { + ret = -EINVAL; + goto fail; + } + ret = 0; fail: @@ -464,9 +626,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr)); offset += sizeof(s->metadata_hdr); - le64_to_cpus(&s->metadata_hdr.signature); - le16_to_cpus(&s->metadata_hdr.reserved); - le16_to_cpus(&s->metadata_hdr.entry_count); + vhdx_metadata_header_le_import(&s->metadata_hdr); if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) { ret = -EINVAL; @@ -485,11 +645,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) memcpy(&md_entry, buffer + offset, sizeof(md_entry)); offset += sizeof(md_entry); - leguid_to_cpus(&md_entry.item_id); - le32_to_cpus(&md_entry.offset); - le32_to_cpus(&md_entry.length); - le32_to_cpus(&md_entry.data_bits); - le32_to_cpus(&md_entry.reserved2); + vhdx_metadata_entry_le_import(&md_entry); if (guid_eq(md_entry.item_id, file_param_guid)) { if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) { @@ -662,10 +818,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) goto exit; } - s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size); - s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block); - s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio); - s->block_size_bits = 31 - clz32(s->block_size); + vhdx_set_shift_bits(s); ret = 0; @@ -674,48 +827,49 @@ exit: return ret; } -/* Parse the replay log. Per the VHDX spec, if the log is present - * it must be replayed prior to opening the file, even read-only. - * - * If read-only, we must replay the log in RAM (or refuse to open - * a dirty VHDX file read-only */ -static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s) +/* + * Calculate the number of BAT entries, including sector + * bitmap entries. + */ +static void vhdx_calc_bat_entries(BDRVVHDXState *s) { - int ret = 0; - int i; - VHDXHeader *hdr; - - hdr = s->headers[s->curr_header]; + uint32_t data_blocks_cnt, bitmap_blocks_cnt; - /* either the log guid, or log length is zero, - * then a replay log is present */ - for (i = 0; i < sizeof(hdr->log_guid.data4); i++) { - ret |= hdr->log_guid.data4[i]; - } - if (hdr->log_guid.data1 == 0 && - hdr->log_guid.data2 == 0 && - hdr->log_guid.data3 == 0 && - ret == 0) { - goto exit; + data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits; + if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) { + data_blocks_cnt++; } - - /* per spec, only log version of 0 is supported */ - if (hdr->log_version != 0) { - ret = -EINVAL; - goto exit; + bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits; + if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) { + bitmap_blocks_cnt++; } - if (hdr->log_length == 0) { - goto exit; + if (s->parent_entries) { + s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); + } else { + s->bat_entries = data_blocks_cnt + + ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); } - /* We currently do not support images with logs to replay */ - ret = -ENOTSUP; - -exit: - return ret; } +static void vhdx_close(BlockDriverState *bs) +{ + BDRVVHDXState *s = bs->opaque; + qemu_vfree(s->headers[0]); + s->headers[0] = NULL; + qemu_vfree(s->headers[1]); + s->headers[1] = NULL; + qemu_vfree(s->bat); + s->bat = NULL; + qemu_vfree(s->parent_entries); + s->parent_entries = NULL; + migrate_del_blocker(s->migration_blocker); + error_free(s->migration_blocker); + qemu_vfree(s->log.hdr); + s->log.hdr = NULL; + vhdx_region_unregister_all(s); +} static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) @@ -724,12 +878,14 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, int ret = 0; uint32_t i; uint64_t signature; - uint32_t data_blocks_cnt, bitmap_blocks_cnt; + bool log_flushed = false; s->bat = NULL; + s->first_visible_write = true; qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->regions); /* validate the file signature */ ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t)); @@ -741,46 +897,38 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + /* This is used for any header updates, for the file_write_guid. + * The spec dictates that a new value should be used for the first + * header update */ + vhdx_guid_generate(&s->session_guid); + ret = vhdx_parse_header(bs, s); - if (ret) { + if (ret < 0) { goto fail; } - ret = vhdx_parse_log(bs, s); - if (ret) { + ret = vhdx_parse_log(bs, s, &log_flushed); + if (ret < 0) { goto fail; } ret = vhdx_open_region_tables(bs, s); - if (ret) { + if (ret < 0) { goto fail; } ret = vhdx_parse_metadata(bs, s); - if (ret) { + if (ret < 0) { goto fail; } + s->block_size = s->params.block_size; /* the VHDX spec dictates that virtual_disk_size is always a multiple of * logical_sector_size */ bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits; - data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits; - if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) { - data_blocks_cnt++; - } - bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits; - if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) { - bitmap_blocks_cnt++; - } - - if (s->parent_entries) { - s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); - } else { - s->bat_entries = data_blocks_cnt + - ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); - } + vhdx_calc_bat_entries(s); s->bat_offset = s->bat_rt.file_offset; @@ -790,6 +938,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + /* s->bat is freed in vhdx_close() */ s->bat = qemu_blockalign(bs, s->bat_rt.length); ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); @@ -797,16 +946,36 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + uint64_t payblocks = s->chunk_ratio; + /* endian convert, and verify populated BAT field file offsets against + * region table and log entries */ for (i = 0; i < s->bat_entries; i++) { le64_to_cpus(&s->bat[i]); + if (payblocks--) { + /* payload bat entries */ + if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) == + PAYLOAD_BLOCK_FULLY_PRESENT) { + ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK, + s->block_size); + if (ret < 0) { + goto fail; + } + } + } else { + payblocks = s->chunk_ratio; + /* Once differencing files are supported, verify sector bitmap + * blocks here */ + } } if (flags & BDRV_O_RDWR) { - ret = -ENOTSUP; - goto fail; + ret = vhdx_update_headers(bs, s, false, NULL); + if (ret < 0) { + goto fail; + } } - /* TODO: differencing files, write */ + /* TODO: differencing files */ /* Disable migration when VHDX images are used */ error_set(&s->migration_blocker, @@ -816,10 +985,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, return 0; fail: - qemu_vfree(s->headers[0]); - qemu_vfree(s->headers[1]); - qemu_vfree(s->bat); - qemu_vfree(s->parent_entries); + vhdx_close(bs); return ret; } @@ -859,7 +1025,7 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits; - sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS; + sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK; sinfo->block_offset = block_offset << s->logical_sector_size_bits; @@ -873,7 +1039,6 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, * in the block, and add in the payload data block offset * in the file, in bytes, to get the final read address */ - sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */ sinfo->file_offset += sinfo->block_offset; } @@ -914,7 +1079,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, /* return zero */ qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail); break; - case PAYLOAD_BLOCK_FULL_PRESENT: + case PAYLOAD_BLOCK_FULLY_PRESENT: qemu_co_mutex_unlock(&s->lock); ret = bdrv_co_readv(bs->file, sinfo.file_offset >> BDRV_SECTOR_BITS, @@ -944,26 +1109,772 @@ exit: return ret; } +/* + * Allocate a new payload block at the end of the file. + * + * Allocation will happen at 1MB alignment inside the file + * + * Returns the file offset start of the new payload block + */ +static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, + uint64_t *new_offset) +{ + *new_offset = bdrv_getlength(bs->file); + + /* per the spec, the address for a block is in units of 1MB */ + *new_offset = ROUND_UP(*new_offset, 1024 * 1024); + + return bdrv_truncate(bs->file, *new_offset + s->block_size); +} + +/* + * Update the BAT table entry with the new file offset, and the new entry + * state */ +static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s, + VHDXSectorInfo *sinfo, + uint64_t *bat_entry_le, + uint64_t *bat_offset, int state) +{ + /* The BAT entry is a uint64, with 44 bits for the file offset in units of + * 1MB, and 3 bits for the block state. */ + s->bat[sinfo->bat_idx] = sinfo->file_offset; + + s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK; + + *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]); + *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry); + +} +/* Per the spec, on the first write of guest-visible data to the file the + * data write guid must be updated in the header */ +int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + if (s->first_visible_write) { + s->first_visible_write = false; + ret = vhdx_update_headers(bs, s, true, NULL); + } + return ret; +} static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return -ENOTSUP; + int ret = -ENOTSUP; + BDRVVHDXState *s = bs->opaque; + VHDXSectorInfo sinfo; + uint64_t bytes_done = 0; + uint64_t bat_entry = 0; + uint64_t bat_entry_offset = 0; + QEMUIOVector hd_qiov; + struct iovec iov1 = { 0 }; + struct iovec iov2 = { 0 }; + int sectors_to_write; + int bat_state; + uint64_t bat_prior_offset = 0; + bool bat_update = false; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + qemu_co_mutex_lock(&s->lock); + + ret = vhdx_user_visible_write(bs, s); + if (ret < 0) { + goto exit; + } + + while (nb_sectors > 0) { + bool use_zero_buffers = false; + bat_update = false; + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + /* not supported yet */ + ret = -ENOTSUP; + goto exit; + } else { + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); + sectors_to_write = sinfo.sectors_avail; + + qemu_iovec_reset(&hd_qiov); + /* check the payload block state */ + bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK; + switch (bat_state) { + case PAYLOAD_BLOCK_ZERO: + /* in this case, we need to preserve zero writes for + * data that is not part of this write, so we must pad + * the rest of the buffer to zeroes */ + + /* if we are on a posix system with ftruncate() that extends + * a file, then it is zero-filled for us. On Win32, the raw + * layer uses SetFilePointer and SetFileEnd, which does not + * zero fill AFAIK */ + + /* Queue another write of zero buffers if the underlying file + * does not zero-fill on file extension */ + + if (bdrv_has_zero_init(bs->file) == 0) { + use_zero_buffers = true; + + /* zero fill the front, if any */ + if (sinfo.block_offset) { + iov1.iov_len = sinfo.block_offset; + iov1.iov_base = qemu_blockalign(bs, iov1.iov_len); + memset(iov1.iov_base, 0, iov1.iov_len); + qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0, + sinfo.block_offset); + sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS; + } + + /* our actual data */ + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + sinfo.bytes_avail); + + /* zero fill the back, if any */ + if ((sinfo.bytes_avail - sinfo.block_offset) < + s->block_size) { + iov2.iov_len = s->block_size - + (sinfo.bytes_avail + sinfo.block_offset); + iov2.iov_base = qemu_blockalign(bs, iov2.iov_len); + memset(iov2.iov_base, 0, iov2.iov_len); + qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0, + sinfo.block_offset); + sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS; + } + } + + /* fall through */ + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNMAPPED: /* fall through */ + case PAYLOAD_BLOCK_UNDEFINED: /* fall through */ + bat_prior_offset = sinfo.file_offset; + ret = vhdx_allocate_block(bs, s, &sinfo.file_offset); + if (ret < 0) { + goto exit; + } + /* once we support differencing files, this may also be + * partially present */ + /* update block state to the newly specified state */ + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, + PAYLOAD_BLOCK_FULLY_PRESENT); + bat_update = true; + /* since we just allocated a block, file_offset is the + * beginning of the payload block. It needs to be the + * write address, which includes the offset into the block */ + if (!use_zero_buffers) { + sinfo.file_offset += sinfo.block_offset; + } + /* fall through */ + case PAYLOAD_BLOCK_FULLY_PRESENT: + /* if the file offset address is in the header zone, + * there is a problem */ + if (sinfo.file_offset < (1024 * 1024)) { + ret = -EFAULT; + goto error_bat_restore; + } + + if (!use_zero_buffers) { + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, + sinfo.bytes_avail); + } + /* block exists, so we can just overwrite it */ + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_writev(bs->file, + sinfo.file_offset >> BDRV_SECTOR_BITS, + sectors_to_write, &hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto error_bat_restore; + } + break; + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: + /* we don't yet support difference files, fall through + * to error */ + default: + ret = -EIO; + goto exit; + break; + } + + if (bat_update) { + /* this will update the BAT entry into the log journal, and + * then flush the log journal out to disk */ + ret = vhdx_log_write_and_flush(bs, s, &bat_entry, + sizeof(VHDXBatEntry), + bat_entry_offset); + if (ret < 0) { + goto exit; + } + } + + nb_sectors -= sinfo.sectors_avail; + sector_num += sinfo.sectors_avail; + bytes_done += sinfo.bytes_avail; + + } + } + + goto exit; + +error_bat_restore: + if (bat_update) { + /* keep metadata in sync, and restore the bat entry state + * if error. */ + sinfo.file_offset = bat_prior_offset; + vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry, + &bat_entry_offset, bat_state); + } +exit: + qemu_vfree(iov1.iov_base); + qemu_vfree(iov2.iov_base); + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&hd_qiov); + return ret; } -static void vhdx_close(BlockDriverState *bs) + +/* + * Create VHDX Headers + * + * There are 2 headers, and the highest sequence number will represent + * the active header + */ +static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size, + uint32_t log_size) { - BDRVVHDXState *s = bs->opaque; - qemu_vfree(s->headers[0]); - qemu_vfree(s->headers[1]); - qemu_vfree(s->bat); - qemu_vfree(s->parent_entries); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + int ret = 0; + VHDXHeader *hdr = NULL; + + hdr = g_malloc0(sizeof(VHDXHeader)); + + hdr->signature = VHDX_HEADER_SIGNATURE; + hdr->sequence_number = g_random_int(); + hdr->log_version = 0; + hdr->version = 1; + hdr->log_length = log_size; + hdr->log_offset = VHDX_HEADER_SECTION_END; + vhdx_guid_generate(&hdr->file_write_guid); + vhdx_guid_generate(&hdr->data_write_guid); + + ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false); + if (ret < 0) { + goto exit; + } + hdr->sequence_number++; + ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false); + if (ret < 0) { + goto exit; + } + +exit: + g_free(hdr); + return ret; +} + + +/* + * Create the Metadata entries. + * + * For more details on the entries, see section 3.5 (pg 29) in the + * VHDX 1.00 specification. + * + * We support 5 metadata entries (all required by spec): + * File Parameters, + * Virtual Disk Size, + * Page 83 Data, + * Logical Sector Size, + * Physical Sector Size + * + * The first 64KB of the Metadata section is reserved for the metadata + * header and entries; beyond that, the metadata items themselves reside. + */ +static int vhdx_create_new_metadata(BlockDriverState *bs, + uint64_t image_size, + uint32_t block_size, + uint32_t sector_size, + uint64_t metadata_offset, + VHDXImageType type) +{ + int ret = 0; + uint32_t offset = 0; + void *buffer = NULL; + void *entry_buffer; + VHDXMetadataTableHeader *md_table;; + VHDXMetadataTableEntry *md_table_entry; + + /* Metadata entries */ + VHDXFileParameters *mt_file_params; + VHDXVirtualDiskSize *mt_virtual_size; + VHDXPage83Data *mt_page83; + VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size; + VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size; + + entry_buffer = g_malloc0(sizeof(VHDXFileParameters) + + sizeof(VHDXVirtualDiskSize) + + sizeof(VHDXPage83Data) + + sizeof(VHDXVirtualDiskLogicalSectorSize) + + sizeof(VHDXVirtualDiskPhysicalSectorSize)); + + mt_file_params = entry_buffer; + offset += sizeof(VHDXFileParameters); + mt_virtual_size = entry_buffer + offset; + offset += sizeof(VHDXVirtualDiskSize); + mt_page83 = entry_buffer + offset; + offset += sizeof(VHDXPage83Data); + mt_log_sector_size = entry_buffer + offset; + offset += sizeof(VHDXVirtualDiskLogicalSectorSize); + mt_phys_sector_size = entry_buffer + offset; + + mt_file_params->block_size = cpu_to_le32(block_size); + if (type == VHDX_TYPE_FIXED) { + mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED; + cpu_to_le32s(&mt_file_params->data_bits); + } + + vhdx_guid_generate(&mt_page83->page_83_data); + cpu_to_leguids(&mt_page83->page_83_data); + mt_virtual_size->virtual_disk_size = cpu_to_le64(image_size); + mt_log_sector_size->logical_sector_size = cpu_to_le32(sector_size); + mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size); + + buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); + md_table = buffer; + + md_table->signature = VHDX_METADATA_SIGNATURE; + md_table->entry_count = 5; + vhdx_metadata_header_le_export(md_table); + + + /* This will reference beyond the reserved table portion */ + offset = 64 * KiB; + + md_table_entry = buffer + sizeof(VHDXMetadataTableHeader); + + md_table_entry[0].item_id = file_param_guid; + md_table_entry[0].offset = offset; + md_table_entry[0].length = sizeof(VHDXFileParameters); + md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED; + offset += md_table_entry[0].length; + vhdx_metadata_entry_le_export(&md_table_entry[0]); + + md_table_entry[1].item_id = virtual_size_guid; + md_table_entry[1].offset = offset; + md_table_entry[1].length = sizeof(VHDXVirtualDiskSize); + md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[1].length; + vhdx_metadata_entry_le_export(&md_table_entry[1]); + + md_table_entry[2].item_id = page83_guid; + md_table_entry[2].offset = offset; + md_table_entry[2].length = sizeof(VHDXPage83Data); + md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[2].length; + vhdx_metadata_entry_le_export(&md_table_entry[2]); + + md_table_entry[3].item_id = logical_sector_guid; + md_table_entry[3].offset = offset; + md_table_entry[3].length = sizeof(VHDXVirtualDiskLogicalSectorSize); + md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + offset += md_table_entry[3].length; + vhdx_metadata_entry_le_export(&md_table_entry[3]); + + md_table_entry[4].item_id = phys_sector_guid; + md_table_entry[4].offset = offset; + md_table_entry[4].length = sizeof(VHDXVirtualDiskPhysicalSectorSize); + md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED | + VHDX_META_FLAGS_IS_VIRTUAL_DISK; + vhdx_metadata_entry_le_export(&md_table_entry[4]); + + ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + +exit: + g_free(buffer); + g_free(entry_buffer); + return ret; } +/* This create the actual BAT itself. We currently only support + * 'Dynamic' and 'Fixed' image types. + * + * Dynamic images: default state of the BAT is all zeroes. + * + * Fixed images: default state of the BAT is fully populated, with + * file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT. + */ +static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s, + uint64_t image_size, VHDXImageType type, + bool use_zero_blocks, VHDXRegionTableEntry *rt_bat) +{ + int ret = 0; + uint64_t data_file_offset; + uint64_t total_sectors = 0; + uint64_t sector_num = 0; + uint64_t unused; + int block_state; + VHDXSectorInfo sinfo; + + assert(s->bat == NULL); + + /* this gives a data start after BAT/bitmap entries, and well + * past any metadata entries (with a 4 MB buffer for future + * expansion */ + data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB; + total_sectors = image_size >> s->logical_sector_size_bits; + + if (type == VHDX_TYPE_DYNAMIC) { + /* All zeroes, so we can just extend the file - the end of the BAT + * is the furthest thing we have written yet */ + ret = bdrv_truncate(bs, data_file_offset); + if (ret < 0) { + goto exit; + } + } else if (type == VHDX_TYPE_FIXED) { + ret = bdrv_truncate(bs, data_file_offset + image_size); + if (ret < 0) { + goto exit; + } + } else { + ret = -ENOTSUP; + goto exit; + } + + if (type == VHDX_TYPE_FIXED || + use_zero_blocks || + bdrv_has_zero_init(bs) == 0) { + /* for a fixed file, the default BAT entry is not zero */ + s->bat = g_malloc0(rt_bat->length); + block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT : + PAYLOAD_BLOCK_NOT_PRESENT; + block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state; + /* fill the BAT by emulating sector writes of sectors_per_block size */ + while (sector_num < total_sectors) { + vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo); + sinfo.file_offset = data_file_offset + + (sector_num << s->logical_sector_size_bits); + sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB); + vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused, + block_state); + cpu_to_le64s(&s->bat[sinfo.bat_idx]); + sector_num += s->sectors_per_block; + } + ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length); + if (ret < 0) { + goto exit; + } + } + + + +exit: + g_free(s->bat); + return ret; +} + +/* Creates the region table header, and region table entries. + * There are 2 supported region table entries: BAT, and Metadata/ + * + * As the calculations for the BAT region table are also needed + * to create the BAT itself, we will also cause the BAT to be + * created. + */ +static int vhdx_create_new_region_table(BlockDriverState *bs, + uint64_t image_size, + uint32_t block_size, + uint32_t sector_size, + uint32_t log_size, + bool use_zero_blocks, + VHDXImageType type, + uint64_t *metadata_offset) +{ + int ret = 0; + uint32_t offset = 0; + void *buffer = NULL; + BDRVVHDXState *s = NULL; + VHDXRegionTableHeader *region_table; + VHDXRegionTableEntry *rt_bat; + VHDXRegionTableEntry *rt_metadata; + + assert(metadata_offset != NULL); + + /* Populate enough of the BDRVVHDXState to be able to use the + * pre-existing BAT calculation, translation, and update functions */ + s = g_malloc0(sizeof(BDRVVHDXState)); + + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * + (uint64_t) sector_size / (uint64_t) block_size; + + s->sectors_per_block = block_size / sector_size; + s->virtual_disk_size = image_size; + s->block_size = block_size; + s->logical_sector_size = sector_size; + + vhdx_set_shift_bits(s); + + vhdx_calc_bat_entries(s); + + /* At this point the VHDX state is populated enough for creation */ + + /* a single buffer is used so we can calculate the checksum over the + * entire 64KB block */ + buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE); + region_table = buffer; + offset += sizeof(VHDXRegionTableHeader); + rt_bat = buffer + offset; + offset += sizeof(VHDXRegionTableEntry); + rt_metadata = buffer + offset; + + region_table->signature = VHDX_REGION_SIGNATURE; + region_table->entry_count = 2; /* BAT and Metadata */ + + rt_bat->guid = bat_guid; + rt_bat->length = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB); + rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB); + s->bat_offset = rt_bat->file_offset; + + rt_metadata->guid = metadata_guid; + rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length, + MiB); + rt_metadata->length = 1 * MiB; /* min size, and more than enough */ + *metadata_offset = rt_metadata->file_offset; + + vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE, + offsetof(VHDXRegionTableHeader, checksum)); + + + /* The region table gives us the data we need to create the BAT, + * so do that now */ + ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat); + + /* Now write out the region headers to disk */ + vhdx_region_header_le_export(region_table); + vhdx_region_entry_le_export(rt_bat); + vhdx_region_entry_le_export(rt_metadata); + + ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto exit; + } + + +exit: + g_free(s); + g_free(buffer); + return ret; +} + +/* We need to create the following elements: + * + * .-----------------------------------------------------------------. + * | (A) | (B) | (C) | (D) | (E) | + * | File ID | Header1 | Header 2 | Region Tbl 1 | Region Tbl 2 | + * | | | | | | + * .-----------------------------------------------------------------. + * 0 64KB 128KB 192KB 256KB 320KB + * + * + * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. + * | (F) | (G) | (H) | | + * | Journal Log | BAT / Bitmap | Metadata | .... data ...... | + * | | | | | + * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------. + * 1MB + */ +static int vhdx_create(const char *filename, QEMUOptionParameter *options, + Error **errp) +{ + int ret = 0; + uint64_t image_size = (uint64_t) 2 * GiB; + uint32_t log_size = 1 * MiB; + uint32_t block_size = 0; + uint64_t signature; + uint64_t metadata_offset; + bool use_zero_blocks = false; + + gunichar2 *creator = NULL; + glong creator_items; + BlockDriverState *bs; + const char *type = NULL; + VHDXImageType image_type; + Error *local_err = NULL; + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + image_size = options->value.n; + } else if (!strcmp(options->name, VHDX_BLOCK_OPT_LOG_SIZE)) { + log_size = options->value.n; + } else if (!strcmp(options->name, VHDX_BLOCK_OPT_BLOCK_SIZE)) { + block_size = options->value.n; + } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { + type = options->value.s; + } else if (!strcmp(options->name, VHDX_BLOCK_OPT_ZERO)) { + use_zero_blocks = options->value.n != 0; + } + options++; + } + + if (image_size > VHDX_MAX_IMAGE_SIZE) { + error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB"); + ret = -EINVAL; + goto exit; + } + + if (type == NULL) { + type = "dynamic"; + } + + if (!strcmp(type, "dynamic")) { + image_type = VHDX_TYPE_DYNAMIC; + } else if (!strcmp(type, "fixed")) { + image_type = VHDX_TYPE_FIXED; + } else if (!strcmp(type, "differencing")) { + error_setg_errno(errp, ENOTSUP, + "Differencing files not yet supported"); + ret = -ENOTSUP; + goto exit; + } else { + ret = -EINVAL; + goto exit; + } + + /* These are pretty arbitrary, and mainly designed to keep the BAT + * size reasonable to load into RAM */ + if (block_size == 0) { + if (image_size > 32 * TiB) { + block_size = 64 * MiB; + } else if (image_size > (uint64_t) 100 * GiB) { + block_size = 32 * MiB; + } else if (image_size > 1 * GiB) { + block_size = 16 * MiB; + } else { + block_size = 8 * MiB; + } + } + + + /* make the log size close to what was specified, but must be + * min 1MB, and multiple of 1MB */ + log_size = ROUND_UP(log_size, MiB); + + block_size = ROUND_UP(block_size, MiB); + block_size = block_size > VHDX_BLOCK_SIZE_MAX ? VHDX_BLOCK_SIZE_MAX : + block_size; + + ret = bdrv_create_file(filename, options, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto exit; + } + + ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto exit; + } + + /* Create (A) */ + + /* The creator field is optional, but may be useful for + * debugging / diagnostics */ + creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, + &creator_items, NULL); + signature = cpu_to_le64(VHDX_FILE_SIGNATURE); + bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); + if (ret < 0) { + goto delete_and_exit; + } + if (creator) { + bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), creator, + creator_items * sizeof(gunichar2)); + if (ret < 0) { + goto delete_and_exit; + } + } + + + /* Creates (B),(C) */ + ret = vhdx_create_new_headers(bs, image_size, log_size); + if (ret < 0) { + goto delete_and_exit; + } + + /* Creates (D),(E),(G) explicitly. (F) created as by-product */ + ret = vhdx_create_new_region_table(bs, image_size, block_size, 512, + log_size, use_zero_blocks, image_type, + &metadata_offset); + if (ret < 0) { + goto delete_and_exit; + } + + /* Creates (H) */ + ret = vhdx_create_new_metadata(bs, image_size, block_size, 512, + metadata_offset, image_type); + if (ret < 0) { + goto delete_and_exit; + } + + + +delete_and_exit: + bdrv_unref(bs); +exit: + g_free(creator); + return ret; +} + +static QEMUOptionParameter vhdx_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size; max of 64TB." + }, + { + .name = VHDX_BLOCK_OPT_LOG_SIZE, + .type = OPT_SIZE, + .value.n = 1 * MiB, + .help = "Log size; min 1MB." + }, + { + .name = VHDX_BLOCK_OPT_BLOCK_SIZE, + .type = OPT_SIZE, + .value.n = 0, + .help = "Block Size; min 1MB, max 256MB. " \ + "0 means auto-calculate based on image size." + }, + { + .name = BLOCK_OPT_SUBFMT, + .type = OPT_STRING, + .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\ + "Default is 'dynamic'." + }, + { + .name = VHDX_BLOCK_OPT_ZERO, + .type = OPT_FLAG, + .help = "Force use of payload blocks of type 'ZERO'. Non-standard." + }, + { NULL } +}; + static BlockDriver bdrv_vhdx = { .format_name = "vhdx", .instance_size = sizeof(BDRVVHDXState), @@ -973,6 +1884,9 @@ static BlockDriver bdrv_vhdx = { .bdrv_reopen_prepare = vhdx_reopen_prepare, .bdrv_co_readv = vhdx_co_readv, .bdrv_co_writev = vhdx_co_writev, + .bdrv_create = vhdx_create, + + .create_options = vhdx_create_options, }; static void bdrv_vhdx_init(void) diff --git a/block/vhdx.h b/block/vhdx.h index fb687ed2d6..51183b243c 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -6,9 +6,9 @@ * Authors: * Jeff Cody <jcody@redhat.com> * - * This is based on the "VHDX Format Specification v0.95", published 4/12/2012 + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 * by Microsoft: - * https://www.microsoft.com/en-us/download/details.aspx?id=29681 + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 * * This work is licensed under the terms of the GNU LGPL, version 2 or later. * See the COPYING.LIB file in the top-level directory. @@ -18,6 +18,11 @@ #ifndef BLOCK_VHDX_H #define BLOCK_VHDX_H +#define KiB (1 * 1024) +#define MiB (KiB * 1024) +#define GiB (MiB * 1024) +#define TiB ((uint64_t) GiB * 1024) + /* Structures and fields present in the VHDX file */ /* The header section has the following blocks, @@ -30,14 +35,15 @@ * 0.........64KB...........128KB........192KB..........256KB................1MB */ -#define VHDX_HEADER_BLOCK_SIZE (64*1024) +#define VHDX_HEADER_BLOCK_SIZE (64 * 1024) #define VHDX_FILE_ID_OFFSET 0 -#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE*1) -#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE*2) -#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE*3) - +#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1) +#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2) +#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) +#define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4) +#define VHDX_HEADER_SECTION_END (1 * MiB) /* * A note on the use of MS-GUID fields. For more details on the GUID, * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. @@ -55,10 +61,11 @@ /* These structures are ones that are defined in the VHDX specification * document */ +#define VHDX_FILE_SIGNATURE 0x656C696678646876 /* "vhdxfile" in ASCII */ typedef struct VHDXFileIdentifier { uint64_t signature; /* "vhdxfile" in ASCII */ uint16_t creator[256]; /* optional; utf-16 string to identify - the vhdx file creator. Diagnotistic + the vhdx file creator. Diagnostic only */ } VHDXFileIdentifier; @@ -67,7 +74,7 @@ typedef struct VHDXFileIdentifier { * Microsoft is not just 16 bytes though - it is a structure that is defined, * so we need to follow it here so that endianness does not trip us up */ -typedef struct MSGUID { +typedef struct QEMU_PACKED MSGUID { uint32_t data1; uint16_t data2; uint16_t data3; @@ -77,14 +84,15 @@ typedef struct MSGUID { #define guid_eq(a, b) \ (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) -#define VHDX_HEADER_SIZE (4*1024) /* although the vhdx_header struct in disk - is only 582 bytes, for purposes of crc - the header is the first 4KB of the 64KB - block */ +#define VHDX_HEADER_SIZE (4 * 1024) /* although the vhdx_header struct in disk + is only 582 bytes, for purposes of crc + the header is the first 4KB of the 64KB + block */ /* The full header is 4KB, although the actual header data is much smaller. * But for the checksum calculation, it is over the entire 4KB structure, * not just the defined portion of it */ +#define VHDX_HEADER_SIGNATURE 0x64616568 typedef struct QEMU_PACKED VHDXHeader { uint32_t signature; /* "head" in ASCII */ uint32_t checksum; /* CRC-32C hash of the whole header */ @@ -92,7 +100,7 @@ typedef struct QEMU_PACKED VHDXHeader { VHDX file has 2 of these headers, and only the header with the highest sequence number is valid */ - MSGUID file_write_guid; /* 128 bit unique identifier. Must be + MSGUID file_write_guid; /* 128 bit unique identifier. Must be updated to new, unique value before the first modification is made to file */ @@ -114,9 +122,9 @@ typedef struct QEMU_PACKED VHDXHeader { there is no valid log. If non-zero, log entries with this guid are valid. */ - uint16_t log_version; /* version of the log format. Mustn't be - zero, unless log_guid is also zero */ - uint16_t version; /* version of th evhdx file. Currently, + uint16_t log_version; /* version of the log format. Must be + set to zero */ + uint16_t version; /* version of the vhdx file. Currently, only supported version is "1" */ uint32_t log_length; /* length of the log. Must be multiple of 1MB */ @@ -125,6 +133,7 @@ typedef struct QEMU_PACKED VHDXHeader { } VHDXHeader; /* Header for the region table block */ +#define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */ typedef struct QEMU_PACKED VHDXRegionTableHeader { uint32_t signature; /* "regi" in ASCII */ uint32_t checksum; /* CRC-32C hash of the 64KB table */ @@ -151,7 +160,10 @@ typedef struct QEMU_PACKED VHDXRegionTableEntry { /* ---- LOG ENTRY STRUCTURES ---- */ +#define VHDX_LOG_MIN_SIZE (1024 * 1024) +#define VHDX_LOG_SECTOR_SIZE 4096 #define VHDX_LOG_HDR_SIZE 64 +#define VHDX_LOG_SIGNATURE 0x65676f6c typedef struct QEMU_PACKED VHDXLogEntryHeader { uint32_t signature; /* "loge" in ASCII */ uint32_t checksum; /* CRC-32C hash of the 64KB table */ @@ -174,7 +186,8 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader { } VHDXLogEntryHeader; #define VHDX_LOG_DESC_SIZE 32 - +#define VHDX_LOG_DESC_SIGNATURE 0x63736564 +#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a typedef struct QEMU_PACKED VHDXLogDescriptor { uint32_t signature; /* "zero" or "desc" in ASCII */ union { @@ -194,6 +207,7 @@ typedef struct QEMU_PACKED VHDXLogDescriptor { vhdx_log_entry_header */ } VHDXLogDescriptor; +#define VHDX_LOG_DATA_SIGNATURE 0x61746164 typedef struct QEMU_PACKED VHDXLogDataSector { uint32_t data_signature; /* "data" in ASCII */ uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ @@ -212,19 +226,19 @@ typedef struct QEMU_PACKED VHDXLogDataSector { #define PAYLOAD_BLOCK_UNDEFINED 1 #define PAYLOAD_BLOCK_ZERO 2 #define PAYLOAD_BLOCK_UNMAPPED 5 -#define PAYLOAD_BLOCK_FULL_PRESENT 6 +#define PAYLOAD_BLOCK_FULLY_PRESENT 6 #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 #define SB_BLOCK_NOT_PRESENT 0 #define SB_BLOCK_PRESENT 6 /* per the spec */ -#define VHDX_MAX_SECTORS_PER_BLOCK (1<<23) +#define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23) /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state other bits are reserved */ #define VHDX_BAT_STATE_BIT_MASK 0x07 -#define VHDX_BAT_FILE_OFF_BITS (64-44) +#define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000 /* upper 44 bits */ typedef uint64_t VHDXBatEntry; /* ---- METADATA REGION STRUCTURES ---- */ @@ -233,6 +247,7 @@ typedef uint64_t VHDXBatEntry; #define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ #define VHDX_METADATA_TABLE_MAX_SIZE \ (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) +#define VHDX_METADATA_SIGNATURE 0x617461646174656D /* "metadata" in ASCII */ typedef struct QEMU_PACKED VHDXMetadataTableHeader { uint64_t signature; /* "metadata" in ASCII */ uint16_t reserved; @@ -252,8 +267,8 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry { metadata region */ /* note: if length = 0, so is offset */ uint32_t length; /* length of metadata. <= 1MB. */ - uint32_t data_bits; /* least-significant 3 bits are flags, the - rest are reserved (see above) */ + uint32_t data_bits; /* least-significant 3 bits are flags, + the rest are reserved (see above) */ uint32_t reserved2; } VHDXMetadataTableEntry; @@ -262,13 +277,16 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry { If set indicates a fixed size VHDX file */ #define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ +#define VHDX_BLOCK_SIZE_MIN (1 * MiB) +#define VHDX_BLOCK_SIZE_MAX (256 * MiB) typedef struct QEMU_PACKED VHDXFileParameters { uint32_t block_size; /* size of each payload block, always power of 2, <= 256MB and >= 1MB. */ - uint32_t data_bits; /* least-significant 2 bits are flags, the rest - are reserved (see above) */ + uint32_t data_bits; /* least-significant 2 bits are flags, + the rest are reserved (see above) */ } VHDXFileParameters; +#define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB) typedef struct QEMU_PACKED VHDXVirtualDiskSize { uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. Must be multiple of the sector size, @@ -276,7 +294,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskSize { } VHDXVirtualDiskSize; typedef struct QEMU_PACKED VHDXPage83Data { - MSGUID page_83_data[16]; /* unique id for scsi devices that + MSGUID page_83_data; /* unique id for scsi devices that support page 0x83 */ } VHDXPage83Data; @@ -291,7 +309,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { } VHDXVirtualDiskPhysicalSectorSize; typedef struct QEMU_PACKED VHDXParentLocatorHeader { - MSGUID locator_type[16]; /* type of the parent virtual disk. */ + MSGUID locator_type; /* type of the parent virtual disk. */ uint16_t reserved; uint16_t key_value_count; /* number of key/value pairs for this locator */ @@ -308,18 +326,122 @@ typedef struct QEMU_PACKED VHDXParentLocatorEntry { /* ----- END VHDX SPECIFICATION STRUCTURES ---- */ +typedef struct VHDXMetadataEntries { + VHDXMetadataTableEntry file_parameters_entry; + VHDXMetadataTableEntry virtual_disk_size_entry; + VHDXMetadataTableEntry page83_data_entry; + VHDXMetadataTableEntry logical_sector_size_entry; + VHDXMetadataTableEntry phys_sector_size_entry; + VHDXMetadataTableEntry parent_locator_entry; + uint16_t present; +} VHDXMetadataEntries; + +typedef struct VHDXLogEntries { + uint64_t offset; + uint64_t length; + uint32_t write; + uint32_t read; + VHDXLogEntryHeader *hdr; + void *desc_buffer; + uint64_t sequence; + uint32_t tail; +} VHDXLogEntries; + +typedef struct VHDXRegionEntry { + uint64_t start; + uint64_t end; + QLIST_ENTRY(VHDXRegionEntry) entries; +} VHDXRegionEntry; + +typedef struct BDRVVHDXState { + CoMutex lock; + + int curr_header; + VHDXHeader *headers[2]; + + VHDXRegionTableHeader rt; + VHDXRegionTableEntry bat_rt; /* region table for the BAT */ + VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ + + VHDXMetadataTableHeader metadata_hdr; + VHDXMetadataEntries metadata_entries; + + VHDXFileParameters params; + uint32_t block_size; + uint32_t block_size_bits; + uint32_t sectors_per_block; + uint32_t sectors_per_block_bits; + + uint64_t virtual_disk_size; + uint32_t logical_sector_size; + uint32_t physical_sector_size; + + uint64_t chunk_ratio; + uint32_t chunk_ratio_bits; + uint32_t logical_sector_size_bits; + + uint32_t bat_entries; + VHDXBatEntry *bat; + uint64_t bat_offset; + bool first_visible_write; + MSGUID session_guid; + + VHDXLogEntries log; + + VHDXParentLocatorHeader parent_header; + VHDXParentLocatorEntry *parent_entries; + + Error *migration_blocker; + + QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions; +} BDRVVHDXState; + +void vhdx_guid_generate(MSGUID *guid); + +int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw, + MSGUID *log_guid); + +uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, int crc_offset); bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed); + +int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, + void *data, uint32_t length, uint64_t offset); -static void leguid_to_cpus(MSGUID *guid) +static inline void leguid_to_cpus(MSGUID *guid) { le32_to_cpus(&guid->data1); le16_to_cpus(&guid->data2); le16_to_cpus(&guid->data3); } +static inline void cpu_to_leguids(MSGUID *guid) +{ + cpu_to_le32s(&guid->data1); + cpu_to_le16s(&guid->data2); + cpu_to_le16s(&guid->data3); +} + +void vhdx_header_le_import(VHDXHeader *h); +void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h); +void vhdx_log_desc_le_import(VHDXLogDescriptor *d); +void vhdx_log_desc_le_export(VHDXLogDescriptor *d); +void vhdx_log_data_le_export(VHDXLogDataSector *d); +void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); +void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); +void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr); +void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr); +void vhdx_region_entry_le_import(VHDXRegionTableEntry *e); +void vhdx_region_entry_le_export(VHDXRegionTableEntry *e); +void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr); +void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr); +void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e); +void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e); +int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); + #endif diff --git a/block/vpc.c b/block/vpc.c index 627d11cb9b..577cc45992 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -211,6 +211,15 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; + /* images created with disk2vhd report a far higher virtual size + * than expected with the cyls * heads * sectors_per_cyl formula. + * use the footer->size instead if the image was created with + * disk2vhd. + */ + if (!strncmp(footer->creator_app, "d2v", 4)) { + bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE; + } + /* Allow a maximum disk size of approximately 2 TB */ if (bs->total_sectors >= 65535LL * 255 * 255) { ret = -EFBIG; diff --git a/blockdev.c b/blockdev.c index b260477f1b..86e6bffdc4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -341,7 +341,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts, qemu_opts_absorb_qdict(opts, bs_opts, &error); if (error_is_set(&error)) { error_propagate(errp, error); - return NULL; + goto early_err; } if (id) { @@ -361,7 +361,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if ((buf = qemu_opt_get(opts, "discard")) != NULL) { if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) { error_setg(errp, "invalid discard option"); - return NULL; + goto early_err; } } @@ -383,7 +383,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts, /* this is the default */ } else { error_setg(errp, "invalid aio option"); - return NULL; + goto early_err; } } #endif @@ -393,13 +393,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts, error_printf("Supported formats:"); bdrv_iterate_format(bdrv_format_print, NULL); error_printf("\n"); - return NULL; + goto early_err; } drv = bdrv_find_format(buf); if (!drv) { error_setg(errp, "'%s' invalid format", buf); - return NULL; + goto early_err; } } @@ -435,20 +435,20 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if (!check_throttle_config(&cfg, &error)) { error_propagate(errp, error); - return NULL; + goto early_err; } on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { error_setg(errp, "werror is not supported by this bus type"); - return NULL; + goto early_err; } on_write_error = parse_block_error_action(buf, 0, &error); if (error_is_set(&error)) { error_propagate(errp, error); - return NULL; + goto early_err; } } @@ -456,13 +456,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) { error_report("rerror is not supported by this bus type"); - return NULL; + goto early_err; } on_read_error = parse_block_error_action(buf, 1, &error); if (error_is_set(&error)) { error_propagate(errp, error); - return NULL; + goto early_err; } } @@ -491,6 +491,8 @@ static DriveInfo *blockdev_init(QDict *bs_opts, if (has_driver_specific_opts) { file = NULL; } else { + QDECREF(bs_opts); + qemu_opts_del(opts); return dinfo; } } @@ -529,12 +531,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts, return dinfo; err: - qemu_opts_del(opts); - QDECREF(bs_opts); bdrv_unref(dinfo->bdrv); g_free(dinfo->id); QTAILQ_REMOVE(&drives, dinfo, next); g_free(dinfo); +early_err: + QDECREF(bs_opts); + qemu_opts_del(opts); return NULL; } @@ -260,6 +260,7 @@ gtk="" gtkabi="2.0" tpm="no" libssh2="" +vhdx="" # parse CC options first for opt do @@ -985,6 +986,10 @@ for opt do ;; --enable-libssh2) libssh2="yes" ;; + --enable-vhdx) vhdx="yes" + ;; + --disable-vhdx) vhdx="no" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -1217,6 +1222,8 @@ echo " --gcov=GCOV use specified gcov [$gcov_tool]" echo " --enable-tpm enable TPM support" echo " --disable-libssh2 disable ssh block device support" echo " --enable-libssh2 enable ssh block device support" +echo " --disable-vhdx disables support for the Microsoft VHDX image format" +echo " --enable-vhdx enable support for the Microsoft VHDX image format" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2017,6 +2024,18 @@ EOF fi fi +if test "$vhdx" = "yes" ; then + if test "$uuid" = "no" ; then + error_exit "uuid required for VHDX support" + fi +elif test "$vhdx" != "no" ; then + if test "$uuid" = "yes" ; then + vhdx=yes + else + vhdx=no + fi +fi + ########################################## # xfsctl() probe, used for raw-posix if test "$xfs" != "no" ; then @@ -3760,6 +3779,7 @@ echo "TPM support $tpm" echo "libssh2 support $libssh2" echo "TPM passthrough $tpm_passthrough" echo "QOM debugging $qom_cast_debug" +echo "vhdx $vhdx" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -4152,6 +4172,10 @@ if test "$virtio_blk_data_plane" = "yes" ; then echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak fi +if test "$vhdx" = "yes" ; then + echo "CONFIG_VHDX=y" >> $config_host_mak +fi + # USB host support if test "$libusb" = "yes"; then echo "HOST_USB=libusb legacy" >> $config_host_mak diff --git a/qapi-schema.json b/qapi-schema.json index 81a375ba06..76c98a7265 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -227,7 +227,7 @@ ## # @ImageInfoSpecificVmdk: # -# @create_type: The create type of VMDK image +# @create-type: The create type of VMDK image # # @cid: Content id of image # diff --git a/tests/Makefile b/tests/Makefile index f414f2c80a..379cdd9ad1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -68,6 +68,8 @@ check-qtest-i386-y += tests/rtc-test$(EXESUF) check-qtest-i386-y += tests/i440fx-test$(EXESUF) check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) check-qtest-i386-y += tests/qom-test$(EXESUF) +check-qtest-i386-y += tests/blockdev-test$(EXESUF) +check-qtest-i386-y += tests/qdev-monitor-test$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -200,6 +202,8 @@ tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y) tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) tests/qom-test$(EXESUF): tests/qom-test.o +tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y) +tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o # QTest rules diff --git a/tests/blockdev-test.c b/tests/blockdev-test.c new file mode 100644 index 0000000000..c940e00690 --- /dev/null +++ b/tests/blockdev-test.c @@ -0,0 +1,59 @@ +/* + * blockdev.c test cases + * + * Copyright (C) 2013 Red Hat Inc. + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include <glib.h> +#include <string.h> +#include "libqtest.h" + +static void test_drive_add_empty(void) +{ + QDict *response; + const char *response_return; + + /* Start with an empty drive */ + qtest_start("-drive if=none,id=drive0"); + + /* Delete the drive */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_del drive0\"" + "}}"); + g_assert(response); + response_return = qdict_get_try_str(response, "return"); + g_assert(response_return); + g_assert(strcmp(response_return, "") == 0); + QDECREF(response); + + /* Ensure re-adding the drive works - there should be no duplicate ID error + * because the old drive must be gone. + */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_add 0 if=none,id=drive0\"" + "}}"); + g_assert(response); + response_return = qdict_get_try_str(response, "return"); + g_assert(response_return); + g_assert(strcmp(response_return, "OK\r\n") == 0); + QDECREF(response); + + qtest_end(); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/qmp/drive_add_empty", test_drive_add_empty); + + return g_test_run(); +} diff --git a/tests/boot-order-test.c b/tests/boot-order-test.c index 4b233d0b24..da158c32bd 100644 --- a/tests/boot-order-test.c +++ b/tests/boot-order-test.c @@ -41,12 +41,12 @@ static void test_a_boot_order(const char *machine, qtest_start(args); actual = read_boot_order(); g_assert_cmphex(actual, ==, expected_boot); - qmp("{ 'execute': 'system_reset' }"); + qmp_discard_response("{ 'execute': 'system_reset' }"); /* * system_reset only requests reset. We get a RESET event after * the actual reset completes. Need to wait for that. */ - qmp(""); /* HACK: wait for event */ + qmp_discard_response(""); /* HACK: wait for event */ actual = read_boot_order(); g_assert_cmphex(actual, ==, expected_reboot); qtest_quit(global_qtest); diff --git a/tests/fdc-test.c b/tests/fdc-test.c index fd198dcf8b..38b5b178d0 100644 --- a/tests/fdc-test.c +++ b/tests/fdc-test.c @@ -290,10 +290,12 @@ static void test_media_insert(void) /* Insert media in drive. DSKCHK should not be reset until a step pulse * is sent. */ - qmp("{'execute':'change', 'arguments':{ 'device':'floppy0', " - "'target': '%s' }}", test_image); - qmp(""); /* ignore event (FIXME open -> open transition?!) */ - qmp(""); /* ignore event */ + qmp_discard_response("{'execute':'change', 'arguments':{" + " 'device':'floppy0', 'target': '%s' }}", + test_image); + qmp_discard_response(""); /* ignore event + (FIXME open -> open transition?!) */ + qmp_discard_response(""); /* ignore event */ dir = inb(FLOPPY_BASE + reg_dir); assert_bit_set(dir, DSKCHG); @@ -322,8 +324,9 @@ static void test_media_change(void) /* Eject the floppy and check that DSKCHG is set. Reading it out doesn't * reset the bit. */ - qmp("{'execute':'eject', 'arguments':{ 'device':'floppy0' }}"); - qmp(""); /* ignore event */ + qmp_discard_response("{'execute':'eject', 'arguments':{" + " 'device':'floppy0' }}"); + qmp_discard_response(""); /* ignore event */ dir = inb(FLOPPY_BASE + reg_dir); assert_bit_set(dir, DSKCHG); diff --git a/tests/ide-test.c b/tests/ide-test.c index bc824a8144..d5cec5a1fc 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -460,8 +460,9 @@ static void test_flush(void) tmp_path); /* Delay the completion of the flush request until we explicitly do it */ - qmp("{'execute':'human-monitor-command', 'arguments': { " - "'command-line': 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }"); + qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {" + " 'command-line':" + " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }"); /* FLUSH CACHE command on device 0*/ outb(IDE_BASE + reg_device, 0); @@ -473,8 +474,9 @@ static void test_flush(void) assert_bit_clear(data, DF | ERR | DRQ); /* Complete the command */ - qmp("{'execute':'human-monitor-command', 'arguments': { " - "'command-line': 'qemu-io ide0-hd0 \"resume A\"'} }"); + qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {" + " 'command-line':" + " 'qemu-io ide0-hd0 \"resume A\"'} }"); /* Check registers */ data = inb(IDE_BASE + reg_device); diff --git a/tests/libqtest.c b/tests/libqtest.c index bb82069f5c..83424c3c6b 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -30,6 +30,8 @@ #include "qemu/compiler.h" #include "qemu/osdep.h" +#include "qapi/qmp/json-streamer.h" +#include "qapi/qmp/json-parser.h" #define MAX_IRQ 256 @@ -151,8 +153,8 @@ QTestState *qtest_init(const char *extra_args) } /* Read the QMP greeting and then do the handshake */ - qtest_qmp(s, ""); - qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }"); + qtest_qmp_discard_response(s, ""); + qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }"); if (getenv("QTEST_STOP")) { kill(qtest_qemu_pid(s), SIGSTOP); @@ -291,16 +293,38 @@ redo: return words; } -void qtest_qmpv(QTestState *s, const char *fmt, va_list ap) +typedef struct { + JSONMessageParser parser; + QDict *response; +} QMPResponseParser; + +static void qmp_response(JSONMessageParser *parser, QList *tokens) +{ + QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser); + QObject *obj; + + obj = json_parser_parse(tokens, NULL); + if (!obj) { + fprintf(stderr, "QMP JSON response parsing failed\n"); + exit(1); + } + + g_assert(qobject_type(obj) == QTYPE_QDICT); + g_assert(!qmp->response); + qmp->response = (QDict *)obj; +} + +QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap) { - bool has_reply = false; - int nesting = 0; + QMPResponseParser qmp; /* Send QMP request */ socket_sendf(s->qmp_fd, fmt, ap); /* Receive reply */ - while (!has_reply || nesting > 0) { + qmp.response = NULL; + json_message_parser_init(&qmp.parser, qmp_response); + while (!qmp.response) { ssize_t len; char c; @@ -314,25 +338,39 @@ void qtest_qmpv(QTestState *s, const char *fmt, va_list ap) exit(1); } - switch (c) { - case '{': - nesting++; - has_reply = true; - break; - case '}': - nesting--; - break; - } + json_message_parser_feed(&qmp.parser, &c, 1); } + json_message_parser_destroy(&qmp.parser); + + return qmp.response; +} + +QDict *qtest_qmp(QTestState *s, const char *fmt, ...) +{ + va_list ap; + QDict *response; + + va_start(ap, fmt); + response = qtest_qmpv(s, fmt, ap); + va_end(ap); + return response; +} + +void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap) +{ + QDict *response = qtest_qmpv(s, fmt, ap); + QDECREF(response); } -void qtest_qmp(QTestState *s, const char *fmt, ...) +void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...) { va_list ap; + QDict *response; va_start(ap, fmt); - qtest_qmpv(s, fmt, ap); + response = qtest_qmpv(s, fmt, ap); va_end(ap); + QDECREF(response); } const char *qtest_get_arch(void) diff --git a/tests/libqtest.h b/tests/libqtest.h index a6e99bd023..9deebdcdfa 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -22,6 +22,7 @@ #include <stdbool.h> #include <stdarg.h> #include <sys/types.h> +#include "qapi/qmp/qdict.h" typedef struct QTestState QTestState; @@ -44,13 +45,32 @@ QTestState *qtest_init(const char *extra_args); void qtest_quit(QTestState *s); /** + * qtest_qmp_discard_response: + * @s: #QTestState instance to operate on. + * @fmt...: QMP message to send to qemu + * + * Sends a QMP message to QEMU and consumes the response. + */ +void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...); + +/** * qtest_qmp: * @s: #QTestState instance to operate on. * @fmt...: QMP message to send to qemu * - * Sends a QMP message to QEMU + * Sends a QMP message to QEMU and returns the response. + */ +QDict *qtest_qmp(QTestState *s, const char *fmt, ...); + +/** + * qtest_qmpv_discard_response: + * @s: #QTestState instance to operate on. + * @fmt: QMP message to send to QEMU + * @ap: QMP message arguments + * + * Sends a QMP message to QEMU and consumes the response. */ -void qtest_qmp(QTestState *s, const char *fmt, ...); +void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap); /** * qtest_qmpv: @@ -58,9 +78,9 @@ void qtest_qmp(QTestState *s, const char *fmt, ...); * @fmt: QMP message to send to QEMU * @ap: QMP message arguments * - * Sends a QMP message to QEMU. + * Sends a QMP message to QEMU and returns the response. */ -void qtest_qmpv(QTestState *s, const char *fmt, va_list ap); +QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap); /** * qtest_get_irq: @@ -334,14 +354,31 @@ static inline void qtest_end(void) * qmp: * @fmt...: QMP message to send to qemu * - * Sends a QMP message to QEMU + * Sends a QMP message to QEMU and returns the response. + */ +static inline QDict *qmp(const char *fmt, ...) +{ + va_list ap; + QDict *response; + + va_start(ap, fmt); + response = qtest_qmpv(global_qtest, fmt, ap); + va_end(ap); + return response; +} + +/** + * qmp_discard_response: + * @fmt...: QMP message to send to qemu + * + * Sends a QMP message to QEMU and consumes the response. */ -static inline void qmp(const char *fmt, ...) +static inline void qmp_discard_response(const char *fmt, ...) { va_list ap; va_start(ap, fmt); - qtest_qmpv(global_qtest, fmt, ap); + qtest_qmpv_discard_response(global_qtest, fmt, ap); va_end(ap); } diff --git a/tests/qdev-monitor-test.c b/tests/qdev-monitor-test.c new file mode 100644 index 0000000000..33a8ea4b9c --- /dev/null +++ b/tests/qdev-monitor-test.c @@ -0,0 +1,81 @@ +/* + * qdev-monitor.c test cases + * + * Copyright (C) 2013 Red Hat Inc. + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include <string.h> +#include <glib.h> +#include "libqtest.h" +#include "qapi/qmp/qjson.h" + +static void test_device_add(void) +{ + QDict *response; + QDict *error; + + qtest_start("-drive if=none,id=drive0"); + + /* Make device_add fail. If this leaks the virtio-blk-pci device then a + * reference to drive0 will also be held (via qdev properties). + */ + response = qmp("{\"execute\": \"device_add\"," + " \"arguments\": {" + " \"driver\": \"virtio-blk-pci\"," + " \"drive\": \"drive0\"" + "}}"); + g_assert(response); + error = qdict_get_qdict(response, "error"); + g_assert(!strcmp(qdict_get_try_str(error, "class") ?: "", + "GenericError")); + g_assert(!strcmp(qdict_get_try_str(error, "desc") ?: "", + "Device initialization failed.")); + QDECREF(response); + + /* Delete the drive */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_del drive0\"" + "}}"); + g_assert(response); + g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "(null)", "")); + QDECREF(response); + + /* Try to re-add the drive. This fails with duplicate IDs if a leaked + * virtio-blk-pci exists that holds a reference to the old drive0. + */ + response = qmp("{\"execute\": \"human-monitor-command\"," + " \"arguments\": {" + " \"command-line\": \"drive_add pci-addr=auto if=none,id=drive0\"" + "}}"); + g_assert(response); + g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "", + "OK\r\n")); + QDECREF(response); + + qtest_end(); +} + +int main(int argc, char **argv) +{ + const char *arch = qtest_get_arch(); + + /* Check architecture */ + if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) { + g_test_message("Skipping test for non-x86\n"); + return 0; + } + + /* Run the tests */ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/qmp/device_add", test_device_add); + + return g_test_run(); +} diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 index 45f2c0b055..aba3faf712 100755 --- a/tests/qemu-iotests/017 +++ b/tests/qemu-iotests/017 @@ -66,7 +66,7 @@ echo "Creating test image with backing file" echo TEST_IMG=$TEST_IMG_SAVE -_make_test_img -b $TEST_IMG.base 6G +_make_test_img -b "$TEST_IMG.base" 6G echo "Filling test image" echo diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019 index cd3582cf6f..5bb18d0c0a 100755 --- a/tests/qemu-iotests/019 +++ b/tests/qemu-iotests/019 @@ -90,12 +90,12 @@ mv "$TEST_IMG" "$TEST_IMG.orig" # Test the conversion twice: One test with the old-style -B option and another # one with -o backing_file -for backing_option in "-B $TEST_IMG.base" "-o backing_file=$TEST_IMG.base"; do +for backing_option in "-B " "-o backing_file="; do echo - echo Testing conversion with $backing_option | _filter_testdir | _filter_imgfmt + echo Testing conversion with $backing_option$TEST_IMG.base | _filter_testdir | _filter_imgfmt echo - $QEMU_IMG convert -O $IMGFMT $backing_option "$TEST_IMG.orig" "$TEST_IMG" + $QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" "$TEST_IMG.orig" "$TEST_IMG" echo "Checking if backing clusters are allocated when they shouldn't" echo diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index f85b4ce63f..8bade92a80 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -54,7 +54,7 @@ echo "== Checking that image is clean on shutdown ==" IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img $size -$QEMU_IO -c "write -P 0x5a 0 512" ""$TEST_IMG"" | _filter_qemu_io +$QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io # The dirty bit must not be set ./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 356c3756f4..0a4971d437 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -64,9 +64,9 @@ function run_qemu() size=128M _make_test_img $size -cp $TEST_IMG $TEST_IMG.orig -mv $TEST_IMG $TEST_IMG.base -_make_test_img -b $TEST_IMG.base $size +cp "$TEST_IMG" "$TEST_IMG.orig" +mv "$TEST_IMG" "$TEST_IMG.base" +_make_test_img -b "$TEST_IMG.base" $size echo echo === Unknown option === @@ -81,7 +81,7 @@ echo echo === Overriding backing file === echo -echo "info block" | run_qemu -drive file=$TEST_IMG,driver=qcow2,backing.file.filename=$TEST_IMG.orig -nodefaults +echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults echo echo === Enable and disable lazy refcounting on the command line, plus some invalid values === diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 index fa9319da26..e42f9bd5e8 100755 --- a/tests/qemu-iotests/061 +++ b/tests/qemu-iotests/061 @@ -163,7 +163,7 @@ echo "=== Testing zero expansion on backed image ===" echo IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io -IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M $QEMU_IO -c "read -P 0x2a 0 128k" -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" _check_test_img @@ -174,7 +174,7 @@ echo "=== Testing zero expansion on backed inactive clusters ===" echo IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io -IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M $QEMU_IO -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG snapshot -c foo "$TEST_IMG" $QEMU_IO -c "write -P 0x42 0 128k" "$TEST_IMG" | _filter_qemu_io @@ -190,7 +190,7 @@ echo "=== Testing zero expansion on backed image with shared L2 table ===" echo IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io -IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M +IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M $QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io $QEMU_IMG snapshot -c foo "$TEST_IMG" $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG" diff --git a/tests/qemu-iotests/064 b/tests/qemu-iotests/064 index 6789aa6ee4..1c74c31a1a 100755 --- a/tests/qemu-iotests/064 +++ b/tests/qemu-iotests/064 @@ -56,6 +56,17 @@ echo echo "=== Verify pattern 0x00, 66M - 1024M ===" $QEMU_IO -r -c "read -pP 0x00 66M 958M" "$TEST_IMG" | _filter_qemu_io +echo +echo "=== Verify pattern write, 0xc3 99M-157M ===" +$QEMU_IO -c "write -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io +# first verify we didn't write where we should not have +$QEMU_IO -c "read -pP 0xa5 0 33M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -pP 0x96 33M 33M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -pP 0x00 66M 33M" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -pP 0x00 157MM 867MM" "$TEST_IMG" | _filter_qemu_io +# now verify what we should have actually written +$QEMU_IO -c "read -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/064.out b/tests/qemu-iotests/064.out index b9e8e4a873..5346a4e630 100644 --- a/tests/qemu-iotests/064.out +++ b/tests/qemu-iotests/064.out @@ -11,4 +11,18 @@ read 34603008/34603008 bytes at offset 34603008 === Verify pattern 0x00, 66M - 1024M === read 1004535808/1004535808 bytes at offset 69206016 958 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Verify pattern write, 0xc3 99M-157M === +wrote 60817408/60817408 bytes at offset 103809024 +58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 34603008/34603008 bytes at offset 0 +33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 34603008/34603008 bytes at offset 34603008 +33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 34603008/34603008 bytes at offset 69206016 +33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 909115392/909115392 bytes at offset 164626432 +867 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 60817408/60817408 bytes at offset 103809024 +58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 79dc38bc04..d025192c83 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -45,7 +45,7 @@ function do_run_qemu() function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } size=128M diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 4bb9ff9652..8d271cc41a 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -6,7 +6,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virtio-blk-pci,drive=disk,id=virtio0 QMP_VERSION {"return": {}} -{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}} @@ -24,7 +24,7 @@ QMP_VERSION Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk QMP_VERSION {"return": {}} -{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} +{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"return": {}} @@ -44,7 +44,7 @@ Testing: QMP_VERSION {"return": {}} {"return": "OK\r\n"} -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"return": {}} @@ -64,14 +64,14 @@ Testing: QMP_VERSION {"return": {}} {"return": {}} -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} {"return": {}} {"return": {}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"device": "virtio0", "path": "/machine/peripheral/virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "RESET"} -{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} +{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} diff --git a/tests/qemu-iotests/070 b/tests/qemu-iotests/070 new file mode 100755 index 0000000000..41bf100701 --- /dev/null +++ b/tests/qemu-iotests/070 @@ -0,0 +1,67 @@ +#!/bin/bash +# +# Test VHDX log replay from an image with a journal that needs to be +# replayed +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=jcody@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt vhdx +_supported_proto generic +_supported_os Linux + +# With the log replayed, the pattern 0xa5 extends to 0xc025000 +# If the log was not replayed, it would only extend to 0xc000000 +# +# This image is a 10G dynamic image, with 4M block size, and 1 unplayed +# data sector in the log +# +# This image was created with qemu-img, however it was verified using +# Hyper-V to properly replay the logs and give the same post-replay +# image as qemu. +_use_sample_img iotest-dirtylog-10G-4M.vhdx.bz2 + +echo +echo "=== Verify open image read-only fails, due to dirty log ===" +$QEMU_IO -r -c "read -pP 0xa5 0 18M" "$TEST_IMG" 2>&1 | grep -o "Permission denied" + +echo "=== Verify open image replays log ===" +$QEMU_IO -c "read -pP 0xa5 0 18M" "$TEST_IMG" | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/070.out b/tests/qemu-iotests/070.out new file mode 100644 index 0000000000..9db8ff2650 --- /dev/null +++ b/tests/qemu-iotests/070.out @@ -0,0 +1,8 @@ +QA output created by 070 + +=== Verify open image read-only fails, due to dirty log === +Permission denied +=== Verify open image replays log === +read 18874368/18874368 bytes at offset 0 +18 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 2932e14e73..8cde7f11fa 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -200,7 +200,6 @@ testlist options -vhdx) IMGFMT=vhdx xpand=false - IMGFMT_GENERIC=false ;; -rbd) diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern index 00e0f605fd..ddfbca1b76 100644 --- a/tests/qemu-iotests/common.pattern +++ b/tests/qemu-iotests/common.pattern @@ -28,7 +28,7 @@ function do_is_allocated() { } function is_allocated() { - do_is_allocated "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_is_allocated "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function do_io() { @@ -46,18 +46,18 @@ function do_io() { } function io_pattern() { - do_io "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_io "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function io() { local start=$2 local pattern=$(( (start >> 9) % 256 )) - do_io "$@" $pattern | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_io "$@" $pattern | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function io_zero() { - do_io "$@" 0 | $QEMU_IO $TEST_IMG | _filter_qemu_io + do_io "$@" 0 | $QEMU_IO "$TEST_IMG" | _filter_qemu_io } function io_test() { @@ -117,8 +117,8 @@ function io_test2() { echo === Clusters to be compressed [3] io_pattern writev $((offset + 8 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165 - mv $TEST_IMG $TEST_IMG.orig - $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c $TEST_IMG.orig $TEST_IMG + mv "$TEST_IMG" "$TEST_IMG.orig" + $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG" # Write the used clusters echo === Used clusters [1] diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 4e826040d4..7f6245770a 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -111,6 +111,8 @@ _make_test_img() local image_size=$* local optstr="" local img_name="" + local use_backing=0 + local backing_file="" if [ -n "$TEST_IMG_FILE" ]; then img_name=$TEST_IMG_FILE @@ -123,7 +125,8 @@ _make_test_img() fi if [ "$1" = "-b" ]; then - extra_img_options="$1 $2" + use_backing=1 + backing_file=$2 image_size=$3 fi if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then @@ -135,7 +138,13 @@ _make_test_img() fi # XXX(hch): have global image options? - $QEMU_IMG create -f $IMGFMT $extra_img_options $img_name $image_size 2>&1 | \ + ( + if [ $use_backing = 1 ]; then + $QEMU_IMG create -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" $image_size 2>&1 + else + $QEMU_IMG create -f $IMGFMT $extra_img_options "$img_name" $image_size 2>&1 + fi + ) | \ sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ @@ -148,7 +157,10 @@ _make_test_img() -e "s# zeroed_grain=\\(on\\|off\\)##g" \ -e "s# subformat='[^']*'##g" \ -e "s# adapter_type='[^']*'##g" \ - -e "s# lazy_refcounts=\\(on\\|off\\)##g" + -e "s# lazy_refcounts=\\(on\\|off\\)##g" \ + -e "s# block_size=[0-9]\\+##g" \ + -e "s# block_state_zero=\\(on\\|off\\)##g" \ + -e "s# log_size=[0-9]\\+##g" # Start an NBD server on the image file, which is what we'll be talking to if [ $IMGPROTO = "nbd" ]; then diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index c57ff35843..b18b241f8d 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -75,3 +75,4 @@ 067 rw auto 068 rw auto 069 rw auto +070 rw auto diff --git a/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 Binary files differnew file mode 100644 index 0000000000..4b91cfc654 --- /dev/null +++ b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 |