aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@amazon.com>2013-11-13 11:47:44 -0800
committerAnthony Liguori <aliguori@amazon.com>2013-11-13 11:47:44 -0800
commitdeb0f500651317863922964c87b4fa64eecdbd73 (patch)
treef67b4c5eabb590c5a51b64796cdb19e1dce07847
parent70c4c5b5621f6d954843547bcab9db857d882e99 (diff)
parent7e382003f1bd9d8a441ecc5ac8a74bad3564d943 (diff)
Merge remote-tracking branch 'stefanha/block' into staging
# By Jeff Cody (26) and others # Via Stefan Hajnoczi * stefanha/block: (37 commits) block: Round up total_sectors block: vhdx qemu-iotest - log replay of data sector block: qemu-iotests for vhdx, add write test support block: vhdx - update _make_test_img() to filter out vhdx options block: vhdx - add .bdrv_create() support block: vhdx - fix comment typos in header, fix incorrect struct fields block: vhdx - break out code operations to functions block: vhdx - move more endian translations to vhdx-endian.c block: vhdx - remove BAT file offset bit shifting block: vhdx write support block: vhdx - add log write support block: vhdx - add region overlap detection for image files block: vhdx - log parsing, replay, and flush support block: vhdx code movement - move vhdx_close() above vhdx_open() block: vhdx - update log guid in header, and first write tracker block: vhdx - break endian translation functions out block: vhdx - log support struct and defines block: vhdx code movement - VHDXMetadataEntries and BDRVVHDXState to header. block: vhdx - add header update capability. block: vhdx - minor comments and typo correction. ... Message-id: 1383905551-16411-1-git-send-email-stefanha@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
-rw-r--r--block.c4
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/raw-posix.c3
-rw-r--r--block/vhdx-endian.c216
-rw-r--r--block/vhdx-log.c1010
-rw-r--r--block/vhdx.c1220
-rw-r--r--block/vhdx.h178
-rw-r--r--block/vpc.c9
-rw-r--r--blockdev.c27
-rwxr-xr-xconfigure24
-rw-r--r--qapi-schema.json2
-rw-r--r--tests/Makefile4
-rw-r--r--tests/blockdev-test.c59
-rw-r--r--tests/boot-order-test.c4
-rw-r--r--tests/fdc-test.c15
-rw-r--r--tests/ide-test.c10
-rw-r--r--tests/libqtest.c72
-rw-r--r--tests/libqtest.h51
-rw-r--r--tests/qdev-monitor-test.c81
-rwxr-xr-xtests/qemu-iotests/0172
-rwxr-xr-xtests/qemu-iotests/0196
-rwxr-xr-xtests/qemu-iotests/0392
-rwxr-xr-xtests/qemu-iotests/0518
-rwxr-xr-xtests/qemu-iotests/0616
-rwxr-xr-xtests/qemu-iotests/06411
-rw-r--r--tests/qemu-iotests/064.out14
-rwxr-xr-xtests/qemu-iotests/0672
-rw-r--r--tests/qemu-iotests/067.out10
-rwxr-xr-xtests/qemu-iotests/07067
-rw-r--r--tests/qemu-iotests/070.out8
-rw-r--r--tests/qemu-iotests/common1
-rw-r--r--tests/qemu-iotests/common.pattern12
-rw-r--r--tests/qemu-iotests/common.rc18
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2bin0 -> 4490 bytes
35 files changed, 2897 insertions, 262 deletions
diff --git a/block.c b/block.c
index 58efb5b4e4..6d5c80475c 100644
--- a/block.c
+++ b/block.c
@@ -640,7 +640,7 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
if (length < 0) {
return length;
}
- hint = length >> BDRV_SECTOR_BITS;
+ hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
}
bs->total_sectors = hint;
@@ -1084,8 +1084,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
snprintf(backing_filename, sizeof(backing_filename),
"%s", filename);
} else if (!realpath(filename, backing_filename)) {
- error_setg_errno(errp, errno, "Could not resolve path '%s'", filename);
ret = -errno;
+ error_setg_errno(errp, errno, "Could not resolve path '%s'", filename);
goto fail;
}
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 3bb85b535c..f43ecbc044 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,7 +2,7 @@ block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o v
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
-block-obj-y += vhdx.o
+block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += parallels.o blkdebug.o blkverify.o
block-obj-y += snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
diff --git a/block/raw-posix.c b/block/raw-posix.c
index f6d48bbdb2..ace5d962e8 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1842,7 +1842,8 @@ static BlockDriver bdrv_host_cdrom = {
#endif /* __linux__ */
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags)
+static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
{
BDRVRawState *s = bs->opaque;
Error *local_err = NULL;
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
new file mode 100644
index 0000000000..fe879ed995
--- /dev/null
+++ b/block/vhdx-endian.c
@@ -0,0 +1,216 @@
+/*
+ * Block driver for Hyper-V VHDX Images
+ *
+ * Copyright (c) 2013 Red Hat, Inc.,
+ *
+ * Authors:
+ * Jeff Cody <jcody@redhat.com>
+ *
+ * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
+ * by Microsoft:
+ * https://www.microsoft.com/en-us/download/details.aspx?id=34750
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/vhdx.h"
+
+#include <uuid/uuid.h>
+
+
+/*
+ * All the VHDX formats on disk are little endian - the following
+ * are helper import/export functions to correctly convert
+ * endianness from disk read to native cpu format, and back again.
+ */
+
+
+/* VHDX File Header */
+
+
+void vhdx_header_le_import(VHDXHeader *h)
+{
+ assert(h != NULL);
+
+ le32_to_cpus(&h->signature);
+ le32_to_cpus(&h->checksum);
+ le64_to_cpus(&h->sequence_number);
+
+ leguid_to_cpus(&h->file_write_guid);
+ leguid_to_cpus(&h->data_write_guid);
+ leguid_to_cpus(&h->log_guid);
+
+ le16_to_cpus(&h->log_version);
+ le16_to_cpus(&h->version);
+ le32_to_cpus(&h->log_length);
+ le64_to_cpus(&h->log_offset);
+}
+
+void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
+{
+ assert(orig_h != NULL);
+ assert(new_h != NULL);
+
+ new_h->signature = cpu_to_le32(orig_h->signature);
+ new_h->checksum = cpu_to_le32(orig_h->checksum);
+ new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
+
+ new_h->file_write_guid = orig_h->file_write_guid;
+ new_h->data_write_guid = orig_h->data_write_guid;
+ new_h->log_guid = orig_h->log_guid;
+
+ cpu_to_leguids(&new_h->file_write_guid);
+ cpu_to_leguids(&new_h->data_write_guid);
+ cpu_to_leguids(&new_h->log_guid);
+
+ new_h->log_version = cpu_to_le16(orig_h->log_version);
+ new_h->version = cpu_to_le16(orig_h->version);
+ new_h->log_length = cpu_to_le32(orig_h->log_length);
+ new_h->log_offset = cpu_to_le64(orig_h->log_offset);
+}
+
+
+/* VHDX Log Headers */
+
+
+void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
+{
+ assert(d != NULL);
+
+ le32_to_cpus(&d->signature);
+ le32_to_cpus(&d->trailing_bytes);
+ le64_to_cpus(&d->leading_bytes);
+ le64_to_cpus(&d->file_offset);
+ le64_to_cpus(&d->sequence_number);
+}
+
+void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
+{
+ assert(d != NULL);
+
+ cpu_to_le32s(&d->signature);
+ cpu_to_le32s(&d->trailing_bytes);
+ cpu_to_le64s(&d->leading_bytes);
+ cpu_to_le64s(&d->file_offset);
+ cpu_to_le64s(&d->sequence_number);
+}
+
+void vhdx_log_data_le_export(VHDXLogDataSector *d)
+{
+ assert(d != NULL);
+
+ cpu_to_le32s(&d->data_signature);
+ cpu_to_le32s(&d->sequence_high);
+ cpu_to_le32s(&d->sequence_low);
+}
+
+void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr)
+{
+ assert(hdr != NULL);
+
+ le32_to_cpus(&hdr->signature);
+ le32_to_cpus(&hdr->checksum);
+ le32_to_cpus(&hdr->entry_length);
+ le32_to_cpus(&hdr->tail);
+ le64_to_cpus(&hdr->sequence_number);
+ le32_to_cpus(&hdr->descriptor_count);
+ leguid_to_cpus(&hdr->log_guid);
+ le64_to_cpus(&hdr->flushed_file_offset);
+ le64_to_cpus(&hdr->last_file_offset);
+}
+
+void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr)
+{
+ assert(hdr != NULL);
+
+ cpu_to_le32s(&hdr->signature);
+ cpu_to_le32s(&hdr->checksum);
+ cpu_to_le32s(&hdr->entry_length);
+ cpu_to_le32s(&hdr->tail);
+ cpu_to_le64s(&hdr->sequence_number);
+ cpu_to_le32s(&hdr->descriptor_count);
+ cpu_to_leguids(&hdr->log_guid);
+ cpu_to_le64s(&hdr->flushed_file_offset);
+ cpu_to_le64s(&hdr->last_file_offset);
+}
+
+
+/* Region table entries */
+void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr)
+{
+ assert(hdr != NULL);
+
+ le32_to_cpus(&hdr->signature);
+ le32_to_cpus(&hdr->checksum);
+ le32_to_cpus(&hdr->entry_count);
+}
+
+void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr)
+{
+ assert(hdr != NULL);
+
+ cpu_to_le32s(&hdr->signature);
+ cpu_to_le32s(&hdr->checksum);
+ cpu_to_le32s(&hdr->entry_count);
+}
+
+void vhdx_region_entry_le_import(VHDXRegionTableEntry *e)
+{
+ assert(e != NULL);
+
+ leguid_to_cpus(&e->guid);
+ le64_to_cpus(&e->file_offset);
+ le32_to_cpus(&e->length);
+ le32_to_cpus(&e->data_bits);
+}
+
+void vhdx_region_entry_le_export(VHDXRegionTableEntry *e)
+{
+ assert(e != NULL);
+
+ cpu_to_leguids(&e->guid);
+ cpu_to_le64s(&e->file_offset);
+ cpu_to_le32s(&e->length);
+ cpu_to_le32s(&e->data_bits);
+}
+
+
+/* Metadata headers & table */
+void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr)
+{
+ assert(hdr != NULL);
+
+ le64_to_cpus(&hdr->signature);
+ le16_to_cpus(&hdr->entry_count);
+}
+
+void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr)
+{
+ assert(hdr != NULL);
+
+ cpu_to_le64s(&hdr->signature);
+ cpu_to_le16s(&hdr->entry_count);
+}
+
+void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e)
+{
+ assert(e != NULL);
+
+ leguid_to_cpus(&e->item_id);
+ le32_to_cpus(&e->offset);
+ le32_to_cpus(&e->length);
+ le32_to_cpus(&e->data_bits);
+}
+void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e)
+{
+ assert(e != NULL);
+
+ cpu_to_leguids(&e->item_id);
+ cpu_to_le32s(&e->offset);
+ cpu_to_le32s(&e->length);
+ cpu_to_le32s(&e->data_bits);
+}
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
new file mode 100644
index 0000000000..ee5583c309
--- /dev/null
+++ b/block/vhdx-log.c
@@ -0,0 +1,1010 @@
+/*
+ * Block driver for Hyper-V VHDX Images
+ *
+ * Copyright (c) 2013 Red Hat, Inc.,
+ *
+ * Authors:
+ * Jeff Cody <jcody@redhat.com>
+ *
+ * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
+ * by Microsoft:
+ * https://www.microsoft.com/en-us/download/details.aspx?id=34750
+ *
+ * This file covers the functionality of the metadata log writing, parsing, and
+ * replay.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include "block/vhdx.h"
+
+
+typedef struct VHDXLogSequence {
+ bool valid;
+ uint32_t count;
+ VHDXLogEntries log;
+ VHDXLogEntryHeader hdr;
+} VHDXLogSequence;
+
+typedef struct VHDXLogDescEntries {
+ VHDXLogEntryHeader hdr;
+ VHDXLogDescriptor desc[];
+} VHDXLogDescEntries;
+
+static const MSGUID zero_guid = { 0 };
+
+/* The log located on the disk is circular buffer containing
+ * sectors of 4096 bytes each.
+ *
+ * It is assumed for the read/write functions below that the
+ * circular buffer scheme uses a 'one sector open' to indicate
+ * the buffer is full. Given the validation methods used for each
+ * sector, this method should be compatible with other methods that
+ * do not waste a sector.
+ */
+
+
+/* Allow peeking at the hdr entry at the beginning of the current
+ * read index, without advancing the read index */
+static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
+ VHDXLogEntryHeader *hdr)
+{
+ int ret = 0;
+ uint64_t offset;
+ uint32_t read;
+
+ assert(hdr != NULL);
+
+ /* peek is only supported on sector boundaries */
+ if (log->read % VHDX_LOG_SECTOR_SIZE) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ read = log->read;
+ /* we are guaranteed that a) log sectors are 4096 bytes,
+ * and b) the log length is a multiple of 1MB. So, there
+ * is always a round number of sectors in the buffer */
+ if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
+ read = 0;
+ }
+
+ if (read == log->write) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ offset = log->offset + read;
+
+ ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+ if (ret < 0) {
+ goto exit;
+ }
+
+exit:
+ return ret;
+}
+
+/* Index increment for log, based on sector boundaries */
+static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
+{
+ idx += VHDX_LOG_SECTOR_SIZE;
+ /* we are guaranteed that a) log sectors are 4096 bytes,
+ * and b) the log length is a multiple of 1MB. So, there
+ * is always a round number of sectors in the buffer */
+ return idx >= length ? 0 : idx;
+}
+
+
+/* Reset the log to empty */
+static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
+{
+ MSGUID guid = { 0 };
+ s->log.read = s->log.write = 0;
+ /* a log guid of 0 indicates an empty log to any parser of v0
+ * VHDX logs */
+ vhdx_update_headers(bs, s, false, &guid);
+}
+
+/* Reads num_sectors from the log (all log sectors are 4096 bytes),
+ * into buffer 'buffer'. Upon return, *sectors_read will contain
+ * the number of sectors successfully read.
+ *
+ * It is assumed that 'buffer' is already allocated, and of sufficient
+ * size (i.e. >= 4096*num_sectors).
+ *
+ * If 'peek' is true, then the tail (read) pointer for the circular buffer is
+ * not modified.
+ *
+ * 0 is returned on success, -errno otherwise. */
+static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+ uint32_t *sectors_read, void *buffer,
+ uint32_t num_sectors, bool peek)
+{
+ int ret = 0;
+ uint64_t offset;
+ uint32_t read;
+
+ read = log->read;
+
+ *sectors_read = 0;
+ while (num_sectors) {
+ if (read == log->write) {
+ /* empty */
+ break;
+ }
+ offset = log->offset + read;
+
+ ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ read = vhdx_log_inc_idx(read, log->length);
+
+ *sectors_read = *sectors_read + 1;
+ num_sectors--;
+ }
+
+exit:
+ if (!peek) {
+ log->read = read;
+ }
+ return ret;
+}
+
+/* Writes num_sectors to the log (all log sectors are 4096 bytes),
+ * from buffer 'buffer'. Upon return, *sectors_written will contain
+ * the number of sectors successfully written.
+ *
+ * It is assumed that 'buffer' is at least 4096*num_sectors large.
+ *
+ * 0 is returned on success, -errno otherwise */
+static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
+ uint32_t *sectors_written, void *buffer,
+ uint32_t num_sectors)
+{
+ int ret = 0;
+ uint64_t offset;
+ uint32_t write;
+ void *buffer_tmp;
+ BDRVVHDXState *s = bs->opaque;
+
+ ret = vhdx_user_visible_write(bs, s);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ write = log->write;
+
+ buffer_tmp = buffer;
+ while (num_sectors) {
+
+ offset = log->offset + write;
+ write = vhdx_log_inc_idx(write, log->length);
+ if (write == log->read) {
+ /* full */
+ break;
+ }
+ ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ buffer_tmp += VHDX_LOG_SECTOR_SIZE;
+
+ log->write = write;
+ *sectors_written = *sectors_written + 1;
+ num_sectors--;
+ }
+
+exit:
+ return ret;
+}
+
+
+/* Validates a log entry header */
+static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
+ BDRVVHDXState *s)
+{
+ int valid = false;
+
+ if (memcmp(&hdr->signature, "loge", 4)) {
+ goto exit;
+ }
+
+ /* if the individual entry length is larger than the whole log
+ * buffer, that is obviously invalid */
+ if (log->length < hdr->entry_length) {
+ goto exit;
+ }
+
+ /* length of entire entry must be in units of 4KB (log sector size) */
+ if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
+ goto exit;
+ }
+
+ /* per spec, sequence # must be > 0 */
+ if (hdr->sequence_number == 0) {
+ goto exit;
+ }
+
+ /* log entries are only valid if they match the file-wide log guid
+ * found in the active header */
+ if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
+ goto exit;
+ }
+
+ if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
+ goto exit;
+ }
+
+ valid = true;
+
+exit:
+ return valid;
+}
+
+/*
+ * Given a log header, this will validate that the descriptors and the
+ * corresponding data sectors (if applicable)
+ *
+ * Validation consists of:
+ * 1. Making sure the sequence numbers matches the entry header
+ * 2. Verifying a valid signature ('zero' or 'desc' for descriptors)
+ * 3. File offset field is a multiple of 4KB
+ * 4. If a data descriptor, the corresponding data sector
+ * has its signature ('data') and matching sequence number
+ *
+ * @desc: the data buffer containing the descriptor
+ * @hdr: the log entry header
+ *
+ * Returns true if valid
+ */
+static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
+ VHDXLogEntryHeader *hdr)
+{
+ bool ret = false;
+
+ if (desc->sequence_number != hdr->sequence_number) {
+ goto exit;
+ }
+ if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
+ goto exit;
+ }
+
+ if (!memcmp(&desc->signature, "zero", 4)) {
+ if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
+ /* valid */
+ ret = true;
+ }
+ } else if (!memcmp(&desc->signature, "desc", 4)) {
+ /* valid */
+ ret = true;
+ }
+
+exit:
+ return ret;
+}
+
+
+/* Prior to sector data for a log entry, there is the header
+ * and the descriptors referenced in the header:
+ *
+ * [] = 4KB sector
+ *
+ * [ hdr, desc ][ desc ][ ... ][ data ][ ... ]
+ *
+ * The first sector in a log entry has a 64 byte header, and
+ * up to 126 32-byte descriptors. If more descriptors than
+ * 126 are required, then subsequent sectors can have up to 128
+ * descriptors. Each sector is 4KB. Data follows the descriptor
+ * sectors.
+ *
+ * This will return the number of sectors needed to encompass
+ * the passed number of descriptors in desc_cnt.
+ *
+ * This will never return 0, even if desc_cnt is 0.
+ */
+static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
+{
+ uint32_t desc_sectors;
+
+ desc_cnt += 2; /* account for header in first sector */
+ desc_sectors = desc_cnt / 128;
+ if (desc_cnt % 128) {
+ desc_sectors++;
+ }
+
+ return desc_sectors;
+}
+
+
+/* Reads the log header, and subsequent descriptors (if any). This
+ * will allocate all the space for buffer, which must be NULL when
+ * passed into this function. Each descriptor will also be validated,
+ * and error returned if any are invalid. */
+static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
+ VHDXLogEntries *log, VHDXLogDescEntries **buffer)
+{
+ int ret = 0;
+ uint32_t desc_sectors;
+ uint32_t sectors_read;
+ VHDXLogEntryHeader hdr;
+ VHDXLogDescEntries *desc_entries = NULL;
+ int i;
+
+ assert(*buffer == NULL);
+
+ ret = vhdx_log_peek_hdr(bs, log, &hdr);
+ if (ret < 0) {
+ goto exit;
+ }
+ vhdx_log_entry_hdr_le_import(&hdr);
+ if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
+ desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
+
+ ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
+ desc_sectors, false);
+ if (ret < 0) {
+ goto free_and_exit;
+ }
+ if (sectors_read != desc_sectors) {
+ ret = -EINVAL;
+ goto free_and_exit;
+ }
+
+ /* put in proper endianness, and validate each desc */
+ for (i = 0; i < hdr.descriptor_count; i++) {
+ vhdx_log_desc_le_import(&desc_entries->desc[i]);
+ if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
+ ret = -EINVAL;
+ goto free_and_exit;
+ }
+ }
+
+ *buffer = desc_entries;
+ goto exit;
+
+free_and_exit:
+ qemu_vfree(desc_entries);
+exit:
+ return ret;
+}
+
+
+/* Flushes the descriptor described by desc to the VHDX image file.
+ * If the descriptor is a data descriptor, than 'data' must be non-NULL,
+ * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
+ * written.
+ *
+ * Verification is performed to make sure the sequence numbers of a data
+ * descriptor match the sequence number in the desc.
+ *
+ * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
+ * In this case, it should be noted that zeroes are written to disk, and the
+ * image file is not extended as a sparse file. */
+static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
+ VHDXLogDataSector *data)
+{
+ int ret = 0;
+ uint64_t seq, file_offset;
+ uint32_t offset = 0;
+ void *buffer = NULL;
+ uint64_t count = 1;
+ int i;
+
+ buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+ if (!memcmp(&desc->signature, "desc", 4)) {
+ /* data sector */
+ if (data == NULL) {
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ /* The sequence number of the data sector must match that
+ * in the descriptor */
+ seq = data->sequence_high;
+ seq <<= 32;
+ seq |= data->sequence_low & 0xffffffff;
+
+ if (seq != desc->sequence_number) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* Each data sector is in total 4096 bytes, however the first
+ * 8 bytes, and last 4 bytes, are located in the descriptor */
+ memcpy(buffer, &desc->leading_bytes, 8);
+ offset += 8;
+
+ memcpy(buffer+offset, data->data, 4084);
+ offset += 4084;
+
+ memcpy(buffer+offset, &desc->trailing_bytes, 4);
+
+ } else if (!memcmp(&desc->signature, "zero", 4)) {
+ /* write 'count' sectors of sector */
+ memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
+ count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
+ }
+
+ file_offset = desc->file_offset;
+
+ /* count is only > 1 if we are writing zeroes */
+ for (i = 0; i < count; i++) {
+ ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
+ VHDX_LOG_SECTOR_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ file_offset += VHDX_LOG_SECTOR_SIZE;
+ }
+
+exit:
+ qemu_vfree(buffer);
+ return ret;
+}
+
+/* Flush the entire log (as described by 'logs') to the VHDX image
+ * file, and then set the log to 'empty' status once complete.
+ *
+ * The log entries should be validate prior to flushing */
+static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
+ VHDXLogSequence *logs)
+{
+ int ret = 0;
+ int i;
+ uint32_t cnt, sectors_read;
+ uint64_t new_file_size;
+ void *data = NULL;
+ VHDXLogDescEntries *desc_entries = NULL;
+ VHDXLogEntryHeader hdr_tmp = { 0 };
+
+ cnt = logs->count;
+
+ data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+ ret = vhdx_user_visible_write(bs, s);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ /* each iteration represents one log sequence, which may span multiple
+ * sectors */
+ while (cnt--) {
+ ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
+ if (ret < 0) {
+ goto exit;
+ }
+ /* if the log shows a FlushedFileOffset larger than our current file
+ * size, then that means the file has been truncated / corrupted, and
+ * we must refused to open it / use it */
+ if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
+ if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
+ /* data sector, so read a sector to flush */
+ ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
+ data, 1, false);
+ if (ret < 0) {
+ goto exit;
+ }
+ if (sectors_read != 1) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+
+ ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
+ if (ret < 0) {
+ goto exit;
+ }
+ }
+ if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+ new_file_size = desc_entries->hdr.last_file_offset;
+ if (new_file_size % (1024*1024)) {
+ /* round up to nearest 1MB boundary */
+ new_file_size = ((new_file_size >> 20) + 1) << 20;
+ bdrv_truncate(bs->file, new_file_size);
+ }
+ }
+ qemu_vfree(desc_entries);
+ desc_entries = NULL;
+ }
+
+ bdrv_flush(bs);
+ /* once the log is fully flushed, indicate that we have an empty log
+ * now. This also sets the log guid to 0, to indicate an empty log */
+ vhdx_log_reset(bs, s);
+
+exit:
+ qemu_vfree(data);
+ qemu_vfree(desc_entries);
+ return ret;
+}
+
+static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
+ VHDXLogEntries *log, uint64_t seq,
+ bool *valid, VHDXLogEntryHeader *entry)
+{
+ int ret = 0;
+ VHDXLogEntryHeader hdr;
+ void *buffer = NULL;
+ uint32_t i, desc_sectors, total_sectors, crc;
+ uint32_t sectors_read = 0;
+ VHDXLogDescEntries *desc_buffer = NULL;
+
+ *valid = false;
+
+ ret = vhdx_log_peek_hdr(bs, log, &hdr);
+ if (ret < 0) {
+ goto inc_and_exit;
+ }
+
+ vhdx_log_entry_hdr_le_import(&hdr);
+
+
+ if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
+ goto inc_and_exit;
+ }
+
+ if (seq > 0) {
+ if (hdr.sequence_number != seq + 1) {
+ goto inc_and_exit;
+ }
+ }
+
+ desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
+
+ /* Read desc sectors, and calculate log checksum */
+
+ total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
+
+
+ /* read_desc() will incrememnt the read idx */
+ ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
+ if (ret < 0) {
+ goto free_and_exit;
+ }
+
+ crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
+ desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
+ crc ^= 0xffffffff;
+
+ buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+ if (total_sectors > desc_sectors) {
+ for (i = 0; i < total_sectors - desc_sectors; i++) {
+ sectors_read = 0;
+ ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
+ 1, false);
+ if (ret < 0 || sectors_read != 1) {
+ goto free_and_exit;
+ }
+ crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
+ crc ^= 0xffffffff;
+ }
+ }
+ crc ^= 0xffffffff;
+ if (crc != desc_buffer->hdr.checksum) {
+ goto free_and_exit;
+ }
+
+ *valid = true;
+ *entry = hdr;
+ goto free_and_exit;
+
+inc_and_exit:
+ log->read = vhdx_log_inc_idx(log->read, log->length);
+
+free_and_exit:
+ qemu_vfree(buffer);
+ qemu_vfree(desc_buffer);
+ return ret;
+}
+
+/* Search through the log circular buffer, and find the valid, active
+ * log sequence, if any exists
+ * */
+static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
+ VHDXLogSequence *logs)
+{
+ int ret = 0;
+ uint32_t tail;
+ bool seq_valid = false;
+ VHDXLogSequence candidate = { 0 };
+ VHDXLogEntryHeader hdr = { 0 };
+ VHDXLogEntries curr_log;
+
+ memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
+ curr_log.write = curr_log.length; /* assume log is full */
+ curr_log.read = 0;
+
+
+ /* now we will go through the whole log sector by sector, until
+ * we find a valid, active log sequence, or reach the end of the
+ * log buffer */
+ for (;;) {
+ uint64_t curr_seq = 0;
+ VHDXLogSequence current = { 0 };
+
+ tail = curr_log.read;
+
+ ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
+ &seq_valid, &hdr);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ if (seq_valid) {
+ current.valid = true;
+ current.log = curr_log;
+ current.log.read = tail;
+ current.log.write = curr_log.read;
+ current.count = 1;
+ current.hdr = hdr;
+
+
+ for (;;) {
+ ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
+ &seq_valid, &hdr);
+ if (ret < 0) {
+ goto exit;
+ }
+ if (seq_valid == false) {
+ break;
+ }
+ current.log.write = curr_log.read;
+ current.count++;
+
+ curr_seq = hdr.sequence_number;
+ }
+ }
+
+ if (current.valid) {
+ if (candidate.valid == false ||
+ current.hdr.sequence_number > candidate.hdr.sequence_number) {
+ candidate = current;
+ }
+ }
+
+ if (curr_log.read < tail) {
+ break;
+ }
+ }
+
+ *logs = candidate;
+
+ if (candidate.valid) {
+ /* this is the next sequence number, for writes */
+ s->log.sequence = candidate.hdr.sequence_number + 1;
+ }
+
+
+exit:
+ return ret;
+}
+
+/* Parse the replay log. Per the VHDX spec, if the log is present
+ * it must be replayed prior to opening the file, even read-only.
+ *
+ * If read-only, we must replay the log in RAM (or refuse to open
+ * a dirty VHDX file read-only) */
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed)
+{
+ int ret = 0;
+ VHDXHeader *hdr;
+ VHDXLogSequence logs = { 0 };
+
+ hdr = s->headers[s->curr_header];
+
+ *flushed = false;
+
+ /* s->log.hdr is freed in vhdx_close() */
+ if (s->log.hdr == NULL) {
+ s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
+ }
+
+ s->log.offset = hdr->log_offset;
+ s->log.length = hdr->log_length;
+
+ if (s->log.offset < VHDX_LOG_MIN_SIZE ||
+ s->log.offset % VHDX_LOG_MIN_SIZE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* per spec, only log version of 0 is supported */
+ if (hdr->log_version != 0) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* If either the log guid, or log length is zero,
+ * then a replay log is not present */
+ if (guid_eq(hdr->log_guid, zero_guid)) {
+ goto exit;
+ }
+
+ if (hdr->log_length == 0) {
+ goto exit;
+ }
+
+ if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+
+ /* The log is present, we need to find if and where there is an active
+ * sequence of valid entries present in the log. */
+
+ ret = vhdx_log_search(bs, s, &logs);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ if (logs.valid) {
+ /* now flush the log */
+ ret = vhdx_log_flush(bs, s, &logs);
+ if (ret < 0) {
+ goto exit;
+ }
+ *flushed = true;
+ }
+
+
+exit:
+ return ret;
+}
+
+
+
+static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
+ VHDXLogDataSector *sector, void *data,
+ uint64_t seq)
+{
+ /* 8 + 4084 + 4 = 4096, 1 log sector */
+ memcpy(&desc->leading_bytes, data, 8);
+ data += 8;
+ cpu_to_le64s(&desc->leading_bytes);
+ memcpy(sector->data, data, 4084);
+ data += 4084;
+ memcpy(&desc->trailing_bytes, data, 4);
+ cpu_to_le32s(&desc->trailing_bytes);
+ data += 4;
+
+ sector->sequence_high = (uint32_t) (seq >> 32);
+ sector->sequence_low = (uint32_t) (seq & 0xffffffff);
+ sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
+
+ vhdx_log_desc_le_export(desc);
+ vhdx_log_data_le_export(sector);
+}
+
+
+static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
+ void *data, uint32_t length, uint64_t offset)
+{
+ int ret = 0;
+ void *buffer = NULL;
+ void *merged_sector = NULL;
+ void *data_tmp, *sector_write;
+ unsigned int i;
+ int sector_offset;
+ uint32_t desc_sectors, sectors, total_length;
+ uint32_t sectors_written = 0;
+ uint32_t aligned_length;
+ uint32_t leading_length = 0;
+ uint32_t trailing_length = 0;
+ uint32_t partial_sectors = 0;
+ uint32_t bytes_written = 0;
+ uint64_t file_offset;
+ VHDXHeader *header;
+ VHDXLogEntryHeader new_hdr;
+ VHDXLogDescriptor *new_desc = NULL;
+ VHDXLogDataSector *data_sector = NULL;
+ MSGUID new_guid = { 0 };
+
+ header = s->headers[s->curr_header];
+
+ /* need to have offset read data, and be on 4096 byte boundary */
+
+ if (length > header->log_length) {
+ /* no log present. we could create a log here instead of failing */
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (guid_eq(header->log_guid, zero_guid)) {
+ vhdx_guid_generate(&new_guid);
+ vhdx_update_headers(bs, s, false, &new_guid);
+ } else {
+ /* currently, we require that the log be flushed after
+ * every write. */
+ ret = -ENOTSUP;
+ goto exit;
+ }
+
+ /* 0 is an invalid sequence number, but may also represent the first
+ * log write (or a wrapped seq) */
+ if (s->log.sequence == 0) {
+ s->log.sequence = 1;
+ }
+
+ sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
+ file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
+
+ aligned_length = length;
+
+ /* add in the unaligned head and tail bytes */
+ if (sector_offset) {
+ leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
+ leading_length = leading_length > length ? length : leading_length;
+ aligned_length -= leading_length;
+ partial_sectors++;
+ }
+
+ sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
+ trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
+ if (trailing_length) {
+ partial_sectors++;
+ }
+
+ sectors += partial_sectors;
+
+ /* sectors is now how many sectors the data itself takes, not
+ * including the header and descriptor metadata */
+
+ new_hdr = (VHDXLogEntryHeader) {
+ .signature = VHDX_LOG_SIGNATURE,
+ .tail = s->log.tail,
+ .sequence_number = s->log.sequence,
+ .descriptor_count = sectors,
+ .reserved = 0,
+ .flushed_file_offset = bdrv_getlength(bs->file),
+ .last_file_offset = bdrv_getlength(bs->file),
+ };
+
+ new_hdr.log_guid = header->log_guid;
+
+ desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
+
+ total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
+ new_hdr.entry_length = total_length;
+
+ vhdx_log_entry_hdr_le_export(&new_hdr);
+
+ buffer = qemu_blockalign(bs, total_length);
+ memcpy(buffer, &new_hdr, sizeof(new_hdr));
+
+ new_desc = (VHDXLogDescriptor *) (buffer + sizeof(new_hdr));
+ data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
+ data_tmp = data;
+
+ /* All log sectors are 4KB, so for any partial sectors we must
+ * merge the data with preexisting data from the final file
+ * destination */
+ merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
+
+ for (i = 0; i < sectors; i++) {
+ new_desc->signature = VHDX_LOG_DESC_SIGNATURE;
+ new_desc->sequence_number = s->log.sequence;
+ new_desc->file_offset = file_offset;
+
+ if (i == 0 && leading_length) {
+ /* partial sector at the front of the buffer */
+ ret = bdrv_pread(bs->file, file_offset, merged_sector,
+ VHDX_LOG_SECTOR_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ memcpy(merged_sector + sector_offset, data_tmp, leading_length);
+ bytes_written = leading_length;
+ sector_write = merged_sector;
+ } else if (i == sectors - 1 && trailing_length) {
+ /* partial sector at the end of the buffer */
+ ret = bdrv_pread(bs->file,
+ file_offset,
+ merged_sector + trailing_length,
+ VHDX_LOG_SECTOR_SIZE - trailing_length);
+ if (ret < 0) {
+ goto exit;
+ }
+ memcpy(merged_sector, data_tmp, trailing_length);
+ bytes_written = trailing_length;
+ sector_write = merged_sector;
+ } else {
+ bytes_written = VHDX_LOG_SECTOR_SIZE;
+ sector_write = data_tmp;
+ }
+
+ /* populate the raw sector data into the proper structures,
+ * as well as update the descriptor, and convert to proper
+ * endianness */
+ vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
+ s->log.sequence);
+
+ data_tmp += bytes_written;
+ data_sector++;
+ new_desc++;
+ file_offset += VHDX_LOG_SECTOR_SIZE;
+ }
+
+ /* checksum covers entire entry, from the log header through the
+ * last data sector */
+ vhdx_update_checksum(buffer, total_length,
+ offsetof(VHDXLogEntryHeader, checksum));
+ cpu_to_le32s((uint32_t *)(buffer + 4));
+
+ /* now write to the log */
+ vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
+ desc_sectors + sectors);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ if (sectors_written != desc_sectors + sectors) {
+ /* instead of failing, we could flush the log here */
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ s->log.sequence++;
+ /* write new tail */
+ s->log.tail = s->log.write;
+
+exit:
+ qemu_vfree(buffer);
+ qemu_vfree(merged_sector);
+ return ret;
+}
+
+/* Perform a log write, and then immediately flush the entire log */
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+ void *data, uint32_t length, uint64_t offset)
+{
+ int ret = 0;
+ VHDXLogSequence logs = { .valid = true,
+ .count = 1,
+ .hdr = { 0 } };
+
+
+ /* Make sure data written (new and/or changed blocks) is stable
+ * on disk, before creating log entry */
+ bdrv_flush(bs);
+ ret = vhdx_log_write(bs, s, data, length, offset);
+ if (ret < 0) {
+ goto exit;
+ }
+ logs.log = s->log;
+
+ /* Make sure log is stable on disk */
+ bdrv_flush(bs);
+ ret = vhdx_log_flush(bs, s, &logs);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ s->log = logs.log;
+
+exit:
+ return ret;
+}
+
diff --git a/block/vhdx.c b/block/vhdx.c
index 6cb04122bb..7d1af9663b 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -6,9 +6,9 @@
* Authors:
* Jeff Cody <jcody@redhat.com>
*
- * This is based on the "VHDX Format Specification v0.95", published 4/12/2012
+ * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
* by Microsoft:
- * https://www.microsoft.com/en-us/download/details.aspx?id=29681
+ * https://www.microsoft.com/en-us/download/details.aspx?id=34750
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
@@ -22,6 +22,20 @@
#include "block/vhdx.h"
#include "migration/migration.h"
+#include <uuid/uuid.h>
+#include <glib.h>
+
+/* Options for VHDX creation */
+
+#define VHDX_BLOCK_OPT_LOG_SIZE "log_size"
+#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size"
+#define VHDX_BLOCK_OPT_ZERO "block_state_zero"
+
+typedef enum VHDXImageType {
+ VHDX_TYPE_DYNAMIC = 0,
+ VHDX_TYPE_FIXED,
+ VHDX_TYPE_DIFFERENCING, /* Currently unsupported */
+} VHDXImageType;
/* Several metadata and region table data entries are identified by
* guids in a MS-specific GUID format. */
@@ -104,16 +118,6 @@ static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
META_PHYS_SECTOR_SIZE_PRESENT)
-typedef struct VHDXMetadataEntries {
- VHDXMetadataTableEntry file_parameters_entry;
- VHDXMetadataTableEntry virtual_disk_size_entry;
- VHDXMetadataTableEntry page83_data_entry;
- VHDXMetadataTableEntry logical_sector_size_entry;
- VHDXMetadataTableEntry phys_sector_size_entry;
- VHDXMetadataTableEntry parent_locator_entry;
- uint16_t present;
-} VHDXMetadataEntries;
-
typedef struct VHDXSectorInfo {
uint32_t bat_idx; /* BAT entry index */
@@ -124,44 +128,31 @@ typedef struct VHDXSectorInfo {
uint64_t block_offset; /* block offset, in bytes */
} VHDXSectorInfo;
+/* Calculates new checksum.
+ *
+ * Zero is substituted during crc calculation for the original crc field
+ * crc_offset: byte offset in buf of the buffer crc
+ * buf: buffer pointer
+ * size: size of buffer (must be > crc_offset+4)
+ *
+ * Note: The resulting checksum is in the CPU endianness, not necessarily
+ * in the file format endianness (LE). Any header export to disk should
+ * make sure that vhdx_header_le_export() is used to convert to the
+ * correct endianness
+ */
+uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
+{
+ uint32_t crc;
+ assert(buf != NULL);
+ assert(size > (crc_offset + sizeof(crc)));
-typedef struct BDRVVHDXState {
- CoMutex lock;
-
- int curr_header;
- VHDXHeader *headers[2];
-
- VHDXRegionTableHeader rt;
- VHDXRegionTableEntry bat_rt; /* region table for the BAT */
- VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
-
- VHDXMetadataTableHeader metadata_hdr;
- VHDXMetadataEntries metadata_entries;
-
- VHDXFileParameters params;
- uint32_t block_size;
- uint32_t block_size_bits;
- uint32_t sectors_per_block;
- uint32_t sectors_per_block_bits;
-
- uint64_t virtual_disk_size;
- uint32_t logical_sector_size;
- uint32_t physical_sector_size;
-
- uint64_t chunk_ratio;
- uint32_t chunk_ratio_bits;
- uint32_t logical_sector_size_bits;
-
- uint32_t bat_entries;
- VHDXBatEntry *bat;
- uint64_t bat_offset;
-
- VHDXParentLocatorHeader parent_header;
- VHDXParentLocatorEntry *parent_entries;
+ memset(buf + crc_offset, 0, sizeof(crc));
+ crc = crc32c(0xffffffff, buf, size);
+ memcpy(buf + crc_offset, &crc, sizeof(crc));
- Error *migration_blocker;
-} BDRVVHDXState;
+ return crc;
+}
uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
int crc_offset)
@@ -214,6 +205,71 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
/*
+ * This generates a UUID that is compliant with the MS GUIDs used
+ * in the VHDX spec (and elsewhere).
+ */
+void vhdx_guid_generate(MSGUID *guid)
+{
+ uuid_t uuid;
+ assert(guid != NULL);
+
+ uuid_generate(uuid);
+ memcpy(guid, uuid, sizeof(MSGUID));
+}
+
+/* Check for region overlaps inside the VHDX image */
+static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length)
+{
+ int ret = 0;
+ uint64_t end;
+ VHDXRegionEntry *r;
+
+ end = start + length;
+ QLIST_FOREACH(r, &s->regions, entries) {
+ if (!((start >= r->end) || (end <= r->start))) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ }
+
+exit:
+ return ret;
+}
+
+/* Register a region for future checks */
+static void vhdx_region_register(BDRVVHDXState *s,
+ uint64_t start, uint64_t length)
+{
+ VHDXRegionEntry *r;
+
+ r = g_malloc0(sizeof(*r));
+
+ r->start = start;
+ r->end = start + length;
+
+ QLIST_INSERT_HEAD(&s->regions, r, entries);
+}
+
+/* Free all registered regions */
+static void vhdx_region_unregister_all(BDRVVHDXState *s)
+{
+ VHDXRegionEntry *r, *r_next;
+
+ QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) {
+ QLIST_REMOVE(r, entries);
+ g_free(r);
+ }
+}
+
+static void vhdx_set_shift_bits(BDRVVHDXState *s)
+{
+ s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
+ s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
+ s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
+ s->block_size_bits = 31 - clz32(s->block_size);
+}
+
+/*
* Per the MS VHDX Specification, for every VHDX file:
* - The header section is fixed size - 1 MB
* - The header section is always the first "object"
@@ -232,25 +288,118 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-/* All VHDX structures on disk are little endian */
-static void vhdx_header_le_import(VHDXHeader *h)
+/*
+ * Writes the header to the specified offset.
+ *
+ * This will optionally read in buffer data from disk (otherwise zero-fill),
+ * and then update the header checksum. Header is converted to proper
+ * endianness before being written to the specified file offset
+ */
+static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
+ uint64_t offset, bool read)
+{
+ uint8_t *buffer = NULL;
+ int ret;
+ VHDXHeader header_le;
+
+ assert(bs_file != NULL);
+ assert(hdr != NULL);
+
+ /* the header checksum is not over just the packed size of VHDXHeader,
+ * but rather over the entire 'reserved' range for the header, which is
+ * 4KB (VHDX_HEADER_SIZE). */
+
+ buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
+ if (read) {
+ /* if true, we can't assume the extra reserved bytes are 0 */
+ ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+ } else {
+ memset(buffer, 0, VHDX_HEADER_SIZE);
+ }
+
+ /* overwrite the actual VHDXHeader portion */
+ memcpy(buffer, hdr, sizeof(VHDXHeader));
+ hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
+ offsetof(VHDXHeader, checksum));
+ vhdx_header_le_export(hdr, &header_le);
+ ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
+
+exit:
+ qemu_vfree(buffer);
+ return ret;
+}
+
+/* Update the VHDX headers
+ *
+ * This follows the VHDX spec procedures for header updates.
+ *
+ * - non-current header is updated with largest sequence number
+ */
+static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
+ bool generate_data_write_guid, MSGUID *log_guid)
{
- assert(h != NULL);
+ int ret = 0;
+ int hdr_idx = 0;
+ uint64_t header_offset = VHDX_HEADER1_OFFSET;
+
+ VHDXHeader *active_header;
+ VHDXHeader *inactive_header;
+
+ /* operate on the non-current header */
+ if (s->curr_header == 0) {
+ hdr_idx = 1;
+ header_offset = VHDX_HEADER2_OFFSET;
+ }
+
+ active_header = s->headers[s->curr_header];
+ inactive_header = s->headers[hdr_idx];
+
+ inactive_header->sequence_number = active_header->sequence_number + 1;
+
+ /* a new file guid must be generated before any file write, including
+ * headers */
+ inactive_header->file_write_guid = s->session_guid;
+
+ /* a new data guid only needs to be generated before any guest-visible
+ * writes (i.e. something observable via virtual disk read) */
+ if (generate_data_write_guid) {
+ vhdx_guid_generate(&inactive_header->data_write_guid);
+ }
- le32_to_cpus(&h->signature);
- le32_to_cpus(&h->checksum);
- le64_to_cpus(&h->sequence_number);
+ /* update the log guid if present */
+ if (log_guid) {
+ inactive_header->log_guid = *log_guid;
+ }
- leguid_to_cpus(&h->file_write_guid);
- leguid_to_cpus(&h->data_write_guid);
- leguid_to_cpus(&h->log_guid);
+ vhdx_write_header(bs->file, inactive_header, header_offset, true);
+ if (ret < 0) {
+ goto exit;
+ }
+ s->curr_header = hdr_idx;
- le16_to_cpus(&h->log_version);
- le16_to_cpus(&h->version);
- le32_to_cpus(&h->log_length);
- le64_to_cpus(&h->log_offset);
+exit:
+ return ret;
}
+/*
+ * The VHDX spec calls for header updates to be performed twice, so that both
+ * the current and non-current header have valid info
+ */
+int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
+ bool generate_data_write_guid, MSGUID *log_guid)
+{
+ int ret;
+
+ ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
+ return ret;
+}
/* opens the specified header block from the VHDX file header section */
static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
@@ -264,6 +413,7 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
uint64_t h2_seq = 0;
uint8_t *buffer;
+ /* header1 & header2 are freed in vhdx_close() */
header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
@@ -328,6 +478,9 @@ static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
}
}
+ vhdx_region_register(s, s->headers[s->curr_header]->log_offset,
+ s->headers[s->curr_header]->log_length);
+
ret = 0;
goto exit;
@@ -364,10 +517,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
goto fail;
}
memcpy(&s->rt, buffer, sizeof(s->rt));
- le32_to_cpus(&s->rt.signature);
- le32_to_cpus(&s->rt.checksum);
- le32_to_cpus(&s->rt.entry_count);
- le32_to_cpus(&s->rt.reserved);
+ vhdx_region_header_le_import(&s->rt);
offset += sizeof(s->rt);
if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
@@ -386,10 +536,16 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
offset += sizeof(rt_entry);
- leguid_to_cpus(&rt_entry.guid);
- le64_to_cpus(&rt_entry.file_offset);
- le32_to_cpus(&rt_entry.length);
- le32_to_cpus(&rt_entry.data_bits);
+ vhdx_region_entry_le_import(&rt_entry);
+
+ /* check for region overlap between these entries, and any
+ * other memory regions in the file */
+ ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ vhdx_region_register(s, rt_entry.file_offset, rt_entry.length);
/* see if we recognize the entry */
if (guid_eq(rt_entry.guid, bat_guid)) {
@@ -421,6 +577,12 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
goto fail;
}
}
+
+ if (!bat_rt_found || !metadata_rt_found) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
ret = 0;
fail:
@@ -464,9 +626,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
offset += sizeof(s->metadata_hdr);
- le64_to_cpus(&s->metadata_hdr.signature);
- le16_to_cpus(&s->metadata_hdr.reserved);
- le16_to_cpus(&s->metadata_hdr.entry_count);
+ vhdx_metadata_header_le_import(&s->metadata_hdr);
if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
ret = -EINVAL;
@@ -485,11 +645,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
memcpy(&md_entry, buffer + offset, sizeof(md_entry));
offset += sizeof(md_entry);
- leguid_to_cpus(&md_entry.item_id);
- le32_to_cpus(&md_entry.offset);
- le32_to_cpus(&md_entry.length);
- le32_to_cpus(&md_entry.data_bits);
- le32_to_cpus(&md_entry.reserved2);
+ vhdx_metadata_entry_le_import(&md_entry);
if (guid_eq(md_entry.item_id, file_param_guid)) {
if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
@@ -662,10 +818,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
goto exit;
}
- s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
- s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
- s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
- s->block_size_bits = 31 - clz32(s->block_size);
+ vhdx_set_shift_bits(s);
ret = 0;
@@ -674,48 +827,49 @@ exit:
return ret;
}
-/* Parse the replay log. Per the VHDX spec, if the log is present
- * it must be replayed prior to opening the file, even read-only.
- *
- * If read-only, we must replay the log in RAM (or refuse to open
- * a dirty VHDX file read-only */
-static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
+/*
+ * Calculate the number of BAT entries, including sector
+ * bitmap entries.
+ */
+static void vhdx_calc_bat_entries(BDRVVHDXState *s)
{
- int ret = 0;
- int i;
- VHDXHeader *hdr;
-
- hdr = s->headers[s->curr_header];
+ uint32_t data_blocks_cnt, bitmap_blocks_cnt;
- /* either the log guid, or log length is zero,
- * then a replay log is present */
- for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
- ret |= hdr->log_guid.data4[i];
- }
- if (hdr->log_guid.data1 == 0 &&
- hdr->log_guid.data2 == 0 &&
- hdr->log_guid.data3 == 0 &&
- ret == 0) {
- goto exit;
+ data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
+ if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
+ data_blocks_cnt++;
}
-
- /* per spec, only log version of 0 is supported */
- if (hdr->log_version != 0) {
- ret = -EINVAL;
- goto exit;
+ bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
+ if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
+ bitmap_blocks_cnt++;
}
- if (hdr->log_length == 0) {
- goto exit;
+ if (s->parent_entries) {
+ s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
+ } else {
+ s->bat_entries = data_blocks_cnt +
+ ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
}
- /* We currently do not support images with logs to replay */
- ret = -ENOTSUP;
-
-exit:
- return ret;
}
+static void vhdx_close(BlockDriverState *bs)
+{
+ BDRVVHDXState *s = bs->opaque;
+ qemu_vfree(s->headers[0]);
+ s->headers[0] = NULL;
+ qemu_vfree(s->headers[1]);
+ s->headers[1] = NULL;
+ qemu_vfree(s->bat);
+ s->bat = NULL;
+ qemu_vfree(s->parent_entries);
+ s->parent_entries = NULL;
+ migrate_del_blocker(s->migration_blocker);
+ error_free(s->migration_blocker);
+ qemu_vfree(s->log.hdr);
+ s->log.hdr = NULL;
+ vhdx_region_unregister_all(s);
+}
static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
@@ -724,12 +878,14 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
int ret = 0;
uint32_t i;
uint64_t signature;
- uint32_t data_blocks_cnt, bitmap_blocks_cnt;
+ bool log_flushed = false;
s->bat = NULL;
+ s->first_visible_write = true;
qemu_co_mutex_init(&s->lock);
+ QLIST_INIT(&s->regions);
/* validate the file signature */
ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
@@ -741,46 +897,38 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ /* This is used for any header updates, for the file_write_guid.
+ * The spec dictates that a new value should be used for the first
+ * header update */
+ vhdx_guid_generate(&s->session_guid);
+
ret = vhdx_parse_header(bs, s);
- if (ret) {
+ if (ret < 0) {
goto fail;
}
- ret = vhdx_parse_log(bs, s);
- if (ret) {
+ ret = vhdx_parse_log(bs, s, &log_flushed);
+ if (ret < 0) {
goto fail;
}
ret = vhdx_open_region_tables(bs, s);
- if (ret) {
+ if (ret < 0) {
goto fail;
}
ret = vhdx_parse_metadata(bs, s);
- if (ret) {
+ if (ret < 0) {
goto fail;
}
+
s->block_size = s->params.block_size;
/* the VHDX spec dictates that virtual_disk_size is always a multiple of
* logical_sector_size */
bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
- data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
- if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
- data_blocks_cnt++;
- }
- bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
- if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
- bitmap_blocks_cnt++;
- }
-
- if (s->parent_entries) {
- s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
- } else {
- s->bat_entries = data_blocks_cnt +
- ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
- }
+ vhdx_calc_bat_entries(s);
s->bat_offset = s->bat_rt.file_offset;
@@ -790,6 +938,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ /* s->bat is freed in vhdx_close() */
s->bat = qemu_blockalign(bs, s->bat_rt.length);
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
@@ -797,16 +946,36 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
+ uint64_t payblocks = s->chunk_ratio;
+ /* endian convert, and verify populated BAT field file offsets against
+ * region table and log entries */
for (i = 0; i < s->bat_entries; i++) {
le64_to_cpus(&s->bat[i]);
+ if (payblocks--) {
+ /* payload bat entries */
+ if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) ==
+ PAYLOAD_BLOCK_FULLY_PRESENT) {
+ ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK,
+ s->block_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ } else {
+ payblocks = s->chunk_ratio;
+ /* Once differencing files are supported, verify sector bitmap
+ * blocks here */
+ }
}
if (flags & BDRV_O_RDWR) {
- ret = -ENOTSUP;
- goto fail;
+ ret = vhdx_update_headers(bs, s, false, NULL);
+ if (ret < 0) {
+ goto fail;
+ }
}
- /* TODO: differencing files, write */
+ /* TODO: differencing files */
/* Disable migration when VHDX images are used */
error_set(&s->migration_blocker,
@@ -816,10 +985,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
fail:
- qemu_vfree(s->headers[0]);
- qemu_vfree(s->headers[1]);
- qemu_vfree(s->bat);
- qemu_vfree(s->parent_entries);
+ vhdx_close(bs);
return ret;
}
@@ -859,7 +1025,7 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits;
- sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS;
+ sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK;
sinfo->block_offset = block_offset << s->logical_sector_size_bits;
@@ -873,7 +1039,6 @@ static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
* in the block, and add in the payload data block offset
* in the file, in bytes, to get the final read address */
- sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */
sinfo->file_offset += sinfo->block_offset;
}
@@ -914,7 +1079,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
/* return zero */
qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
break;
- case PAYLOAD_BLOCK_FULL_PRESENT:
+ case PAYLOAD_BLOCK_FULLY_PRESENT:
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
@@ -944,26 +1109,772 @@ exit:
return ret;
}
+/*
+ * Allocate a new payload block at the end of the file.
+ *
+ * Allocation will happen at 1MB alignment inside the file
+ *
+ * Returns the file offset start of the new payload block
+ */
+static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
+ uint64_t *new_offset)
+{
+ *new_offset = bdrv_getlength(bs->file);
+
+ /* per the spec, the address for a block is in units of 1MB */
+ *new_offset = ROUND_UP(*new_offset, 1024 * 1024);
+
+ return bdrv_truncate(bs->file, *new_offset + s->block_size);
+}
+
+/*
+ * Update the BAT table entry with the new file offset, and the new entry
+ * state */
+static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s,
+ VHDXSectorInfo *sinfo,
+ uint64_t *bat_entry_le,
+ uint64_t *bat_offset, int state)
+{
+ /* The BAT entry is a uint64, with 44 bits for the file offset in units of
+ * 1MB, and 3 bits for the block state. */
+ s->bat[sinfo->bat_idx] = sinfo->file_offset;
+
+ s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK;
+
+ *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]);
+ *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry);
+
+}
+/* Per the spec, on the first write of guest-visible data to the file the
+ * data write guid must be updated in the header */
+int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
+{
+ int ret = 0;
+ if (s->first_visible_write) {
+ s->first_visible_write = false;
+ ret = vhdx_update_headers(bs, s, true, NULL);
+ }
+ return ret;
+}
static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- return -ENOTSUP;
+ int ret = -ENOTSUP;
+ BDRVVHDXState *s = bs->opaque;
+ VHDXSectorInfo sinfo;
+ uint64_t bytes_done = 0;
+ uint64_t bat_entry = 0;
+ uint64_t bat_entry_offset = 0;
+ QEMUIOVector hd_qiov;
+ struct iovec iov1 = { 0 };
+ struct iovec iov2 = { 0 };
+ int sectors_to_write;
+ int bat_state;
+ uint64_t bat_prior_offset = 0;
+ bool bat_update = false;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ qemu_co_mutex_lock(&s->lock);
+
+ ret = vhdx_user_visible_write(bs, s);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ while (nb_sectors > 0) {
+ bool use_zero_buffers = false;
+ bat_update = false;
+ if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
+ /* not supported yet */
+ ret = -ENOTSUP;
+ goto exit;
+ } else {
+ vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
+ sectors_to_write = sinfo.sectors_avail;
+
+ qemu_iovec_reset(&hd_qiov);
+ /* check the payload block state */
+ bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK;
+ switch (bat_state) {
+ case PAYLOAD_BLOCK_ZERO:
+ /* in this case, we need to preserve zero writes for
+ * data that is not part of this write, so we must pad
+ * the rest of the buffer to zeroes */
+
+ /* if we are on a posix system with ftruncate() that extends
+ * a file, then it is zero-filled for us. On Win32, the raw
+ * layer uses SetFilePointer and SetFileEnd, which does not
+ * zero fill AFAIK */
+
+ /* Queue another write of zero buffers if the underlying file
+ * does not zero-fill on file extension */
+
+ if (bdrv_has_zero_init(bs->file) == 0) {
+ use_zero_buffers = true;
+
+ /* zero fill the front, if any */
+ if (sinfo.block_offset) {
+ iov1.iov_len = sinfo.block_offset;
+ iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
+ memset(iov1.iov_base, 0, iov1.iov_len);
+ qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
+ sinfo.block_offset);
+ sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
+ }
+
+ /* our actual data */
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ sinfo.bytes_avail);
+
+ /* zero fill the back, if any */
+ if ((sinfo.bytes_avail - sinfo.block_offset) <
+ s->block_size) {
+ iov2.iov_len = s->block_size -
+ (sinfo.bytes_avail + sinfo.block_offset);
+ iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
+ memset(iov2.iov_base, 0, iov2.iov_len);
+ qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
+ sinfo.block_offset);
+ sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
+ }
+ }
+
+ /* fall through */
+ case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
+ case PAYLOAD_BLOCK_UNMAPPED: /* fall through */
+ case PAYLOAD_BLOCK_UNDEFINED: /* fall through */
+ bat_prior_offset = sinfo.file_offset;
+ ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
+ if (ret < 0) {
+ goto exit;
+ }
+ /* once we support differencing files, this may also be
+ * partially present */
+ /* update block state to the newly specified state */
+ vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
+ &bat_entry_offset,
+ PAYLOAD_BLOCK_FULLY_PRESENT);
+ bat_update = true;
+ /* since we just allocated a block, file_offset is the
+ * beginning of the payload block. It needs to be the
+ * write address, which includes the offset into the block */
+ if (!use_zero_buffers) {
+ sinfo.file_offset += sinfo.block_offset;
+ }
+ /* fall through */
+ case PAYLOAD_BLOCK_FULLY_PRESENT:
+ /* if the file offset address is in the header zone,
+ * there is a problem */
+ if (sinfo.file_offset < (1024 * 1024)) {
+ ret = -EFAULT;
+ goto error_bat_restore;
+ }
+
+ if (!use_zero_buffers) {
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ sinfo.bytes_avail);
+ }
+ /* block exists, so we can just overwrite it */
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ sinfo.file_offset >> BDRV_SECTOR_BITS,
+ sectors_to_write, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto error_bat_restore;
+ }
+ break;
+ case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
+ /* we don't yet support difference files, fall through
+ * to error */
+ default:
+ ret = -EIO;
+ goto exit;
+ break;
+ }
+
+ if (bat_update) {
+ /* this will update the BAT entry into the log journal, and
+ * then flush the log journal out to disk */
+ ret = vhdx_log_write_and_flush(bs, s, &bat_entry,
+ sizeof(VHDXBatEntry),
+ bat_entry_offset);
+ if (ret < 0) {
+ goto exit;
+ }
+ }
+
+ nb_sectors -= sinfo.sectors_avail;
+ sector_num += sinfo.sectors_avail;
+ bytes_done += sinfo.bytes_avail;
+
+ }
+ }
+
+ goto exit;
+
+error_bat_restore:
+ if (bat_update) {
+ /* keep metadata in sync, and restore the bat entry state
+ * if error. */
+ sinfo.file_offset = bat_prior_offset;
+ vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
+ &bat_entry_offset, bat_state);
+ }
+exit:
+ qemu_vfree(iov1.iov_base);
+ qemu_vfree(iov2.iov_base);
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&hd_qiov);
+ return ret;
}
-static void vhdx_close(BlockDriverState *bs)
+
+/*
+ * Create VHDX Headers
+ *
+ * There are 2 headers, and the highest sequence number will represent
+ * the active header
+ */
+static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+ uint32_t log_size)
{
- BDRVVHDXState *s = bs->opaque;
- qemu_vfree(s->headers[0]);
- qemu_vfree(s->headers[1]);
- qemu_vfree(s->bat);
- qemu_vfree(s->parent_entries);
- migrate_del_blocker(s->migration_blocker);
- error_free(s->migration_blocker);
+ int ret = 0;
+ VHDXHeader *hdr = NULL;
+
+ hdr = g_malloc0(sizeof(VHDXHeader));
+
+ hdr->signature = VHDX_HEADER_SIGNATURE;
+ hdr->sequence_number = g_random_int();
+ hdr->log_version = 0;
+ hdr->version = 1;
+ hdr->log_length = log_size;
+ hdr->log_offset = VHDX_HEADER_SECTION_END;
+ vhdx_guid_generate(&hdr->file_write_guid);
+ vhdx_guid_generate(&hdr->data_write_guid);
+
+ ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
+ if (ret < 0) {
+ goto exit;
+ }
+ hdr->sequence_number++;
+ ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
+ if (ret < 0) {
+ goto exit;
+ }
+
+exit:
+ g_free(hdr);
+ return ret;
+}
+
+
+/*
+ * Create the Metadata entries.
+ *
+ * For more details on the entries, see section 3.5 (pg 29) in the
+ * VHDX 1.00 specification.
+ *
+ * We support 5 metadata entries (all required by spec):
+ * File Parameters,
+ * Virtual Disk Size,
+ * Page 83 Data,
+ * Logical Sector Size,
+ * Physical Sector Size
+ *
+ * The first 64KB of the Metadata section is reserved for the metadata
+ * header and entries; beyond that, the metadata items themselves reside.
+ */
+static int vhdx_create_new_metadata(BlockDriverState *bs,
+ uint64_t image_size,
+ uint32_t block_size,
+ uint32_t sector_size,
+ uint64_t metadata_offset,
+ VHDXImageType type)
+{
+ int ret = 0;
+ uint32_t offset = 0;
+ void *buffer = NULL;
+ void *entry_buffer;
+ VHDXMetadataTableHeader *md_table;;
+ VHDXMetadataTableEntry *md_table_entry;
+
+ /* Metadata entries */
+ VHDXFileParameters *mt_file_params;
+ VHDXVirtualDiskSize *mt_virtual_size;
+ VHDXPage83Data *mt_page83;
+ VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size;
+ VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size;
+
+ entry_buffer = g_malloc0(sizeof(VHDXFileParameters) +
+ sizeof(VHDXVirtualDiskSize) +
+ sizeof(VHDXPage83Data) +
+ sizeof(VHDXVirtualDiskLogicalSectorSize) +
+ sizeof(VHDXVirtualDiskPhysicalSectorSize));
+
+ mt_file_params = entry_buffer;
+ offset += sizeof(VHDXFileParameters);
+ mt_virtual_size = entry_buffer + offset;
+ offset += sizeof(VHDXVirtualDiskSize);
+ mt_page83 = entry_buffer + offset;
+ offset += sizeof(VHDXPage83Data);
+ mt_log_sector_size = entry_buffer + offset;
+ offset += sizeof(VHDXVirtualDiskLogicalSectorSize);
+ mt_phys_sector_size = entry_buffer + offset;
+
+ mt_file_params->block_size = cpu_to_le32(block_size);
+ if (type == VHDX_TYPE_FIXED) {
+ mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED;
+ cpu_to_le32s(&mt_file_params->data_bits);
+ }
+
+ vhdx_guid_generate(&mt_page83->page_83_data);
+ cpu_to_leguids(&mt_page83->page_83_data);
+ mt_virtual_size->virtual_disk_size = cpu_to_le64(image_size);
+ mt_log_sector_size->logical_sector_size = cpu_to_le32(sector_size);
+ mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size);
+
+ buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE);
+ md_table = buffer;
+
+ md_table->signature = VHDX_METADATA_SIGNATURE;
+ md_table->entry_count = 5;
+ vhdx_metadata_header_le_export(md_table);
+
+
+ /* This will reference beyond the reserved table portion */
+ offset = 64 * KiB;
+
+ md_table_entry = buffer + sizeof(VHDXMetadataTableHeader);
+
+ md_table_entry[0].item_id = file_param_guid;
+ md_table_entry[0].offset = offset;
+ md_table_entry[0].length = sizeof(VHDXFileParameters);
+ md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED;
+ offset += md_table_entry[0].length;
+ vhdx_metadata_entry_le_export(&md_table_entry[0]);
+
+ md_table_entry[1].item_id = virtual_size_guid;
+ md_table_entry[1].offset = offset;
+ md_table_entry[1].length = sizeof(VHDXVirtualDiskSize);
+ md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+ VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+ offset += md_table_entry[1].length;
+ vhdx_metadata_entry_le_export(&md_table_entry[1]);
+
+ md_table_entry[2].item_id = page83_guid;
+ md_table_entry[2].offset = offset;
+ md_table_entry[2].length = sizeof(VHDXPage83Data);
+ md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+ VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+ offset += md_table_entry[2].length;
+ vhdx_metadata_entry_le_export(&md_table_entry[2]);
+
+ md_table_entry[3].item_id = logical_sector_guid;
+ md_table_entry[3].offset = offset;
+ md_table_entry[3].length = sizeof(VHDXVirtualDiskLogicalSectorSize);
+ md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+ VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+ offset += md_table_entry[3].length;
+ vhdx_metadata_entry_le_export(&md_table_entry[3]);
+
+ md_table_entry[4].item_id = phys_sector_guid;
+ md_table_entry[4].offset = offset;
+ md_table_entry[4].length = sizeof(VHDXVirtualDiskPhysicalSectorSize);
+ md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
+ VHDX_META_FLAGS_IS_VIRTUAL_DISK;
+ vhdx_metadata_entry_le_export(&md_table_entry[4]);
+
+ ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
+ VHDX_HEADER_BLOCK_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+
+
+exit:
+ g_free(buffer);
+ g_free(entry_buffer);
+ return ret;
}
+/* This create the actual BAT itself. We currently only support
+ * 'Dynamic' and 'Fixed' image types.
+ *
+ * Dynamic images: default state of the BAT is all zeroes.
+ *
+ * Fixed images: default state of the BAT is fully populated, with
+ * file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
+ */
+static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
+ uint64_t image_size, VHDXImageType type,
+ bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
+{
+ int ret = 0;
+ uint64_t data_file_offset;
+ uint64_t total_sectors = 0;
+ uint64_t sector_num = 0;
+ uint64_t unused;
+ int block_state;
+ VHDXSectorInfo sinfo;
+
+ assert(s->bat == NULL);
+
+ /* this gives a data start after BAT/bitmap entries, and well
+ * past any metadata entries (with a 4 MB buffer for future
+ * expansion */
+ data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
+ total_sectors = image_size >> s->logical_sector_size_bits;
+
+ if (type == VHDX_TYPE_DYNAMIC) {
+ /* All zeroes, so we can just extend the file - the end of the BAT
+ * is the furthest thing we have written yet */
+ ret = bdrv_truncate(bs, data_file_offset);
+ if (ret < 0) {
+ goto exit;
+ }
+ } else if (type == VHDX_TYPE_FIXED) {
+ ret = bdrv_truncate(bs, data_file_offset + image_size);
+ if (ret < 0) {
+ goto exit;
+ }
+ } else {
+ ret = -ENOTSUP;
+ goto exit;
+ }
+
+ if (type == VHDX_TYPE_FIXED ||
+ use_zero_blocks ||
+ bdrv_has_zero_init(bs) == 0) {
+ /* for a fixed file, the default BAT entry is not zero */
+ s->bat = g_malloc0(rt_bat->length);
+ block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
+ PAYLOAD_BLOCK_NOT_PRESENT;
+ block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
+ /* fill the BAT by emulating sector writes of sectors_per_block size */
+ while (sector_num < total_sectors) {
+ vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo);
+ sinfo.file_offset = data_file_offset +
+ (sector_num << s->logical_sector_size_bits);
+ sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
+ vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
+ block_state);
+ cpu_to_le64s(&s->bat[sinfo.bat_idx]);
+ sector_num += s->sectors_per_block;
+ }
+ ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
+ if (ret < 0) {
+ goto exit;
+ }
+ }
+
+
+
+exit:
+ g_free(s->bat);
+ return ret;
+}
+
+/* Creates the region table header, and region table entries.
+ * There are 2 supported region table entries: BAT, and Metadata/
+ *
+ * As the calculations for the BAT region table are also needed
+ * to create the BAT itself, we will also cause the BAT to be
+ * created.
+ */
+static int vhdx_create_new_region_table(BlockDriverState *bs,
+ uint64_t image_size,
+ uint32_t block_size,
+ uint32_t sector_size,
+ uint32_t log_size,
+ bool use_zero_blocks,
+ VHDXImageType type,
+ uint64_t *metadata_offset)
+{
+ int ret = 0;
+ uint32_t offset = 0;
+ void *buffer = NULL;
+ BDRVVHDXState *s = NULL;
+ VHDXRegionTableHeader *region_table;
+ VHDXRegionTableEntry *rt_bat;
+ VHDXRegionTableEntry *rt_metadata;
+
+ assert(metadata_offset != NULL);
+
+ /* Populate enough of the BDRVVHDXState to be able to use the
+ * pre-existing BAT calculation, translation, and update functions */
+ s = g_malloc0(sizeof(BDRVVHDXState));
+
+ s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
+ (uint64_t) sector_size / (uint64_t) block_size;
+
+ s->sectors_per_block = block_size / sector_size;
+ s->virtual_disk_size = image_size;
+ s->block_size = block_size;
+ s->logical_sector_size = sector_size;
+
+ vhdx_set_shift_bits(s);
+
+ vhdx_calc_bat_entries(s);
+
+ /* At this point the VHDX state is populated enough for creation */
+
+ /* a single buffer is used so we can calculate the checksum over the
+ * entire 64KB block */
+ buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE);
+ region_table = buffer;
+ offset += sizeof(VHDXRegionTableHeader);
+ rt_bat = buffer + offset;
+ offset += sizeof(VHDXRegionTableEntry);
+ rt_metadata = buffer + offset;
+
+ region_table->signature = VHDX_REGION_SIGNATURE;
+ region_table->entry_count = 2; /* BAT and Metadata */
+
+ rt_bat->guid = bat_guid;
+ rt_bat->length = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB);
+ rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB);
+ s->bat_offset = rt_bat->file_offset;
+
+ rt_metadata->guid = metadata_guid;
+ rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length,
+ MiB);
+ rt_metadata->length = 1 * MiB; /* min size, and more than enough */
+ *metadata_offset = rt_metadata->file_offset;
+
+ vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
+ offsetof(VHDXRegionTableHeader, checksum));
+
+
+ /* The region table gives us the data we need to create the BAT,
+ * so do that now */
+ ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);
+
+ /* Now write out the region headers to disk */
+ vhdx_region_header_le_export(region_table);
+ vhdx_region_entry_le_export(rt_bat);
+ vhdx_region_entry_le_export(rt_metadata);
+
+ ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
+ VHDX_HEADER_BLOCK_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
+ VHDX_HEADER_BLOCK_SIZE);
+ if (ret < 0) {
+ goto exit;
+ }
+
+
+exit:
+ g_free(s);
+ g_free(buffer);
+ return ret;
+}
+
+/* We need to create the following elements:
+ *
+ * .-----------------------------------------------------------------.
+ * | (A) | (B) | (C) | (D) | (E) |
+ * | File ID | Header1 | Header 2 | Region Tbl 1 | Region Tbl 2 |
+ * | | | | | |
+ * .-----------------------------------------------------------------.
+ * 0 64KB 128KB 192KB 256KB 320KB
+ *
+ *
+ * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
+ * | (F) | (G) | (H) | |
+ * | Journal Log | BAT / Bitmap | Metadata | .... data ...... |
+ * | | | | |
+ * .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
+ * 1MB
+ */
+static int vhdx_create(const char *filename, QEMUOptionParameter *options,
+ Error **errp)
+{
+ int ret = 0;
+ uint64_t image_size = (uint64_t) 2 * GiB;
+ uint32_t log_size = 1 * MiB;
+ uint32_t block_size = 0;
+ uint64_t signature;
+ uint64_t metadata_offset;
+ bool use_zero_blocks = false;
+
+ gunichar2 *creator = NULL;
+ glong creator_items;
+ BlockDriverState *bs;
+ const char *type = NULL;
+ VHDXImageType image_type;
+ Error *local_err = NULL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ image_size = options->value.n;
+ } else if (!strcmp(options->name, VHDX_BLOCK_OPT_LOG_SIZE)) {
+ log_size = options->value.n;
+ } else if (!strcmp(options->name, VHDX_BLOCK_OPT_BLOCK_SIZE)) {
+ block_size = options->value.n;
+ } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
+ type = options->value.s;
+ } else if (!strcmp(options->name, VHDX_BLOCK_OPT_ZERO)) {
+ use_zero_blocks = options->value.n != 0;
+ }
+ options++;
+ }
+
+ if (image_size > VHDX_MAX_IMAGE_SIZE) {
+ error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (type == NULL) {
+ type = "dynamic";
+ }
+
+ if (!strcmp(type, "dynamic")) {
+ image_type = VHDX_TYPE_DYNAMIC;
+ } else if (!strcmp(type, "fixed")) {
+ image_type = VHDX_TYPE_FIXED;
+ } else if (!strcmp(type, "differencing")) {
+ error_setg_errno(errp, ENOTSUP,
+ "Differencing files not yet supported");
+ ret = -ENOTSUP;
+ goto exit;
+ } else {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* These are pretty arbitrary, and mainly designed to keep the BAT
+ * size reasonable to load into RAM */
+ if (block_size == 0) {
+ if (image_size > 32 * TiB) {
+ block_size = 64 * MiB;
+ } else if (image_size > (uint64_t) 100 * GiB) {
+ block_size = 32 * MiB;
+ } else if (image_size > 1 * GiB) {
+ block_size = 16 * MiB;
+ } else {
+ block_size = 8 * MiB;
+ }
+ }
+
+
+ /* make the log size close to what was specified, but must be
+ * min 1MB, and multiple of 1MB */
+ log_size = ROUND_UP(log_size, MiB);
+
+ block_size = ROUND_UP(block_size, MiB);
+ block_size = block_size > VHDX_BLOCK_SIZE_MAX ? VHDX_BLOCK_SIZE_MAX :
+ block_size;
+
+ ret = bdrv_create_file(filename, options, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ goto exit;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ goto exit;
+ }
+
+ /* Create (A) */
+
+ /* The creator field is optional, but may be useful for
+ * debugging / diagnostics */
+ creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
+ &creator_items, NULL);
+ signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
+ bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ if (ret < 0) {
+ goto delete_and_exit;
+ }
+ if (creator) {
+ bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), creator,
+ creator_items * sizeof(gunichar2));
+ if (ret < 0) {
+ goto delete_and_exit;
+ }
+ }
+
+
+ /* Creates (B),(C) */
+ ret = vhdx_create_new_headers(bs, image_size, log_size);
+ if (ret < 0) {
+ goto delete_and_exit;
+ }
+
+ /* Creates (D),(E),(G) explicitly. (F) created as by-product */
+ ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+ log_size, use_zero_blocks, image_type,
+ &metadata_offset);
+ if (ret < 0) {
+ goto delete_and_exit;
+ }
+
+ /* Creates (H) */
+ ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+ metadata_offset, image_type);
+ if (ret < 0) {
+ goto delete_and_exit;
+ }
+
+
+
+delete_and_exit:
+ bdrv_unref(bs);
+exit:
+ g_free(creator);
+ return ret;
+}
+
+static QEMUOptionParameter vhdx_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size; max of 64TB."
+ },
+ {
+ .name = VHDX_BLOCK_OPT_LOG_SIZE,
+ .type = OPT_SIZE,
+ .value.n = 1 * MiB,
+ .help = "Log size; min 1MB."
+ },
+ {
+ .name = VHDX_BLOCK_OPT_BLOCK_SIZE,
+ .type = OPT_SIZE,
+ .value.n = 0,
+ .help = "Block Size; min 1MB, max 256MB. " \
+ "0 means auto-calculate based on image size."
+ },
+ {
+ .name = BLOCK_OPT_SUBFMT,
+ .type = OPT_STRING,
+ .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\
+ "Default is 'dynamic'."
+ },
+ {
+ .name = VHDX_BLOCK_OPT_ZERO,
+ .type = OPT_FLAG,
+ .help = "Force use of payload blocks of type 'ZERO'. Non-standard."
+ },
+ { NULL }
+};
+
static BlockDriver bdrv_vhdx = {
.format_name = "vhdx",
.instance_size = sizeof(BDRVVHDXState),
@@ -973,6 +1884,9 @@ static BlockDriver bdrv_vhdx = {
.bdrv_reopen_prepare = vhdx_reopen_prepare,
.bdrv_co_readv = vhdx_co_readv,
.bdrv_co_writev = vhdx_co_writev,
+ .bdrv_create = vhdx_create,
+
+ .create_options = vhdx_create_options,
};
static void bdrv_vhdx_init(void)
diff --git a/block/vhdx.h b/block/vhdx.h
index fb687ed2d6..51183b243c 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -6,9 +6,9 @@
* Authors:
* Jeff Cody <jcody@redhat.com>
*
- * This is based on the "VHDX Format Specification v0.95", published 4/12/2012
+ * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
* by Microsoft:
- * https://www.microsoft.com/en-us/download/details.aspx?id=29681
+ * https://www.microsoft.com/en-us/download/details.aspx?id=34750
*
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
* See the COPYING.LIB file in the top-level directory.
@@ -18,6 +18,11 @@
#ifndef BLOCK_VHDX_H
#define BLOCK_VHDX_H
+#define KiB (1 * 1024)
+#define MiB (KiB * 1024)
+#define GiB (MiB * 1024)
+#define TiB ((uint64_t) GiB * 1024)
+
/* Structures and fields present in the VHDX file */
/* The header section has the following blocks,
@@ -30,14 +35,15 @@
* 0.........64KB...........128KB........192KB..........256KB................1MB
*/
-#define VHDX_HEADER_BLOCK_SIZE (64*1024)
+#define VHDX_HEADER_BLOCK_SIZE (64 * 1024)
#define VHDX_FILE_ID_OFFSET 0
-#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE*1)
-#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE*2)
-#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE*3)
-
+#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1)
+#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2)
+#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3)
+#define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4)
+#define VHDX_HEADER_SECTION_END (1 * MiB)
/*
* A note on the use of MS-GUID fields. For more details on the GUID,
* please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
@@ -55,10 +61,11 @@
/* These structures are ones that are defined in the VHDX specification
* document */
+#define VHDX_FILE_SIGNATURE 0x656C696678646876 /* "vhdxfile" in ASCII */
typedef struct VHDXFileIdentifier {
uint64_t signature; /* "vhdxfile" in ASCII */
uint16_t creator[256]; /* optional; utf-16 string to identify
- the vhdx file creator. Diagnotistic
+ the vhdx file creator. Diagnostic
only */
} VHDXFileIdentifier;
@@ -67,7 +74,7 @@ typedef struct VHDXFileIdentifier {
* Microsoft is not just 16 bytes though - it is a structure that is defined,
* so we need to follow it here so that endianness does not trip us up */
-typedef struct MSGUID {
+typedef struct QEMU_PACKED MSGUID {
uint32_t data1;
uint16_t data2;
uint16_t data3;
@@ -77,14 +84,15 @@ typedef struct MSGUID {
#define guid_eq(a, b) \
(memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
-#define VHDX_HEADER_SIZE (4*1024) /* although the vhdx_header struct in disk
- is only 582 bytes, for purposes of crc
- the header is the first 4KB of the 64KB
- block */
+#define VHDX_HEADER_SIZE (4 * 1024) /* although the vhdx_header struct in disk
+ is only 582 bytes, for purposes of crc
+ the header is the first 4KB of the 64KB
+ block */
/* The full header is 4KB, although the actual header data is much smaller.
* But for the checksum calculation, it is over the entire 4KB structure,
* not just the defined portion of it */
+#define VHDX_HEADER_SIGNATURE 0x64616568
typedef struct QEMU_PACKED VHDXHeader {
uint32_t signature; /* "head" in ASCII */
uint32_t checksum; /* CRC-32C hash of the whole header */
@@ -92,7 +100,7 @@ typedef struct QEMU_PACKED VHDXHeader {
VHDX file has 2 of these headers,
and only the header with the highest
sequence number is valid */
- MSGUID file_write_guid; /* 128 bit unique identifier. Must be
+ MSGUID file_write_guid; /* 128 bit unique identifier. Must be
updated to new, unique value before
the first modification is made to
file */
@@ -114,9 +122,9 @@ typedef struct QEMU_PACKED VHDXHeader {
there is no valid log. If non-zero,
log entries with this guid are
valid. */
- uint16_t log_version; /* version of the log format. Mustn't be
- zero, unless log_guid is also zero */
- uint16_t version; /* version of th evhdx file. Currently,
+ uint16_t log_version; /* version of the log format. Must be
+ set to zero */
+ uint16_t version; /* version of the vhdx file. Currently,
only supported version is "1" */
uint32_t log_length; /* length of the log. Must be multiple
of 1MB */
@@ -125,6 +133,7 @@ typedef struct QEMU_PACKED VHDXHeader {
} VHDXHeader;
/* Header for the region table block */
+#define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */
typedef struct QEMU_PACKED VHDXRegionTableHeader {
uint32_t signature; /* "regi" in ASCII */
uint32_t checksum; /* CRC-32C hash of the 64KB table */
@@ -151,7 +160,10 @@ typedef struct QEMU_PACKED VHDXRegionTableEntry {
/* ---- LOG ENTRY STRUCTURES ---- */
+#define VHDX_LOG_MIN_SIZE (1024 * 1024)
+#define VHDX_LOG_SECTOR_SIZE 4096
#define VHDX_LOG_HDR_SIZE 64
+#define VHDX_LOG_SIGNATURE 0x65676f6c
typedef struct QEMU_PACKED VHDXLogEntryHeader {
uint32_t signature; /* "loge" in ASCII */
uint32_t checksum; /* CRC-32C hash of the 64KB table */
@@ -174,7 +186,8 @@ typedef struct QEMU_PACKED VHDXLogEntryHeader {
} VHDXLogEntryHeader;
#define VHDX_LOG_DESC_SIZE 32
-
+#define VHDX_LOG_DESC_SIGNATURE 0x63736564
+#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
typedef struct QEMU_PACKED VHDXLogDescriptor {
uint32_t signature; /* "zero" or "desc" in ASCII */
union {
@@ -194,6 +207,7 @@ typedef struct QEMU_PACKED VHDXLogDescriptor {
vhdx_log_entry_header */
} VHDXLogDescriptor;
+#define VHDX_LOG_DATA_SIGNATURE 0x61746164
typedef struct QEMU_PACKED VHDXLogDataSector {
uint32_t data_signature; /* "data" in ASCII */
uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */
@@ -212,19 +226,19 @@ typedef struct QEMU_PACKED VHDXLogDataSector {
#define PAYLOAD_BLOCK_UNDEFINED 1
#define PAYLOAD_BLOCK_ZERO 2
#define PAYLOAD_BLOCK_UNMAPPED 5
-#define PAYLOAD_BLOCK_FULL_PRESENT 6
+#define PAYLOAD_BLOCK_FULLY_PRESENT 6
#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
#define SB_BLOCK_NOT_PRESENT 0
#define SB_BLOCK_PRESENT 6
/* per the spec */
-#define VHDX_MAX_SECTORS_PER_BLOCK (1<<23)
+#define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23)
/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
other bits are reserved */
#define VHDX_BAT_STATE_BIT_MASK 0x07
-#define VHDX_BAT_FILE_OFF_BITS (64-44)
+#define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000 /* upper 44 bits */
typedef uint64_t VHDXBatEntry;
/* ---- METADATA REGION STRUCTURES ---- */
@@ -233,6 +247,7 @@ typedef uint64_t VHDXBatEntry;
#define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */
#define VHDX_METADATA_TABLE_MAX_SIZE \
(VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
+#define VHDX_METADATA_SIGNATURE 0x617461646174656D /* "metadata" in ASCII */
typedef struct QEMU_PACKED VHDXMetadataTableHeader {
uint64_t signature; /* "metadata" in ASCII */
uint16_t reserved;
@@ -252,8 +267,8 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry {
metadata region */
/* note: if length = 0, so is offset */
uint32_t length; /* length of metadata. <= 1MB. */
- uint32_t data_bits; /* least-significant 3 bits are flags, the
- rest are reserved (see above) */
+ uint32_t data_bits; /* least-significant 3 bits are flags,
+ the rest are reserved (see above) */
uint32_t reserved2;
} VHDXMetadataTableEntry;
@@ -262,13 +277,16 @@ typedef struct QEMU_PACKED VHDXMetadataTableEntry {
If set indicates a fixed
size VHDX file */
#define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */
+#define VHDX_BLOCK_SIZE_MIN (1 * MiB)
+#define VHDX_BLOCK_SIZE_MAX (256 * MiB)
typedef struct QEMU_PACKED VHDXFileParameters {
uint32_t block_size; /* size of each payload block, always
power of 2, <= 256MB and >= 1MB. */
- uint32_t data_bits; /* least-significant 2 bits are flags, the rest
- are reserved (see above) */
+ uint32_t data_bits; /* least-significant 2 bits are flags,
+ the rest are reserved (see above) */
} VHDXFileParameters;
+#define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB)
typedef struct QEMU_PACKED VHDXVirtualDiskSize {
uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes.
Must be multiple of the sector size,
@@ -276,7 +294,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskSize {
} VHDXVirtualDiskSize;
typedef struct QEMU_PACKED VHDXPage83Data {
- MSGUID page_83_data[16]; /* unique id for scsi devices that
+ MSGUID page_83_data; /* unique id for scsi devices that
support page 0x83 */
} VHDXPage83Data;
@@ -291,7 +309,7 @@ typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
} VHDXVirtualDiskPhysicalSectorSize;
typedef struct QEMU_PACKED VHDXParentLocatorHeader {
- MSGUID locator_type[16]; /* type of the parent virtual disk. */
+ MSGUID locator_type; /* type of the parent virtual disk. */
uint16_t reserved;
uint16_t key_value_count; /* number of key/value pairs for this
locator */
@@ -308,18 +326,122 @@ typedef struct QEMU_PACKED VHDXParentLocatorEntry {
/* ----- END VHDX SPECIFICATION STRUCTURES ---- */
+typedef struct VHDXMetadataEntries {
+ VHDXMetadataTableEntry file_parameters_entry;
+ VHDXMetadataTableEntry virtual_disk_size_entry;
+ VHDXMetadataTableEntry page83_data_entry;
+ VHDXMetadataTableEntry logical_sector_size_entry;
+ VHDXMetadataTableEntry phys_sector_size_entry;
+ VHDXMetadataTableEntry parent_locator_entry;
+ uint16_t present;
+} VHDXMetadataEntries;
+
+typedef struct VHDXLogEntries {
+ uint64_t offset;
+ uint64_t length;
+ uint32_t write;
+ uint32_t read;
+ VHDXLogEntryHeader *hdr;
+ void *desc_buffer;
+ uint64_t sequence;
+ uint32_t tail;
+} VHDXLogEntries;
+
+typedef struct VHDXRegionEntry {
+ uint64_t start;
+ uint64_t end;
+ QLIST_ENTRY(VHDXRegionEntry) entries;
+} VHDXRegionEntry;
+
+typedef struct BDRVVHDXState {
+ CoMutex lock;
+
+ int curr_header;
+ VHDXHeader *headers[2];
+
+ VHDXRegionTableHeader rt;
+ VHDXRegionTableEntry bat_rt; /* region table for the BAT */
+ VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
+
+ VHDXMetadataTableHeader metadata_hdr;
+ VHDXMetadataEntries metadata_entries;
+
+ VHDXFileParameters params;
+ uint32_t block_size;
+ uint32_t block_size_bits;
+ uint32_t sectors_per_block;
+ uint32_t sectors_per_block_bits;
+
+ uint64_t virtual_disk_size;
+ uint32_t logical_sector_size;
+ uint32_t physical_sector_size;
+
+ uint64_t chunk_ratio;
+ uint32_t chunk_ratio_bits;
+ uint32_t logical_sector_size_bits;
+
+ uint32_t bat_entries;
+ VHDXBatEntry *bat;
+ uint64_t bat_offset;
+ bool first_visible_write;
+ MSGUID session_guid;
+
+ VHDXLogEntries log;
+
+ VHDXParentLocatorHeader parent_header;
+ VHDXParentLocatorEntry *parent_entries;
+
+ Error *migration_blocker;
+
+ QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
+} BDRVVHDXState;
+
+void vhdx_guid_generate(MSGUID *guid);
+
+int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
+ MSGUID *log_guid);
+
+uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
int crc_offset);
bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
+int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed);
+
+int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
+ void *data, uint32_t length, uint64_t offset);
-static void leguid_to_cpus(MSGUID *guid)
+static inline void leguid_to_cpus(MSGUID *guid)
{
le32_to_cpus(&guid->data1);
le16_to_cpus(&guid->data2);
le16_to_cpus(&guid->data3);
}
+static inline void cpu_to_leguids(MSGUID *guid)
+{
+ cpu_to_le32s(&guid->data1);
+ cpu_to_le16s(&guid->data2);
+ cpu_to_le16s(&guid->data3);
+}
+
+void vhdx_header_le_import(VHDXHeader *h);
+void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
+void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
+void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
+void vhdx_log_data_le_export(VHDXLogDataSector *d);
+void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
+void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
+void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
+void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
+void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
+void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
+void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
+void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
+void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
+void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
+int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
+
#endif
diff --git a/block/vpc.c b/block/vpc.c
index 627d11cb9b..577cc45992 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -211,6 +211,15 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
+ /* images created with disk2vhd report a far higher virtual size
+ * than expected with the cyls * heads * sectors_per_cyl formula.
+ * use the footer->size instead if the image was created with
+ * disk2vhd.
+ */
+ if (!strncmp(footer->creator_app, "d2v", 4)) {
+ bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
+ }
+
/* Allow a maximum disk size of approximately 2 TB */
if (bs->total_sectors >= 65535LL * 255 * 255) {
ret = -EFBIG;
diff --git a/blockdev.c b/blockdev.c
index b260477f1b..86e6bffdc4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -341,7 +341,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
qemu_opts_absorb_qdict(opts, bs_opts, &error);
if (error_is_set(&error)) {
error_propagate(errp, error);
- return NULL;
+ goto early_err;
}
if (id) {
@@ -361,7 +361,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
error_setg(errp, "invalid discard option");
- return NULL;
+ goto early_err;
}
}
@@ -383,7 +383,7 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
/* this is the default */
} else {
error_setg(errp, "invalid aio option");
- return NULL;
+ goto early_err;
}
}
#endif
@@ -393,13 +393,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
error_printf("Supported formats:");
bdrv_iterate_format(bdrv_format_print, NULL);
error_printf("\n");
- return NULL;
+ goto early_err;
}
drv = bdrv_find_format(buf);
if (!drv) {
error_setg(errp, "'%s' invalid format", buf);
- return NULL;
+ goto early_err;
}
}
@@ -435,20 +435,20 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
if (!check_throttle_config(&cfg, &error)) {
error_propagate(errp, error);
- return NULL;
+ goto early_err;
}
on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
error_setg(errp, "werror is not supported by this bus type");
- return NULL;
+ goto early_err;
}
on_write_error = parse_block_error_action(buf, 0, &error);
if (error_is_set(&error)) {
error_propagate(errp, error);
- return NULL;
+ goto early_err;
}
}
@@ -456,13 +456,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
error_report("rerror is not supported by this bus type");
- return NULL;
+ goto early_err;
}
on_read_error = parse_block_error_action(buf, 1, &error);
if (error_is_set(&error)) {
error_propagate(errp, error);
- return NULL;
+ goto early_err;
}
}
@@ -491,6 +491,8 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
if (has_driver_specific_opts) {
file = NULL;
} else {
+ QDECREF(bs_opts);
+ qemu_opts_del(opts);
return dinfo;
}
}
@@ -529,12 +531,13 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
return dinfo;
err:
- qemu_opts_del(opts);
- QDECREF(bs_opts);
bdrv_unref(dinfo->bdrv);
g_free(dinfo->id);
QTAILQ_REMOVE(&drives, dinfo, next);
g_free(dinfo);
+early_err:
+ QDECREF(bs_opts);
+ qemu_opts_del(opts);
return NULL;
}
diff --git a/configure b/configure
index a1cc5be69e..9a02610ac8 100755
--- a/configure
+++ b/configure
@@ -260,6 +260,7 @@ gtk=""
gtkabi="2.0"
tpm="no"
libssh2=""
+vhdx=""
# parse CC options first
for opt do
@@ -985,6 +986,10 @@ for opt do
;;
--enable-libssh2) libssh2="yes"
;;
+ --enable-vhdx) vhdx="yes"
+ ;;
+ --disable-vhdx) vhdx="no"
+ ;;
*) echo "ERROR: unknown option $opt"; show_help="yes"
;;
esac
@@ -1217,6 +1222,8 @@ echo " --gcov=GCOV use specified gcov [$gcov_tool]"
echo " --enable-tpm enable TPM support"
echo " --disable-libssh2 disable ssh block device support"
echo " --enable-libssh2 enable ssh block device support"
+echo " --disable-vhdx disables support for the Microsoft VHDX image format"
+echo " --enable-vhdx enable support for the Microsoft VHDX image format"
echo ""
echo "NOTE: The object files are built at the place where configure is launched"
exit 1
@@ -2017,6 +2024,18 @@ EOF
fi
fi
+if test "$vhdx" = "yes" ; then
+ if test "$uuid" = "no" ; then
+ error_exit "uuid required for VHDX support"
+ fi
+elif test "$vhdx" != "no" ; then
+ if test "$uuid" = "yes" ; then
+ vhdx=yes
+ else
+ vhdx=no
+ fi
+fi
+
##########################################
# xfsctl() probe, used for raw-posix
if test "$xfs" != "no" ; then
@@ -3760,6 +3779,7 @@ echo "TPM support $tpm"
echo "libssh2 support $libssh2"
echo "TPM passthrough $tpm_passthrough"
echo "QOM debugging $qom_cast_debug"
+echo "vhdx $vhdx"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -4152,6 +4172,10 @@ if test "$virtio_blk_data_plane" = "yes" ; then
echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
fi
+if test "$vhdx" = "yes" ; then
+ echo "CONFIG_VHDX=y" >> $config_host_mak
+fi
+
# USB host support
if test "$libusb" = "yes"; then
echo "HOST_USB=libusb legacy" >> $config_host_mak
diff --git a/qapi-schema.json b/qapi-schema.json
index 81a375ba06..76c98a7265 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -227,7 +227,7 @@
##
# @ImageInfoSpecificVmdk:
#
-# @create_type: The create type of VMDK image
+# @create-type: The create type of VMDK image
#
# @cid: Content id of image
#
diff --git a/tests/Makefile b/tests/Makefile
index f414f2c80a..379cdd9ad1 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -68,6 +68,8 @@ check-qtest-i386-y += tests/rtc-test$(EXESUF)
check-qtest-i386-y += tests/i440fx-test$(EXESUF)
check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
check-qtest-i386-y += tests/qom-test$(EXESUF)
+check-qtest-i386-y += tests/blockdev-test$(EXESUF)
+check-qtest-i386-y += tests/qdev-monitor-test$(EXESUF)
check-qtest-x86_64-y = $(check-qtest-i386-y)
gcov-files-i386-y += i386-softmmu/hw/mc146818rtc.c
gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -200,6 +202,8 @@ tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
tests/qom-test$(EXESUF): tests/qom-test.o
+tests/blockdev-test$(EXESUF): tests/blockdev-test.o $(libqos-pc-obj-y)
+tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
# QTest rules
diff --git a/tests/blockdev-test.c b/tests/blockdev-test.c
new file mode 100644
index 0000000000..c940e00690
--- /dev/null
+++ b/tests/blockdev-test.c
@@ -0,0 +1,59 @@
+/*
+ * blockdev.c test cases
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include "libqtest.h"
+
+static void test_drive_add_empty(void)
+{
+ QDict *response;
+ const char *response_return;
+
+ /* Start with an empty drive */
+ qtest_start("-drive if=none,id=drive0");
+
+ /* Delete the drive */
+ response = qmp("{\"execute\": \"human-monitor-command\","
+ " \"arguments\": {"
+ " \"command-line\": \"drive_del drive0\""
+ "}}");
+ g_assert(response);
+ response_return = qdict_get_try_str(response, "return");
+ g_assert(response_return);
+ g_assert(strcmp(response_return, "") == 0);
+ QDECREF(response);
+
+ /* Ensure re-adding the drive works - there should be no duplicate ID error
+ * because the old drive must be gone.
+ */
+ response = qmp("{\"execute\": \"human-monitor-command\","
+ " \"arguments\": {"
+ " \"command-line\": \"drive_add 0 if=none,id=drive0\""
+ "}}");
+ g_assert(response);
+ response_return = qdict_get_try_str(response, "return");
+ g_assert(response_return);
+ g_assert(strcmp(response_return, "OK\r\n") == 0);
+ QDECREF(response);
+
+ qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+
+ qtest_add_func("/qmp/drive_add_empty", test_drive_add_empty);
+
+ return g_test_run();
+}
diff --git a/tests/boot-order-test.c b/tests/boot-order-test.c
index 4b233d0b24..da158c32bd 100644
--- a/tests/boot-order-test.c
+++ b/tests/boot-order-test.c
@@ -41,12 +41,12 @@ static void test_a_boot_order(const char *machine,
qtest_start(args);
actual = read_boot_order();
g_assert_cmphex(actual, ==, expected_boot);
- qmp("{ 'execute': 'system_reset' }");
+ qmp_discard_response("{ 'execute': 'system_reset' }");
/*
* system_reset only requests reset. We get a RESET event after
* the actual reset completes. Need to wait for that.
*/
- qmp(""); /* HACK: wait for event */
+ qmp_discard_response(""); /* HACK: wait for event */
actual = read_boot_order();
g_assert_cmphex(actual, ==, expected_reboot);
qtest_quit(global_qtest);
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index fd198dcf8b..38b5b178d0 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -290,10 +290,12 @@ static void test_media_insert(void)
/* Insert media in drive. DSKCHK should not be reset until a step pulse
* is sent. */
- qmp("{'execute':'change', 'arguments':{ 'device':'floppy0', "
- "'target': '%s' }}", test_image);
- qmp(""); /* ignore event (FIXME open -> open transition?!) */
- qmp(""); /* ignore event */
+ qmp_discard_response("{'execute':'change', 'arguments':{"
+ " 'device':'floppy0', 'target': '%s' }}",
+ test_image);
+ qmp_discard_response(""); /* ignore event
+ (FIXME open -> open transition?!) */
+ qmp_discard_response(""); /* ignore event */
dir = inb(FLOPPY_BASE + reg_dir);
assert_bit_set(dir, DSKCHG);
@@ -322,8 +324,9 @@ static void test_media_change(void)
/* Eject the floppy and check that DSKCHG is set. Reading it out doesn't
* reset the bit. */
- qmp("{'execute':'eject', 'arguments':{ 'device':'floppy0' }}");
- qmp(""); /* ignore event */
+ qmp_discard_response("{'execute':'eject', 'arguments':{"
+ " 'device':'floppy0' }}");
+ qmp_discard_response(""); /* ignore event */
dir = inb(FLOPPY_BASE + reg_dir);
assert_bit_set(dir, DSKCHG);
diff --git a/tests/ide-test.c b/tests/ide-test.c
index bc824a8144..d5cec5a1fc 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -460,8 +460,9 @@ static void test_flush(void)
tmp_path);
/* Delay the completion of the flush request until we explicitly do it */
- qmp("{'execute':'human-monitor-command', 'arguments': { "
- "'command-line': 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
+ qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
+ " 'command-line':"
+ " 'qemu-io ide0-hd0 \"break flush_to_os A\"'} }");
/* FLUSH CACHE command on device 0*/
outb(IDE_BASE + reg_device, 0);
@@ -473,8 +474,9 @@ static void test_flush(void)
assert_bit_clear(data, DF | ERR | DRQ);
/* Complete the command */
- qmp("{'execute':'human-monitor-command', 'arguments': { "
- "'command-line': 'qemu-io ide0-hd0 \"resume A\"'} }");
+ qmp_discard_response("{'execute':'human-monitor-command', 'arguments': {"
+ " 'command-line':"
+ " 'qemu-io ide0-hd0 \"resume A\"'} }");
/* Check registers */
data = inb(IDE_BASE + reg_device);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index bb82069f5c..83424c3c6b 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -30,6 +30,8 @@
#include "qemu/compiler.h"
#include "qemu/osdep.h"
+#include "qapi/qmp/json-streamer.h"
+#include "qapi/qmp/json-parser.h"
#define MAX_IRQ 256
@@ -151,8 +153,8 @@ QTestState *qtest_init(const char *extra_args)
}
/* Read the QMP greeting and then do the handshake */
- qtest_qmp(s, "");
- qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }");
+ qtest_qmp_discard_response(s, "");
+ qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }");
if (getenv("QTEST_STOP")) {
kill(qtest_qemu_pid(s), SIGSTOP);
@@ -291,16 +293,38 @@ redo:
return words;
}
-void qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
+typedef struct {
+ JSONMessageParser parser;
+ QDict *response;
+} QMPResponseParser;
+
+static void qmp_response(JSONMessageParser *parser, QList *tokens)
+{
+ QMPResponseParser *qmp = container_of(parser, QMPResponseParser, parser);
+ QObject *obj;
+
+ obj = json_parser_parse(tokens, NULL);
+ if (!obj) {
+ fprintf(stderr, "QMP JSON response parsing failed\n");
+ exit(1);
+ }
+
+ g_assert(qobject_type(obj) == QTYPE_QDICT);
+ g_assert(!qmp->response);
+ qmp->response = (QDict *)obj;
+}
+
+QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
{
- bool has_reply = false;
- int nesting = 0;
+ QMPResponseParser qmp;
/* Send QMP request */
socket_sendf(s->qmp_fd, fmt, ap);
/* Receive reply */
- while (!has_reply || nesting > 0) {
+ qmp.response = NULL;
+ json_message_parser_init(&qmp.parser, qmp_response);
+ while (!qmp.response) {
ssize_t len;
char c;
@@ -314,25 +338,39 @@ void qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
exit(1);
}
- switch (c) {
- case '{':
- nesting++;
- has_reply = true;
- break;
- case '}':
- nesting--;
- break;
- }
+ json_message_parser_feed(&qmp.parser, &c, 1);
}
+ json_message_parser_destroy(&qmp.parser);
+
+ return qmp.response;
+}
+
+QDict *qtest_qmp(QTestState *s, const char *fmt, ...)
+{
+ va_list ap;
+ QDict *response;
+
+ va_start(ap, fmt);
+ response = qtest_qmpv(s, fmt, ap);
+ va_end(ap);
+ return response;
+}
+
+void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap)
+{
+ QDict *response = qtest_qmpv(s, fmt, ap);
+ QDECREF(response);
}
-void qtest_qmp(QTestState *s, const char *fmt, ...)
+void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...)
{
va_list ap;
+ QDict *response;
va_start(ap, fmt);
- qtest_qmpv(s, fmt, ap);
+ response = qtest_qmpv(s, fmt, ap);
va_end(ap);
+ QDECREF(response);
}
const char *qtest_get_arch(void)
diff --git a/tests/libqtest.h b/tests/libqtest.h
index a6e99bd023..9deebdcdfa 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -22,6 +22,7 @@
#include <stdbool.h>
#include <stdarg.h>
#include <sys/types.h>
+#include "qapi/qmp/qdict.h"
typedef struct QTestState QTestState;
@@ -44,13 +45,32 @@ QTestState *qtest_init(const char *extra_args);
void qtest_quit(QTestState *s);
/**
+ * qtest_qmp_discard_response:
+ * @s: #QTestState instance to operate on.
+ * @fmt...: QMP message to send to qemu
+ *
+ * Sends a QMP message to QEMU and consumes the response.
+ */
+void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...);
+
+/**
* qtest_qmp:
* @s: #QTestState instance to operate on.
* @fmt...: QMP message to send to qemu
*
- * Sends a QMP message to QEMU
+ * Sends a QMP message to QEMU and returns the response.
+ */
+QDict *qtest_qmp(QTestState *s, const char *fmt, ...);
+
+/**
+ * qtest_qmpv_discard_response:
+ * @s: #QTestState instance to operate on.
+ * @fmt: QMP message to send to QEMU
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU and consumes the response.
*/
-void qtest_qmp(QTestState *s, const char *fmt, ...);
+void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap);
/**
* qtest_qmpv:
@@ -58,9 +78,9 @@ void qtest_qmp(QTestState *s, const char *fmt, ...);
* @fmt: QMP message to send to QEMU
* @ap: QMP message arguments
*
- * Sends a QMP message to QEMU.
+ * Sends a QMP message to QEMU and returns the response.
*/
-void qtest_qmpv(QTestState *s, const char *fmt, va_list ap);
+QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap);
/**
* qtest_get_irq:
@@ -334,14 +354,31 @@ static inline void qtest_end(void)
* qmp:
* @fmt...: QMP message to send to qemu
*
- * Sends a QMP message to QEMU
+ * Sends a QMP message to QEMU and returns the response.
+ */
+static inline QDict *qmp(const char *fmt, ...)
+{
+ va_list ap;
+ QDict *response;
+
+ va_start(ap, fmt);
+ response = qtest_qmpv(global_qtest, fmt, ap);
+ va_end(ap);
+ return response;
+}
+
+/**
+ * qmp_discard_response:
+ * @fmt...: QMP message to send to qemu
+ *
+ * Sends a QMP message to QEMU and consumes the response.
*/
-static inline void qmp(const char *fmt, ...)
+static inline void qmp_discard_response(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- qtest_qmpv(global_qtest, fmt, ap);
+ qtest_qmpv_discard_response(global_qtest, fmt, ap);
va_end(ap);
}
diff --git a/tests/qdev-monitor-test.c b/tests/qdev-monitor-test.c
new file mode 100644
index 0000000000..33a8ea4b9c
--- /dev/null
+++ b/tests/qdev-monitor-test.c
@@ -0,0 +1,81 @@
+/*
+ * qdev-monitor.c test cases
+ *
+ * Copyright (C) 2013 Red Hat Inc.
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include "libqtest.h"
+#include "qapi/qmp/qjson.h"
+
+static void test_device_add(void)
+{
+ QDict *response;
+ QDict *error;
+
+ qtest_start("-drive if=none,id=drive0");
+
+ /* Make device_add fail. If this leaks the virtio-blk-pci device then a
+ * reference to drive0 will also be held (via qdev properties).
+ */
+ response = qmp("{\"execute\": \"device_add\","
+ " \"arguments\": {"
+ " \"driver\": \"virtio-blk-pci\","
+ " \"drive\": \"drive0\""
+ "}}");
+ g_assert(response);
+ error = qdict_get_qdict(response, "error");
+ g_assert(!strcmp(qdict_get_try_str(error, "class") ?: "",
+ "GenericError"));
+ g_assert(!strcmp(qdict_get_try_str(error, "desc") ?: "",
+ "Device initialization failed."));
+ QDECREF(response);
+
+ /* Delete the drive */
+ response = qmp("{\"execute\": \"human-monitor-command\","
+ " \"arguments\": {"
+ " \"command-line\": \"drive_del drive0\""
+ "}}");
+ g_assert(response);
+ g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "(null)", ""));
+ QDECREF(response);
+
+ /* Try to re-add the drive. This fails with duplicate IDs if a leaked
+ * virtio-blk-pci exists that holds a reference to the old drive0.
+ */
+ response = qmp("{\"execute\": \"human-monitor-command\","
+ " \"arguments\": {"
+ " \"command-line\": \"drive_add pci-addr=auto if=none,id=drive0\""
+ "}}");
+ g_assert(response);
+ g_assert(!strcmp(qdict_get_try_str(response, "return") ?: "",
+ "OK\r\n"));
+ QDECREF(response);
+
+ qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+ const char *arch = qtest_get_arch();
+
+ /* Check architecture */
+ if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) {
+ g_test_message("Skipping test for non-x86\n");
+ return 0;
+ }
+
+ /* Run the tests */
+ g_test_init(&argc, &argv, NULL);
+
+ qtest_add_func("/qmp/device_add", test_device_add);
+
+ return g_test_run();
+}
diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017
index 45f2c0b055..aba3faf712 100755
--- a/tests/qemu-iotests/017
+++ b/tests/qemu-iotests/017
@@ -66,7 +66,7 @@ echo "Creating test image with backing file"
echo
TEST_IMG=$TEST_IMG_SAVE
-_make_test_img -b $TEST_IMG.base 6G
+_make_test_img -b "$TEST_IMG.base" 6G
echo "Filling test image"
echo
diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019
index cd3582cf6f..5bb18d0c0a 100755
--- a/tests/qemu-iotests/019
+++ b/tests/qemu-iotests/019
@@ -90,12 +90,12 @@ mv "$TEST_IMG" "$TEST_IMG.orig"
# Test the conversion twice: One test with the old-style -B option and another
# one with -o backing_file
-for backing_option in "-B $TEST_IMG.base" "-o backing_file=$TEST_IMG.base"; do
+for backing_option in "-B " "-o backing_file="; do
echo
- echo Testing conversion with $backing_option | _filter_testdir | _filter_imgfmt
+ echo Testing conversion with $backing_option$TEST_IMG.base | _filter_testdir | _filter_imgfmt
echo
- $QEMU_IMG convert -O $IMGFMT $backing_option "$TEST_IMG.orig" "$TEST_IMG"
+ $QEMU_IMG convert -O $IMGFMT $backing_option"$TEST_IMG.base" "$TEST_IMG.orig" "$TEST_IMG"
echo "Checking if backing clusters are allocated when they shouldn't"
echo
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index f85b4ce63f..8bade92a80 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -54,7 +54,7 @@ echo "== Checking that image is clean on shutdown =="
IMGOPTS="compat=1.1,lazy_refcounts=on"
_make_test_img $size
-$QEMU_IO -c "write -P 0x5a 0 512" ""$TEST_IMG"" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x5a 0 512" "$TEST_IMG" | _filter_qemu_io
# The dirty bit must not be set
./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index 356c3756f4..0a4971d437 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -64,9 +64,9 @@ function run_qemu()
size=128M
_make_test_img $size
-cp $TEST_IMG $TEST_IMG.orig
-mv $TEST_IMG $TEST_IMG.base
-_make_test_img -b $TEST_IMG.base $size
+cp "$TEST_IMG" "$TEST_IMG.orig"
+mv "$TEST_IMG" "$TEST_IMG.base"
+_make_test_img -b "$TEST_IMG.base" $size
echo
echo === Unknown option ===
@@ -81,7 +81,7 @@ echo
echo === Overriding backing file ===
echo
-echo "info block" | run_qemu -drive file=$TEST_IMG,driver=qcow2,backing.file.filename=$TEST_IMG.orig -nodefaults
+echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults
echo
echo === Enable and disable lazy refcounting on the command line, plus some invalid values ===
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index fa9319da26..e42f9bd5e8 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -163,7 +163,7 @@ echo "=== Testing zero expansion on backed image ==="
echo
IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
$QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
$QEMU_IO -c "read -P 0x2a 0 128k" -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
_check_test_img
@@ -174,7 +174,7 @@ echo "=== Testing zero expansion on backed inactive clusters ==="
echo
IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
$QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
$QEMU_IO -c "write -z 0 64k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG snapshot -c foo "$TEST_IMG"
$QEMU_IO -c "write -P 0x42 0 128k" "$TEST_IMG" | _filter_qemu_io
@@ -190,7 +190,7 @@ echo "=== Testing zero expansion on backed image with shared L2 table ==="
echo
IMGOPTS="compat=1.1" TEST_IMG="$TEST_IMG.base" _make_test_img 64M
$QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
-IMGOPTS="compat=1.1,backing_file=$TEST_IMG.base" _make_test_img 64M
+IMGOPTS="compat=1.1" _make_test_img -b "$TEST_IMG.base" 64M
$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG snapshot -c foo "$TEST_IMG"
$QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
diff --git a/tests/qemu-iotests/064 b/tests/qemu-iotests/064
index 6789aa6ee4..1c74c31a1a 100755
--- a/tests/qemu-iotests/064
+++ b/tests/qemu-iotests/064
@@ -56,6 +56,17 @@ echo
echo "=== Verify pattern 0x00, 66M - 1024M ==="
$QEMU_IO -r -c "read -pP 0x00 66M 958M" "$TEST_IMG" | _filter_qemu_io
+echo
+echo "=== Verify pattern write, 0xc3 99M-157M ==="
+$QEMU_IO -c "write -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io
+# first verify we didn't write where we should not have
+$QEMU_IO -c "read -pP 0xa5 0 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x96 33M 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x00 66M 33M" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -pP 0x00 157MM 867MM" "$TEST_IMG" | _filter_qemu_io
+# now verify what we should have actually written
+$QEMU_IO -c "read -pP 0xc3 99M 58M" "$TEST_IMG" | _filter_qemu_io
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/064.out b/tests/qemu-iotests/064.out
index b9e8e4a873..5346a4e630 100644
--- a/tests/qemu-iotests/064.out
+++ b/tests/qemu-iotests/064.out
@@ -11,4 +11,18 @@ read 34603008/34603008 bytes at offset 34603008
=== Verify pattern 0x00, 66M - 1024M ===
read 1004535808/1004535808 bytes at offset 69206016
958 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Verify pattern write, 0xc3 99M-157M ===
+wrote 60817408/60817408 bytes at offset 103809024
+58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 0
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 34603008
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 34603008/34603008 bytes at offset 69206016
+33 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 909115392/909115392 bytes at offset 164626432
+867 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 60817408/60817408 bytes at offset 103809024
+58 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
*** done
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 79dc38bc04..d025192c83 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -45,7 +45,7 @@ function do_run_qemu()
function run_qemu()
{
- do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp
+ do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
}
size=128M
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 4bb9ff9652..8d271cc41a 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -6,7 +6,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virtio-blk-pci,drive=disk,id=virtio0
QMP_VERSION
{"return": {}}
-{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "disk", "locked": false, "removable": false, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
{"return": {}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}}
@@ -24,7 +24,7 @@ QMP_VERSION
Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
QMP_VERSION
{"return": {}}
-{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
+{"return": [{"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}]}
{"return": {}}
{"return": {}}
{"return": {}}
@@ -44,7 +44,7 @@ Testing:
QMP_VERSION
{"return": {}}
{"return": "OK\r\n"}
-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
{"return": {}}
{"return": {}}
{"return": {}}
@@ -64,14 +64,14 @@ Testing:
QMP_VERSION
{"return": {}}
{"return": {}}
-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
{"return": {}}
{"return": {}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/virtio0/virtio-backend"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_DELETED", "data": {"device": "virtio0", "path": "/machine/peripheral/virtio0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "RESET"}
-{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": 139264, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
+{"return": [{"io-status": "ok", "device": "ide1-cd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "floppy0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"device": "sd0", "locked": false, "removable": true, "tray_open": false, "type": "unknown"}, {"io-status": "ok", "device": "disk", "locked": false, "removable": true, "inserted": {"iops_rd": 0, "image": {"virtual-size": 134217728, "filename": "TEST_DIR/t.qcow2", "cluster-size": 65536, "format": "qcow2", "actual-size": SIZE, "format-specific": {"type": "qcow2", "data": {"compat": "1.1", "lazy-refcounts": false}}, "dirty-flag": false}, "iops_wr": 0, "ro": false, "backing_file_depth": 0, "drv": "qcow2", "iops": 0, "bps_wr": 0, "encrypted": false, "bps": 0, "bps_rd": 0, "file": "TEST_DIR/t.qcow2", "encryption_key_missing": false}, "tray_open": false, "type": "unknown"}]}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
diff --git a/tests/qemu-iotests/070 b/tests/qemu-iotests/070
new file mode 100755
index 0000000000..41bf100701
--- /dev/null
+++ b/tests/qemu-iotests/070
@@ -0,0 +1,67 @@
+#!/bin/bash
+#
+# Test VHDX log replay from an image with a journal that needs to be
+# replayed
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt vhdx
+_supported_proto generic
+_supported_os Linux
+
+# With the log replayed, the pattern 0xa5 extends to 0xc025000
+# If the log was not replayed, it would only extend to 0xc000000
+#
+# This image is a 10G dynamic image, with 4M block size, and 1 unplayed
+# data sector in the log
+#
+# This image was created with qemu-img, however it was verified using
+# Hyper-V to properly replay the logs and give the same post-replay
+# image as qemu.
+_use_sample_img iotest-dirtylog-10G-4M.vhdx.bz2
+
+echo
+echo "=== Verify open image read-only fails, due to dirty log ==="
+$QEMU_IO -r -c "read -pP 0xa5 0 18M" "$TEST_IMG" 2>&1 | grep -o "Permission denied"
+
+echo "=== Verify open image replays log ==="
+$QEMU_IO -c "read -pP 0xa5 0 18M" "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/070.out b/tests/qemu-iotests/070.out
new file mode 100644
index 0000000000..9db8ff2650
--- /dev/null
+++ b/tests/qemu-iotests/070.out
@@ -0,0 +1,8 @@
+QA output created by 070
+
+=== Verify open image read-only fails, due to dirty log ===
+Permission denied
+=== Verify open image replays log ===
+read 18874368/18874368 bytes at offset 0
+18 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 2932e14e73..8cde7f11fa 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -200,7 +200,6 @@ testlist options
-vhdx)
IMGFMT=vhdx
xpand=false
- IMGFMT_GENERIC=false
;;
-rbd)
diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern
index 00e0f605fd..ddfbca1b76 100644
--- a/tests/qemu-iotests/common.pattern
+++ b/tests/qemu-iotests/common.pattern
@@ -28,7 +28,7 @@ function do_is_allocated() {
}
function is_allocated() {
- do_is_allocated "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io
+ do_is_allocated "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
}
function do_io() {
@@ -46,18 +46,18 @@ function do_io() {
}
function io_pattern() {
- do_io "$@" | $QEMU_IO $TEST_IMG | _filter_qemu_io
+ do_io "$@" | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
}
function io() {
local start=$2
local pattern=$(( (start >> 9) % 256 ))
- do_io "$@" $pattern | $QEMU_IO $TEST_IMG | _filter_qemu_io
+ do_io "$@" $pattern | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
}
function io_zero() {
- do_io "$@" 0 | $QEMU_IO $TEST_IMG | _filter_qemu_io
+ do_io "$@" 0 | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
}
function io_test() {
@@ -117,8 +117,8 @@ function io_test2() {
echo === Clusters to be compressed [3]
io_pattern writev $((offset + 8 * $cluster_size)) $cluster_size $((9 * $cluster_size)) $num 165
- mv $TEST_IMG $TEST_IMG.orig
- $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c $TEST_IMG.orig $TEST_IMG
+ mv "$TEST_IMG" "$TEST_IMG.orig"
+ $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -c "$TEST_IMG.orig" "$TEST_IMG"
# Write the used clusters
echo === Used clusters [1]
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 4e826040d4..7f6245770a 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -111,6 +111,8 @@ _make_test_img()
local image_size=$*
local optstr=""
local img_name=""
+ local use_backing=0
+ local backing_file=""
if [ -n "$TEST_IMG_FILE" ]; then
img_name=$TEST_IMG_FILE
@@ -123,7 +125,8 @@ _make_test_img()
fi
if [ "$1" = "-b" ]; then
- extra_img_options="$1 $2"
+ use_backing=1
+ backing_file=$2
image_size=$3
fi
if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then
@@ -135,7 +138,13 @@ _make_test_img()
fi
# XXX(hch): have global image options?
- $QEMU_IMG create -f $IMGFMT $extra_img_options $img_name $image_size 2>&1 | \
+ (
+ if [ $use_backing = 1 ]; then
+ $QEMU_IMG create -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" $image_size 2>&1
+ else
+ $QEMU_IMG create -f $IMGFMT $extra_img_options "$img_name" $image_size 2>&1
+ fi
+ ) | \
sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
-e "s#$IMGFMT#IMGFMT#g" \
@@ -148,7 +157,10 @@ _make_test_img()
-e "s# zeroed_grain=\\(on\\|off\\)##g" \
-e "s# subformat='[^']*'##g" \
-e "s# adapter_type='[^']*'##g" \
- -e "s# lazy_refcounts=\\(on\\|off\\)##g"
+ -e "s# lazy_refcounts=\\(on\\|off\\)##g" \
+ -e "s# block_size=[0-9]\\+##g" \
+ -e "s# block_state_zero=\\(on\\|off\\)##g" \
+ -e "s# log_size=[0-9]\\+##g"
# Start an NBD server on the image file, which is what we'll be talking to
if [ $IMGPROTO = "nbd" ]; then
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c57ff35843..b18b241f8d 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -75,3 +75,4 @@
067 rw auto
068 rw auto
069 rw auto
+070 rw auto
diff --git a/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2 b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2
new file mode 100644
index 0000000000..4b91cfc654
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/iotest-dirtylog-10G-4M.vhdx.bz2
Binary files differ