aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAnthony Liguori <aliguori@us.ibm.com>2010-09-21 17:50:58 -0500
committerAnthony Liguori <aliguori@us.ibm.com>2010-09-21 17:50:58 -0500
commite1bb0a1a6c7c6d51443856ca90bd538b1ef5feeb (patch)
treeec271a0d224608382eb146d68298fd9e9fd0d3c9 /block
parentf36d53ef6cb848d5cf204b0854b12e0359f0fd7e (diff)
parentd9d334176c5dba23b47be07192f431b0c030928d (diff)
Merge remote branch 'kwolf/for-anthony' into staging
Diffstat (limited to 'block')
-rw-r--r--block/blkverify.c382
-rw-r--r--block/nbd.c2
-rw-r--r--block/qcow2-cluster.c19
-rw-r--r--block/qcow2-refcount.c13
-rw-r--r--block/qcow2-snapshot.c2
-rw-r--r--block/qcow2.c115
-rw-r--r--block/qcow2.h4
-rw-r--r--block/raw-posix.c79
-rw-r--r--block/vvfat.c26
9 files changed, 551 insertions, 91 deletions
diff --git a/block/blkverify.c b/block/blkverify.c
new file mode 100644
index 0000000000..4202685a63
--- /dev/null
+++ b/block/blkverify.c
@@ -0,0 +1,382 @@
+/*
+ * Block protocol for block driver correctness testing
+ *
+ * Copyright (C) 2010 IBM, Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include "qemu_socket.h" /* for EINPROGRESS on Windows */
+#include "block_int.h"
+
+typedef struct {
+ BlockDriverState *test_file;
+} BDRVBlkverifyState;
+
+typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+struct BlkverifyAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+
+ /* Request metadata */
+ bool is_write;
+ int64_t sector_num;
+ int nb_sectors;
+
+ int ret; /* first completed request's result */
+ unsigned int done; /* completion counter */
+ bool *finished; /* completion signal for cancel */
+
+ QEMUIOVector *qiov; /* user I/O vector */
+ QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
+ void *buf; /* buffer for raw file I/O */
+
+ void (*verify)(BlkverifyAIOCB *acb);
+};
+
+static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+ bool finished = false;
+
+ /* Wait until request completes, invokes its callback, and frees itself */
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool blkverify_aio_pool = {
+ .aiocb_size = sizeof(BlkverifyAIOCB),
+ .cancel = blkverify_aio_cancel,
+};
+
+static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
+ acb->is_write ? "write" : "read", acb->sector_num,
+ acb->nb_sectors);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
+static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ int ret;
+ char *raw, *c;
+
+ /* Parse the blkverify: prefix */
+ if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
+ return -EINVAL;
+ }
+ filename += strlen("blkverify:");
+
+ /* Parse the raw image filename */
+ c = strchr(filename, ':');
+ if (c == NULL) {
+ return -EINVAL;
+ }
+
+ raw = strdup(filename);
+ raw[c - filename] = '\0';
+ ret = bdrv_file_open(&bs->file, raw, flags);
+ free(raw);
+ if (ret < 0) {
+ return ret;
+ }
+ filename = c + 1;
+
+ /* Open the test file */
+ s->test_file = bdrv_new("");
+ ret = bdrv_open(s->test_file, filename, flags, NULL);
+ if (ret < 0) {
+ bdrv_delete(s->test_file);
+ s->test_file = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+static void blkverify_close(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_delete(s->test_file);
+ s->test_file = NULL;
+}
+
+static void blkverify_flush(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ bdrv_flush(s->test_file);
+}
+
+static int64_t blkverify_getlength(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ return bdrv_getlength(s->test_file);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+typedef struct {
+ int src_index;
+ struct iovec *src_iov;
+ void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ /* Don't overflow */
+ if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+ return -1;
+ } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved. This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+ void *buf)
+{
+ IOVectorSortElem sortelems[src->niov];
+ void *last_end;
+ int i;
+
+ /* Sort by source iovecs by base address */
+ for (i = 0; i < src->niov; i++) {
+ sortelems[i].src_index = i;
+ sortelems[i].src_iov = &src->iov[i];
+ }
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+ /* Allocate buffer space taking into account overlapping iovecs */
+ last_end = NULL;
+ for (i = 0; i < src->niov; i++) {
+ struct iovec *cur = sortelems[i].src_iov;
+ ptrdiff_t rewind = 0;
+
+ /* Detect overlap */
+ if (last_end && last_end > cur->iov_base) {
+ rewind = last_end - cur->iov_base;
+ }
+
+ sortelems[i].dest_base = buf - rewind;
+ buf += cur->iov_len - MIN(rewind, cur->iov_len);
+ last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+ }
+
+ /* Sort by source iovec index and build destination iovec */
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+ }
+}
+
+static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
+
+ acb->bh = NULL;
+ acb->is_write = is_write;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->ret = -EINPROGRESS;
+ acb->done = 0;
+ acb->qiov = qiov;
+ acb->buf = NULL;
+ acb->verify = NULL;
+ acb->finished = NULL;
+ return acb;
+}
+
+static void blkverify_aio_bh(void *opaque)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ if (acb->buf) {
+ qemu_iovec_destroy(&acb->raw_qiov);
+ qemu_vfree(acb->buf);
+ }
+ acb->common.cb(acb->common.opaque, acb->ret);
+ if (acb->finished) {
+ *acb->finished = true;
+ }
+ qemu_aio_release(acb);
+}
+
+static void blkverify_aio_cb(void *opaque, int ret)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ switch (++acb->done) {
+ case 1:
+ acb->ret = ret;
+ break;
+
+ case 2:
+ if (acb->ret != ret) {
+ blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
+ }
+
+ if (acb->verify) {
+ acb->verify(acb);
+ }
+
+ acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ break;
+ }
+}
+
+static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+{
+ ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+ if (offset != -1) {
+ blkverify_err(acb, "contents mismatch in sector %ld",
+ acb->sector_num + (offset / BDRV_SECTOR_SIZE));
+ }
+}
+
+static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ acb->verify = blkverify_verify_readv;
+ acb->buf = qemu_blockalign(bs->file, qiov->size);
+ qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
+ blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+
+ if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ return bdrv_aio_flush(s->test_file, cb, opaque);
+}
+
+static BlockDriver bdrv_blkverify = {
+ .format_name = "blkverify",
+ .protocol_name = "blkverify",
+
+ .instance_size = sizeof(BDRVBlkverifyState),
+
+ .bdrv_getlength = blkverify_getlength,
+
+ .bdrv_file_open = blkverify_open,
+ .bdrv_close = blkverify_close,
+ .bdrv_flush = blkverify_flush,
+
+ .bdrv_aio_readv = blkverify_aio_readv,
+ .bdrv_aio_writev = blkverify_aio_writev,
+ .bdrv_aio_flush = blkverify_aio_flush,
+};
+
+static void bdrv_blkverify_init(void)
+{
+ bdrv_register(&bdrv_blkverify);
+}
+
+block_init(bdrv_blkverify_init);
diff --git a/block/nbd.c b/block/nbd.c
index 5e9d6cb8e6..c8dc763c6b 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -95,8 +95,6 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
if (r == p) {
goto out;
}
- } else if (name == NULL) {
- goto out;
}
sock = tcp_socket_outgoing(hostname, port);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f562b1602e..fb4224a669 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -60,6 +60,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
qemu_free(new_l1_table);
return new_l1_table_offset;
}
+ bdrv_flush(bs->file);
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
@@ -243,6 +244,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
if (l2_offset < 0) {
return l2_offset;
}
+ bdrv_flush(bs->file);
/* allocate a new entry in the l2 cache */
@@ -348,6 +350,8 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
BDRVQcowState *s = bs->opaque;
int ret, index_in_cluster, n, n1;
uint64_t cluster_offset;
+ struct iovec iov;
+ QEMUIOVector qiov;
while (nb_sectors > 0) {
n = nb_sectors;
@@ -362,7 +366,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
if (!cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n);
+ iov.iov_base = buf;
+ iov.iov_len = n * 512;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
if (n1 > 0) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
@@ -413,7 +421,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
&s->aes_encrypt_key);
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_write_sync(bs->file, (cluster_offset >> 9) + n_start,
+ ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
s->cluster_data, n);
if (ret < 0)
return ret;
@@ -712,6 +720,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
}
+ /*
+ * Before we update the L2 table to actually point to the new cluster, we
+ * need to be sure that the refcounts have been increased and COW was
+ * handled.
+ */
+ bdrv_flush(bs->file);
+
ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters);
if (ret < 0) {
qcow2_l2_cache_reset(bs);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4c19e7ebd8..7082601f59 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -261,6 +261,8 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
goto fail_block;
}
+ bdrv_flush(bs->file);
+
/* Initialize the new refcount block only after updating its refcount,
* update_refcount uses the refcount cache itself */
memset(s->refcount_block_cache, 0, s->cluster_size);
@@ -444,7 +446,7 @@ static int write_refcount_block_entries(BlockDriverState *bs,
size = (last_index - first_index) << REFCOUNT_SHIFT;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
- ret = bdrv_pwrite_sync(bs->file,
+ ret = bdrv_pwrite(bs->file,
refcount_block_offset + (first_index << REFCOUNT_SHIFT),
&s->refcount_block_cache[first_index], size);
if (ret < 0) {
@@ -573,6 +575,8 @@ static int update_cluster_refcount(BlockDriverState *bs,
return ret;
}
+ bdrv_flush(bs->file);
+
return get_refcount(bs, cluster_index);
}
@@ -624,6 +628,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
if (ret < 0) {
return ret;
}
+
return offset;
}
@@ -671,6 +676,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
goto redo;
}
}
+
+ bdrv_flush(bs->file);
return offset;
}
@@ -801,6 +808,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
+
+ /* TODO Flushing once for the whole function should
+ * be enough */
+ bdrv_flush(bs->file);
}
/* compressed clusters are never modified */
refcount = 2;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 6228612f84..bbfcaaae22 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -138,6 +138,7 @@ static int qcow_write_snapshots(BlockDriverState *bs)
snapshots_size = offset;
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
+ bdrv_flush(bs->file);
offset = snapshots_offset;
if (offset < 0) {
return offset;
@@ -271,6 +272,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
if (l1_table_offset < 0) {
goto fail;
}
+ bdrv_flush(bs->file);
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
diff --git a/block/qcow2.c b/block/qcow2.c
index a53014dbda..ee3481b786 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,8 +311,8 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
}
/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors)
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors)
{
int n1;
if ((sector_num + nb_sectors) <= bs->total_sectors)
@@ -321,7 +321,9 @@ int qcow2_backing_read1(BlockDriverState *bs,
n1 = 0;
else
n1 = bs->total_sectors - sector_num;
- memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
+
+ qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1));
+
return n1;
}
@@ -329,14 +331,12 @@ typedef struct QCowAIOCB {
BlockDriverAIOCB common;
int64_t sector_num;
QEMUIOVector *qiov;
- uint8_t *buf;
- void *orig_buf;
int remaining_sectors;
int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done;
uint64_t cluster_offset;
uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb;
- struct iovec hd_iov;
QEMUIOVector hd_qiov;
QEMUBH *bh;
QCowL2Meta l2meta;
@@ -397,15 +397,19 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* nothing to do */
} else {
if (s->crypt_method) {
- qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
- acb->cur_nr_sectors, 0,
- &s->aes_decrypt_key);
+ qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
+ acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+ acb->cur_nr_sectors * 512);
+ qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data,
+ 512 * acb->cur_nr_sectors);
}
}
acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors;
- acb->buf += acb->cur_nr_sectors * 512;
+ acb->bytes_done += acb->cur_nr_sectors * 512;
if (acb->remaining_sectors == 0) {
/* request completed */
@@ -415,6 +419,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* prepare next AIO request */
acb->cur_nr_sectors = acb->remaining_sectors;
+ if (s->crypt_method) {
+ acb->cur_nr_sectors = MIN(acb->cur_nr_sectors,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ }
+
ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
&acb->cur_nr_sectors, &acb->cluster_offset);
if (ret < 0) {
@@ -423,15 +432,17 @@ static void qcow_aio_read_cb(void *opaque, int ret)
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+ acb->cur_nr_sectors * 512);
+
if (!acb->cluster_offset) {
+
if (bs->backing_hd) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num,
- acb->buf, acb->cur_nr_sectors);
+ n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov,
+ acb->sector_num, acb->cur_nr_sectors);
if (n1 > 0) {
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
&acb->hd_qiov, acb->cur_nr_sectors,
@@ -445,7 +456,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
}
} else {
/* Note: in this case, no need to wait */
- memset(acb->buf, 0, 512 * acb->cur_nr_sectors);
+ qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@@ -454,8 +465,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* add AIO support for compressed blocks ? */
if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
goto done;
- memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512,
- 512 * acb->cur_nr_sectors);
+
+ qemu_iovec_from_buffer(&acb->hd_qiov,
+ s->cluster_cache + index_in_cluster * 512,
+ 512 * acb->cur_nr_sectors);
+
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@@ -465,9 +479,23 @@ static void qcow_aio_read_cb(void *opaque, int ret)
goto done;
}
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+ if (s->crypt_method) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ */
+ if (!acb->cluster_data) {
+ acb->cluster_data =
+ qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
+
+ assert(acb->cur_nr_sectors <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
+ 512 * acb->cur_nr_sectors);
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
@@ -481,11 +509,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
return;
done:
- if (acb->qiov->niov > 1) {
- qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
- qemu_vfree(acb->orig_buf);
- }
acb->common.cb(acb->common.opaque, ret);
+ qemu_iovec_destroy(&acb->hd_qiov);
qemu_aio_release(acb);
}
@@ -501,13 +526,10 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
acb->qiov = qiov;
- if (qiov->niov > 1) {
- acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
- if (is_write)
- qemu_iovec_to_buffer(qiov, acb->buf);
- } else {
- acb->buf = (uint8_t *)qiov->iov->iov_base;
- }
+
+ qemu_iovec_init(&acb->hd_qiov, qiov->niov);
+
+ acb->bytes_done = 0;
acb->remaining_sectors = nb_sectors;
acb->cur_nr_sectors = 0;
acb->cluster_offset = 0;
@@ -557,7 +579,6 @@ static void qcow_aio_write_cb(void *opaque, int ret)
BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
- const uint8_t *src_buf;
int n_end;
acb->hd_aiocb = NULL;
@@ -573,7 +594,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors;
- acb->buf += acb->cur_nr_sectors * 512;
+ acb->bytes_done += acb->cur_nr_sectors * 512;
if (acb->remaining_sectors == 0) {
/* request completed */
@@ -604,20 +625,27 @@ static void qcow_aio_write_cb(void *opaque, int ret)
assert((acb->cluster_offset & 511) == 0);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+ acb->cur_nr_sectors * 512);
+
if (s->crypt_method) {
if (!acb->cluster_data) {
acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
s->cluster_size);
}
- qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
- acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
- src_buf = acb->cluster_data;
- } else {
- src_buf = acb->buf;
+
+ assert(acb->hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_iovec_to_buffer(&acb->hd_qiov, acb->cluster_data);
+
+ qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
+ acb->cluster_data, acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
+
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
+ acb->cur_nr_sectors * 512);
}
- acb->hd_iov.iov_base = (void *)src_buf;
- acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
acb->hd_aiocb = bdrv_aio_writev(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
@@ -635,9 +663,8 @@ fail:
QLIST_REMOVE(&acb->l2meta, next_in_flight);
}
done:
- if (acb->qiov->niov > 1)
- qemu_vfree(acb->orig_buf);
acb->common.cb(acb->common.opaque, ret);
+ qemu_iovec_destroy(&acb->hd_qiov);
qemu_aio_release(acb);
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 3ff162efcd..356a34af43 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -166,8 +166,8 @@ static inline int64_t align_offset(int64_t offset, int n)
// FIXME Need qcow2_ prefix to global functions
/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors);
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors);
/* qcow2-refcount.c functions */
int qcow2_refcount_init(BlockDriverState *bs);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 813372a6c7..a5cbb7e929 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -97,12 +97,12 @@
#define FTYPE_CD 1
#define FTYPE_FD 2
-#define ALIGNED_BUFFER_SIZE (32 * 512)
-
/* if the FD is not accessed during that time (in ms), we try to
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
+#define MAX_BLOCKSIZE 4096
+
typedef struct BDRVRawState {
int fd;
int type;
@@ -118,7 +118,8 @@ typedef struct BDRVRawState {
int use_aio;
void *aio_ctx;
#endif
- uint8_t* aligned_buf;
+ uint8_t *aligned_buf;
+ unsigned aligned_buf_size;
} BDRVRawState;
static int fd_open(BlockDriverState *bs);
@@ -161,7 +162,12 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
s->aligned_buf = NULL;
if ((bdrv_flags & BDRV_O_NOCACHE)) {
- s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
+ /*
+ * Allocate a buffer for read/modify/write cycles. Chose the size
+ * pessimistically as we don't know the block size yet.
+ */
+ s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
+ s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size);
if (s->aligned_buf == NULL) {
goto out_close;
}
@@ -278,8 +284,9 @@ static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
}
/*
- * offset and count are in bytes, but must be multiples of 512 for files
- * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ * offset and count are in bytes, but must be multiples of the sector size
+ * for files opened with O_DIRECT. buf must be aligned to sector size bytes
+ * then.
*
* This function may be called without alignment if the caller ensures
* that O_DIRECT is not in effect.
@@ -316,24 +323,25 @@ static int raw_pread(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
+ unsigned sector_mask = bs->buffer_alignment - 1;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
- if (offset & 0x1ff) {
- /* align offset on a 512 bytes boundary */
+ if (offset & sector_mask) {
+ /* align offset on a sector size bytes boundary */
- shift = offset & 0x1ff;
- size = (shift + count + 0x1ff) & ~0x1ff;
- if (size > ALIGNED_BUFFER_SIZE)
- size = ALIGNED_BUFFER_SIZE;
+ shift = offset & sector_mask;
+ size = (shift + count + sector_mask) & ~sector_mask;
+ if (size > s->aligned_buf_size)
+ size = s->aligned_buf_size;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
if (ret < 0)
return ret;
- size = 512 - shift;
+ size = bs->buffer_alignment - shift;
if (size > count)
size = count;
memcpy(buf, s->aligned_buf + shift, size);
@@ -346,15 +354,15 @@ static int raw_pread(BlockDriverState *bs, int64_t offset,
if (count == 0)
return sum;
}
- if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+ if (count & sector_mask || (uintptr_t) buf & sector_mask) {
/* read on aligned buffer */
while (count) {
- size = (count + 0x1ff) & ~0x1ff;
- if (size > ALIGNED_BUFFER_SIZE)
- size = ALIGNED_BUFFER_SIZE;
+ size = (count + sector_mask) & ~sector_mask;
+ if (size > s->aligned_buf_size)
+ size = s->aligned_buf_size;
ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
if (ret < 0) {
@@ -404,25 +412,28 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
+ unsigned sector_mask = bs->buffer_alignment - 1;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
- if (offset & 0x1ff) {
- /* align offset on a 512 bytes boundary */
- shift = offset & 0x1ff;
- ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
+ if (offset & sector_mask) {
+ /* align offset on a sector size bytes boundary */
+ shift = offset & sector_mask;
+ ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
- size = 512 - shift;
+ size = bs->buffer_alignment - shift;
if (size > count)
size = count;
memcpy(s->aligned_buf + shift, buf, size);
- ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
+ ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
@@ -434,12 +445,12 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
if (count == 0)
return sum;
}
- if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+ if (count & sector_mask || (uintptr_t) buf & sector_mask) {
- while ((size = (count & ~0x1ff)) != 0) {
+ while ((size = (count & ~sector_mask)) != 0) {
- if (size > ALIGNED_BUFFER_SIZE)
- size = ALIGNED_BUFFER_SIZE;
+ if (size > s->aligned_buf_size)
+ size = s->aligned_buf_size;
memcpy(s->aligned_buf, buf, size);
@@ -452,14 +463,16 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
count -= ret;
sum += ret;
}
- /* here, count < 512 because (count & ~0x1ff) == 0 */
+ /* here, count < 512 because (count & ~sector_mask) == 0 */
if (count) {
- ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
+ ret = raw_pread_aligned(bs, offset, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
memcpy(s->aligned_buf, buf, count);
- ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
+ ret = raw_pwrite_aligned(bs, offset, s->aligned_buf,
+ bs->buffer_alignment);
if (ret < 0)
return ret;
if (count < ret)
@@ -487,12 +500,12 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num,
/*
* Check if all memory in this vector is sector aligned.
*/
-static int qiov_is_aligned(QEMUIOVector *qiov)
+static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
for (i = 0; i < qiov->niov; i++) {
- if ((uintptr_t) qiov->iov[i].iov_base % BDRV_SECTOR_SIZE) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
return 0;
}
}
@@ -515,7 +528,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
* driver that it needs to copy the buffer.
*/
if (s->aligned_buf) {
- if (!qiov_is_aligned(qiov)) {
+ if (!qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {
diff --git a/block/vvfat.c b/block/vvfat.c
index 365332aa21..53e57bf228 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2665,6 +2665,11 @@ static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
DLOG(checkpoint());
+ /* Check if we're operating in read-only mode */
+ if (s->qcow == NULL) {
+ return -EACCES;
+ }
+
vvfat_close_current_file(s);
/*
@@ -2763,12 +2768,12 @@ static int vvfat_is_allocated(BlockDriverState *bs,
static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
const uint8_t* buffer, int nb_sectors) {
- BDRVVVFATState* s = bs->opaque;
+ BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
return try_commit(s);
}
static void write_target_close(BlockDriverState *bs) {
- BDRVVVFATState* s = bs->opaque;
+ BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
bdrv_delete(s->qcow);
free(s->qcow_filename);
}
@@ -2783,6 +2788,7 @@ static int enable_write_target(BDRVVVFATState *s)
{
BlockDriver *bdrv_qcow;
QEMUOptionParameter *options;
+ int ret;
int size = sector2cluster(s, s->sector_count);
s->used_clusters = calloc(size, 1);
@@ -2798,11 +2804,16 @@ static int enable_write_target(BDRVVVFATState *s)
if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
return -1;
+
s->qcow = bdrv_new("");
- if (s->qcow == NULL ||
- bdrv_open(s->qcow, s->qcow_filename, BDRV_O_RDWR, bdrv_qcow) < 0)
- {
- return -1;
+ if (s->qcow == NULL) {
+ return -1;
+ }
+
+ ret = bdrv_open(s->qcow, s->qcow_filename,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
+ if (ret < 0) {
+ return ret;
}
#ifndef _WIN32
@@ -2811,7 +2822,8 @@ static int enable_write_target(BDRVVVFATState *s)
s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
s->bs->backing_hd->drv = &vvfat_write_target;
- s->bs->backing_hd->opaque = s;
+ s->bs->backing_hd->opaque = qemu_malloc(sizeof(void*));
+ *(void**)s->bs->backing_hd->opaque = s;
return 0;
}