diff options
-rw-r--r-- | Makefile.objs | 2 | ||||
-rw-r--r-- | block/blkverify.c | 382 | ||||
-rw-r--r-- | block/nbd.c | 2 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 19 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 13 | ||||
-rw-r--r-- | block/qcow2-snapshot.c | 2 | ||||
-rw-r--r-- | block/qcow2.c | 115 | ||||
-rw-r--r-- | block/qcow2.h | 4 | ||||
-rw-r--r-- | block/raw-posix.c | 79 | ||||
-rw-r--r-- | block/vvfat.c | 26 | ||||
-rw-r--r-- | cutils.c | 50 | ||||
-rw-r--r-- | docs/blkverify.txt | 69 | ||||
-rw-r--r-- | hw/ide/core.c | 4 | ||||
-rw-r--r-- | hw/scsi-bus.c | 3 | ||||
-rw-r--r-- | hw/scsi-disk.c | 10 | ||||
-rw-r--r-- | hw/scsi-generic.c | 21 | ||||
-rw-r--r-- | hw/sd.c | 2 | ||||
-rw-r--r-- | hw/virtio-blk.c | 1 | ||||
-rw-r--r-- | nbd.c | 25 | ||||
-rw-r--r-- | posix-aio-compat.c | 2 | ||||
-rw-r--r-- | qemu-common.h | 3 | ||||
-rw-r--r-- | qemu-io.c | 2 | ||||
-rw-r--r-- | qemu-nbd.c | 8 |
23 files changed, 721 insertions, 123 deletions
diff --git a/Makefile.objs b/Makefile.objs index 3ef6d80dfd..dad459343d 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o block-nested-$(CONFIG_CURL) += curl.o diff --git a/block/blkverify.c b/block/blkverify.c new file mode 100644 index 0000000000..4202685a63 --- /dev/null +++ b/block/blkverify.c @@ -0,0 +1,382 @@ +/* + * Block protocol for block driver correctness testing + * + * Copyright (C) 2010 IBM, Corp. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <stdarg.h> +#include "qemu_socket.h" /* for EINPROGRESS on Windows */ +#include "block_int.h" + +typedef struct { + BlockDriverState *test_file; +} BDRVBlkverifyState; + +typedef struct BlkverifyAIOCB BlkverifyAIOCB; +struct BlkverifyAIOCB { + BlockDriverAIOCB common; + QEMUBH *bh; + + /* Request metadata */ + bool is_write; + int64_t sector_num; + int nb_sectors; + + int ret; /* first completed request's result */ + unsigned int done; /* completion counter */ + bool *finished; /* completion signal for cancel */ + + QEMUIOVector *qiov; /* user I/O vector */ + QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */ + void *buf; /* buffer for raw file I/O */ + + void (*verify)(BlkverifyAIOCB *acb); +}; + +static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb) +{ + BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb; + bool finished = false; + + /* Wait until request completes, invokes its callback, and frees itself */ + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static AIOPool blkverify_aio_pool = { + .aiocb_size = sizeof(BlkverifyAIOCB), + .cancel = blkverify_aio_cancel, +}; + +static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ", + acb->is_write ? "write" : "read", acb->sector_num, + acb->nb_sectors); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + +/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */ +static int blkverify_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVBlkverifyState *s = bs->opaque; + int ret; + char *raw, *c; + + /* Parse the blkverify: prefix */ + if (strncmp(filename, "blkverify:", strlen("blkverify:"))) { + return -EINVAL; + } + filename += strlen("blkverify:"); + + /* Parse the raw image filename */ + c = strchr(filename, ':'); + if (c == NULL) { + return -EINVAL; + } + + raw = strdup(filename); + raw[c - filename] = '\0'; + ret = bdrv_file_open(&bs->file, raw, flags); + free(raw); + if (ret < 0) { + return ret; + } + filename = c + 1; + + /* Open the test file */ + s->test_file = bdrv_new(""); + ret = bdrv_open(s->test_file, filename, flags, NULL); + if (ret < 0) { + bdrv_delete(s->test_file); + s->test_file = NULL; + return ret; + } + + return 0; +} + +static void blkverify_close(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + bdrv_delete(s->test_file); + s->test_file = NULL; +} + +static void blkverify_flush(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + /* Only flush test file, the raw file is not important */ + bdrv_flush(s->test_file); +} + +static int64_t blkverify_getlength(BlockDriverState *bs) +{ + BDRVBlkverifyState *s = bs->opaque; + + return bdrv_getlength(s->test_file); +} + +/** + * Check that I/O vector contents are identical + * + * @a: I/O vector + * @b: I/O vector + * @ret: Offset to first mismatching byte or -1 if match + */ +static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) +{ + int i; + ssize_t offset = 0; + + assert(a->niov == b->niov); + for (i = 0; i < a->niov; i++) { + size_t len = 0; + uint8_t *p = (uint8_t *)a->iov[i].iov_base; + uint8_t *q = (uint8_t *)b->iov[i].iov_base; + + assert(a->iov[i].iov_len == b->iov[i].iov_len); + while (len < a->iov[i].iov_len && *p++ == *q++) { + len++; + } + + offset += len; + + if (len != a->iov[i].iov_len) { + return offset; + } + } + return -1; +} + +typedef struct { + int src_index; + struct iovec *src_iov; + void *dest_base; +} IOVectorSortElem; + +static int sortelem_cmp_src_base(const void *a, const void *b) +{ + const IOVectorSortElem *elem_a = a; + const IOVectorSortElem *elem_b = b; + + /* Don't overflow */ + if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) { + return -1; + } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) { + return 1; + } else { + return 0; + } +} + +static int sortelem_cmp_src_index(const void *a, const void *b) +{ + const IOVectorSortElem *elem_a = a; + const IOVectorSortElem *elem_b = b; + + return elem_a->src_index - elem_b->src_index; +} + +/** + * Copy contents of I/O vector + * + * The relative relationships of overlapping iovecs are preserved. This is + * necessary to ensure identical semantics in the cloned I/O vector. + */ +static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, + void *buf) +{ + IOVectorSortElem sortelems[src->niov]; + void *last_end; + int i; + + /* Sort by source iovecs by base address */ + for (i = 0; i < src->niov; i++) { + sortelems[i].src_index = i; + sortelems[i].src_iov = &src->iov[i]; + } + qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base); + + /* Allocate buffer space taking into account overlapping iovecs */ + last_end = NULL; + for (i = 0; i < src->niov; i++) { + struct iovec *cur = sortelems[i].src_iov; + ptrdiff_t rewind = 0; + + /* Detect overlap */ + if (last_end && last_end > cur->iov_base) { + rewind = last_end - cur->iov_base; + } + + sortelems[i].dest_base = buf - rewind; + buf += cur->iov_len - MIN(rewind, cur->iov_len); + last_end = MAX(cur->iov_base + cur->iov_len, last_end); + } + + /* Sort by source iovec index and build destination iovec */ + qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index); + for (i = 0; i < src->niov; i++) { + qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len); + } +} + +static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque); + + acb->bh = NULL; + acb->is_write = is_write; + acb->sector_num = sector_num; + acb->nb_sectors = nb_sectors; + acb->ret = -EINPROGRESS; + acb->done = 0; + acb->qiov = qiov; + acb->buf = NULL; + acb->verify = NULL; + acb->finished = NULL; + return acb; +} + +static void blkverify_aio_bh(void *opaque) +{ + BlkverifyAIOCB *acb = opaque; + + qemu_bh_delete(acb->bh); + if (acb->buf) { + qemu_iovec_destroy(&acb->raw_qiov); + qemu_vfree(acb->buf); + } + acb->common.cb(acb->common.opaque, acb->ret); + if (acb->finished) { + *acb->finished = true; + } + qemu_aio_release(acb); +} + +static void blkverify_aio_cb(void *opaque, int ret) +{ + BlkverifyAIOCB *acb = opaque; + + switch (++acb->done) { + case 1: + acb->ret = ret; + break; + + case 2: + if (acb->ret != ret) { + blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret); + } + + if (acb->verify) { + acb->verify(acb); + } + + acb->bh = qemu_bh_new(blkverify_aio_bh, acb); + qemu_bh_schedule(acb->bh); + break; + } +} + +static void blkverify_verify_readv(BlkverifyAIOCB *acb) +{ + ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov); + if (offset != -1) { + blkverify_err(acb, "contents mismatch in sector %ld", + acb->sector_num + (offset / BDRV_SECTOR_SIZE)); + } +} + +static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVBlkverifyState *s = bs->opaque; + BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov, + nb_sectors, cb, opaque); + + acb->verify = blkverify_verify_readv; + acb->buf = qemu_blockalign(bs->file, qiov->size); + qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov); + blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf); + + if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors, + blkverify_aio_cb, acb)) { + blkverify_aio_cb(acb, -EIO); + } + if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors, + blkverify_aio_cb, acb)) { + blkverify_aio_cb(acb, -EIO); + } + return &acb->common; +} + +static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVBlkverifyState *s = bs->opaque; + BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov, + nb_sectors, cb, opaque); + + if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors, + blkverify_aio_cb, acb)) { + blkverify_aio_cb(acb, -EIO); + } + if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, + blkverify_aio_cb, acb)) { + blkverify_aio_cb(acb, -EIO); + } + return &acb->common; +} + +static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BDRVBlkverifyState *s = bs->opaque; + + /* Only flush test file, the raw file is not important */ + return bdrv_aio_flush(s->test_file, cb, opaque); +} + +static BlockDriver bdrv_blkverify = { + .format_name = "blkverify", + .protocol_name = "blkverify", + + .instance_size = sizeof(BDRVBlkverifyState), + + .bdrv_getlength = blkverify_getlength, + + .bdrv_file_open = blkverify_open, + .bdrv_close = blkverify_close, + .bdrv_flush = blkverify_flush, + + .bdrv_aio_readv = blkverify_aio_readv, + .bdrv_aio_writev = blkverify_aio_writev, + .bdrv_aio_flush = blkverify_aio_flush, +}; + +static void bdrv_blkverify_init(void) +{ + bdrv_register(&bdrv_blkverify); +} + +block_init(bdrv_blkverify_init); diff --git a/block/nbd.c b/block/nbd.c index 5e9d6cb8e6..c8dc763c6b 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -95,8 +95,6 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) if (r == p) { goto out; } - } else if (name == NULL) { - goto out; } sock = tcp_socket_outgoing(hostname, port); diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index f562b1602e..fb4224a669 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -60,6 +60,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size) qemu_free(new_l1_table); return new_l1_table_offset; } + bdrv_flush(bs->file); BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); for(i = 0; i < s->l1_size; i++) @@ -243,6 +244,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) if (l2_offset < 0) { return l2_offset; } + bdrv_flush(bs->file); /* allocate a new entry in the l2 cache */ @@ -348,6 +350,8 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, BDRVQcowState *s = bs->opaque; int ret, index_in_cluster, n, n1; uint64_t cluster_offset; + struct iovec iov; + QEMUIOVector qiov; while (nb_sectors > 0) { n = nb_sectors; @@ -362,7 +366,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, if (!cluster_offset) { if (bs->backing_hd) { /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n); + iov.iov_base = buf; + iov.iov_len = n * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + + n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n); if (n1 > 0) { BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING); ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); @@ -413,7 +421,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, &s->aes_encrypt_key); } BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); - ret = bdrv_write_sync(bs->file, (cluster_offset >> 9) + n_start, + ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, s->cluster_data, n); if (ret < 0) return ret; @@ -712,6 +720,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); } + /* + * Before we update the L2 table to actually point to the new cluster, we + * need to be sure that the refcounts have been increased and COW was + * handled. + */ + bdrv_flush(bs->file); + ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters); if (ret < 0) { qcow2_l2_cache_reset(bs); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4c19e7ebd8..7082601f59 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -261,6 +261,8 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) goto fail_block; } + bdrv_flush(bs->file); + /* Initialize the new refcount block only after updating its refcount, * update_refcount uses the refcount cache itself */ memset(s->refcount_block_cache, 0, s->cluster_size); @@ -444,7 +446,7 @@ static int write_refcount_block_entries(BlockDriverState *bs, size = (last_index - first_index) << REFCOUNT_SHIFT; BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); - ret = bdrv_pwrite_sync(bs->file, + ret = bdrv_pwrite(bs->file, refcount_block_offset + (first_index << REFCOUNT_SHIFT), &s->refcount_block_cache[first_index], size); if (ret < 0) { @@ -573,6 +575,8 @@ static int update_cluster_refcount(BlockDriverState *bs, return ret; } + bdrv_flush(bs->file); + return get_refcount(bs, cluster_index); } @@ -624,6 +628,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) if (ret < 0) { return ret; } + return offset; } @@ -671,6 +676,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) goto redo; } } + + bdrv_flush(bs->file); return offset; } @@ -801,6 +808,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, if (ret < 0) { goto fail; } + + /* TODO Flushing once for the whole function should + * be enough */ + bdrv_flush(bs->file); } /* compressed clusters are never modified */ refcount = 2; diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 6228612f84..bbfcaaae22 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -138,6 +138,7 @@ static int qcow_write_snapshots(BlockDriverState *bs) snapshots_size = offset; snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size); + bdrv_flush(bs->file); offset = snapshots_offset; if (offset < 0) { return offset; @@ -271,6 +272,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) if (l1_table_offset < 0) { goto fail; } + bdrv_flush(bs->file); sn->l1_table_offset = l1_table_offset; sn->l1_size = s->l1_size; diff --git a/block/qcow2.c b/block/qcow2.c index a53014dbda..ee3481b786 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -311,8 +311,8 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, } /* handle reading after the end of the backing file */ -int qcow2_backing_read1(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors) +int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t sector_num, int nb_sectors) { int n1; if ((sector_num + nb_sectors) <= bs->total_sectors) @@ -321,7 +321,9 @@ int qcow2_backing_read1(BlockDriverState *bs, n1 = 0; else n1 = bs->total_sectors - sector_num; - memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1)); + + qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1)); + return n1; } @@ -329,14 +331,12 @@ typedef struct QCowAIOCB { BlockDriverAIOCB common; int64_t sector_num; QEMUIOVector *qiov; - uint8_t *buf; - void *orig_buf; int remaining_sectors; int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t bytes_done; uint64_t cluster_offset; uint8_t *cluster_data; BlockDriverAIOCB *hd_aiocb; - struct iovec hd_iov; QEMUIOVector hd_qiov; QEMUBH *bh; QCowL2Meta l2meta; @@ -397,15 +397,19 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* nothing to do */ } else { if (s->crypt_method) { - qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, - acb->cur_nr_sectors, 0, - &s->aes_decrypt_key); + qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, + acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data, + 512 * acb->cur_nr_sectors); } } acb->remaining_sectors -= acb->cur_nr_sectors; acb->sector_num += acb->cur_nr_sectors; - acb->buf += acb->cur_nr_sectors * 512; + acb->bytes_done += acb->cur_nr_sectors * 512; if (acb->remaining_sectors == 0) { /* request completed */ @@ -415,6 +419,11 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* prepare next AIO request */ acb->cur_nr_sectors = acb->remaining_sectors; + if (s->crypt_method) { + acb->cur_nr_sectors = MIN(acb->cur_nr_sectors, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); + } + ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, &acb->cur_nr_sectors, &acb->cluster_offset); if (ret < 0) { @@ -423,15 +432,17 @@ static void qcow_aio_read_cb(void *opaque, int ret) index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + if (!acb->cluster_offset) { + if (bs->backing_hd) { /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num, - acb->buf, acb->cur_nr_sectors); + n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov, + acb->sector_num, acb->cur_nr_sectors); if (n1 > 0) { - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->cur_nr_sectors * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, &acb->hd_qiov, acb->cur_nr_sectors, @@ -445,7 +456,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) } } else { /* Note: in this case, no need to wait */ - memset(acb->buf, 0, 512 * acb->cur_nr_sectors); + qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors); ret = qcow_schedule_bh(qcow_aio_read_bh, acb); if (ret < 0) goto done; @@ -454,8 +465,11 @@ static void qcow_aio_read_cb(void *opaque, int ret) /* add AIO support for compressed blocks ? */ if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0) goto done; - memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512, - 512 * acb->cur_nr_sectors); + + qemu_iovec_from_buffer(&acb->hd_qiov, + s->cluster_cache + index_in_cluster * 512, + 512 * acb->cur_nr_sectors); + ret = qcow_schedule_bh(qcow_aio_read_bh, acb); if (ret < 0) goto done; @@ -465,9 +479,23 @@ static void qcow_aio_read_cb(void *opaque, int ret) goto done; } - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->cur_nr_sectors * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + if (s->crypt_method) { + /* + * For encrypted images, read everything into a temporary + * contiguous buffer on which the AES functions can work. + */ + if (!acb->cluster_data) { + acb->cluster_data = + qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + } + + assert(acb->cur_nr_sectors <= + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_add(&acb->hd_qiov, acb->cluster_data, + 512 * acb->cur_nr_sectors); + } + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); acb->hd_aiocb = bdrv_aio_readv(bs->file, (acb->cluster_offset >> 9) + index_in_cluster, @@ -481,11 +509,8 @@ static void qcow_aio_read_cb(void *opaque, int ret) return; done: - if (acb->qiov->niov > 1) { - qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); - qemu_vfree(acb->orig_buf); - } acb->common.cb(acb->common.opaque, ret); + qemu_iovec_destroy(&acb->hd_qiov); qemu_aio_release(acb); } @@ -501,13 +526,10 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, acb->hd_aiocb = NULL; acb->sector_num = sector_num; acb->qiov = qiov; - if (qiov->niov > 1) { - acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); - if (is_write) - qemu_iovec_to_buffer(qiov, acb->buf); - } else { - acb->buf = (uint8_t *)qiov->iov->iov_base; - } + + qemu_iovec_init(&acb->hd_qiov, qiov->niov); + + acb->bytes_done = 0; acb->remaining_sectors = nb_sectors; acb->cur_nr_sectors = 0; acb->cluster_offset = 0; @@ -557,7 +579,6 @@ static void qcow_aio_write_cb(void *opaque, int ret) BlockDriverState *bs = acb->common.bs; BDRVQcowState *s = bs->opaque; int index_in_cluster; - const uint8_t *src_buf; int n_end; acb->hd_aiocb = NULL; @@ -573,7 +594,7 @@ static void qcow_aio_write_cb(void *opaque, int ret) acb->remaining_sectors -= acb->cur_nr_sectors; acb->sector_num += acb->cur_nr_sectors; - acb->buf += acb->cur_nr_sectors * 512; + acb->bytes_done += acb->cur_nr_sectors * 512; if (acb->remaining_sectors == 0) { /* request completed */ @@ -604,20 +625,27 @@ static void qcow_aio_write_cb(void *opaque, int ret) assert((acb->cluster_offset & 511) == 0); + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done, + acb->cur_nr_sectors * 512); + if (s->crypt_method) { if (!acb->cluster_data) { acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); } - qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, - acb->cur_nr_sectors, 1, &s->aes_encrypt_key); - src_buf = acb->cluster_data; - } else { - src_buf = acb->buf; + + assert(acb->hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + qemu_iovec_to_buffer(&acb->hd_qiov, acb->cluster_data); + + qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, + acb->cluster_data, acb->cur_nr_sectors, 1, &s->aes_encrypt_key); + + qemu_iovec_reset(&acb->hd_qiov); + qemu_iovec_add(&acb->hd_qiov, acb->cluster_data, + acb->cur_nr_sectors * 512); } - acb->hd_iov.iov_base = (void *)src_buf; - acb->hd_iov.iov_len = acb->cur_nr_sectors * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); acb->hd_aiocb = bdrv_aio_writev(bs->file, (acb->cluster_offset >> 9) + index_in_cluster, @@ -635,9 +663,8 @@ fail: QLIST_REMOVE(&acb->l2meta, next_in_flight); } done: - if (acb->qiov->niov > 1) - qemu_vfree(acb->orig_buf); acb->common.cb(acb->common.opaque, ret); + qemu_iovec_destroy(&acb->hd_qiov); qemu_aio_release(acb); } diff --git a/block/qcow2.h b/block/qcow2.h index 3ff162efcd..356a34af43 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -166,8 +166,8 @@ static inline int64_t align_offset(int64_t offset, int n) // FIXME Need qcow2_ prefix to global functions /* qcow2.c functions */ -int qcow2_backing_read1(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors); +int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t sector_num, int nb_sectors); /* qcow2-refcount.c functions */ int qcow2_refcount_init(BlockDriverState *bs); diff --git a/block/raw-posix.c b/block/raw-posix.c index 813372a6c7..a5cbb7e929 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -97,12 +97,12 @@ #define FTYPE_CD 1 #define FTYPE_FD 2 -#define ALIGNED_BUFFER_SIZE (32 * 512) - /* if the FD is not accessed during that time (in ms), we try to reopen it to see if the disk has been changed */ #define FD_OPEN_TIMEOUT 1000 +#define MAX_BLOCKSIZE 4096 + typedef struct BDRVRawState { int fd; int type; @@ -118,7 +118,8 @@ typedef struct BDRVRawState { int use_aio; void *aio_ctx; #endif - uint8_t* aligned_buf; + uint8_t *aligned_buf; + unsigned aligned_buf_size; } BDRVRawState; static int fd_open(BlockDriverState *bs); @@ -161,7 +162,12 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, s->aligned_buf = NULL; if ((bdrv_flags & BDRV_O_NOCACHE)) { - s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); + /* + * Allocate a buffer for read/modify/write cycles. Chose the size + * pessimistically as we don't know the block size yet. + */ + s->aligned_buf_size = 32 * MAX_BLOCKSIZE; + s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size); if (s->aligned_buf == NULL) { goto out_close; } @@ -278,8 +284,9 @@ static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, } /* - * offset and count are in bytes, but must be multiples of 512 for files - * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * offset and count are in bytes, but must be multiples of the sector size + * for files opened with O_DIRECT. buf must be aligned to sector size bytes + * then. * * This function may be called without alignment if the caller ensures * that O_DIRECT is not in effect. @@ -316,24 +323,25 @@ static int raw_pread(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; + unsigned sector_mask = bs->buffer_alignment - 1; int size, ret, shift, sum; sum = 0; if (s->aligned_buf != NULL) { - if (offset & 0x1ff) { - /* align offset on a 512 bytes boundary */ + if (offset & sector_mask) { + /* align offset on a sector size bytes boundary */ - shift = offset & 0x1ff; - size = (shift + count + 0x1ff) & ~0x1ff; - if (size > ALIGNED_BUFFER_SIZE) - size = ALIGNED_BUFFER_SIZE; + shift = offset & sector_mask; + size = (shift + count + sector_mask) & ~sector_mask; + if (size > s->aligned_buf_size) + size = s->aligned_buf_size; ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size); if (ret < 0) return ret; - size = 512 - shift; + size = bs->buffer_alignment - shift; if (size > count) size = count; memcpy(buf, s->aligned_buf + shift, size); @@ -346,15 +354,15 @@ static int raw_pread(BlockDriverState *bs, int64_t offset, if (count == 0) return sum; } - if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { + if (count & sector_mask || (uintptr_t) buf & sector_mask) { /* read on aligned buffer */ while (count) { - size = (count + 0x1ff) & ~0x1ff; - if (size > ALIGNED_BUFFER_SIZE) - size = ALIGNED_BUFFER_SIZE; + size = (count + sector_mask) & ~sector_mask; + if (size > s->aligned_buf_size) + size = s->aligned_buf_size; ret = raw_pread_aligned(bs, offset, s->aligned_buf, size); if (ret < 0) { @@ -404,25 +412,28 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, const uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; + unsigned sector_mask = bs->buffer_alignment - 1; int size, ret, shift, sum; sum = 0; if (s->aligned_buf != NULL) { - if (offset & 0x1ff) { - /* align offset on a 512 bytes boundary */ - shift = offset & 0x1ff; - ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512); + if (offset & sector_mask) { + /* align offset on a sector size bytes boundary */ + shift = offset & sector_mask; + ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, + bs->buffer_alignment); if (ret < 0) return ret; - size = 512 - shift; + size = bs->buffer_alignment - shift; if (size > count) size = count; memcpy(s->aligned_buf + shift, buf, size); - ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512); + ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, + bs->buffer_alignment); if (ret < 0) return ret; @@ -434,12 +445,12 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, if (count == 0) return sum; } - if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { + if (count & sector_mask || (uintptr_t) buf & sector_mask) { - while ((size = (count & ~0x1ff)) != 0) { + while ((size = (count & ~sector_mask)) != 0) { - if (size > ALIGNED_BUFFER_SIZE) - size = ALIGNED_BUFFER_SIZE; + if (size > s->aligned_buf_size) + size = s->aligned_buf_size; memcpy(s->aligned_buf, buf, size); @@ -452,14 +463,16 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset, count -= ret; sum += ret; } - /* here, count < 512 because (count & ~0x1ff) == 0 */ + /* here, count < 512 because (count & ~sector_mask) == 0 */ if (count) { - ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512); + ret = raw_pread_aligned(bs, offset, s->aligned_buf, + bs->buffer_alignment); if (ret < 0) return ret; memcpy(s->aligned_buf, buf, count); - ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512); + ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, + bs->buffer_alignment); if (ret < 0) return ret; if (count < ret) @@ -487,12 +500,12 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num, /* * Check if all memory in this vector is sector aligned. */ -static int qiov_is_aligned(QEMUIOVector *qiov) +static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % BDRV_SECTOR_SIZE) { + if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { return 0; } } @@ -515,7 +528,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, * driver that it needs to copy the buffer. */ if (s->aligned_buf) { - if (!qiov_is_aligned(qiov)) { + if (!qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { diff --git a/block/vvfat.c b/block/vvfat.c index 365332aa21..53e57bf228 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2665,6 +2665,11 @@ static int vvfat_write(BlockDriverState *bs, int64_t sector_num, DLOG(checkpoint()); + /* Check if we're operating in read-only mode */ + if (s->qcow == NULL) { + return -EACCES; + } + vvfat_close_current_file(s); /* @@ -2763,12 +2768,12 @@ static int vvfat_is_allocated(BlockDriverState *bs, static int write_target_commit(BlockDriverState *bs, int64_t sector_num, const uint8_t* buffer, int nb_sectors) { - BDRVVVFATState* s = bs->opaque; + BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); return try_commit(s); } static void write_target_close(BlockDriverState *bs) { - BDRVVVFATState* s = bs->opaque; + BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); bdrv_delete(s->qcow); free(s->qcow_filename); } @@ -2783,6 +2788,7 @@ static int enable_write_target(BDRVVVFATState *s) { BlockDriver *bdrv_qcow; QEMUOptionParameter *options; + int ret; int size = sector2cluster(s, s->sector_count); s->used_clusters = calloc(size, 1); @@ -2798,11 +2804,16 @@ static int enable_write_target(BDRVVVFATState *s) if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0) return -1; + s->qcow = bdrv_new(""); - if (s->qcow == NULL || - bdrv_open(s->qcow, s->qcow_filename, BDRV_O_RDWR, bdrv_qcow) < 0) - { - return -1; + if (s->qcow == NULL) { + return -1; + } + + ret = bdrv_open(s->qcow, s->qcow_filename, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow); + if (ret < 0) { + return ret; } #ifndef _WIN32 @@ -2811,7 +2822,8 @@ static int enable_write_target(BDRVVVFATState *s) s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1); s->bs->backing_hd->drv = &vvfat_write_target; - s->bs->backing_hd->opaque = s; + s->bs->backing_hd->opaque = qemu_malloc(sizeof(void*)); + *(void**)s->bs->backing_hd->opaque = s; return 0; } @@ -168,30 +168,50 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) } /* - * Copies iovecs from src to the end dst until src is completely copied or the - * total size of the copied iovec reaches size. The size of the last copied - * iovec is changed in order to fit the specified total size if it isn't a - * perfect fit already. + * Copies iovecs from src to the end of dst. It starts copying after skipping + * the given number of bytes in src and copies until src is completely copied + * or the total size of the copied iovec reaches size.The size of the last + * copied iovec is changed in order to fit the specified total size if it isn't + * a perfect fit already. */ -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) +void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, + size_t size) { int i; size_t done; + void *iov_base; + uint64_t iov_len; assert(dst->nalloc != -1); done = 0; for (i = 0; (i < src->niov) && (done != size); i++) { - if (done + src->iov[i].iov_len > size) { - qemu_iovec_add(dst, src->iov[i].iov_base, size - done); + if (skip >= src->iov[i].iov_len) { + /* Skip the whole iov */ + skip -= src->iov[i].iov_len; + continue; + } else { + /* Skip only part (or nothing) of the iov */ + iov_base = (uint8_t*) src->iov[i].iov_base + skip; + iov_len = src->iov[i].iov_len - skip; + skip = 0; + } + + if (done + iov_len > size) { + qemu_iovec_add(dst, iov_base, size - done); break; } else { - qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len); + qemu_iovec_add(dst, iov_base, iov_len); } - done += src->iov[i].iov_len; + done += iov_len; } } +void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) +{ + qemu_iovec_copy(dst, src, 0, size); +} + void qemu_iovec_destroy(QEMUIOVector *qiov) { assert(qiov->nalloc != -1); @@ -234,6 +254,18 @@ void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) } } +void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count) +{ + size_t n; + int i; + + for (i = 0; i < qiov->niov && count; ++i) { + n = MIN(count, qiov->iov[i].iov_len); + memset(qiov->iov[i].iov_base, c, n); + count -= n; + } +} + #ifndef _WIN32 /* Sets a specific flag */ int fcntl_setfl(int fd, int flag) diff --git a/docs/blkverify.txt b/docs/blkverify.txt new file mode 100644 index 0000000000..d556dc4e6d --- /dev/null +++ b/docs/blkverify.txt @@ -0,0 +1,69 @@ += Block driver correctness testing with blkverify = + +== Introduction == + +This document describes how to use the blkverify protocol to test that a block +driver is operating correctly. + +It is difficult to test and debug block drivers against real guests. Often +processes inside the guest will crash because corrupt sectors were read as part +of the executable. Other times obscure errors are raised by a program inside +the guest. These issues are extremely hard to trace back to bugs in the block +driver. + +Blkverify solves this problem by catching data corruption inside QEMU the first +time bad data is read and reporting the disk sector that is corrupted. + +== How it works == + +The blkverify protocol has two child block devices, the "test" device and the +"raw" device. Read/write operations are mirrored to both devices so their +state should always be in sync. + +The "raw" device is a raw image, a flat file, that has identical starting +contents to the "test" image. The idea is that the "raw" device will handle +read/write operations correctly and not corrupt data. It can be used as a +reference for comparison against the "test" device. + +After a mirrored read operation completes, blkverify will compare the data and +raise an error if it is not identical. This makes it possible to catch the +first instance where corrupt data is read. + +== Example == + +Imagine raw.img has 0xcd repeated throughout its first sector: + + $ ./qemu-io -c 'read -v 0 512' raw.img + 00000000: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................ + 00000010: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................ + [...] + 000001e0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................ + 000001f0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................ + read 512/512 bytes at offset 0 + 512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000 ops/sec) + +And test.img is corrupt, its first sector is zeroed when it shouldn't be: + + $ ./qemu-io -c 'read -v 0 512' test.img + 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + [...] + 000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 000001f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + read 512/512 bytes at offset 0 + 512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667 ops/sec) + +This error is caught by blkverify: + + $ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img + blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0 + +A more realistic scenario is verifying the installation of a guest OS: + + $ ./qemu-img create raw.img 16G + $ ./qemu-img create -f qcow2 test.qcow2 16G + $ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \ + -drive file=blkverify:raw.img:test.qcow2 + +If the installation is aborted when blkverify detects corruption, use qemu-io +to explore the contents of the disk image at the sector in question. diff --git a/hw/ide/core.c b/hw/ide/core.c index 1e466d1ff5..06b6e14e56 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2645,6 +2645,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) { s->drive_kind = IDE_CD; bdrv_set_change_cb(bs, cdrom_change_cb, s); + bs->buffer_alignment = 2048; } else { if (!bdrv_is_inserted(s->bs)) { error_report("Device needs media, but drive is empty"); @@ -2679,7 +2680,8 @@ static void ide_init1(IDEBus *bus, int unit) s->bus = bus; s->unit = unit; s->drive_serial = drive_serial++; - s->io_buffer = qemu_blockalign(s->bs, IDE_DMA_BUF_SECTORS*512 + 4); + /* we need at least 2k alignment for accessing CDROMs using O_DIRECT */ + s->io_buffer = qemu_memalign(2048, IDE_DMA_BUF_SECTORS*512 + 4); s->io_buffer_total_len = IDE_DMA_BUF_SECTORS*512 + 4; s->smart_selftest_data = qemu_blockalign(s->bs, 512); s->sector_write_timer = qemu_new_timer(vm_clock, diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c index 7aa0bcd1ee..5a3fd4b7ac 100644 --- a/hw/scsi-bus.c +++ b/hw/scsi-bus.c @@ -208,6 +208,8 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd) case SEEK_6: case WRITE_FILEMARKS: case SPACE: + case RESERVE: + case RELEASE: case ERASE: case ALLOW_MEDIUM_REMOVAL: case VERIFY: @@ -319,7 +321,6 @@ static void scsi_req_xfer_mode(SCSIRequest *req) case WRITE_BUFFER: case FORMAT_UNIT: case REASSIGN_BLOCKS: - case RESERVE: case SEARCH_EQUAL: case SEARCH_HIGH: case SEARCH_LOW: diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 1446ca634c..9628b39a21 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -70,14 +70,15 @@ struct SCSIDiskState char *serial; }; -static SCSIDiskReq *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun) +static SCSIDiskReq *scsi_new_request(SCSIDiskState *s, uint32_t tag, + uint32_t lun) { SCSIRequest *req; SCSIDiskReq *r; - req = scsi_req_alloc(sizeof(SCSIDiskReq), d, tag, lun); + req = scsi_req_alloc(sizeof(SCSIDiskReq), &s->qdev, tag, lun); r = DO_UPCAST(SCSIDiskReq, req, req); - r->iov.iov_base = qemu_memalign(512, SCSI_DMA_BUF_SIZE); + r->iov.iov_base = qemu_blockalign(s->bs, SCSI_DMA_BUF_SIZE); return r; } @@ -939,7 +940,7 @@ static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag, } /* ??? Tags are not unique for different luns. We only implement a single lun, so this should not matter. */ - r = scsi_new_request(d, tag, lun); + r = scsi_new_request(s, tag, lun); outbuf = (uint8_t *)r->iov.iov_base; is_write = 0; DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]); @@ -1177,6 +1178,7 @@ static int scsi_disk_initfn(SCSIDevice *dev) s->qdev.blocksize = s->qdev.conf.logical_block_size; } s->cluster_size = s->qdev.blocksize / 512; + s->bs->buffer_alignment = s->qdev.blocksize; s->qdev.type = TYPE_DISK; qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s); diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index 953802777d..7212091695 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -455,15 +455,31 @@ static int get_stream_blocksize(BlockDriverState *bdrv) return (buf[9] << 16) | (buf[10] << 8) | buf[11]; } -static void scsi_destroy(SCSIDevice *d) +static void scsi_generic_purge_requests(SCSIGenericState *s) { - SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d); SCSIGenericReq *r; while (!QTAILQ_EMPTY(&s->qdev.requests)) { r = DO_UPCAST(SCSIGenericReq, req, QTAILQ_FIRST(&s->qdev.requests)); + if (r->req.aiocb) { + bdrv_aio_cancel(r->req.aiocb); + } scsi_remove_request(r); } +} + +static void scsi_generic_reset(DeviceState *dev) +{ + SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev.qdev, dev); + + scsi_generic_purge_requests(s); +} + +static void scsi_destroy(SCSIDevice *d) +{ + SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d); + + scsi_generic_purge_requests(s); blockdev_mark_auto_del(s->qdev.conf.bs); } @@ -537,6 +553,7 @@ static SCSIDeviceInfo scsi_generic_info = { .qdev.name = "scsi-generic", .qdev.desc = "pass through generic scsi device (/dev/sg*)", .qdev.size = sizeof(SCSIGenericState), + .qdev.reset = scsi_generic_reset, .init = scsi_generic_initfn, .destroy = scsi_destroy, .send_command = scsi_send_command, @@ -440,7 +440,7 @@ SDState *sd_init(BlockDriverState *bs, int is_spi) SDState *sd; sd = (SDState *) qemu_mallocz(sizeof(SDState)); - sd->buf = qemu_memalign(512, 512); + sd->buf = qemu_blockalign(bs, 512); sd->spi = is_spi; sd->enable = 1; sd_reset(sd, bs); diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index bd6bbe6b14..a1df26dbcf 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -540,6 +540,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf) register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, virtio_blk_save, virtio_blk_load, s); bdrv_set_removable(s->bs, 0); + s->bs->buffer_alignment = conf->logical_block_size; return &s->vdev; } @@ -49,6 +49,7 @@ /* This is all part of the "official" NBD API */ +#define NBD_REPLY_SIZE (4 + 4 + 8) #define NBD_REQUEST_MAGIC 0x25609513 #define NBD_REPLY_MAGIC 0x67446698 @@ -588,7 +589,7 @@ static int nbd_receive_request(int csock, struct nbd_request *request) int nbd_receive_reply(int csock, struct nbd_reply *reply) { - uint8_t buf[4 + 4 + 8]; + uint8_t buf[NBD_REPLY_SIZE]; uint32_t magic; memset(buf, 0xAA, sizeof(buf)); @@ -655,9 +656,9 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, if (nbd_receive_request(csock, &request) == -1) return -1; - if (request.len > data_size) { + if (request.len + NBD_REPLY_SIZE > data_size) { LOG("len (%u) is larger than max len (%u)", - request.len, data_size); + request.len + NBD_REPLY_SIZE, data_size); errno = EINVAL; return -1; } @@ -687,7 +688,8 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, case NBD_CMD_READ: TRACE("Request type is READ"); - if (bdrv_read(bs, (request.from + dev_offset) / 512, data, + if (bdrv_read(bs, (request.from + dev_offset) / 512, + data + NBD_REPLY_SIZE, request.len / 512) == -1) { LOG("reading from file failed"); errno = EINVAL; @@ -697,12 +699,21 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, TRACE("Read %u byte(s)", request.len); - if (nbd_send_reply(csock, &reply) == -1) - return -1; + /* Reply + [ 0 .. 3] magic (NBD_REPLY_MAGIC) + [ 4 .. 7] error (0 == no error) + [ 7 .. 15] handle + */ + + cpu_to_be32w((uint32_t*)data, NBD_REPLY_MAGIC); + cpu_to_be32w((uint32_t*)(data + 4), reply.error); + cpu_to_be64w((uint64_t*)(data + 8), reply.handle); TRACE("Sending data to client"); - if (write_sync(csock, data, request.len) != request.len) { + if (write_sync(csock, data, + request.len + NBD_REPLY_SIZE) != + request.len + NBD_REPLY_SIZE) { LOG("writing to socket failed"); errno = EINVAL; return -1; diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 10f1f037fb..842f1a24aa 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -270,7 +270,7 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb) * Ok, we have to do it the hard way, copy all segments into * a single aligned buffer. */ - buf = qemu_memalign(512, aiocb->aio_nbytes); + buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes); if (aiocb->aio_type & QEMU_AIO_WRITE) { char *p = buf; int i; diff --git a/qemu-common.h b/qemu-common.h index dfd3dc08a2..5544ffd8cf 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -278,11 +278,14 @@ typedef struct QEMUIOVector { void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); +void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, + size_t size); void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); +void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count); struct Monitor; typedef struct Monitor Monitor; @@ -61,7 +61,7 @@ static void *qemu_io_alloc(size_t len, int pattern) if (misalign) len += MISALIGN_OFFSET; - buf = qemu_memalign(512, len); + buf = qemu_blockalign(bs, len); memset(buf, pattern, len); if (misalign) buf += MISALIGN_OFFSET; diff --git a/qemu-nbd.c b/qemu-nbd.c index 91b569f8e6..99f1d22884 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -44,7 +44,7 @@ static void usage(const char *name) "Usage: %s [OPTIONS] FILE\n" "QEMU Disk Network Block Device Server\n" "\n" -" -p, --port=PORT port to listen on (default `1024')\n" +" -p, --port=PORT port to listen on (default `%d')\n" " -o, --offset=OFFSET offset into the image\n" " -b, --bind=IFACE interface to bind to (default `0.0.0.0')\n" " -k, --socket=PATH path to the unix socket\n" @@ -62,7 +62,7 @@ static void usage(const char *name) " -V, --version output version information and exit\n" "\n" "Report bugs to <anthony@codemonkey.ws>\n" - , name, "DEVICE"); + , name, NBD_DEFAULT_PORT, "DEVICE"); } static void version(const char *name) @@ -188,7 +188,7 @@ int main(int argc, char **argv) bool readonly = false; bool disconnect = false; const char *bindto = "0.0.0.0"; - int port = 1024; + int port = NBD_DEFAULT_PORT; struct sockaddr_in addr; socklen_t addr_len = sizeof(addr); off_t fd_size; @@ -446,7 +446,7 @@ int main(int argc, char **argv) max_fd = sharing_fds[0]; nb_fds++; - data = qemu_memalign(512, NBD_BUFFER_SIZE); + data = qemu_blockalign(bs, NBD_BUFFER_SIZE); if (data == NULL) errx(EXIT_FAILURE, "Cannot allocate data buffer"); |