aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--aio-win32.c8
-rw-r--r--async.c16
-rw-r--r--block.c72
-rw-r--r--block/Makefile.objs3
-rw-r--r--block/archipelago.c21
-rw-r--r--block/blkdebug.c17
-rw-r--r--block/blkverify.c21
-rw-r--r--block/cow.c433
-rw-r--r--block/curl.c16
-rw-r--r--block/iscsi.c23
-rw-r--r--block/linux-aio.c34
-rw-r--r--block/null.c168
-rw-r--r--block/qcow2-cluster.c43
-rw-r--r--block/qcow2-refcount.c67
-rw-r--r--block/qcow2.c77
-rw-r--r--block/qcow2.h6
-rw-r--r--block/qed.c23
-rw-r--r--block/quorum.c11
-rw-r--r--block/rbd.c25
-rw-r--r--block/sheepdog.c54
-rw-r--r--block/vhdx.c19
-rw-r--r--block/win32-aio.c18
-rwxr-xr-xconfigure21
-rw-r--r--dma-helpers.c20
-rw-r--r--docs/image-fuzzer.txt3
-rw-r--r--hmp.c2
-rw-r--r--hw/block/Makefile.objs2
-rw-r--r--hw/block/virtio-blk.c20
-rw-r--r--hw/ide/ahci.c45
-rw-r--r--hw/ide/atapi.c7
-rw-r--r--hw/ide/core.c26
-rw-r--r--hw/ide/ich.c7
-rw-r--r--hw/net/vhost_net.c2
-rw-r--r--hw/virtio/Makefile.objs2
-rw-r--r--hw/virtio/dataplane/vring.c3
-rw-r--r--include/block/aio.h9
-rw-r--r--include/block/block.h1
-rw-r--r--include/hw/virtio/dataplane/vring.h2
-rw-r--r--include/hw/virtio/virtio-blk.h2
-rw-r--r--include/hw/virtio/virtio_ring.h167
-rw-r--r--include/qemu/main-loop.h2
-rw-r--r--iothread.c11
-rw-r--r--linux-headers/linux/vhost.h2
-rw-r--r--main-loop.c9
-rw-r--r--qapi/block-core.json146
-rw-r--r--qdev-monitor.c2
-rw-r--r--qemu-doc.texi9
-rw-r--r--qemu-img.c8
-rw-r--r--qemu-img.texi4
-rw-r--r--qemu-io.c7
-rw-r--r--qemu-nbd.c6
-rw-r--r--qmp-commands.hx2
-rw-r--r--scripts/qapi-types.py2
-rw-r--r--scripts/qapi-visit.py3
-rw-r--r--tests/Makefile2
-rw-r--r--tests/ahci-test.c1561
-rw-r--r--tests/image-fuzzer/qcow2/fuzz.py26
-rw-r--r--tests/image-fuzzer/qcow2/layout.py138
-rwxr-xr-xtests/image-fuzzer/runner.py54
-rw-r--r--tests/libqos/pci.c6
-rwxr-xr-xtests/qemu-iotests/06056
-rw-r--r--tests/qemu-iotests/060.out61
-rwxr-xr-xtests/qemu-iotests/0692
-rwxr-xr-xtests/qemu-iotests/0722
-rwxr-xr-xtests/qemu-iotests/0992
-rw-r--r--tests/qemu-iotests/common6
-rw-r--r--tests/test-aio.c10
-rw-r--r--tests/test-thread-pool.c44
-rw-r--r--tests/test-throttle.c10
-rw-r--r--thread-pool.c36
-rw-r--r--ui/cocoa.m2
-rw-r--r--vl.c5
72 files changed, 2806 insertions, 946 deletions
diff --git a/aio-win32.c b/aio-win32.c
index 61e3d2ddfe..d81313b00b 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -283,9 +283,9 @@ bool aio_poll(AioContext *ctx, bool blocking)
int count;
int timeout;
- if (aio_prepare(ctx)) {
+ have_select_revents = aio_prepare(ctx);
+ if (have_select_revents) {
blocking = false;
- have_select_revents = true;
}
was_dispatching = ctx->dispatching;
@@ -335,6 +335,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
event = NULL;
if ((DWORD) (ret - WAIT_OBJECT_0) < count) {
event = events[ret - WAIT_OBJECT_0];
+ events[ret - WAIT_OBJECT_0] = events[--count];
} else if (!have_select_revents) {
break;
}
@@ -343,9 +344,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
blocking = false;
progress |= aio_dispatch_handlers(ctx, event);
-
- /* Try again, but only call each handler once. */
- events[ret - WAIT_OBJECT_0] = events[--count];
}
progress |= timerlistgroup_run_timers(&ctx->tlg);
diff --git a/async.c b/async.c
index a99e7f639a..6e1b282aa7 100644
--- a/async.c
+++ b/async.c
@@ -289,18 +289,24 @@ static void aio_rfifolock_cb(void *opaque)
aio_notify(opaque);
}
-AioContext *aio_context_new(void)
+AioContext *aio_context_new(Error **errp)
{
+ int ret;
AioContext *ctx;
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+ ret = event_notifier_init(&ctx->notifier, false);
+ if (ret < 0) {
+ g_source_destroy(&ctx->source);
+ error_setg_errno(errp, -ret, "Failed to initialize event notifier");
+ return NULL;
+ }
+ aio_set_event_notifier(ctx, &ctx->notifier,
+ (EventNotifierHandler *)
+ event_notifier_test_and_clear);
ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
- event_notifier_init(&ctx->notifier, false);
- aio_set_event_notifier(ctx, &ctx->notifier,
- (EventNotifierHandler *)
- event_notifier_test_and_clear);
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
return ctx;
diff --git a/block.c b/block.c
index bcd952a4cb..a857913fc2 100644
--- a/block.c
+++ b/block.c
@@ -4640,7 +4640,28 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
void bdrv_aio_cancel(BlockDriverAIOCB *acb)
{
- acb->aiocb_info->cancel(acb);
+ qemu_aio_ref(acb);
+ bdrv_aio_cancel_async(acb);
+ while (acb->refcnt > 1) {
+ if (acb->aiocb_info->get_aio_context) {
+ aio_poll(acb->aiocb_info->get_aio_context(acb), true);
+ } else if (acb->bs) {
+ aio_poll(bdrv_get_aio_context(acb->bs), true);
+ } else {
+ abort();
+ }
+ }
+ qemu_aio_unref(acb);
+}
+
+/* Async version of aio cancel. The caller is not blocked if the acb implements
+ * cancel_async, otherwise we do nothing and let the request normally complete.
+ * In either case the completion callback must be called. */
+void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
+{
+ if (acb->aiocb_info->cancel_async) {
+ acb->aiocb_info->cancel_async(acb);
+ }
}
/**************************************************************/
@@ -4656,18 +4677,8 @@ typedef struct BlockDriverAIOCBSync {
int is_write;
} BlockDriverAIOCBSync;
-static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
-{
- BlockDriverAIOCBSync *acb =
- container_of(blockacb, BlockDriverAIOCBSync, common);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_release(acb);
-}
-
static const AIOCBInfo bdrv_em_aiocb_info = {
.aiocb_size = sizeof(BlockDriverAIOCBSync),
- .cancel = bdrv_aio_cancel_em,
};
static void bdrv_aio_bh_cb(void *opaque)
@@ -4681,7 +4692,7 @@ static void bdrv_aio_bh_cb(void *opaque)
acb->common.cb(acb->common.opaque, acb->ret);
qemu_bh_delete(acb->bh);
acb->bh = NULL;
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
@@ -4738,22 +4749,8 @@ typedef struct BlockDriverAIOCBCoroutine {
QEMUBH* bh;
} BlockDriverAIOCBCoroutine;
-static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
-{
- AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
- BlockDriverAIOCBCoroutine *acb =
- container_of(blockacb, BlockDriverAIOCBCoroutine, common);
- bool done = false;
-
- acb->done = &done;
- while (!done) {
- aio_poll(aio_context, true);
- }
-}
-
static const AIOCBInfo bdrv_em_co_aiocb_info = {
.aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
- .cancel = bdrv_aio_co_cancel_em,
};
static void bdrv_co_em_bh(void *opaque)
@@ -4762,12 +4759,8 @@ static void bdrv_co_em_bh(void *opaque)
acb->common.cb(acb->common.opaque, acb->req.error);
- if (acb->done) {
- *acb->done = true;
- }
-
qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
@@ -4806,7 +4799,6 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
acb->req.qiov = qiov;
acb->req.flags = flags;
acb->is_write = is_write;
- acb->done = NULL;
co = qemu_coroutine_create(bdrv_co_do_rw);
qemu_coroutine_enter(co, acb);
@@ -4833,7 +4825,6 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
BlockDriverAIOCBCoroutine *acb;
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->done = NULL;
co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
qemu_coroutine_enter(co, acb);
@@ -4863,7 +4854,6 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
acb->req.sector = sector_num;
acb->req.nb_sectors = nb_sectors;
- acb->done = NULL;
co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
qemu_coroutine_enter(co, acb);
@@ -4891,13 +4881,23 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
acb->bs = bs;
acb->cb = cb;
acb->opaque = opaque;
+ acb->refcnt = 1;
return acb;
}
-void qemu_aio_release(void *p)
+void qemu_aio_ref(void *p)
+{
+ BlockDriverAIOCB *acb = p;
+ acb->refcnt++;
+}
+
+void qemu_aio_unref(void *p)
{
BlockDriverAIOCB *acb = p;
- g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+ assert(acb->refcnt > 0);
+ if (--acb->refcnt == 0) {
+ g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+ }
}
/**************************************************************/
diff --git a/block/Makefile.objs b/block/Makefile.objs
index c9c8bbbcde..a833ed5745 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
@@ -9,6 +9,7 @@ block-obj-y += snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
+block-obj-y += null.o
block-obj-y += nbd.o nbd-client.o sheepdog.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/block/archipelago.c b/block/archipelago.c
index 93fb7c0634..73d91a422f 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -91,7 +91,6 @@ typedef struct ArchipelagoAIOCB {
struct BDRVArchipelagoState *s;
QEMUIOVector *qiov;
ARCHIPCmd cmd;
- bool cancelled;
int status;
int64_t size;
int64_t ret;
@@ -318,9 +317,7 @@ static void qemu_archipelago_complete_aio(void *opaque)
aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
aio_cb->status = 0;
- if (!aio_cb->cancelled) {
- qemu_aio_release(aio_cb);
- }
+ qemu_aio_unref(aio_cb);
g_free(reqdata);
}
@@ -725,19 +722,8 @@ static int qemu_archipelago_create(const char *filename,
return ret;
}
-static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb;
- aio_cb->cancelled = true;
- while (aio_cb->status == -EINPROGRESS) {
- aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true);
- }
- qemu_aio_release(aio_cb);
-}
-
static const AIOCBInfo archipelago_aiocb_info = {
.aiocb_size = sizeof(ArchipelagoAIOCB),
- .cancel = qemu_archipelago_aio_cancel,
};
static int archipelago_submit_request(BDRVArchipelagoState *s,
@@ -889,7 +875,6 @@ static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
aio_cb->ret = 0;
aio_cb->s = s;
- aio_cb->cancelled = false;
aio_cb->status = -EINPROGRESS;
off = sector_num * BDRV_SECTOR_SIZE;
@@ -905,7 +890,7 @@ static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
err_exit:
error_report("qemu_archipelago_aio_rw(): I/O Error\n");
- qemu_aio_release(aio_cb);
+ qemu_aio_unref(aio_cb);
return NULL;
}
@@ -1008,7 +993,7 @@ static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
if (!req) {
archipelagolog("Cannot get XSEG request\n");
- return err_exit2;
+ goto err_exit2;
}
ret = xseg_prep_request(s->xseg, req, targetlen, 0);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 69b330eced..ced0b600f9 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -52,11 +52,8 @@ typedef struct BlkdebugSuspendedReq {
QLIST_ENTRY(BlkdebugSuspendedReq) next;
} BlkdebugSuspendedReq;
-static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
-
static const AIOCBInfo blkdebug_aiocb_info = {
- .aiocb_size = sizeof(BlkdebugAIOCB),
- .cancel = blkdebug_aio_cancel,
+ .aiocb_size = sizeof(BlkdebugAIOCB),
};
enum {
@@ -447,17 +444,7 @@ static void error_callback_bh(void *opaque)
struct BlkdebugAIOCB *acb = opaque;
qemu_bh_delete(acb->bh);
acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_release(acb);
-}
-
-static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
- if (acb->bh) {
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- }
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
diff --git a/block/blkverify.c b/block/blkverify.c
index 163064cf6b..7d64a23a09 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -29,7 +29,6 @@ struct BlkverifyAIOCB {
int ret; /* first completed request's result */
unsigned int done; /* completion counter */
- bool *finished; /* completion signal for cancel */
QEMUIOVector *qiov; /* user I/O vector */
QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
@@ -38,22 +37,8 @@ struct BlkverifyAIOCB {
void (*verify)(BlkverifyAIOCB *acb);
};
-static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
- AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
- bool finished = false;
-
- /* Wait until request completes, invokes its callback, and frees itself */
- acb->finished = &finished;
- while (!finished) {
- aio_poll(aio_context, true);
- }
-}
-
static const AIOCBInfo blkverify_aiocb_info = {
.aiocb_size = sizeof(BlkverifyAIOCB),
- .cancel = blkverify_aio_cancel,
};
static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
@@ -194,7 +179,6 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
acb->qiov = qiov;
acb->buf = NULL;
acb->verify = NULL;
- acb->finished = NULL;
return acb;
}
@@ -208,10 +192,7 @@ static void blkverify_aio_bh(void *opaque)
qemu_vfree(acb->buf);
}
acb->common.cb(acb->common.opaque, acb->ret);
- if (acb->finished) {
- *acb->finished = true;
- }
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
static void blkverify_aio_cb(void *opaque, int ret)
diff --git a/block/cow.c b/block/cow.c
deleted file mode 100644
index c3769fe03b..0000000000
--- a/block/cow.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * Block driver for the COW format
- *
- * Copyright (c) 2004 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-
-/**************************************************************/
-/* COW block driver using file system holes */
-
-/* user mode linux compatible COW file */
-#define COW_MAGIC 0x4f4f4f4d /* MOOO */
-#define COW_VERSION 2
-
-struct cow_header_v2 {
- uint32_t magic;
- uint32_t version;
- char backing_file[1024];
- int32_t mtime;
- uint64_t size;
- uint32_t sectorsize;
-};
-
-typedef struct BDRVCowState {
- CoMutex lock;
- int64_t cow_sectors_offset;
-} BDRVCowState;
-
-static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const struct cow_header_v2 *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(struct cow_header_v2) &&
- be32_to_cpu(cow_header->magic) == COW_MAGIC &&
- be32_to_cpu(cow_header->version) == COW_VERSION)
- return 100;
- else
- return 0;
-}
-
-static int cow_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
-{
- BDRVCowState *s = bs->opaque;
- struct cow_header_v2 cow_header;
- int bitmap_size;
- int64_t size;
- int ret;
-
- /* see if it is a cow image */
- ret = bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header));
- if (ret < 0) {
- goto fail;
- }
-
- if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
- error_setg(errp, "Image not in COW format");
- ret = -EINVAL;
- goto fail;
- }
-
- if (be32_to_cpu(cow_header.version) != COW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version),
- "COW version %" PRIu32, cow_header.version);
- error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "cow", version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- /* cow image found */
- size = be64_to_cpu(cow_header.size);
- bs->total_sectors = size / 512;
-
- pstrcpy(bs->backing_file, sizeof(bs->backing_file),
- cow_header.backing_file);
-
- bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
- s->cow_sectors_offset = (bitmap_size + 511) & ~511;
- qemu_co_mutex_init(&s->lock);
- return 0;
- fail:
- return ret;
-}
-
-static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
-{
- int64_t bitnum = start, last = start + nb_sectors;
- while (bitnum < last) {
- if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
- bitmap[bitnum / 8] = 0xFF;
- bitnum += 8;
- continue;
- }
- bitmap[bitnum/8] |= (1 << (bitnum % 8));
- bitnum++;
- }
-}
-
-#define BITS_PER_BITMAP_SECTOR (512 * 8)
-
-/* Cannot use bitmap.c on big-endian machines. */
-static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
-{
- return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
-}
-
-static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
-{
- int streak_value = value ? 0xFF : 0;
- int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
- int bitnum = start;
- while (bitnum < last) {
- if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
- bitnum += 8;
- continue;
- }
- if (cow_test_bit(bitnum, bitmap) == value) {
- bitnum++;
- continue;
- }
- break;
- }
- return MIN(bitnum, last) - start;
-}
-
-/* Return true if first block has been changed (ie. current version is
- * in COW file). Set the number of continuous blocks for which that
- * is true. */
-static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *num_same)
-{
- int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
- uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
- bool first = true;
- int changed = 0, same = 0;
-
- do {
- int ret;
- uint8_t bitmap[BDRV_SECTOR_SIZE];
-
- bitnum &= BITS_PER_BITMAP_SECTOR - 1;
- int sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
- ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
- }
-
- if (first) {
- changed = cow_test_bit(bitnum, bitmap);
- first = false;
- }
-
- same += cow_find_streak(bitmap, changed, bitnum, nb_sectors);
-
- bitnum += sector_bits;
- nb_sectors -= sector_bits;
- offset += BDRV_SECTOR_SIZE;
- } while (nb_sectors);
-
- *num_same = same;
- return changed;
-}
-
-static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *num_same)
-{
- BDRVCowState *s = bs->opaque;
- int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
- int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
- if (ret < 0) {
- return ret;
- }
- return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
-}
-
-static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
- uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
- bool first = true;
- int sector_bits;
-
- for ( ; nb_sectors;
- bitnum += sector_bits,
- nb_sectors -= sector_bits,
- offset += BDRV_SECTOR_SIZE) {
- int ret, set;
- uint8_t bitmap[BDRV_SECTOR_SIZE];
-
- bitnum &= BITS_PER_BITMAP_SECTOR - 1;
- sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
-
- ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
- }
-
- /* Skip over any already set bits */
- set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
- bitnum += set;
- sector_bits -= set;
- nb_sectors -= set;
- if (!sector_bits) {
- continue;
- }
-
- if (first) {
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- return ret;
- }
- first = false;
- }
-
- cow_set_bits(bitmap, bitnum, sector_bits);
-
- ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
- }
- }
-
- return 0;
-}
-
-static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- BDRVCowState *s = bs->opaque;
- int ret, n;
-
- while (nb_sectors > 0) {
- ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
- if (ret < 0) {
- return ret;
- }
- if (ret) {
- ret = bdrv_pread(bs->file,
- s->cow_sectors_offset + sector_num * 512,
- buf, n * 512);
- if (ret < 0) {
- return ret;
- }
- } else {
- if (bs->backing_hd) {
- /* read from the base image */
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
- if (ret < 0) {
- return ret;
- }
- } else {
- memset(buf, 0, n * 512);
- }
- }
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
- return 0;
-}
-
-static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVCowState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = cow_read(bs, sector_num, buf, nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
-}
-
-static int cow_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVCowState *s = bs->opaque;
- int ret;
-
- ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
- buf, nb_sectors * 512);
- if (ret < 0) {
- return ret;
- }
-
- return cow_update_bitmap(bs, sector_num, nb_sectors);
-}
-
-static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVCowState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = cow_write(bs, sector_num, buf, nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
-}
-
-static void cow_close(BlockDriverState *bs)
-{
-}
-
-static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
-{
- struct cow_header_v2 cow_header;
- struct stat st;
- int64_t image_sectors = 0;
- char *image_filename = NULL;
- Error *local_err = NULL;
- int ret;
- BlockDriverState *cow_bs = NULL;
-
- /* Read out options */
- image_sectors = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
- BDRV_SECTOR_SIZE);
- image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-
- ret = bdrv_create_file(filename, opts, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
- goto exit;
- }
-
- ret = bdrv_open(&cow_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
- goto exit;
- }
-
- memset(&cow_header, 0, sizeof(cow_header));
- cow_header.magic = cpu_to_be32(COW_MAGIC);
- cow_header.version = cpu_to_be32(COW_VERSION);
- if (image_filename) {
- /* Note: if no file, we put a dummy mtime */
- cow_header.mtime = cpu_to_be32(0);
-
- if (stat(image_filename, &st) != 0) {
- goto mtime_fail;
- }
- cow_header.mtime = cpu_to_be32(st.st_mtime);
- mtime_fail:
- pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
- image_filename);
- }
- cow_header.sectorsize = cpu_to_be32(512);
- cow_header.size = cpu_to_be64(image_sectors * 512);
- ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
- if (ret < 0) {
- goto exit;
- }
-
- /* resize to include at least all the bitmap */
- ret = bdrv_truncate(cow_bs,
- sizeof(cow_header) + ((image_sectors + 7) >> 3));
- if (ret < 0) {
- goto exit;
- }
-
-exit:
- g_free(image_filename);
- if (cow_bs) {
- bdrv_unref(cow_bs);
- }
- return ret;
-}
-
-static QemuOptsList cow_create_opts = {
- .name = "cow-create-opts",
- .head = QTAILQ_HEAD_INITIALIZER(cow_create_opts.head),
- .desc = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = QEMU_OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = QEMU_OPT_STRING,
- .help = "File name of a base image"
- },
- { /* end of list */ }
- }
-};
-
-static BlockDriver bdrv_cow = {
- .format_name = "cow",
- .instance_size = sizeof(BDRVCowState),
-
- .bdrv_probe = cow_probe,
- .bdrv_open = cow_open,
- .bdrv_close = cow_close,
- .bdrv_create = cow_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .supports_backing = true,
-
- .bdrv_read = cow_co_read,
- .bdrv_write = cow_co_write,
- .bdrv_co_get_block_status = cow_co_get_block_status,
-
- .create_opts = &cow_create_opts,
-};
-
-static void bdrv_cow_init(void)
-{
- bdrv_register(&bdrv_cow);
-}
-
-block_init(bdrv_cow_init);
diff --git a/block/curl.c b/block/curl.c
index 938f9d94e8..225407c643 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -212,7 +212,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
acb->end - acb->start);
acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
s->acb[i] = NULL;
}
}
@@ -304,7 +304,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
}
acb->common.cb(acb->common.opaque, -EIO);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
state->acb[i] = NULL;
}
}
@@ -613,14 +613,8 @@ out_noclean:
return -EINVAL;
}
-static void curl_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- // Do we have to implement canceling? Seems to work without...
-}
-
static const AIOCBInfo curl_aiocb_info = {
.aiocb_size = sizeof(CURLAIOCB),
- .cancel = curl_aio_cancel,
};
@@ -642,7 +636,7 @@ static void curl_readv_bh_cb(void *p)
// we can just call the callback and be done.
switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
case FIND_RET_OK:
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
// fall through
case FIND_RET_WAIT:
return;
@@ -654,7 +648,7 @@ static void curl_readv_bh_cb(void *p)
state = curl_init_state(acb->common.bs, s);
if (!state) {
acb->common.cb(acb->common.opaque, -EIO);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return;
}
@@ -670,7 +664,7 @@ static void curl_readv_bh_cb(void *p)
if (state->buf_len && state->orig_buf == NULL) {
curl_clean_state(state);
acb->common.cb(acb->common.opaque, -ENOMEM);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return;
}
state->acb[0] = acb;
diff --git a/block/iscsi.c b/block/iscsi.c
index 84bcae89fa..3c1b416704 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -88,7 +88,6 @@ typedef struct IscsiAIOCB {
struct scsi_task *task;
uint8_t *buf;
int status;
- int canceled;
int64_t sector_num;
int nb_sectors;
#ifdef __linux__
@@ -120,16 +119,14 @@ iscsi_bh_cb(void *p)
g_free(acb->buf);
acb->buf = NULL;
- if (acb->canceled == 0) {
- acb->common.cb(acb->common.opaque, acb->status);
- }
+ acb->common.cb(acb->common.opaque, acb->status);
if (acb->task != NULL) {
scsi_free_scsi_task(acb->task);
acb->task = NULL;
}
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
static void
@@ -240,20 +237,15 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
return;
}
- acb->canceled = 1;
-
/* send a task mgmt call to the target to cancel the task on the target */
iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
iscsi_abort_task_cb, acb);
- while (acb->status == -EINPROGRESS) {
- aio_poll(iscsilun->aio_context, true);
- }
}
static const AIOCBInfo iscsi_aiocb_info = {
.aiocb_size = sizeof(IscsiAIOCB),
- .cancel = iscsi_aio_cancel,
+ .cancel_async = iscsi_aio_cancel,
};
@@ -638,10 +630,6 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
g_free(acb->buf);
acb->buf = NULL;
- if (acb->canceled != 0) {
- return;
- }
-
acb->status = 0;
if (status < 0) {
error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
@@ -683,7 +671,6 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
acb->iscsilun = iscsilun;
- acb->canceled = 0;
acb->bh = NULL;
acb->status = -EINPROGRESS;
acb->buf = NULL;
@@ -693,7 +680,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
iscsi_get_error(iscsi));
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return NULL;
}
memset(acb->task, 0, sizeof(struct scsi_task));
@@ -731,7 +718,7 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
(data.size > 0) ? &data : NULL,
acb) != 0) {
scsi_free_scsi_task(acb->task);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return NULL;
}
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 9aca758b10..f4baba2be0 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -85,11 +85,10 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
ret = -EINVAL;
}
}
-
- laiocb->common.cb(laiocb->common.opaque, ret);
}
+ laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_release(laiocb);
+ qemu_aio_unref(laiocb);
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -153,35 +152,22 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
struct io_event event;
int ret;
- if (laiocb->ret != -EINPROGRESS)
+ if (laiocb->ret != -EINPROGRESS) {
return;
-
- /*
- * Note that as of Linux 2.6.31 neither the block device code nor any
- * filesystem implements cancellation of AIO request.
- * Thus the polling loop below is the normal code path.
- */
+ }
ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
- if (ret == 0) {
- laiocb->ret = -ECANCELED;
+ laiocb->ret = -ECANCELED;
+ if (ret != 0) {
+ /* iocb is not cancelled, cb will be called by the event loop later */
return;
}
- /*
- * We have to wait for the iocb to finish.
- *
- * The only way to get the iocb status update is by polling the io context.
- * We might be able to do this slightly more optimal by removing the
- * O_NONBLOCK flag.
- */
- while (laiocb->ret == -EINPROGRESS) {
- qemu_laio_completion_cb(&laiocb->ctx->e);
- }
+ laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
}
static const AIOCBInfo laio_aiocb_info = {
.aiocb_size = sizeof(struct qemu_laiocb),
- .cancel = laio_cancel,
+ .cancel_async = laio_cancel,
};
static void ioq_init(LaioQueue *io_q)
@@ -300,7 +286,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
return &laiocb->common;
out_free_aiocb:
- qemu_aio_release(laiocb);
+ qemu_aio_unref(laiocb);
return NULL;
}
diff --git a/block/null.c b/block/null.c
new file mode 100644
index 0000000000..828441999c
--- /dev/null
+++ b/block/null.c
@@ -0,0 +1,168 @@
+/*
+ * Null block driver
+ *
+ * Authors:
+ * Fam Zheng <famz@redhat.com>
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+
+typedef struct {
+ int64_t length;
+} BDRVNullState;
+
+static QemuOptsList runtime_opts = {
+ .name = "null",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ .help = "",
+ },
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "size of the null block",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ QemuOpts *opts;
+ BDRVNullState *s = bs->opaque;
+
+ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &error_abort);
+ s->length =
+ qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
+ qemu_opts_del(opts);
+ return 0;
+}
+
+static void null_close(BlockDriverState *bs)
+{
+}
+
+static int64_t null_getlength(BlockDriverState *bs)
+{
+ BDRVNullState *s = bs->opaque;
+ return s->length;
+}
+
+static coroutine_fn int null_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ return 0;
+}
+
+static coroutine_fn int null_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ return 0;
+}
+
+static coroutine_fn int null_co_flush(BlockDriverState *bs)
+{
+ return 0;
+}
+
+typedef struct {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+} NullAIOCB;
+
+static const AIOCBInfo null_aiocb_info = {
+ .aiocb_size = sizeof(NullAIOCB),
+};
+
+static void null_bh_cb(void *opaque)
+{
+ NullAIOCB *acb = opaque;
+ acb->common.cb(acb->common.opaque, 0);
+ qemu_bh_delete(acb->bh);
+ qemu_aio_unref(acb);
+}
+
+static inline BlockDriverAIOCB *null_aio_common(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ NullAIOCB *acb;
+
+ acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
+ qemu_bh_schedule(acb->bh);
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *null_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return null_aio_common(bs, cb, opaque);
+}
+
+static BlockDriverAIOCB *null_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return null_aio_common(bs, cb, opaque);
+}
+
+static BlockDriverAIOCB *null_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return null_aio_common(bs, cb, opaque);
+}
+
+static BlockDriver bdrv_null_co = {
+ .format_name = "null-co",
+ .protocol_name = "null-co",
+ .instance_size = sizeof(BDRVNullState),
+
+ .bdrv_file_open = null_file_open,
+ .bdrv_close = null_close,
+ .bdrv_getlength = null_getlength,
+
+ .bdrv_co_readv = null_co_readv,
+ .bdrv_co_writev = null_co_writev,
+ .bdrv_co_flush_to_disk = null_co_flush,
+};
+
+static BlockDriver bdrv_null_aio = {
+ .format_name = "null-aio",
+ .protocol_name = "null-aio",
+ .instance_size = sizeof(BDRVNullState),
+
+ .bdrv_file_open = null_file_open,
+ .bdrv_close = null_close,
+ .bdrv_getlength = null_getlength,
+
+ .bdrv_aio_readv = null_aio_readv,
+ .bdrv_aio_writev = null_aio_writev,
+ .bdrv_aio_flush = null_aio_flush,
+};
+
+static void bdrv_null_init(void)
+{
+ bdrv_register(&bdrv_null_co);
+ bdrv_register(&bdrv_null_aio);
+}
+
+block_init(bdrv_null_init);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 735f687b06..f7dd8c0985 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -486,6 +486,13 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
goto out;
}
+ if (offset_into_cluster(s, l2_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
+ " unaligned (L1 index: %#" PRIx64 ")",
+ l2_offset, l1_index);
+ return -EIO;
+ }
+
/* load the l2 table in memory */
ret = l2_load(bs, l2_offset, &l2_table);
@@ -508,8 +515,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
break;
case QCOW2_CLUSTER_ZERO:
if (s->qcow_version < 3) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return -EIO;
+ qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+ " in pre-v3 image (L2 offset: %#" PRIx64
+ ", L2 index: %#x)", l2_offset, l2_index);
+ ret = -EIO;
+ goto fail;
}
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
&l2_table[l2_index], QCOW_OFLAG_ZERO);
@@ -525,6 +535,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
c = count_contiguous_clusters(nb_clusters, s->cluster_size,
&l2_table[l2_index], QCOW_OFLAG_ZERO);
*cluster_offset &= L2E_OFFSET_MASK;
+ if (offset_into_cluster(s, *cluster_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
+ PRIx64 " unaligned (L2 offset: %#" PRIx64
+ ", L2 index: %#x)", *cluster_offset,
+ l2_offset, l2_index);
+ ret = -EIO;
+ goto fail;
+ }
break;
default:
abort();
@@ -541,6 +559,10 @@ out:
*num = nb_available - index_in_cluster;
return ret;
+
+fail:
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+ return ret;
}
/*
@@ -576,6 +598,12 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
assert(l1_index < s->l1_size);
l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ if (offset_into_cluster(s, l2_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
+ " unaligned (L1 index: %#" PRIx64 ")",
+ l2_offset, l1_index);
+ return -EIO;
+ }
/* seek the l2 table of the given l2 offset */
@@ -948,6 +976,15 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
bool offset_matches =
(cluster_offset & L2E_OFFSET_MASK) == *host_offset;
+ if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
+ "%#llx unaligned (guest offset: %#" PRIx64
+ ")", cluster_offset & L2E_OFFSET_MASK,
+ guest_offset);
+ ret = -EIO;
+ goto out;
+ }
+
if (*host_offset != 0 && !offset_matches) {
*bytes = 0;
ret = 0;
@@ -979,7 +1016,7 @@ out:
/* Only return a host offset if we actually made progress. Otherwise we
* would make requirements for handle_alloc() that it can't fulfill */
- if (ret) {
+ if (ret > 0) {
*host_offset = (cluster_offset & L2E_OFFSET_MASK)
+ offset_into_cluster(s, guest_offset);
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 43665b86e7..2bcaaf9b98 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -26,8 +26,6 @@
#include "block/block_int.h"
#include "block/qcow2.h"
#include "qemu/range.h"
-#include "qapi/qmp/types.h"
-#include "qapi-event.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -110,6 +108,13 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
if (!refcount_block_offset)
return 0;
+ if (offset_into_cluster(s, refcount_block_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
+ " unaligned (reftable index: %#" PRIx64 ")",
+ refcount_block_offset, refcount_table_index);
+ return -EIO;
+ }
+
ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
(void**) &refcount_block);
if (ret < 0) {
@@ -183,6 +188,14 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* If it's already there, we're done */
if (refcount_block_offset) {
+ if (offset_into_cluster(s, refcount_block_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
+ PRIx64 " unaligned (reftable index: "
+ "%#x)", refcount_block_offset,
+ refcount_table_index);
+ return -EIO;
+ }
+
return load_refcount_block(bs, refcount_block_offset,
(void**) refcount_block);
}
@@ -838,8 +851,14 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
case QCOW2_CLUSTER_NORMAL:
case QCOW2_CLUSTER_ZERO:
if (l2_entry & L2E_OFFSET_MASK) {
- qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
- nb_clusters << s->cluster_bits, type);
+ if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
+ qcow2_signal_corruption(bs, false, -1, -1,
+ "Cannot free unaligned cluster %#llx",
+ l2_entry & L2E_OFFSET_MASK);
+ } else {
+ qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+ nb_clusters << s->cluster_bits, type);
+ }
}
break;
case QCOW2_CLUSTER_UNALLOCATED:
@@ -903,6 +922,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
old_l2_offset = l2_offset;
l2_offset &= L1E_OFFSET_MASK;
+ if (offset_into_cluster(s, l2_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
+ PRIx64 " unaligned (L1 index: %#x)",
+ l2_offset, i);
+ ret = -EIO;
+ goto fail;
+ }
+
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
(void**) &l2_table);
if (ret < 0) {
@@ -935,6 +962,17 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
case QCOW2_CLUSTER_NORMAL:
case QCOW2_CLUSTER_ZERO:
+ if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Data "
+ "cluster offset %#llx "
+ "unaligned (L2 offset: %#"
+ PRIx64 ", L2 index: %#x)",
+ offset & L2E_OFFSET_MASK,
+ l2_offset, j);
+ ret = -EIO;
+ goto fail;
+ }
+
cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
if (!cluster_index) {
/* unallocated */
@@ -1838,26 +1876,11 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
return ret;
} else if (ret > 0) {
int metadata_ol_bitnr = ffs(ret) - 1;
- char *message;
-
assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
- fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps "
- "with %s); image marked as corrupt.\n",
- metadata_ol_names[metadata_ol_bitnr]);
- message = g_strdup_printf("Prevented %s overwrite",
- metadata_ol_names[metadata_ol_bitnr]);
- qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
- message,
- true,
- offset,
- true,
- size,
- &error_abort);
- g_free(message);
-
- qcow2_mark_corrupt(bs);
- bs->drv = NULL; /* make BDS unusable */
+ qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
+ "write on metadata (overlaps with %s)",
+ metadata_ol_names[metadata_ol_bitnr]);
return -EIO;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 0daf25cb58..778fc1ec13 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -31,6 +31,8 @@
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qbool.h"
#include "qapi/util.h"
+#include "qapi/qmp/types.h"
+#include "qapi-event.h"
#include "trace.h"
#include "qemu/option_int.h"
@@ -404,6 +406,12 @@ static QemuOptsList qcow2_runtime_opts = {
"templates (none, constant, cached, all)",
},
{
+ .name = QCOW2_OPT_OVERLAP_TEMPLATE,
+ .type = QEMU_OPT_STRING,
+ .help = "Selects which overlap checks to perform from a range of "
+ "templates (none, constant, cached, all)",
+ },
+ {
.name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
.type = QEMU_OPT_BOOL,
.help = "Check for unintended writes into the main qcow2 header",
@@ -536,11 +544,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
unsigned int len, i;
int ret = 0;
QCowHeader header;
- QemuOpts *opts;
+ QemuOpts *opts = NULL;
Error *local_err = NULL;
uint64_t ext_end;
uint64_t l1_vm_state_index;
- const char *opt_overlap_check;
+ const char *opt_overlap_check, *opt_overlap_check_template;
int overlap_check_template = 0;
uint64_t l2_cache_size, refcount_cache_size;
@@ -920,7 +928,21 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
s->discard_passthrough[QCOW2_DISCARD_OTHER] =
qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
- opt_overlap_check = qemu_opt_get(opts, "overlap-check") ?: "cached";
+ opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
+ opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
+ if (opt_overlap_check_template && opt_overlap_check &&
+ strcmp(opt_overlap_check_template, opt_overlap_check))
+ {
+ error_setg(errp, "Conflicting values for qcow2 options '"
+ QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
+ "' ('%s')", opt_overlap_check, opt_overlap_check_template);
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (!opt_overlap_check) {
+ opt_overlap_check = opt_overlap_check_template ?: "cached";
+ }
+
if (!strcmp(opt_overlap_check, "none")) {
overlap_check_template = 0;
} else if (!strcmp(opt_overlap_check, "constant")) {
@@ -933,7 +955,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
error_setg(errp, "Unsupported value '%s' for qcow2 option "
"'overlap-check'. Allowed are either of the following: "
"none, constant, cached, all", opt_overlap_check);
- qemu_opts_del(opts);
ret = -EINVAL;
goto fail;
}
@@ -948,6 +969,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
qemu_opts_del(opts);
+ opts = NULL;
if (s->use_lazy_refcounts && s->qcow_version < 3) {
error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
@@ -965,6 +987,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
fail:
+ qemu_opts_del(opts);
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
@@ -2529,6 +2552,52 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts)
return 0;
}
+/*
+ * If offset or size are negative, respectively, they will not be included in
+ * the BLOCK_IMAGE_CORRUPTED event emitted.
+ * fatal will be ignored for read-only BDS; corruptions found there will always
+ * be considered non-fatal.
+ */
+void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
+ int64_t size, const char *message_format, ...)
+{
+ BDRVQcowState *s = bs->opaque;
+ char *message;
+ va_list ap;
+
+ fatal = fatal && !bs->read_only;
+
+ if (s->signaled_corruption &&
+ (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
+ {
+ return;
+ }
+
+ va_start(ap, message_format);
+ message = g_strdup_vprintf(message_format, ap);
+ va_end(ap);
+
+ if (fatal) {
+ fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
+ "corruption events will be suppressed\n", message);
+ } else {
+ fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
+ "corruption events will be suppressed\n", message);
+ }
+
+ qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message,
+ offset >= 0, offset, size >= 0, size,
+ fatal, &error_abort);
+ g_free(message);
+
+ if (fatal) {
+ qcow2_mark_corrupt(bs);
+ bs->drv = NULL; /* make BDS unusable */
+ }
+
+ s->signaled_corruption = true;
+}
+
static QemuOptsList qcow2_create_opts = {
.name = "qcow2-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
diff --git a/block/qcow2.h b/block/qcow2.h
index 6aeb7ea90f..7d61e615ff 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -83,6 +83,7 @@
#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
#define QCOW2_OPT_OVERLAP "overlap-check"
+#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
@@ -261,6 +262,7 @@ typedef struct BDRVQcowState {
bool discard_passthrough[QCOW2_DISCARD_MAX];
int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
+ bool signaled_corruption;
uint64_t incompatible_features;
uint64_t compatible_features;
@@ -477,6 +479,10 @@ int qcow2_mark_corrupt(BlockDriverState *bs);
int qcow2_mark_consistent(BlockDriverState *bs);
int qcow2_update_header(BlockDriverState *bs);
+void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
+ int64_t size, const char *message_format, ...)
+ GCC_FMT_ATTR(5, 6);
+
/* qcow2-refcount.c functions */
int qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
diff --git a/block/qed.c b/block/qed.c
index f8d9e12263..e2316d7ff0 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -18,22 +18,8 @@
#include "qapi/qmp/qerror.h"
#include "migration/migration.h"
-static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QEDAIOCB *acb = (QEDAIOCB *)blockacb;
- AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
- bool finished = false;
-
- /* Wait for the request to finish */
- acb->finished = &finished;
- while (!finished) {
- aio_poll(aio_context, true);
- }
-}
-
static const AIOCBInfo qed_aiocb_info = {
.aiocb_size = sizeof(QEDAIOCB),
- .cancel = qed_aio_cancel,
};
static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
@@ -919,18 +905,12 @@ static void qed_aio_complete_bh(void *opaque)
BlockDriverCompletionFunc *cb = acb->common.cb;
void *user_opaque = acb->common.opaque;
int ret = acb->bh_ret;
- bool *finished = acb->finished;
qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
/* Invoke callback */
cb(user_opaque, ret);
-
- /* Signal cancel completion */
- if (finished) {
- *finished = true;
- }
}
static void qed_aio_complete(QEDAIOCB *acb, int ret)
@@ -1397,7 +1377,6 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
opaque, flags);
acb->flags = flags;
- acb->finished = NULL;
acb->qiov = qiov;
acb->qiov_offset = 0;
acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
diff --git a/block/quorum.c b/block/quorum.c
index 093382e8f5..7687466733 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -138,16 +138,15 @@ static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
/* cancel all callbacks */
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_cancel(acb->qcrs[i].aiocb);
+ if (acb->qcrs[i].aiocb) {
+ bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
+ }
}
-
- g_free(acb->qcrs);
- qemu_aio_release(acb);
}
static AIOCBInfo quorum_aiocb_info = {
.aiocb_size = sizeof(QuorumAIOCB),
- .cancel = quorum_aio_cancel,
+ .cancel_async = quorum_aio_cancel,
};
static void quorum_aio_finalize(QuorumAIOCB *acb)
@@ -169,7 +168,7 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
}
g_free(acb->qcrs);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
diff --git a/block/rbd.c b/block/rbd.c
index b7f7d5ff30..96947e328a 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -77,7 +77,6 @@ typedef struct RBDAIOCB {
int64_t sector_num;
int error;
struct BDRVRBDState *s;
- int cancelled;
int status;
} RBDAIOCB;
@@ -408,9 +407,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
acb->status = 0;
- if (!acb->cancelled) {
- qemu_aio_release(acb);
- }
+ qemu_aio_unref(acb);
}
/* TODO Convert to fine grained options */
@@ -539,25 +536,8 @@ static void qemu_rbd_close(BlockDriverState *bs)
rados_shutdown(s->cluster);
}
-/*
- * Cancel aio. Since we don't reference acb in a non qemu threads,
- * it is safe to access it here.
- */
-static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- RBDAIOCB *acb = (RBDAIOCB *) blockacb;
- acb->cancelled = 1;
-
- while (acb->status == -EINPROGRESS) {
- aio_poll(bdrv_get_aio_context(acb->common.bs), true);
- }
-
- qemu_aio_release(acb);
-}
-
static const AIOCBInfo rbd_aiocb_info = {
.aiocb_size = sizeof(RBDAIOCB),
- .cancel = qemu_rbd_aio_cancel,
};
static void rbd_finish_bh(void *opaque)
@@ -640,7 +620,6 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->ret = 0;
acb->error = 0;
acb->s = s;
- acb->cancelled = 0;
acb->bh = NULL;
acb->status = -EINPROGRESS;
@@ -692,7 +671,7 @@ failed_completion:
failed:
g_free(rcb);
qemu_vfree(acb->bounce);
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return NULL;
}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 7da36e1f9a..2d78ef91f7 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -315,7 +315,6 @@ struct SheepdogAIOCB {
void (*aio_done_func)(SheepdogAIOCB *);
bool cancelable;
- bool *finished;
int nr_pending;
};
@@ -446,10 +445,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
{
qemu_coroutine_enter(acb->coroutine, NULL);
- if (acb->finished) {
- *acb->finished = true;
- }
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
}
/*
@@ -482,36 +478,33 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
BDRVSheepdogState *s = acb->common.bs->opaque;
AIOReq *aioreq, *next;
- bool finished = false;
-
- acb->finished = &finished;
- while (!finished) {
- if (sd_acb_cancelable(acb)) {
- /* Remove outstanding requests from pending and failed queues. */
- QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
- next) {
- if (aioreq->aiocb == acb) {
- free_aio_req(s, aioreq);
- }
+
+ if (sd_acb_cancelable(acb)) {
+ /* Remove outstanding requests from pending and failed queues. */
+ QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
+ next) {
+ if (aioreq->aiocb == acb) {
+ free_aio_req(s, aioreq);
}
- QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
- next) {
- if (aioreq->aiocb == acb) {
- free_aio_req(s, aioreq);
- }
+ }
+ QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
+ next) {
+ if (aioreq->aiocb == acb) {
+ free_aio_req(s, aioreq);
}
+ }
- assert(acb->nr_pending == 0);
- sd_finish_aiocb(acb);
- return;
+ assert(acb->nr_pending == 0);
+ if (acb->common.cb) {
+ acb->common.cb(acb->common.opaque, -ECANCELED);
}
- aio_poll(s->aio_context, true);
+ sd_finish_aiocb(acb);
}
}
static const AIOCBInfo sd_aiocb_info = {
- .aiocb_size = sizeof(SheepdogAIOCB),
- .cancel = sd_aio_cancel,
+ .aiocb_size = sizeof(SheepdogAIOCB),
+ .cancel_async = sd_aio_cancel,
};
static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
@@ -528,7 +521,6 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
acb->aio_done_func = NULL;
acb->cancelable = true;
- acb->finished = NULL;
acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
acb->nr_pending = 0;
@@ -2138,7 +2130,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return ret;
}
@@ -2159,7 +2151,7 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return ret;
}
@@ -2518,7 +2510,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
- qemu_aio_release(acb);
+ qemu_aio_unref(acb);
return ret;
}
diff --git a/block/vhdx.c b/block/vhdx.c
index 5bf292e33d..b0464487a5 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -99,7 +99,8 @@ static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
/* Each parent type must have a valid GUID; this is for parent images
* of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
* need to make up our own QCOW2 GUID type */
-static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
+static const MSGUID parent_vhdx_guid __attribute__((unused))
+ = { .data1 = 0xb04aefb7,
.data2 = 0xd19e,
.data3 = 0x4a81,
.data4 = { 0xb7, 0x89, 0x25, 0xb8,
@@ -1407,6 +1408,12 @@ exit:
return ret;
}
+#define VHDX_METADATA_ENTRY_BUFFER_SIZE \
+ (sizeof(VHDXFileParameters) +\
+ sizeof(VHDXVirtualDiskSize) +\
+ sizeof(VHDXPage83Data) +\
+ sizeof(VHDXVirtualDiskLogicalSectorSize) +\
+ sizeof(VHDXVirtualDiskPhysicalSectorSize))
/*
* Create the Metadata entries.
@@ -1445,11 +1452,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
VHDXVirtualDiskLogicalSectorSize *mt_log_sector_size;
VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size;
- entry_buffer = g_malloc0(sizeof(VHDXFileParameters) +
- sizeof(VHDXVirtualDiskSize) +
- sizeof(VHDXPage83Data) +
- sizeof(VHDXVirtualDiskLogicalSectorSize) +
- sizeof(VHDXVirtualDiskPhysicalSectorSize));
+ entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE);
mt_file_params = entry_buffer;
offset += sizeof(VHDXFileParameters);
@@ -1530,7 +1533,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
}
ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
- VHDX_HEADER_BLOCK_SIZE);
+ VHDX_METADATA_ENTRY_BUFFER_SIZE);
if (ret < 0) {
goto exit;
}
@@ -1725,7 +1728,6 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
goto exit;
}
-
exit:
g_free(s);
g_free(buffer);
@@ -1876,7 +1878,6 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
-
delete_and_exit:
bdrv_unref(bs);
exit:
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 5030e3274d..9323c1aed1 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -88,7 +88,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s,
waiocb->common.cb(waiocb->common.opaque, ret);
- qemu_aio_release(waiocb);
+ qemu_aio_unref(waiocb);
}
static void win32_aio_completion_cb(EventNotifier *e)
@@ -106,22 +106,8 @@ static void win32_aio_completion_cb(EventNotifier *e)
}
}
-static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
-
- /*
- * CancelIoEx is only supported in Vista and newer. For now, just
- * wait for completion.
- */
- while (!HasOverlappedIoCompleted(&waiocb->ov)) {
- aio_poll(bdrv_get_aio_context(blockacb->bs), true);
- }
-}
-
static const AIOCBInfo win32_aiocb_info = {
.aiocb_size = sizeof(QEMUWin32AIOCB),
- .cancel = win32_aio_cancel,
};
BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
@@ -172,7 +158,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
out_dec_count:
aio->count--;
out:
- qemu_aio_release(waiocb);
+ qemu_aio_unref(waiocb);
return NULL;
}
diff --git a/configure b/configure
index eb9cbcdc3a..862f6d27e3 100755
--- a/configure
+++ b/configure
@@ -327,7 +327,6 @@ glusterfs=""
glusterfs_discard="no"
glusterfs_zerofill="no"
archipelago=""
-virtio_blk_data_plane=""
gtk=""
gtkabi=""
vte=""
@@ -1093,9 +1092,8 @@ for opt do
;;
--enable-archipelago) archipelago="yes"
;;
- --disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
- ;;
- --enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
+ --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
+ echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
;;
--disable-gtk) gtk="no"
;;
@@ -2944,16 +2942,6 @@ else
fi
##########################################
-# adjust virtio-blk-data-plane based on linux-aio
-
-if test "$virtio_blk_data_plane" = "yes" -a \
- "$linux_aio" != "yes" ; then
- error_exit "virtio-blk-data-plane requires Linux AIO, please try --enable-linux-aio"
-elif test -z "$virtio_blk_data_plane" ; then
- virtio_blk_data_plane=$linux_aio
-fi
-
-##########################################
# attr probe
if test "$attr" != "no" ; then
@@ -4327,7 +4315,6 @@ echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
echo "GlusterFS support $glusterfs"
echo "Archipelago support $archipelago"
-echo "virtio-blk-data-plane $virtio_blk_data_plane"
echo "gcov $gcov_tool"
echo "gcov enabled $gcov"
echo "TPM support $tpm"
@@ -4789,10 +4776,6 @@ if test "$quorum" = "yes" ; then
echo "CONFIG_QUORUM=y" >> $config_host_mak
fi
-if test "$virtio_blk_data_plane" = "yes" ; then
- echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
-fi
-
if test "$vhdx" = "yes" ; then
echo "CONFIG_VHDX=y" >> $config_host_mak
fi
diff --git a/dma-helpers.c b/dma-helpers.c
index f6811fa86b..7f86e18e72 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -73,7 +73,6 @@ typedef struct {
QEMUSGList *sg;
uint64_t sector_num;
DMADirection dir;
- bool in_cancel;
int sg_cur_index;
dma_addr_t sg_cur_byte;
QEMUIOVector iov;
@@ -125,12 +124,7 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
qemu_bh_delete(dbs->bh);
dbs->bh = NULL;
}
- if (!dbs->in_cancel) {
- /* Requests may complete while dma_aio_cancel is in progress. In
- * this case, the AIOCB should not be released because it is still
- * referenced by dma_aio_cancel. */
- qemu_aio_release(dbs);
- }
+ qemu_aio_unref(dbs);
}
static void dma_bdrv_cb(void *opaque, int ret)
@@ -186,19 +180,14 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb)
trace_dma_aio_cancel(dbs);
if (dbs->acb) {
- BlockDriverAIOCB *acb = dbs->acb;
- dbs->acb = NULL;
- dbs->in_cancel = true;
- bdrv_aio_cancel(acb);
- dbs->in_cancel = false;
+ bdrv_aio_cancel_async(dbs->acb);
}
- dbs->common.cb = NULL;
- dma_complete(dbs, 0);
}
+
static const AIOCBInfo dma_aiocb_info = {
.aiocb_size = sizeof(DMAAIOCB),
- .cancel = dma_aio_cancel,
+ .cancel_async = dma_aio_cancel,
};
BlockDriverAIOCB *dma_bdrv_io(
@@ -217,7 +206,6 @@ BlockDriverAIOCB *dma_bdrv_io(
dbs->sg_cur_index = 0;
dbs->sg_cur_byte = 0;
dbs->dir = dir;
- dbs->in_cancel = false;
dbs->io_func = io_func;
dbs->bh = NULL;
qemu_iovec_init(&dbs->iov, sg->nsg);
diff --git a/docs/image-fuzzer.txt b/docs/image-fuzzer.txt
index 0d0005d34a..3e23ebec33 100644
--- a/docs/image-fuzzer.txt
+++ b/docs/image-fuzzer.txt
@@ -125,7 +125,8 @@ If a fuzzer configuration is specified, then it has the next interpretation:
will be always fuzzed for every test. This case is useful for regression
testing.
-For now only header fields, header extensions and L1/L2 tables are generated.
+The generator can create header fields, header extensions, L1/L2 tables and
+refcount table and blocks.
Module interfaces
-----------------
diff --git a/hmp.c b/hmp.c
index 40a90dae70..31fb6a15ca 100644
--- a/hmp.c
+++ b/hmp.c
@@ -679,6 +679,8 @@ void hmp_info_block_jobs(Monitor *mon, const QDict *qdict)
}
list = list->next;
}
+
+ qapi_free_BlockJobInfoList(list);
}
void hmp_info_tpm(Monitor *mon, const QDict *qdict)
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index bf46f03b7e..d4c3ab758d 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -12,4 +12,4 @@ common-obj-$(CONFIG_NVME_PCI) += nvme.o
obj-$(CONFIG_SH4) += tc58128.o
obj-$(CONFIG_VIRTIO) += virtio-blk.o
-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
+obj-$(CONFIG_VIRTIO) += dataplane/
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 38ad38f49f..45e0c8f6e9 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -18,10 +18,8 @@
#include "hw/block/block.h"
#include "sysemu/blockdev.h"
#include "hw/virtio/virtio-blk.h"
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
-# include "dataplane/virtio-blk.h"
-# include "migration/migration.h"
-#endif
+#include "dataplane/virtio-blk.h"
+#include "migration/migration.h"
#include "block/scsi.h"
#ifdef __linux__
# include <scsi/sg.h>
@@ -435,7 +433,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
.num_writes = 0,
};
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
/* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
* dataplane here instead of waiting for .set_status().
*/
@@ -443,7 +440,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
virtio_blk_data_plane_start(s->dataplane);
return;
}
-#endif
while ((req = virtio_blk_get_request(s))) {
virtio_blk_handle_request(req, &mrb);
@@ -500,11 +496,9 @@ static void virtio_blk_reset(VirtIODevice *vdev)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
if (s->dataplane) {
virtio_blk_data_plane_stop(s->dataplane);
}
-#endif
/*
* This should cancel pending requests, but can't do nicely until there
@@ -594,12 +588,10 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
VirtIOBlock *s = VIRTIO_BLK(vdev);
uint32_t features;
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER |
VIRTIO_CONFIG_S_DRIVER_OK))) {
virtio_blk_data_plane_stop(s->dataplane);
}
-#endif
if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
return;
@@ -694,7 +686,6 @@ static const BlockDevOps virtio_block_ops = {
.resize_cb = virtio_blk_resize,
};
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
/* Disable dataplane thread during live migration since it does not
* update the dirty memory bitmap yet.
*/
@@ -725,7 +716,6 @@ static void virtio_blk_migration_state_changed(Notifier *notifier, void *data)
}
}
}
-#endif /* CONFIG_VIRTIO_BLK_DATA_PLANE */
static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
{
@@ -762,7 +752,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
s->complete_request = virtio_blk_complete_request;
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
virtio_blk_data_plane_create(vdev, blk, &s->dataplane, &err);
if (err != NULL) {
error_propagate(errp, err);
@@ -771,7 +760,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
}
s->migration_state_notifier.notify = virtio_blk_migration_state_changed;
add_migration_state_change_notifier(&s->migration_state_notifier);
-#endif
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
@@ -789,11 +777,9 @@ static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOBlock *s = VIRTIO_BLK(dev);
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
remove_migration_state_change_notifier(&s->migration_state_notifier);
virtio_blk_data_plane_destroy(s->dataplane);
s->dataplane = NULL;
-#endif
qemu_del_vm_change_state_handler(s->change);
unregister_savevm(dev, "virtio-blk", s);
blockdev_mark_auto_del(s->bs);
@@ -818,9 +804,7 @@ static Property virtio_blk_properties[] = {
#ifdef __linux__
DEFINE_PROP_BIT("scsi", VirtIOBlock, blk.scsi, 0, true),
#endif
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, blk.data_plane, 0, false),
-#endif
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index ba69de30e0..8978643fff 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -78,8 +78,7 @@ static uint32_t ahci_port_read(AHCIState *s, int port, int offset)
val = pr->cmd;
break;
case PORT_TFDATA:
- val = ((uint16_t)s->dev[port].port.ifs[0].error << 8) |
- s->dev[port].port.ifs[0].status;
+ val = pr->tfdata;
break;
case PORT_SIG:
val = pr->sig;
@@ -251,14 +250,13 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
check_cmd(s, port);
break;
case PORT_TFDATA:
- s->dev[port].port.ifs[0].error = (val >> 8) & 0xff;
- s->dev[port].port.ifs[0].status = val & 0xff;
+ /* Read Only. */
break;
case PORT_SIG:
- pr->sig = val;
+ /* Read Only */
break;
case PORT_SCR_STAT:
- pr->scr_stat = val;
+ /* Read Only */
break;
case PORT_SCR_CTL:
if (((pr->scr_ctl & AHCI_SCR_SCTL_DET) == 1) &&
@@ -497,6 +495,8 @@ static void ahci_reset_port(AHCIState *s, int port)
pr->scr_stat = 0;
pr->scr_err = 0;
pr->scr_act = 0;
+ pr->tfdata = 0x7F;
+ pr->sig = 0xFFFFFFFF;
d->busy_slot = -1;
d->init_d2h_sent = false;
@@ -528,16 +528,16 @@ static void ahci_reset_port(AHCIState *s, int port)
s->dev[port].port_state = STATE_RUN;
if (!ide_state->bs) {
- s->dev[port].port_regs.sig = 0;
+ pr->sig = 0;
ide_state->status = SEEK_STAT | WRERR_STAT;
} else if (ide_state->drive_kind == IDE_CD) {
- s->dev[port].port_regs.sig = SATA_SIGNATURE_CDROM;
+ pr->sig = SATA_SIGNATURE_CDROM;
ide_state->lcyl = 0x14;
ide_state->hcyl = 0xeb;
DPRINTF(port, "set lcyl = %d\n", ide_state->lcyl);
ide_state->status = SEEK_STAT | WRERR_STAT | READY_STAT;
} else {
- s->dev[port].port_regs.sig = SATA_SIGNATURE_DISK;
+ pr->sig = SATA_SIGNATURE_DISK;
ide_state->status = SEEK_STAT | WRERR_STAT;
}
@@ -563,7 +563,8 @@ static void debug_print_fis(uint8_t *fis, int cmd_len)
static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
{
- AHCIPortRegs *pr = &s->dev[port].port_regs;
+ AHCIDevice *ad = &s->dev[port];
+ AHCIPortRegs *pr = &ad->port_regs;
IDEState *ide_state;
uint8_t *sdb_fis;
@@ -572,8 +573,8 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
return;
}
- sdb_fis = &s->dev[port].res_fis[RES_FIS_SDBFIS];
- ide_state = &s->dev[port].port.ifs[0];
+ sdb_fis = &ad->res_fis[RES_FIS_SDBFIS];
+ ide_state = &ad->port.ifs[0];
/* clear memory */
*(uint32_t*)sdb_fis = 0;
@@ -582,9 +583,14 @@ static void ahci_write_fis_sdb(AHCIState *s, int port, uint32_t finished)
sdb_fis[0] = ide_state->error;
sdb_fis[2] = ide_state->status & 0x77;
s->dev[port].finished |= finished;
- *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(s->dev[port].finished);
+ *(uint32_t*)(sdb_fis + 4) = cpu_to_le32(ad->finished);
+
+ /* Update shadow registers (except BSY 0x80 and DRQ 0x08) */
+ pr->tfdata = (ad->port.ifs[0].error << 8) |
+ (ad->port.ifs[0].status & 0x77) |
+ (pr->tfdata & 0x88);
- ahci_trigger_irq(s, &s->dev[port], PORT_IRQ_SDB_FIS);
+ ahci_trigger_irq(s, ad, PORT_IRQ_SDB_FIS);
}
static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
@@ -642,6 +648,10 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len)
pio_fis[18] = 0;
pio_fis[19] = 0;
+ /* Update shadow registers: */
+ pr->tfdata = (ad->port.ifs[0].error << 8) |
+ ad->port.ifs[0].status;
+
if (pio_fis[2] & ERR_STAT) {
ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
}
@@ -693,6 +703,10 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
d2h_fis[i] = 0;
}
+ /* Update shadow registers: */
+ pr->tfdata = (ad->port.ifs[0].error << 8) |
+ ad->port.ifs[0].status;
+
if (d2h_fis[2] & ERR_STAT) {
ahci_trigger_irq(ad->hba, ad, PORT_IRQ_TF_ERR);
}
@@ -791,6 +805,9 @@ static void ncq_cb(void *opaque, int ret)
NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
+ if (ret == -ECANCELED) {
+ return;
+ }
/* Clear bit for this tag in SActive */
ncq_tfs->drive->port_regs.scr_act &= ~(1 << ncq_tfs->tag);
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 6d52cda4cc..10218dfa3a 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -136,6 +136,7 @@ void ide_atapi_cmd_ok(IDEState *s)
s->error = 0;
s->status = READY_STAT | SEEK_STAT;
s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
+ ide_transfer_stop(s);
ide_set_irq(s->bus);
}
@@ -149,6 +150,7 @@ void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc)
s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
s->sense_key = sense_key;
s->asc = asc;
+ ide_transfer_stop(s);
ide_set_irq(s->bus);
}
@@ -176,9 +178,7 @@ void ide_atapi_cmd_reply_end(IDEState *s)
#endif
if (s->packet_transfer_size <= 0) {
/* end of transfer */
- s->status = READY_STAT | SEEK_STAT;
- s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
- ide_transfer_stop(s);
+ ide_atapi_cmd_ok(s);
ide_set_irq(s->bus);
#ifdef DEBUG_IDE_ATAPI
printf("status=0x%x\n", s->status);
@@ -188,7 +188,6 @@ void ide_atapi_cmd_reply_end(IDEState *s)
if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
ret = cd_read_sector(s, s->lba, s->io_buffer, s->cd_sector_size);
if (ret < 0) {
- ide_transfer_stop(s);
ide_atapi_io_error(s, ret);
return;
}
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 6fba056783..190700ac74 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -372,23 +372,21 @@ static void trim_aio_cancel(BlockDriverAIOCB *acb)
{
TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common);
- /* Exit the loop in case bdrv_aio_cancel calls ide_issue_trim_cb again. */
+ /* Exit the loop so ide_issue_trim_cb will not continue */
iocb->j = iocb->qiov->niov - 1;
iocb->i = (iocb->qiov->iov[iocb->j].iov_len / 8) - 1;
- /* Tell ide_issue_trim_cb not to trigger the completion, too. */
- qemu_bh_delete(iocb->bh);
- iocb->bh = NULL;
+ iocb->ret = -ECANCELED;
if (iocb->aiocb) {
- bdrv_aio_cancel(iocb->aiocb);
+ bdrv_aio_cancel_async(iocb->aiocb);
+ iocb->aiocb = NULL;
}
- qemu_aio_release(iocb);
}
static const AIOCBInfo trim_aiocb_info = {
.aiocb_size = sizeof(TrimAIOCB),
- .cancel = trim_aio_cancel,
+ .cancel_async = trim_aio_cancel,
};
static void ide_trim_bh_cb(void *opaque)
@@ -399,7 +397,7 @@ static void ide_trim_bh_cb(void *opaque)
qemu_bh_delete(iocb->bh);
iocb->bh = NULL;
- qemu_aio_release(iocb);
+ qemu_aio_unref(iocb);
}
static void ide_issue_trim_cb(void *opaque, int ret)
@@ -568,6 +566,9 @@ static void ide_sector_read_cb(void *opaque, int ret)
s->pio_aiocb = NULL;
s->status &= ~BUSY_STAT;
+ if (ret == -ECANCELED) {
+ return;
+ }
block_acct_done(bdrv_get_stats(s->bs), &s->acct);
if (ret != 0) {
if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO |
@@ -678,6 +679,9 @@ void ide_dma_cb(void *opaque, int ret)
int64_t sector_num;
bool stay_active = false;
+ if (ret == -ECANCELED) {
+ return;
+ }
if (ret < 0) {
int op = IDE_RETRY_DMA;
@@ -803,6 +807,9 @@ static void ide_sector_write_cb(void *opaque, int ret)
IDEState *s = opaque;
int n;
+ if (ret == -ECANCELED) {
+ return;
+ }
block_acct_done(bdrv_get_stats(s->bs), &s->acct);
s->pio_aiocb = NULL;
@@ -882,6 +889,9 @@ static void ide_flush_cb(void *opaque, int ret)
s->pio_aiocb = NULL;
+ if (ret == -ECANCELED) {
+ return;
+ }
if (ret < 0) {
/* XXX: What sector number to set here? */
if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) {
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index a2f1639310..8eb77a1472 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -71,6 +71,7 @@
#include <hw/ide/pci.h>
#include <hw/ide/ahci.h>
+#define ICH9_MSI_CAP_OFFSET 0x80
#define ICH9_SATA_CAP_OFFSET 0xA8
#define ICH9_IDP_BAR 4
@@ -115,7 +116,6 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
/* XXX Software should program this register */
dev->config[0x90] = 1 << 6; /* Address Map Register - AHCI mode */
- msi_init(dev, 0x50, 1, true, false);
d->ahci.irq = pci_allocate_irq(dev);
pci_register_bar(dev, ICH9_IDP_BAR, PCI_BASE_ADDRESS_SPACE_IO,
@@ -135,6 +135,11 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
(ICH9_IDP_BAR + 0x4) | (ICH9_IDP_INDEX_LOG2 << 4));
d->ahci.idp_offset = ICH9_IDP_INDEX;
+ /* Although the AHCI 1.3 specification states that the first capability
+ * should be PMCAP, the Intel ICH9 data sheet specifies that the ICH9
+ * AHCI device puts the MSI capability first, pointing to 0x80. */
+ msi_init(dev, ICH9_MSI_CAP_OFFSET, 1, true, false);
+
return 0;
}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 77bb93e4d7..4e3a061622 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -28,7 +28,6 @@
#include <sys/socket.h>
#include <linux/kvm.h>
#include <fcntl.h>
-#include <linux/virtio_ring.h>
#include <netpacket/packet.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -36,6 +35,7 @@
#include <stdio.h>
+#include "hw/virtio/virtio_ring.h"
#include "hw/virtio/vhost.h"
#include "hw/virtio/virtio-bus.h"
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index ec9e855bc1..d21c397756 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -2,7 +2,7 @@ common-obj-y += virtio-rng.o
common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
common-obj-y += virtio-bus.o
common-obj-y += virtio-mmio.o
-common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
+common-obj-$(CONFIG_VIRTIO) += dataplane/
obj-y += virtio.o virtio-balloon.o
obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 67cb2b823e..372706a234 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -181,7 +181,8 @@ static int get_desc(Vring *vring, VirtQueueElement *elem,
/* Stop for now if there are not enough iovecs available. */
if (*num >= VIRTQUEUE_MAX_SIZE) {
- return -ENOBUFS;
+ error_report("Invalid SG num: %u", *num);
+ return -EFAULT;
}
/* TODO handle non-contiguous memory across region boundaries */
diff --git a/include/block/aio.h b/include/block/aio.h
index 4603c0f066..156272177f 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -26,7 +26,8 @@ typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
typedef struct AIOCBInfo {
- void (*cancel)(BlockDriverAIOCB *acb);
+ void (*cancel_async)(BlockDriverAIOCB *acb);
+ AioContext *(*get_aio_context)(BlockDriverAIOCB *acb);
size_t aiocb_size;
} AIOCBInfo;
@@ -35,11 +36,13 @@ struct BlockDriverAIOCB {
BlockDriverState *bs;
BlockDriverCompletionFunc *cb;
void *opaque;
+ int refcnt;
};
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
-void qemu_aio_release(void *p);
+void qemu_aio_unref(void *p);
+void qemu_aio_ref(void *p);
typedef struct AioHandler AioHandler;
typedef void QEMUBHFunc(void *opaque);
@@ -99,7 +102,7 @@ void aio_set_dispatching(AioContext *ctx, bool dispatching);
* They also provide bottom halves, a service to execute a piece of code
* as soon as possible.
*/
-AioContext *aio_context_new(void);
+AioContext *aio_context_new(Error **errp);
/**
* aio_context_ref:
diff --git a/include/block/block.h b/include/block/block.h
index 07d6d8e67e..3318f0dfbe 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -337,6 +337,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+void bdrv_aio_cancel_async(BlockDriverAIOCB *acb);
typedef struct BlockRequest {
/* Fields to be filled by multiwrite caller */
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index af73ee2ae3..d3e086aefc 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -17,8 +17,8 @@
#ifndef VRING_H
#define VRING_H
-#include <linux/virtio_ring.h>
#include "qemu-common.h"
+#include "hw/virtio/virtio_ring.h"
#include "hw/virtio/virtio.h"
typedef struct {
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index cf61154658..f3853f2b75 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -132,10 +132,8 @@ typedef struct VirtIOBlock {
VMChangeStateEntry *change;
/* Function to push to vq and notify guest */
void (*complete_request)(struct VirtIOBlockReq *req, unsigned char status);
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
Notifier migration_state_notifier;
struct VirtIOBlockDataPlane *dataplane;
-#endif
} VirtIOBlock;
typedef struct MultiReqBuffer {
diff --git a/include/hw/virtio/virtio_ring.h b/include/hw/virtio/virtio_ring.h
new file mode 100644
index 0000000000..8f58bc975e
--- /dev/null
+++ b/include/hw/virtio/virtio_ring.h
@@ -0,0 +1,167 @@
+#ifndef _LINUX_VIRTIO_RING_H
+#define _LINUX_VIRTIO_RING_H
+/*
+ * This file is copied from /usr/include/linux while converting __uNN types
+ * to uXX_t, __inline__ to inline, and tab to spaces.
+ * */
+
+/* An interface for efficient virtio implementation, currently for use by KVM
+ * and lguest, but hopefully others soon. Do NOT change this since it will
+ * break existing servers and clients.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright Rusty Russell IBM Corporation 2007. */
+
+/* This marks a buffer as continuing via the next field. */
+#define VRING_DESC_F_NEXT 1
+/* This marks a buffer as write-only (otherwise read-only). */
+#define VRING_DESC_F_WRITE 2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT 4
+
+/* The Host uses this in used->flags to advise the Guest: don't kick me when
+ * you add a buffer. It's unreliable, so it's simply an optimization. Guest
+ * will still kick if it's out of buffers. */
+#define VRING_USED_F_NO_NOTIFY 1
+/* The Guest uses this in avail->flags to advise the Host: don't interrupt me
+ * when you consume a buffer. It's unreliable, so it's simply an
+ * optimization. */
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+
+/* The Guest publishes the used index for which it expects an interrupt
+ * at the end of the avail ring. Host should ignore the avail->flags field. */
+/* The Host publishes the avail index for which it expects a kick
+ * at the end of the used ring. Guest should ignore the used->flags field. */
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
+struct vring_desc {
+ /* Address (guest-physical). */
+ uint64_t addr;
+ /* Length. */
+ uint32_t len;
+ /* The flags as indicated above. */
+ uint16_t flags;
+ /* We chain unused descriptors via this, too */
+ uint16_t next;
+};
+
+struct vring_avail {
+ uint16_t flags;
+ uint16_t idx;
+ uint16_t ring[];
+};
+
+/* u32 is used here for ids for padding reasons. */
+struct vring_used_elem {
+ /* Index of start of used descriptor chain. */
+ uint32_t id;
+ /* Total length of the descriptor chain which was used (written to) */
+ uint32_t len;
+};
+
+struct vring_used {
+ uint16_t flags;
+ uint16_t idx;
+ struct vring_used_elem ring[];
+};
+
+struct vring {
+ unsigned int num;
+
+ struct vring_desc *desc;
+
+ struct vring_avail *avail;
+
+ struct vring_used *used;
+};
+
+/* The standard layout for the ring is a continuous chunk of memory which looks
+ * like this. We assume num is a power of 2.
+ *
+ * struct vring
+ * {
+ * // The actual descriptors (16 bytes each)
+ * struct vring_desc desc[num];
+ *
+ * // A ring of available descriptor heads with free-running index.
+ * uint16_t avail_flags;
+ * uint16_t avail_idx;
+ * uint16_t available[num];
+ * uint16_t used_event_idx;
+ *
+ * // Padding to the next align boundary.
+ * char pad[];
+ *
+ * // A ring of used descriptor heads with free-running index.
+ * uint16_t used_flags;
+ * uint16_t used_idx;
+ * struct vring_used_elem used[num];
+ * uint16_t avail_event_idx;
+ * };
+ */
+/* We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility. */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
+
+static inline void vring_init(struct vring *vr, unsigned int num, void *p,
+ unsigned long align)
+{
+ vr->num = num;
+ vr->desc = p;
+ vr->avail = p + num*sizeof(struct vring_desc);
+ vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(uint16_t)
+ + align-1) & ~(align - 1));
+}
+
+static inline unsigned vring_size(unsigned int num, unsigned long align)
+{
+ return ((sizeof(struct vring_desc) * num + sizeof(uint16_t) * (3 + num)
+ + align - 1) & ~(align - 1))
+ + sizeof(uint16_t) * 3 + sizeof(struct vring_used_elem) * num;
+}
+
+/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+ /* Note: Xen has similar logic for notification hold-off
+ * in include/xen/interface/io/ring.h with req_event and req_prod
+ * corresponding to event_idx + 1 and new_idx respectively.
+ * Note also that req_event and req_prod in Xen start at 1,
+ * event indexes in virtio start at 0. */
+ return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
+}
+
+#endif /* _LINUX_VIRTIO_RING_H */
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 6f0200a7ac..62c68c0f32 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -42,7 +42,7 @@
*
* In the case of QEMU tools, this will also start/initialize timers.
*/
-int qemu_init_main_loop(void);
+int qemu_init_main_loop(Error **errp);
/**
* main_loop_wait: Run one iteration of the main loop.
diff --git a/iothread.c b/iothread.c
index d9403cf69e..342a23fcb0 100644
--- a/iothread.c
+++ b/iothread.c
@@ -17,6 +17,7 @@
#include "block/aio.h"
#include "sysemu/iothread.h"
#include "qmp-commands.h"
+#include "qemu/error-report.h"
#define IOTHREADS_PATH "/objects"
@@ -53,6 +54,9 @@ static void iothread_instance_finalize(Object *obj)
{
IOThread *iothread = IOTHREAD(obj);
+ if (!iothread->ctx) {
+ return;
+ }
iothread->stopping = true;
aio_notify(iothread->ctx);
qemu_thread_join(&iothread->thread);
@@ -63,11 +67,16 @@ static void iothread_instance_finalize(Object *obj)
static void iothread_complete(UserCreatable *obj, Error **errp)
{
+ Error *local_error = NULL;
IOThread *iothread = IOTHREAD(obj);
iothread->stopping = false;
- iothread->ctx = aio_context_new();
iothread->thread_id = -1;
+ iothread->ctx = aio_context_new(&local_error);
+ if (!iothread->ctx) {
+ error_propagate(errp, local_error);
+ return;
+ }
qemu_mutex_init(&iothread->init_done_lock);
qemu_cond_init(&iothread->init_done_cond);
diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index c656f61cfc..bb5df43874 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -14,7 +14,7 @@
#include <linux/ioctl.h>
#include <linux/virtio_config.h>
-#include <linux/virtio_ring.h>
+#include "hw/virtio/virtio_ring.h"
struct vhost_vring_state {
unsigned int index;
diff --git a/main-loop.c b/main-loop.c
index 3cc79f82f9..53393a4b18 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -126,10 +126,11 @@ void qemu_notify_event(void)
static GArray *gpollfds;
-int qemu_init_main_loop(void)
+int qemu_init_main_loop(Error **errp)
{
int ret;
GSource *src;
+ Error *local_error = NULL;
init_clocks();
@@ -138,8 +139,12 @@ int qemu_init_main_loop(void)
return ret;
}
+ qemu_aio_context = aio_context_new(&local_error);
+ if (!qemu_aio_context) {
+ error_propagate(errp, local_error);
+ return -EMFILE;
+ }
gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
- qemu_aio_context = aio_context_new();
src = aio_get_g_source(qemu_aio_context);
g_source_attach(src, NULL);
g_source_unref(src);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 95dcd81ed4..fa2d1b7528 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -197,7 +197,7 @@
# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
-# 2.2: 'archipelago'
+# 2.2: 'archipelago' added, 'cow' dropped
#
# @backing_file: #optional the name of the backing file (for copy-on-write)
#
@@ -1148,10 +1148,11 @@
# Since: 2.0
##
{ 'enum': 'BlockdevDriver',
- 'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy',
- 'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
- 'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
- 'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
+ 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
+ 'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
+ 'host_floppy', 'http', 'https', 'null-aio', 'null-co', 'parallels',
+ 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
+ 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsBase
@@ -1204,6 +1205,18 @@
'data': { 'filename': 'str' } }
##
+# @BlockdevOptionsNull
+#
+# Driver specific block device options for the null backend.
+#
+# @size: #optional size of the device in bytes.
+#
+# Since: 2.2
+##
+{ 'type': 'BlockdevOptionsNull',
+ 'data': { '*size': 'int' } }
+
+##
# @BlockdevOptionsVVFAT
#
# Driver specific block device options for the vvfat protocol.
@@ -1251,6 +1264,67 @@
'data': { '*backing': 'BlockdevRef' } }
##
+# @Qcow2OverlapCheckMode
+#
+# General overlap check modes.
+#
+# @none: Do not perform any checks
+#
+# @constant: Perform only checks which can be done in constant time and
+# without reading anything from disk
+#
+# @cached: Perform only checks which can be done without reading anything
+# from disk
+#
+# @all: Perform all available overlap checks
+#
+# Since: 2.2
+##
+{ 'enum': 'Qcow2OverlapCheckMode',
+ 'data': [ 'none', 'constant', 'cached', 'all' ] }
+
+##
+# @Qcow2OverlapCheckFlags
+#
+# Structure of flags for each metadata structure. Setting a field to 'true'
+# makes qemu guard that structure against unintended overwriting. The default
+# value is chosen according to the template given.
+#
+# @template: Specifies a template mode which can be adjusted using the other
+# flags, defaults to 'cached'
+#
+# Since: 2.2
+##
+{ 'type': 'Qcow2OverlapCheckFlags',
+ 'data': { '*template': 'Qcow2OverlapCheckMode',
+ '*main-header': 'bool',
+ '*active-l1': 'bool',
+ '*active-l2': 'bool',
+ '*refcount-table': 'bool',
+ '*refcount-block': 'bool',
+ '*snapshot-table': 'bool',
+ '*inactive-l1': 'bool',
+ '*inactive-l2': 'bool' } }
+
+##
+# @Qcow2OverlapChecks
+#
+# Specifies which metadata structures should be guarded against unintended
+# overwriting.
+#
+# @flags: set of flags for separate specification of each metadata structure
+# type
+#
+# @mode: named mode which chooses a specific set of flags
+#
+# Since: 2.2
+##
+{ 'union': 'Qcow2OverlapChecks',
+ 'discriminator': {},
+ 'data': { 'flags': 'Qcow2OverlapCheckFlags',
+ 'mode': 'Qcow2OverlapCheckMode' } }
+
+##
# @BlockdevOptionsQcow2
#
# Driver specific block device options for qcow2.
@@ -1269,6 +1343,18 @@
# should be issued on other occasions where a cluster
# gets freed
#
+# @overlap-check: #optional which overlap checks to perform for writes
+# to the image, defaults to 'cached' (since 2.2)
+#
+# @cache-size: #optional the maximum total size of the L2 table and
+# refcount block caches in bytes (since 2.2)
+#
+# @l2-cache-size: #optional the maximum size of the L2 table cache in
+# bytes (since 2.2)
+#
+# @refcount-cache-size: #optional the maximum size of the refcount block cache
+# in bytes (since 2.2)
+#
# Since: 1.7
##
{ 'type': 'BlockdevOptionsQcow2',
@@ -1276,7 +1362,11 @@
'data': { '*lazy-refcounts': 'bool',
'*pass-discard-request': 'bool',
'*pass-discard-snapshot': 'bool',
- '*pass-discard-other': 'bool' } }
+ '*pass-discard-other': 'bool',
+ '*overlap-check': 'Qcow2OverlapChecks',
+ '*cache-size': 'int',
+ '*l2-cache-size': 'int',
+ '*refcount-cache-size': 'int' } }
##
@@ -1471,39 +1561,40 @@
'discriminator': 'driver',
'data': {
'archipelago':'BlockdevOptionsArchipelago',
+ 'blkdebug': 'BlockdevOptionsBlkdebug',
+ 'blkverify': 'BlockdevOptionsBlkverify',
+ 'bochs': 'BlockdevOptionsGenericFormat',
+ 'cloop': 'BlockdevOptionsGenericFormat',
+ 'dmg': 'BlockdevOptionsGenericFormat',
'file': 'BlockdevOptionsFile',
- 'host_device':'BlockdevOptionsFile',
+ 'ftp': 'BlockdevOptionsFile',
+ 'ftps': 'BlockdevOptionsFile',
+# TODO gluster: Wait for structured options
'host_cdrom': 'BlockdevOptionsFile',
+ 'host_device':'BlockdevOptionsFile',
'host_floppy':'BlockdevOptionsFile',
'http': 'BlockdevOptionsFile',
'https': 'BlockdevOptionsFile',
- 'ftp': 'BlockdevOptionsFile',
- 'ftps': 'BlockdevOptionsFile',
- 'tftp': 'BlockdevOptionsFile',
-# TODO gluster: Wait for structured options
# TODO iscsi: Wait for structured options
# TODO nbd: Should take InetSocketAddress for 'host'?
# TODO nfs: Wait for structured options
-# TODO rbd: Wait for structured options
-# TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
- 'vvfat': 'BlockdevOptionsVVFAT',
- 'blkdebug': 'BlockdevOptionsBlkdebug',
- 'blkverify': 'BlockdevOptionsBlkverify',
- 'bochs': 'BlockdevOptionsGenericFormat',
- 'cloop': 'BlockdevOptionsGenericFormat',
- 'cow': 'BlockdevOptionsGenericCOWFormat',
- 'dmg': 'BlockdevOptionsGenericFormat',
+ 'null-aio': 'BlockdevOptionsNull',
+ 'null-co': 'BlockdevOptionsNull',
'parallels': 'BlockdevOptionsGenericFormat',
- 'qcow': 'BlockdevOptionsGenericCOWFormat',
'qcow2': 'BlockdevOptionsQcow2',
+ 'qcow': 'BlockdevOptionsGenericCOWFormat',
'qed': 'BlockdevOptionsGenericCOWFormat',
+ 'quorum': 'BlockdevOptionsQuorum',
'raw': 'BlockdevOptionsGenericFormat',
+# TODO rbd: Wait for structured options
+# TODO sheepdog: Wait for structured options
+# TODO ssh: Should take InetSocketAddress for 'host'?
+ 'tftp': 'BlockdevOptionsFile',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
- 'quorum': 'BlockdevOptionsQuorum'
+ 'vvfat': 'BlockdevOptionsVVFAT'
} }
##
@@ -1555,7 +1646,7 @@
##
# @BLOCK_IMAGE_CORRUPTED
#
-# Emitted when a disk image is being marked corrupt
+# Emitted when a corruption has been detected in a disk image
#
# @device: device name
#
@@ -1569,13 +1660,18 @@
# @size: #optional, if the corruption resulted from an image access, this is
# the access size
#
+# fatal: if set, the image is marked corrupt and therefore unusable after this
+# event and must be repaired (Since 2.2; before, every
+# BLOCK_IMAGE_CORRUPTED event was fatal)
+#
# Since: 1.7
##
{ 'event': 'BLOCK_IMAGE_CORRUPTED',
'data': { 'device' : 'str',
'msg' : 'str',
'*offset': 'int',
- '*size' : 'int' } }
+ '*size' : 'int',
+ 'fatal' : 'bool' } }
##
# @BLOCK_IO_ERROR
diff --git a/qdev-monitor.c b/qdev-monitor.c
index fb9ee24b3a..5ec66067f5 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -206,7 +206,7 @@ int qdev_device_help(QemuOpts *opts)
}
prop_list = qmp_device_list_properties(driver, &local_err);
- if (!prop_list) {
+ if (local_err) {
error_printf("%s\n", error_get_pretty(local_err));
error_free(local_err);
return 1;
diff --git a/qemu-doc.texi b/qemu-doc.texi
index ef3be72ae2..9973090c6c 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -654,15 +654,6 @@ File name of a base image (see @option{create} subcommand)
If this option is set to @code{on}, the image is encrypted.
@end table
-@item cow
-User Mode Linux Copy On Write image format. It is supported only for
-compatibility with previous versions.
-Supported options:
-@table @code
-@item backing_file
-File name of a base image (see @option{create} subcommand)
-@end table
-
@item vdi
VirtualBox 1.1 compatible image format.
Supported options:
diff --git a/qemu-img.c b/qemu-img.c
index 91d1ac3d7d..dbf0904dc0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2879,6 +2879,7 @@ int main(int argc, char **argv)
{
const img_cmd_t *cmd;
const char *cmdname;
+ Error *local_error = NULL;
int c;
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
@@ -2893,7 +2894,12 @@ int main(int argc, char **argv)
error_set_progname(argv[0]);
qemu_init_exec_dir(argv[0]);
- qemu_init_main_loop();
+ if (qemu_init_main_loop(&local_error)) {
+ error_report("%s", error_get_pretty(local_error));
+ error_free(local_error);
+ exit(EXIT_FAILURE);
+ }
+
bdrv_init();
if (argc < 2) {
error_exit("Not enough arguments");
diff --git a/qemu-img.texi b/qemu-img.texi
index 50d2cca7a0..e3ef4a00e9 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -228,8 +228,8 @@ compression is read-only. It means that if a compressed sector is
rewritten, then it is rewritten as uncompressed data.
Image conversion is also useful to get smaller image when using a
-growable format such as @code{qcow} or @code{cow}: the empty sectors
-are detected and suppressed from the destination image.
+growable format such as @code{qcow}: the empty sectors are detected and
+suppressed from the destination image.
@var{sparse_size} indicates the consecutive number of bytes (defaults to 4k)
that must contain only zeros for qemu-img to create a sparse image during
diff --git a/qemu-io.c b/qemu-io.c
index d2ab6946e8..66cf3ef4be 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -379,6 +379,7 @@ int main(int argc, char **argv)
int c;
int opt_index = 0;
int flags = BDRV_O_UNMAP;
+ Error *local_error = NULL;
#ifdef CONFIG_POSIX
signal(SIGPIPE, SIG_IGN);
@@ -444,7 +445,11 @@ int main(int argc, char **argv)
exit(1);
}
- qemu_init_main_loop();
+ if (qemu_init_main_loop(&local_error)) {
+ error_report("%s", error_get_pretty(local_error));
+ error_free(local_error);
+ exit(1);
+ }
bdrv_init();
/* initialize commands */
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 9bc152e6c3..de9963f8fb 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -674,7 +674,11 @@ int main(int argc, char **argv)
snprintf(sockpath, 128, SOCKET_PATH, basename(device));
}
- qemu_init_main_loop();
+ if (qemu_init_main_loop(&local_err)) {
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
+ exit(EXIT_FAILURE);
+ }
bdrv_init();
atexit(bdrv_close_all);
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 7658d4bd24..76656cc074 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2081,7 +2081,7 @@ Each json-object contain the following:
- "file": device file name (json-string)
- "ro": true if read-only, false otherwise (json-bool)
- "drv": driver format name (json-string)
- - Possible values: "blkdebug", "bochs", "cloop", "cow", "dmg",
+ - Possible values: "blkdebug", "bochs", "cloop", "dmg",
"file", "file", "ftp", "ftps", "host_cdrom",
"host_device", "host_floppy", "http", "https",
"nbd", "parallels", "qcow", "qcow2", "raw",
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index b4632324a7..d2f815bca2 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -177,6 +177,8 @@ const int %(name)s_qtypes[QTYPE_MAX] = {
qtype = "QTYPE_QDICT"
elif find_union(qapi_type):
qtype = "QTYPE_QDICT"
+ elif find_enum(qapi_type):
+ qtype = "QTYPE_QSTRING"
else:
assert False, "Invalid anonymous union member"
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index c12969721a..df9f7fb657 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -263,7 +263,8 @@ void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error **e
for key in members:
assert (members[key] in builtin_types
or find_struct(members[key])
- or find_union(members[key])), "Invalid anonymous union member"
+ or find_union(members[key])
+ or find_enum(members[key])), "Invalid anonymous union member"
enum_full_value = generate_enum_full_value(disc_type, key)
ret += mcgen('''
diff --git a/tests/Makefile b/tests/Makefile
index a5e3d0c369..f5de29c0b9 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -132,6 +132,7 @@ check-qtest-i386-y = tests/endianness-test$(EXESUF)
check-qtest-i386-y += tests/fdc-test$(EXESUF)
gcov-files-i386-y = hw/block/fdc.c
check-qtest-i386-y += tests/ide-test$(EXESUF)
+check-qtest-i386-y += tests/ahci-test$(EXESUF)
check-qtest-i386-y += tests/hd-geo-test$(EXESUF)
gcov-files-i386-y += hw/block/hd-geometry.c
check-qtest-i386-y += tests/boot-order-test$(EXESUF)
@@ -307,6 +308,7 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o
tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y)
tests/fdc-test$(EXESUF): tests/fdc-test.o
tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
+tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y)
tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o $(libqos-obj-y)
diff --git a/tests/ahci-test.c b/tests/ahci-test.c
new file mode 100644
index 0000000000..4c77ebec7a
--- /dev/null
+++ b/tests/ahci-test.c
@@ -0,0 +1,1561 @@
+/*
+ * AHCI test cases
+ *
+ * Copyright (c) 2014 John Snow <jsnow@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <glib.h>
+
+#include "libqtest.h"
+#include "libqos/pci-pc.h"
+#include "libqos/malloc-pc.h"
+
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+
+#include "hw/pci/pci_ids.h"
+#include "hw/pci/pci_regs.h"
+
+/* Test-specific defines. */
+#define TEST_IMAGE_SIZE (64 * 1024 * 1024)
+
+/*** Supplementary PCI Config Space IDs & Masks ***/
+#define PCI_DEVICE_ID_INTEL_Q35_AHCI (0x2922)
+#define PCI_MSI_FLAGS_RESERVED (0xFF00)
+#define PCI_PM_CTRL_RESERVED (0xFC)
+#define PCI_BCC(REG32) ((REG32) >> 24)
+#define PCI_PI(REG32) (((REG32) >> 8) & 0xFF)
+#define PCI_SCC(REG32) (((REG32) >> 16) & 0xFF)
+
+/*** Recognized AHCI Device Types ***/
+#define AHCI_INTEL_ICH9 (PCI_DEVICE_ID_INTEL_Q35_AHCI << 16 | \
+ PCI_VENDOR_ID_INTEL)
+
+/*** AHCI/HBA Register Offsets and Bitmasks ***/
+#define AHCI_CAP (0)
+#define AHCI_CAP_NP (0x1F)
+#define AHCI_CAP_SXS (0x20)
+#define AHCI_CAP_EMS (0x40)
+#define AHCI_CAP_CCCS (0x80)
+#define AHCI_CAP_NCS (0x1F00)
+#define AHCI_CAP_PSC (0x2000)
+#define AHCI_CAP_SSC (0x4000)
+#define AHCI_CAP_PMD (0x8000)
+#define AHCI_CAP_FBSS (0x10000)
+#define AHCI_CAP_SPM (0x20000)
+#define AHCI_CAP_SAM (0x40000)
+#define AHCI_CAP_RESERVED (0x80000)
+#define AHCI_CAP_ISS (0xF00000)
+#define AHCI_CAP_SCLO (0x1000000)
+#define AHCI_CAP_SAL (0x2000000)
+#define AHCI_CAP_SALP (0x4000000)
+#define AHCI_CAP_SSS (0x8000000)
+#define AHCI_CAP_SMPS (0x10000000)
+#define AHCI_CAP_SSNTF (0x20000000)
+#define AHCI_CAP_SNCQ (0x40000000)
+#define AHCI_CAP_S64A (0x80000000)
+
+#define AHCI_GHC (1)
+#define AHCI_GHC_HR (0x01)
+#define AHCI_GHC_IE (0x02)
+#define AHCI_GHC_MRSM (0x04)
+#define AHCI_GHC_RESERVED (0x7FFFFFF8)
+#define AHCI_GHC_AE (0x80000000)
+
+#define AHCI_IS (2)
+#define AHCI_PI (3)
+#define AHCI_VS (4)
+
+#define AHCI_CCCCTL (5)
+#define AHCI_CCCCTL_EN (0x01)
+#define AHCI_CCCCTL_RESERVED (0x06)
+#define AHCI_CCCCTL_CC (0xFF00)
+#define AHCI_CCCCTL_TV (0xFFFF0000)
+
+#define AHCI_CCCPORTS (6)
+#define AHCI_EMLOC (7)
+
+#define AHCI_EMCTL (8)
+#define AHCI_EMCTL_STSMR (0x01)
+#define AHCI_EMCTL_CTLTM (0x100)
+#define AHCI_EMCTL_CTLRST (0x200)
+#define AHCI_EMCTL_RESERVED (0xF0F0FCFE)
+
+#define AHCI_CAP2 (9)
+#define AHCI_CAP2_BOH (0x01)
+#define AHCI_CAP2_NVMP (0x02)
+#define AHCI_CAP2_APST (0x04)
+#define AHCI_CAP2_RESERVED (0xFFFFFFF8)
+
+#define AHCI_BOHC (10)
+#define AHCI_RESERVED (11)
+#define AHCI_NVMHCI (24)
+#define AHCI_VENDOR (40)
+#define AHCI_PORTS (64)
+
+/*** Port Memory Offsets & Bitmasks ***/
+#define AHCI_PX_CLB (0)
+#define AHCI_PX_CLB_RESERVED (0x1FF)
+
+#define AHCI_PX_CLBU (1)
+
+#define AHCI_PX_FB (2)
+#define AHCI_PX_FB_RESERVED (0xFF)
+
+#define AHCI_PX_FBU (3)
+
+#define AHCI_PX_IS (4)
+#define AHCI_PX_IS_DHRS (0x1)
+#define AHCI_PX_IS_PSS (0x2)
+#define AHCI_PX_IS_DSS (0x4)
+#define AHCI_PX_IS_SDBS (0x8)
+#define AHCI_PX_IS_UFS (0x10)
+#define AHCI_PX_IS_DPS (0x20)
+#define AHCI_PX_IS_PCS (0x40)
+#define AHCI_PX_IS_DMPS (0x80)
+#define AHCI_PX_IS_RESERVED (0x23FFF00)
+#define AHCI_PX_IS_PRCS (0x400000)
+#define AHCI_PX_IS_IPMS (0x800000)
+#define AHCI_PX_IS_OFS (0x1000000)
+#define AHCI_PX_IS_INFS (0x4000000)
+#define AHCI_PX_IS_IFS (0x8000000)
+#define AHCI_PX_IS_HBDS (0x10000000)
+#define AHCI_PX_IS_HBFS (0x20000000)
+#define AHCI_PX_IS_TFES (0x40000000)
+#define AHCI_PX_IS_CPDS (0x80000000)
+
+#define AHCI_PX_IE (5)
+#define AHCI_PX_IE_DHRE (0x1)
+#define AHCI_PX_IE_PSE (0x2)
+#define AHCI_PX_IE_DSE (0x4)
+#define AHCI_PX_IE_SDBE (0x8)
+#define AHCI_PX_IE_UFE (0x10)
+#define AHCI_PX_IE_DPE (0x20)
+#define AHCI_PX_IE_PCE (0x40)
+#define AHCI_PX_IE_DMPE (0x80)
+#define AHCI_PX_IE_RESERVED (0x23FFF00)
+#define AHCI_PX_IE_PRCE (0x400000)
+#define AHCI_PX_IE_IPME (0x800000)
+#define AHCI_PX_IE_OFE (0x1000000)
+#define AHCI_PX_IE_INFE (0x4000000)
+#define AHCI_PX_IE_IFE (0x8000000)
+#define AHCI_PX_IE_HBDE (0x10000000)
+#define AHCI_PX_IE_HBFE (0x20000000)
+#define AHCI_PX_IE_TFEE (0x40000000)
+#define AHCI_PX_IE_CPDE (0x80000000)
+
+#define AHCI_PX_CMD (6)
+#define AHCI_PX_CMD_ST (0x1)
+#define AHCI_PX_CMD_SUD (0x2)
+#define AHCI_PX_CMD_POD (0x4)
+#define AHCI_PX_CMD_CLO (0x8)
+#define AHCI_PX_CMD_FRE (0x10)
+#define AHCI_PX_CMD_RESERVED (0xE0)
+#define AHCI_PX_CMD_CCS (0x1F00)
+#define AHCI_PX_CMD_MPSS (0x2000)
+#define AHCI_PX_CMD_FR (0x4000)
+#define AHCI_PX_CMD_CR (0x8000)
+#define AHCI_PX_CMD_CPS (0x10000)
+#define AHCI_PX_CMD_PMA (0x20000)
+#define AHCI_PX_CMD_HPCP (0x40000)
+#define AHCI_PX_CMD_MPSP (0x80000)
+#define AHCI_PX_CMD_CPD (0x100000)
+#define AHCI_PX_CMD_ESP (0x200000)
+#define AHCI_PX_CMD_FBSCP (0x400000)
+#define AHCI_PX_CMD_APSTE (0x800000)
+#define AHCI_PX_CMD_ATAPI (0x1000000)
+#define AHCI_PX_CMD_DLAE (0x2000000)
+#define AHCI_PX_CMD_ALPE (0x4000000)
+#define AHCI_PX_CMD_ASP (0x8000000)
+#define AHCI_PX_CMD_ICC (0xF0000000)
+
+#define AHCI_PX_RES1 (7)
+
+#define AHCI_PX_TFD (8)
+#define AHCI_PX_TFD_STS (0xFF)
+#define AHCI_PX_TFD_STS_ERR (0x01)
+#define AHCI_PX_TFD_STS_CS1 (0x06)
+#define AHCI_PX_TFD_STS_DRQ (0x08)
+#define AHCI_PX_TFD_STS_CS2 (0x70)
+#define AHCI_PX_TFD_STS_BSY (0x80)
+#define AHCI_PX_TFD_ERR (0xFF00)
+#define AHCI_PX_TFD_RESERVED (0xFFFF0000)
+
+#define AHCI_PX_SIG (9)
+#define AHCI_PX_SIG_SECTOR_COUNT (0xFF)
+#define AHCI_PX_SIG_LBA_LOW (0xFF00)
+#define AHCI_PX_SIG_LBA_MID (0xFF0000)
+#define AHCI_PX_SIG_LBA_HIGH (0xFF000000)
+
+#define AHCI_PX_SSTS (10)
+#define AHCI_PX_SSTS_DET (0x0F)
+#define AHCI_PX_SSTS_SPD (0xF0)
+#define AHCI_PX_SSTS_IPM (0xF00)
+#define AHCI_PX_SSTS_RESERVED (0xFFFFF000)
+#define SSTS_DET_NO_DEVICE (0x00)
+#define SSTS_DET_PRESENT (0x01)
+#define SSTS_DET_ESTABLISHED (0x03)
+#define SSTS_DET_OFFLINE (0x04)
+
+#define AHCI_PX_SCTL (11)
+
+#define AHCI_PX_SERR (12)
+#define AHCI_PX_SERR_ERR (0xFFFF)
+#define AHCI_PX_SERR_DIAG (0xFFFF0000)
+#define AHCI_PX_SERR_DIAG_X (0x04000000)
+
+#define AHCI_PX_SACT (13)
+#define AHCI_PX_CI (14)
+#define AHCI_PX_SNTF (15)
+
+#define AHCI_PX_FBS (16)
+#define AHCI_PX_FBS_EN (0x1)
+#define AHCI_PX_FBS_DEC (0x2)
+#define AHCI_PX_FBS_SDE (0x4)
+#define AHCI_PX_FBS_DEV (0xF00)
+#define AHCI_PX_FBS_ADO (0xF000)
+#define AHCI_PX_FBS_DWE (0xF0000)
+#define AHCI_PX_FBS_RESERVED (0xFFF000F8)
+
+#define AHCI_PX_RES2 (17)
+#define AHCI_PX_VS (28)
+
+#define HBA_DATA_REGION_SIZE (256)
+#define HBA_PORT_DATA_SIZE (128)
+#define HBA_PORT_NUM_REG (HBA_PORT_DATA_SIZE/4)
+
+#define AHCI_VERSION_0_95 (0x00000905)
+#define AHCI_VERSION_1_0 (0x00010000)
+#define AHCI_VERSION_1_1 (0x00010100)
+#define AHCI_VERSION_1_2 (0x00010200)
+#define AHCI_VERSION_1_3 (0x00010300)
+
+/*** Structures ***/
+
+/**
+ * Generic FIS structure.
+ */
+typedef struct FIS {
+ uint8_t fis_type;
+ uint8_t flags;
+ char data[0];
+} __attribute__((__packed__)) FIS;
+
+/**
+ * Register device-to-host FIS structure.
+ */
+typedef struct RegD2HFIS {
+ /* DW0 */
+ uint8_t fis_type;
+ uint8_t flags;
+ uint8_t status;
+ uint8_t error;
+ /* DW1 */
+ uint8_t lba_low;
+ uint8_t lba_mid;
+ uint8_t lba_high;
+ uint8_t device;
+ /* DW2 */
+ uint8_t lba3;
+ uint8_t lba4;
+ uint8_t lba5;
+ uint8_t res1;
+ /* DW3 */
+ uint16_t count;
+ uint8_t res2;
+ uint8_t res3;
+ /* DW4 */
+ uint16_t res4;
+ uint16_t res5;
+} __attribute__((__packed__)) RegD2HFIS;
+
+/**
+ * Register host-to-device FIS structure.
+ */
+typedef struct RegH2DFIS {
+ /* DW0 */
+ uint8_t fis_type;
+ uint8_t flags;
+ uint8_t command;
+ uint8_t feature_low;
+ /* DW1 */
+ uint8_t lba_low;
+ uint8_t lba_mid;
+ uint8_t lba_high;
+ uint8_t device;
+ /* DW2 */
+ uint8_t lba3;
+ uint8_t lba4;
+ uint8_t lba5;
+ uint8_t feature_high;
+ /* DW3 */
+ uint16_t count;
+ uint8_t icc;
+ uint8_t control;
+ /* DW4 */
+ uint32_t aux;
+} __attribute__((__packed__)) RegH2DFIS;
+
+/**
+ * Command List entry structure.
+ * The command list contains between 1-32 of these structures.
+ */
+typedef struct AHCICommand {
+ uint8_t b1;
+ uint8_t b2;
+ uint16_t prdtl; /* Phys Region Desc. Table Length */
+ uint32_t prdbc; /* Phys Region Desc. Byte Count */
+ uint32_t ctba; /* Command Table Descriptor Base Address */
+ uint32_t ctbau; /* '' Upper */
+ uint32_t res[4];
+} __attribute__((__packed__)) AHCICommand;
+
+/**
+ * Physical Region Descriptor; pointed to by the Command List Header,
+ * struct ahci_command.
+ */
+typedef struct PRD {
+ uint32_t dba; /* Data Base Address */
+ uint32_t dbau; /* Data Base Address Upper */
+ uint32_t res; /* Reserved */
+ uint32_t dbc; /* Data Byte Count (0-indexed) & Interrupt Flag (bit 2^31) */
+} PRD;
+
+typedef struct HBACap {
+ uint32_t cap;
+ uint32_t cap2;
+} HBACap;
+
+/*** Globals ***/
+static QGuestAllocator *guest_malloc;
+static QPCIBus *pcibus;
+static uint64_t barsize;
+static char tmp_path[] = "/tmp/qtest.XXXXXX";
+static bool ahci_pedantic;
+static uint32_t ahci_fingerprint;
+
+/*** Macro Utilities ***/
+#define BITANY(data, mask) (((data) & (mask)) != 0)
+#define BITSET(data, mask) (((data) & (mask)) == (mask))
+#define BITCLR(data, mask) (((data) & (mask)) == 0)
+#define ASSERT_BIT_SET(data, mask) g_assert_cmphex((data) & (mask), ==, (mask))
+#define ASSERT_BIT_CLEAR(data, mask) g_assert_cmphex((data) & (mask), ==, 0)
+
+/*** IO macros for the AHCI memory registers. ***/
+#define AHCI_READ(OFST) qpci_io_readl(ahci, hba_base + (OFST))
+#define AHCI_WRITE(OFST, VAL) qpci_io_writel(ahci, hba_base + (OFST), (VAL))
+#define AHCI_RREG(regno) AHCI_READ(4 * (regno))
+#define AHCI_WREG(regno, val) AHCI_WRITE(4 * (regno), (val))
+#define AHCI_SET(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) | (mask))
+#define AHCI_CLR(regno, mask) AHCI_WREG((regno), AHCI_RREG(regno) & ~(mask))
+
+/*** IO macros for port-specific offsets inside of AHCI memory. ***/
+#define PX_OFST(port, regno) (HBA_PORT_NUM_REG * (port) + AHCI_PORTS + (regno))
+#define PX_RREG(port, regno) AHCI_RREG(PX_OFST((port), (regno)))
+#define PX_WREG(port, regno, val) AHCI_WREG(PX_OFST((port), (regno)), (val))
+#define PX_SET(port, reg, mask) PX_WREG((port), (reg), \
+ PX_RREG((port), (reg)) | (mask));
+#define PX_CLR(port, reg, mask) PX_WREG((port), (reg), \
+ PX_RREG((port), (reg)) & ~(mask));
+
+/* For calculating how big the PRD table needs to be: */
+#define CMD_TBL_SIZ(n) ((0x80 + ((n) * sizeof(PRD)) + 0x7F) & ~0x7F)
+
+
+/*** Function Declarations ***/
+static QPCIDevice *get_ahci_device(void);
+static QPCIDevice *start_ahci_device(QPCIDevice *dev, void **hba_base);
+static void free_ahci_device(QPCIDevice *dev);
+static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base,
+ HBACap *hcap, uint8_t port);
+static void ahci_test_pci_spec(QPCIDevice *ahci);
+static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header,
+ uint8_t offset);
+static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset);
+static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset);
+static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset);
+
+/*** Utilities ***/
+
+static void string_bswap16(uint16_t *s, size_t bytes)
+{
+ g_assert_cmphex((bytes & 1), ==, 0);
+ bytes /= 2;
+
+ while (bytes--) {
+ *s = bswap16(*s);
+ s++;
+ }
+}
+
+/**
+ * Locate, verify, and return a handle to the AHCI device.
+ */
+static QPCIDevice *get_ahci_device(void)
+{
+ QPCIDevice *ahci;
+
+ pcibus = qpci_init_pc();
+
+ /* Find the AHCI PCI device and verify it's the right one. */
+ ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02));
+ g_assert(ahci != NULL);
+
+ ahci_fingerprint = qpci_config_readl(ahci, PCI_VENDOR_ID);
+
+ switch (ahci_fingerprint) {
+ case AHCI_INTEL_ICH9:
+ break;
+ default:
+ /* Unknown device. */
+ g_assert_not_reached();
+ }
+
+ return ahci;
+}
+
+static void free_ahci_device(QPCIDevice *ahci)
+{
+ /* libqos doesn't have a function for this, so free it manually */
+ g_free(ahci);
+
+ if (pcibus) {
+ qpci_free_pc(pcibus);
+ pcibus = NULL;
+ }
+
+ /* Clear our cached barsize information. */
+ barsize = 0;
+}
+
+/*** Test Setup & Teardown ***/
+
+/**
+ * Launch QEMU with the given command line,
+ * and then set up interrupts and our guest malloc interface.
+ */
+static void qtest_boot(const char *cmdline_fmt, ...)
+{
+ va_list ap;
+ char *cmdline;
+
+ va_start(ap, cmdline_fmt);
+ cmdline = g_strdup_vprintf(cmdline_fmt, ap);
+ va_end(ap);
+
+ qtest_start(cmdline);
+ qtest_irq_intercept_in(global_qtest, "ioapic");
+ guest_malloc = pc_alloc_init();
+
+ g_free(cmdline);
+}
+
+/**
+ * Tear down the QEMU instance.
+ */
+static void qtest_shutdown(void)
+{
+ g_free(guest_malloc);
+ guest_malloc = NULL;
+ qtest_end();
+}
+
+/**
+ * Start a Q35 machine and bookmark a handle to the AHCI device.
+ */
+static QPCIDevice *ahci_boot(void)
+{
+ qtest_boot("-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s"
+ " -M q35 "
+ "-device ide-hd,drive=drive0 "
+ "-global ide-hd.ver=%s",
+ tmp_path, "testdisk", "version");
+
+ /* Verify that we have an AHCI device present. */
+ return get_ahci_device();
+}
+
+/**
+ * Clean up the PCI device, then terminate the QEMU instance.
+ */
+static void ahci_shutdown(QPCIDevice *ahci)
+{
+ free_ahci_device(ahci);
+ qtest_shutdown();
+}
+
+/*** Logical Device Initialization ***/
+
+/**
+ * Start the PCI device and sanity-check default operation.
+ */
+static void ahci_pci_enable(QPCIDevice *ahci, void **hba_base)
+{
+ uint8_t reg;
+
+ start_ahci_device(ahci, hba_base);
+
+ switch (ahci_fingerprint) {
+ case AHCI_INTEL_ICH9:
+ /* ICH9 has a register at PCI 0x92 that
+ * acts as a master port enabler mask. */
+ reg = qpci_config_readb(ahci, 0x92);
+ reg |= 0x3F;
+ qpci_config_writeb(ahci, 0x92, reg);
+ /* 0...0111111b -- bit significant, ports 0-5 enabled. */
+ ASSERT_BIT_SET(qpci_config_readb(ahci, 0x92), 0x3F);
+ break;
+ }
+
+}
+
+/**
+ * Map BAR5/ABAR, and engage the PCI device.
+ */
+static QPCIDevice *start_ahci_device(QPCIDevice *ahci, void **hba_base)
+{
+ /* Map AHCI's ABAR (BAR5) */
+ *hba_base = qpci_iomap(ahci, 5, &barsize);
+
+ /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */
+ qpci_device_enable(ahci);
+
+ return ahci;
+}
+
+/**
+ * Test and initialize the AHCI's HBA memory areas.
+ * Initialize and start any ports with devices attached.
+ * Bring the HBA into the idle state.
+ */
+static void ahci_hba_enable(QPCIDevice *ahci, void *hba_base)
+{
+ /* Bits of interest in this section:
+ * GHC.AE Global Host Control / AHCI Enable
+ * PxCMD.ST Port Command: Start
+ * PxCMD.SUD "Spin Up Device"
+ * PxCMD.POD "Power On Device"
+ * PxCMD.FRE "FIS Receive Enable"
+ * PxCMD.FR "FIS Receive Running"
+ * PxCMD.CR "Command List Running"
+ */
+
+ g_assert(ahci != NULL);
+ g_assert(hba_base != NULL);
+
+ uint32_t reg, ports_impl, clb, fb;
+ uint16_t i;
+ uint8_t num_cmd_slots;
+
+ g_assert(hba_base != 0);
+
+ /* Set GHC.AE to 1 */
+ AHCI_SET(AHCI_GHC, AHCI_GHC_AE);
+ reg = AHCI_RREG(AHCI_GHC);
+ ASSERT_BIT_SET(reg, AHCI_GHC_AE);
+
+ /* Read CAP.NCS, how many command slots do we have? */
+ reg = AHCI_RREG(AHCI_CAP);
+ num_cmd_slots = ((reg & AHCI_CAP_NCS) >> ctzl(AHCI_CAP_NCS)) + 1;
+ g_test_message("Number of Command Slots: %u", num_cmd_slots);
+
+ /* Determine which ports are implemented. */
+ ports_impl = AHCI_RREG(AHCI_PI);
+
+ for (i = 0; ports_impl; ports_impl >>= 1, ++i) {
+ if (!(ports_impl & 0x01)) {
+ continue;
+ }
+
+ g_test_message("Initializing port %u", i);
+
+ reg = PX_RREG(i, AHCI_PX_CMD);
+ if (BITCLR(reg, AHCI_PX_CMD_ST | AHCI_PX_CMD_CR |
+ AHCI_PX_CMD_FRE | AHCI_PX_CMD_FR)) {
+ g_test_message("port is idle");
+ } else {
+ g_test_message("port needs to be idled");
+ PX_CLR(i, AHCI_PX_CMD, (AHCI_PX_CMD_ST | AHCI_PX_CMD_FRE));
+ /* The port has 500ms to disengage. */
+ usleep(500000);
+ reg = PX_RREG(i, AHCI_PX_CMD);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR);
+ g_test_message("port is now idle");
+ /* The spec does allow for possibly needing a PORT RESET
+ * or HBA reset if we fail to idle the port. */
+ }
+
+ /* Allocate Memory for the Command List Buffer & FIS Buffer */
+ /* PxCLB space ... 0x20 per command, as in 4.2.2 p 36 */
+ clb = guest_alloc(guest_malloc, num_cmd_slots * 0x20);
+ g_test_message("CLB: 0x%08x", clb);
+ PX_WREG(i, AHCI_PX_CLB, clb);
+ g_assert_cmphex(clb, ==, PX_RREG(i, AHCI_PX_CLB));
+
+ /* PxFB space ... 0x100, as in 4.2.1 p 35 */
+ fb = guest_alloc(guest_malloc, 0x100);
+ g_test_message("FB: 0x%08x", fb);
+ PX_WREG(i, AHCI_PX_FB, fb);
+ g_assert_cmphex(fb, ==, PX_RREG(i, AHCI_PX_FB));
+
+ /* Clear PxSERR, PxIS, then IS.IPS[x] by writing '1's. */
+ PX_WREG(i, AHCI_PX_SERR, 0xFFFFFFFF);
+ PX_WREG(i, AHCI_PX_IS, 0xFFFFFFFF);
+ AHCI_WREG(AHCI_IS, (1 << i));
+
+ /* Verify Interrupts Cleared */
+ reg = PX_RREG(i, AHCI_PX_SERR);
+ g_assert_cmphex(reg, ==, 0);
+
+ reg = PX_RREG(i, AHCI_PX_IS);
+ g_assert_cmphex(reg, ==, 0);
+
+ reg = AHCI_RREG(AHCI_IS);
+ ASSERT_BIT_CLEAR(reg, (1 << i));
+
+ /* Enable All Interrupts: */
+ PX_WREG(i, AHCI_PX_IE, 0xFFFFFFFF);
+ reg = PX_RREG(i, AHCI_PX_IE);
+ g_assert_cmphex(reg, ==, ~((uint32_t)AHCI_PX_IE_RESERVED));
+
+ /* Enable the FIS Receive Engine. */
+ PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_FRE);
+ reg = PX_RREG(i, AHCI_PX_CMD);
+ ASSERT_BIT_SET(reg, AHCI_PX_CMD_FR);
+
+ /* AHCI 1.3 spec: if !STS.BSY, !STS.DRQ and PxSSTS.DET indicates
+ * physical presence, a device is present and may be started. However,
+ * PxSERR.DIAG.X /may/ need to be cleared a priori. */
+ reg = PX_RREG(i, AHCI_PX_SERR);
+ if (BITSET(reg, AHCI_PX_SERR_DIAG_X)) {
+ PX_SET(i, AHCI_PX_SERR, AHCI_PX_SERR_DIAG_X);
+ }
+
+ reg = PX_RREG(i, AHCI_PX_TFD);
+ if (BITCLR(reg, AHCI_PX_TFD_STS_BSY | AHCI_PX_TFD_STS_DRQ)) {
+ reg = PX_RREG(i, AHCI_PX_SSTS);
+ if ((reg & AHCI_PX_SSTS_DET) == SSTS_DET_ESTABLISHED) {
+ /* Device Found: set PxCMD.ST := 1 */
+ PX_SET(i, AHCI_PX_CMD, AHCI_PX_CMD_ST);
+ ASSERT_BIT_SET(PX_RREG(i, AHCI_PX_CMD), AHCI_PX_CMD_CR);
+ g_test_message("Started Device %u", i);
+ } else if ((reg & AHCI_PX_SSTS_DET)) {
+ /* Device present, but in some unknown state. */
+ g_assert_not_reached();
+ }
+ }
+ }
+
+ /* Enable GHC.IE */
+ AHCI_SET(AHCI_GHC, AHCI_GHC_IE);
+ reg = AHCI_RREG(AHCI_GHC);
+ ASSERT_BIT_SET(reg, AHCI_GHC_IE);
+
+ /* TODO: The device should now be idling and waiting for commands.
+ * In the future, a small test-case to inspect the Register D2H FIS
+ * and clear the initial interrupts might be good. */
+}
+
+/*** Specification Adherence Tests ***/
+
+/**
+ * Implementation for test_pci_spec. Ensures PCI configuration space is sane.
+ */
+static void ahci_test_pci_spec(QPCIDevice *ahci)
+{
+ uint8_t datab;
+ uint16_t data;
+ uint32_t datal;
+
+ /* Most of these bits should start cleared until we turn them on. */
+ data = qpci_config_readw(ahci, PCI_COMMAND);
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_MEMORY);
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_MASTER);
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_SPECIAL); /* Reserved */
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_VGA_PALETTE); /* Reserved */
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_PARITY);
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_WAIT); /* Reserved */
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_SERR);
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_FAST_BACK);
+ ASSERT_BIT_CLEAR(data, PCI_COMMAND_INTX_DISABLE);
+ ASSERT_BIT_CLEAR(data, 0xF800); /* Reserved */
+
+ data = qpci_config_readw(ahci, PCI_STATUS);
+ ASSERT_BIT_CLEAR(data, 0x01 | 0x02 | 0x04); /* Reserved */
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_INTERRUPT);
+ ASSERT_BIT_SET(data, PCI_STATUS_CAP_LIST); /* must be set */
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_UDF); /* Reserved */
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_PARITY);
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_SIG_TARGET_ABORT);
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_REC_TARGET_ABORT);
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_REC_MASTER_ABORT);
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_SIG_SYSTEM_ERROR);
+ ASSERT_BIT_CLEAR(data, PCI_STATUS_DETECTED_PARITY);
+
+ /* RID occupies the low byte, CCs occupy the high three. */
+ datal = qpci_config_readl(ahci, PCI_CLASS_REVISION);
+ if (ahci_pedantic) {
+ /* AHCI 1.3 specifies that at-boot, the RID should reset to 0x00,
+ * Though in practice this is likely seldom true. */
+ ASSERT_BIT_CLEAR(datal, 0xFF);
+ }
+
+ /* BCC *must* equal 0x01. */
+ g_assert_cmphex(PCI_BCC(datal), ==, 0x01);
+ if (PCI_SCC(datal) == 0x01) {
+ /* IDE */
+ ASSERT_BIT_SET(0x80000000, datal);
+ ASSERT_BIT_CLEAR(0x60000000, datal);
+ } else if (PCI_SCC(datal) == 0x04) {
+ /* RAID */
+ g_assert_cmphex(PCI_PI(datal), ==, 0);
+ } else if (PCI_SCC(datal) == 0x06) {
+ /* AHCI */
+ g_assert_cmphex(PCI_PI(datal), ==, 0x01);
+ } else {
+ g_assert_not_reached();
+ }
+
+ datab = qpci_config_readb(ahci, PCI_CACHE_LINE_SIZE);
+ g_assert_cmphex(datab, ==, 0);
+
+ datab = qpci_config_readb(ahci, PCI_LATENCY_TIMER);
+ g_assert_cmphex(datab, ==, 0);
+
+ /* Only the bottom 7 bits must be off. */
+ datab = qpci_config_readb(ahci, PCI_HEADER_TYPE);
+ ASSERT_BIT_CLEAR(datab, 0x7F);
+
+ /* BIST is optional, but the low 7 bits must always start off regardless. */
+ datab = qpci_config_readb(ahci, PCI_BIST);
+ ASSERT_BIT_CLEAR(datab, 0x7F);
+
+ /* BARS 0-4 do not have a boot spec, but ABAR/BAR5 must be clean. */
+ datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5);
+ g_assert_cmphex(datal, ==, 0);
+
+ qpci_config_writel(ahci, PCI_BASE_ADDRESS_5, 0xFFFFFFFF);
+ datal = qpci_config_readl(ahci, PCI_BASE_ADDRESS_5);
+ /* ABAR must be 32-bit, memory mapped, non-prefetchable and
+ * must be >= 512 bytes. To that end, bits 0-8 must be off. */
+ ASSERT_BIT_CLEAR(datal, 0xFF);
+
+ /* Capability list MUST be present, */
+ datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST);
+ /* But these bits are reserved. */
+ ASSERT_BIT_CLEAR(datal, ~0xFF);
+ g_assert_cmphex(datal, !=, 0);
+
+ /* Check specification adherence for capability extenstions. */
+ data = qpci_config_readw(ahci, datal);
+
+ switch (ahci_fingerprint) {
+ case AHCI_INTEL_ICH9:
+ /* Intel ICH9 Family Datasheet 14.1.19 p.550 */
+ g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_MSI);
+ break;
+ default:
+ /* AHCI 1.3, Section 2.1.14 -- CAP must point to PMCAP. */
+ g_assert_cmphex((data & 0xFF), ==, PCI_CAP_ID_PM);
+ }
+
+ ahci_test_pci_caps(ahci, data, (uint8_t)datal);
+
+ /* Reserved. */
+ datal = qpci_config_readl(ahci, PCI_CAPABILITY_LIST + 4);
+ g_assert_cmphex(datal, ==, 0);
+
+ /* IPIN might vary, but ILINE must be off. */
+ datab = qpci_config_readb(ahci, PCI_INTERRUPT_LINE);
+ g_assert_cmphex(datab, ==, 0);
+}
+
+/**
+ * Test PCI capabilities for AHCI specification adherence.
+ */
+static void ahci_test_pci_caps(QPCIDevice *ahci, uint16_t header,
+ uint8_t offset)
+{
+ uint8_t cid = header & 0xFF;
+ uint8_t next = header >> 8;
+
+ g_test_message("CID: %02x; next: %02x", cid, next);
+
+ switch (cid) {
+ case PCI_CAP_ID_PM:
+ ahci_test_pmcap(ahci, offset);
+ break;
+ case PCI_CAP_ID_MSI:
+ ahci_test_msicap(ahci, offset);
+ break;
+ case PCI_CAP_ID_SATA:
+ ahci_test_satacap(ahci, offset);
+ break;
+
+ default:
+ g_test_message("Unknown CAP 0x%02x", cid);
+ }
+
+ if (next) {
+ ahci_test_pci_caps(ahci, qpci_config_readw(ahci, next), next);
+ }
+}
+
+/**
+ * Test SATA PCI capabilitity for AHCI specification adherence.
+ */
+static void ahci_test_satacap(QPCIDevice *ahci, uint8_t offset)
+{
+ uint16_t dataw;
+ uint32_t datal;
+
+ g_test_message("Verifying SATACAP");
+
+ /* Assert that the SATACAP version is 1.0, And reserved bits are empty. */
+ dataw = qpci_config_readw(ahci, offset + 2);
+ g_assert_cmphex(dataw, ==, 0x10);
+
+ /* Grab the SATACR1 register. */
+ datal = qpci_config_readw(ahci, offset + 4);
+
+ switch (datal & 0x0F) {
+ case 0x04: /* BAR0 */
+ case 0x05: /* BAR1 */
+ case 0x06:
+ case 0x07:
+ case 0x08:
+ case 0x09: /* BAR5 */
+ case 0x0F: /* Immediately following SATACR1 in PCI config space. */
+ break;
+ default:
+ /* Invalid BARLOC for the Index Data Pair. */
+ g_assert_not_reached();
+ }
+
+ /* Reserved. */
+ g_assert_cmphex((datal >> 24), ==, 0x00);
+}
+
+/**
+ * Test MSI PCI capability for AHCI specification adherence.
+ */
+static void ahci_test_msicap(QPCIDevice *ahci, uint8_t offset)
+{
+ uint16_t dataw;
+ uint32_t datal;
+
+ g_test_message("Verifying MSICAP");
+
+ dataw = qpci_config_readw(ahci, offset + PCI_MSI_FLAGS);
+ ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_ENABLE);
+ ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_QSIZE);
+ ASSERT_BIT_CLEAR(dataw, PCI_MSI_FLAGS_RESERVED);
+
+ datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_LO);
+ g_assert_cmphex(datal, ==, 0);
+
+ if (dataw & PCI_MSI_FLAGS_64BIT) {
+ g_test_message("MSICAP is 64bit");
+ datal = qpci_config_readl(ahci, offset + PCI_MSI_ADDRESS_HI);
+ g_assert_cmphex(datal, ==, 0);
+ dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_64);
+ g_assert_cmphex(dataw, ==, 0);
+ } else {
+ g_test_message("MSICAP is 32bit");
+ dataw = qpci_config_readw(ahci, offset + PCI_MSI_DATA_32);
+ g_assert_cmphex(dataw, ==, 0);
+ }
+}
+
+/**
+ * Test Power Management PCI capability for AHCI specification adherence.
+ */
+static void ahci_test_pmcap(QPCIDevice *ahci, uint8_t offset)
+{
+ uint16_t dataw;
+
+ g_test_message("Verifying PMCAP");
+
+ dataw = qpci_config_readw(ahci, offset + PCI_PM_PMC);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_PME_CLOCK);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_RESERVED);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D1);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CAP_D2);
+
+ dataw = qpci_config_readw(ahci, offset + PCI_PM_CTRL);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_STATE_MASK);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_RESERVED);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SEL_MASK);
+ ASSERT_BIT_CLEAR(dataw, PCI_PM_CTRL_DATA_SCALE_MASK);
+}
+
+static void ahci_test_hba_spec(QPCIDevice *ahci, void *hba_base)
+{
+ HBACap hcap;
+ unsigned i;
+ uint32_t cap, cap2, reg;
+ uint32_t ports;
+ uint8_t nports_impl;
+ uint8_t maxports;
+
+ g_assert(ahci != 0);
+ g_assert(hba_base != 0);
+
+ /*
+ * Note that the AHCI spec does expect the BIOS to set up a few things:
+ * CAP.SSS - Support for staggered spin-up (t/f)
+ * CAP.SMPS - Support for mechanical presence switches (t/f)
+ * PI - Ports Implemented (1-32)
+ * PxCMD.HPCP - Hot Plug Capable Port
+ * PxCMD.MPSP - Mechanical Presence Switch Present
+ * PxCMD.CPD - Cold Presence Detection support
+ *
+ * Additional items are touched if CAP.SSS is on, see AHCI 10.1.1 p.97:
+ * Foreach Port Implemented:
+ * -PxCMD.ST, PxCMD.CR, PxCMD.FRE, PxCMD.FR, PxSCTL.DET are 0
+ * -PxCLB/U and PxFB/U are set to valid regions in memory
+ * -PxSUD is set to 1.
+ * -PxSSTS.DET is polled for presence; if detected, we continue:
+ * -PxSERR is cleared with 1's.
+ * -If PxTFD.STS.BSY, PxTFD.STS.DRQ, and PxTFD.STS.ERR are all zero,
+ * the device is ready.
+ */
+
+ /* 1 CAP - Capabilities Register */
+ cap = AHCI_RREG(AHCI_CAP);
+ ASSERT_BIT_CLEAR(cap, AHCI_CAP_RESERVED);
+
+ /* 2 GHC - Global Host Control */
+ reg = AHCI_RREG(AHCI_GHC);
+ ASSERT_BIT_CLEAR(reg, AHCI_GHC_HR);
+ ASSERT_BIT_CLEAR(reg, AHCI_GHC_IE);
+ ASSERT_BIT_CLEAR(reg, AHCI_GHC_MRSM);
+ if (BITSET(cap, AHCI_CAP_SAM)) {
+ g_test_message("Supports AHCI-Only Mode: GHC_AE is Read-Only.");
+ ASSERT_BIT_SET(reg, AHCI_GHC_AE);
+ } else {
+ g_test_message("Supports AHCI/Legacy mix.");
+ ASSERT_BIT_CLEAR(reg, AHCI_GHC_AE);
+ }
+
+ /* 3 IS - Interrupt Status */
+ reg = AHCI_RREG(AHCI_IS);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* 4 PI - Ports Implemented */
+ ports = AHCI_RREG(AHCI_PI);
+ /* Ports Implemented must be non-zero. */
+ g_assert_cmphex(ports, !=, 0);
+ /* Ports Implemented must be <= Number of Ports. */
+ nports_impl = ctpopl(ports);
+ g_assert_cmpuint(((AHCI_CAP_NP & cap) + 1), >=, nports_impl);
+
+ g_assert_cmphex(barsize, >, 0);
+ /* Ports must be within the proper range. Given a mapping of SIZE,
+ * 256 bytes are used for global HBA control, and the rest is used
+ * for ports data, at 0x80 bytes each. */
+ maxports = (barsize - HBA_DATA_REGION_SIZE) / HBA_PORT_DATA_SIZE;
+ /* e.g, 30 ports for 4K of memory. (4096 - 256) / 128 = 30 */
+ g_assert_cmphex((reg >> maxports), ==, 0);
+
+ /* 5 AHCI Version */
+ reg = AHCI_RREG(AHCI_VS);
+ switch (reg) {
+ case AHCI_VERSION_0_95:
+ case AHCI_VERSION_1_0:
+ case AHCI_VERSION_1_1:
+ case AHCI_VERSION_1_2:
+ case AHCI_VERSION_1_3:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* 6 Command Completion Coalescing Control: depends on CAP.CCCS. */
+ reg = AHCI_RREG(AHCI_CCCCTL);
+ if (BITSET(cap, AHCI_CAP_CCCS)) {
+ ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_EN);
+ ASSERT_BIT_CLEAR(reg, AHCI_CCCCTL_RESERVED);
+ ASSERT_BIT_SET(reg, AHCI_CCCCTL_CC);
+ ASSERT_BIT_SET(reg, AHCI_CCCCTL_TV);
+ } else {
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* 7 CCC_PORTS */
+ reg = AHCI_RREG(AHCI_CCCPORTS);
+ /* Must be zeroes initially regardless of CAP.CCCS */
+ g_assert_cmphex(reg, ==, 0);
+
+ /* 8 EM_LOC */
+ reg = AHCI_RREG(AHCI_EMLOC);
+ if (BITCLR(cap, AHCI_CAP_EMS)) {
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* 9 EM_CTL */
+ reg = AHCI_RREG(AHCI_EMCTL);
+ if (BITSET(cap, AHCI_CAP_EMS)) {
+ ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_STSMR);
+ ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLTM);
+ ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_CTLRST);
+ ASSERT_BIT_CLEAR(reg, AHCI_EMCTL_RESERVED);
+ } else {
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* 10 CAP2 -- Capabilities Extended */
+ cap2 = AHCI_RREG(AHCI_CAP2);
+ ASSERT_BIT_CLEAR(cap2, AHCI_CAP2_RESERVED);
+
+ /* 11 BOHC -- Bios/OS Handoff Control */
+ reg = AHCI_RREG(AHCI_BOHC);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* 12 -- 23: Reserved */
+ g_test_message("Verifying HBA reserved area is empty.");
+ for (i = AHCI_RESERVED; i < AHCI_NVMHCI; ++i) {
+ reg = AHCI_RREG(i);
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* 24 -- 39: NVMHCI */
+ if (BITCLR(cap2, AHCI_CAP2_NVMP)) {
+ g_test_message("Verifying HBA/NVMHCI area is empty.");
+ for (i = AHCI_NVMHCI; i < AHCI_VENDOR; ++i) {
+ reg = AHCI_RREG(i);
+ g_assert_cmphex(reg, ==, 0);
+ }
+ }
+
+ /* 40 -- 63: Vendor */
+ g_test_message("Verifying HBA/Vendor area is empty.");
+ for (i = AHCI_VENDOR; i < AHCI_PORTS; ++i) {
+ reg = AHCI_RREG(i);
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* 64 -- XX: Port Space */
+ hcap.cap = cap;
+ hcap.cap2 = cap2;
+ for (i = 0; ports || (i < maxports); ports >>= 1, ++i) {
+ if (BITSET(ports, 0x1)) {
+ g_test_message("Testing port %u for spec", i);
+ ahci_test_port_spec(ahci, hba_base, &hcap, i);
+ } else {
+ uint16_t j;
+ uint16_t low = AHCI_PORTS + (32 * i);
+ uint16_t high = AHCI_PORTS + (32 * (i + 1));
+ g_test_message("Asserting unimplemented port %u "
+ "(reg [%u-%u]) is empty.",
+ i, low, high - 1);
+ for (j = low; j < high; ++j) {
+ reg = AHCI_RREG(j);
+ g_assert_cmphex(reg, ==, 0);
+ }
+ }
+ }
+}
+
+/**
+ * Test the memory space for one port for specification adherence.
+ */
+static void ahci_test_port_spec(QPCIDevice *ahci, void *hba_base,
+ HBACap *hcap, uint8_t port)
+{
+ uint32_t reg;
+ unsigned i;
+
+ /* (0) CLB */
+ reg = PX_RREG(port, AHCI_PX_CLB);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CLB_RESERVED);
+
+ /* (1) CLBU */
+ if (BITCLR(hcap->cap, AHCI_CAP_S64A)) {
+ reg = PX_RREG(port, AHCI_PX_CLBU);
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* (2) FB */
+ reg = PX_RREG(port, AHCI_PX_FB);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FB_RESERVED);
+
+ /* (3) FBU */
+ if (BITCLR(hcap->cap, AHCI_CAP_S64A)) {
+ reg = PX_RREG(port, AHCI_PX_FBU);
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* (4) IS */
+ reg = PX_RREG(port, AHCI_PX_IS);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (5) IE */
+ reg = PX_RREG(port, AHCI_PX_IE);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (6) CMD */
+ reg = PX_RREG(port, AHCI_PX_CMD);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FRE);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_RESERVED);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CCS);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FR);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CR);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_PMA); /* And RW only if CAP.SPM */
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_APSTE); /* RW only if CAP2.APST */
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ATAPI);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_DLAE);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ALPE); /* RW only if CAP.SALP */
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ASP); /* RW only if CAP.SALP */
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_ICC);
+ /* If CPDetect support does not exist, CPState must be off. */
+ if (BITCLR(reg, AHCI_PX_CMD_CPD)) {
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_CPS);
+ }
+ /* If MPSPresence is not set, MPSState must be off. */
+ if (BITCLR(reg, AHCI_PX_CMD_MPSP)) {
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS);
+ }
+ /* If we do not support MPS, MPSS and MPSP must be off. */
+ if (BITCLR(hcap->cap, AHCI_CAP_SMPS)) {
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSS);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_MPSP);
+ }
+ /* If, via CPD or MPSP we detect a drive, HPCP must be on. */
+ if (BITANY(reg, AHCI_PX_CMD_CPD || AHCI_PX_CMD_MPSP)) {
+ ASSERT_BIT_SET(reg, AHCI_PX_CMD_HPCP);
+ }
+ /* HPCP and ESP cannot both be active. */
+ g_assert(!BITSET(reg, AHCI_PX_CMD_HPCP | AHCI_PX_CMD_ESP));
+ /* If CAP.FBSS is not set, FBSCP must not be set. */
+ if (BITCLR(hcap->cap, AHCI_CAP_FBSS)) {
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_CMD_FBSCP);
+ }
+
+ /* (7) RESERVED */
+ reg = PX_RREG(port, AHCI_PX_RES1);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (8) TFD */
+ reg = PX_RREG(port, AHCI_PX_TFD);
+ /* At boot, prior to an FIS being received, the TFD register should be 0x7F,
+ * which breaks down as follows, as seen in AHCI 1.3 sec 3.3.8, p. 27. */
+ ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_ERR);
+ ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_CS1);
+ ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_DRQ);
+ ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_CS2);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_BSY);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_RESERVED);
+
+ /* (9) SIG */
+ /* Though AHCI specifies the boot value should be 0xFFFFFFFF,
+ * Even when GHC.ST is zero, the AHCI HBA may receive the initial
+ * D2H register FIS and update the signature asynchronously,
+ * so we cannot expect a value here. AHCI 1.3, sec 3.3.9, pp 27-28 */
+
+ /* (10) SSTS / SCR0: SStatus */
+ reg = PX_RREG(port, AHCI_PX_SSTS);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_SSTS_RESERVED);
+ /* Even though the register should be 0 at boot, it is asynchronous and
+ * prone to change, so we cannot test any well known value. */
+
+ /* (11) SCTL / SCR2: SControl */
+ reg = PX_RREG(port, AHCI_PX_SCTL);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (12) SERR / SCR1: SError */
+ reg = PX_RREG(port, AHCI_PX_SERR);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (13) SACT / SCR3: SActive */
+ reg = PX_RREG(port, AHCI_PX_SACT);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (14) CI */
+ reg = PX_RREG(port, AHCI_PX_CI);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (15) SNTF */
+ reg = PX_RREG(port, AHCI_PX_SNTF);
+ g_assert_cmphex(reg, ==, 0);
+
+ /* (16) FBS */
+ reg = PX_RREG(port, AHCI_PX_FBS);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_EN);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEC);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_SDE);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DEV);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_DWE);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_FBS_RESERVED);
+ if (BITSET(hcap->cap, AHCI_CAP_FBSS)) {
+ /* if Port-Multiplier FIS-based switching avail, ADO must >= 2 */
+ g_assert((reg & AHCI_PX_FBS_ADO) >> ctzl(AHCI_PX_FBS_ADO) >= 2);
+ }
+
+ /* [17 -- 27] RESERVED */
+ for (i = AHCI_PX_RES2; i < AHCI_PX_VS; ++i) {
+ reg = PX_RREG(port, i);
+ g_assert_cmphex(reg, ==, 0);
+ }
+
+ /* [28 -- 31] Vendor-Specific */
+ for (i = AHCI_PX_VS; i < 32; ++i) {
+ reg = PX_RREG(port, i);
+ if (reg) {
+ g_test_message("INFO: Vendor register %u non-empty", i);
+ }
+ }
+}
+
+/**
+ * Utilizing an initialized AHCI HBA, issue an IDENTIFY command to the first
+ * device we see, then read and check the response.
+ */
+static void ahci_test_identify(QPCIDevice *ahci, void *hba_base)
+{
+ RegD2HFIS *d2h = g_malloc0(0x20);
+ RegD2HFIS *pio = g_malloc0(0x20);
+ RegH2DFIS fis;
+ AHCICommand cmd;
+ PRD prd;
+ uint32_t ports, reg, clb, table, fb, data_ptr;
+ uint16_t buff[256];
+ unsigned i;
+ int rc;
+
+ g_assert(ahci != NULL);
+ g_assert(hba_base != NULL);
+
+ /* We need to:
+ * (1) Create a Command Table Buffer and update the Command List Slot #0
+ * to point to this buffer.
+ * (2) Construct an FIS host-to-device command structure, and write it to
+ * the top of the command table buffer.
+ * (3) Create a data buffer for the IDENTIFY response to be sent to
+ * (4) Create a Physical Region Descriptor that points to the data buffer,
+ * and write it to the bottom (offset 0x80) of the command table.
+ * (5) Now, PxCLB points to the command list, command 0 points to
+ * our table, and our table contains an FIS instruction and a
+ * PRD that points to our rx buffer.
+ * (6) We inform the HBA via PxCI that there is a command ready in slot #0.
+ */
+
+ /* Pick the first implemented and running port */
+ ports = AHCI_RREG(AHCI_PI);
+ for (i = 0; i < 32; ports >>= 1, ++i) {
+ if (ports == 0) {
+ i = 32;
+ }
+
+ if (!(ports & 0x01)) {
+ continue;
+ }
+
+ reg = PX_RREG(i, AHCI_PX_CMD);
+ if (BITSET(reg, AHCI_PX_CMD_ST)) {
+ break;
+ }
+ }
+ g_assert_cmphex(i, <, 32);
+ g_test_message("Selected port %u for test", i);
+
+ /* Clear out this port's interrupts (ignore the init register d2h fis) */
+ reg = PX_RREG(i, AHCI_PX_IS);
+ PX_WREG(i, AHCI_PX_IS, reg);
+ g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+ /* Wipe the FIS-Recieve Buffer */
+ fb = PX_RREG(i, AHCI_PX_FB);
+ g_assert_cmphex(fb, !=, 0);
+ qmemset(fb, 0x00, 0x100);
+
+ /* Create a Command Table buffer. 0x80 is the smallest with a PRDTL of 0. */
+ /* We need at least one PRD, so round up to the nearest 0x80 multiple. */
+ table = guest_alloc(guest_malloc, CMD_TBL_SIZ(1));
+ g_assert(table);
+ ASSERT_BIT_CLEAR(table, 0x7F);
+
+ /* Create a data buffer ... where we will dump the IDENTIFY data to. */
+ data_ptr = guest_alloc(guest_malloc, 512);
+ g_assert(data_ptr);
+
+ /* Grab the Command List Buffer pointer */
+ clb = PX_RREG(i, AHCI_PX_CLB);
+ g_assert(clb);
+
+ /* Copy the existing Command #0 structure from the CLB into local memory,
+ * and build a new command #0. */
+ memread(clb, &cmd, sizeof(cmd));
+ cmd.b1 = 5; /* reg_h2d_fis is 5 double-words long */
+ cmd.b2 = 0x04; /* clear PxTFD.STS.BSY when done */
+ cmd.prdtl = cpu_to_le16(1); /* One PRD table entry. */
+ cmd.prdbc = 0;
+ cmd.ctba = cpu_to_le32(table);
+ cmd.ctbau = 0;
+
+ /* Construct our PRD, noting that DBC is 0-indexed. */
+ prd.dba = cpu_to_le32(data_ptr);
+ prd.dbau = 0;
+ prd.res = 0;
+ /* 511+1 bytes, request DPS interrupt */
+ prd.dbc = cpu_to_le32(511 | 0x80000000);
+
+ /* Construct our Command FIS, Based on http://wiki.osdev.org/AHCI */
+ memset(&fis, 0x00, sizeof(fis));
+ fis.fis_type = 0x27; /* Register Host-to-Device FIS */
+ fis.command = 0xEC; /* IDENTIFY */
+ fis.device = 0;
+ fis.flags = 0x80; /* Indicate this is a command FIS */
+
+ /* We've committed nothing yet, no interrupts should be posted yet. */
+ g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+ /* Commit the Command FIS to the Command Table */
+ memwrite(table, &fis, sizeof(fis));
+
+ /* Commit the PRD entry to the Command Table */
+ memwrite(table + 0x80, &prd, sizeof(prd));
+
+ /* Commit Command #0, pointing to the Table, to the Command List Buffer. */
+ memwrite(clb, &cmd, sizeof(cmd));
+
+ /* Everything is in place, but we haven't given the go-ahead yet. */
+ g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+ /* Issue Command #0 via PxCI */
+ PX_WREG(i, AHCI_PX_CI, (1 << 0));
+ while (BITSET(PX_RREG(i, AHCI_PX_TFD), AHCI_PX_TFD_STS_BSY)) {
+ usleep(50);
+ }
+
+ /* Check for expected interrupts */
+ reg = PX_RREG(i, AHCI_PX_IS);
+ ASSERT_BIT_SET(reg, AHCI_PX_IS_DHRS);
+ ASSERT_BIT_SET(reg, AHCI_PX_IS_PSS);
+ /* BUG: we expect AHCI_PX_IS_DPS to be set. */
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_IS_DPS);
+
+ /* Clear expected interrupts and assert all interrupts now cleared. */
+ PX_WREG(i, AHCI_PX_IS, AHCI_PX_IS_DHRS | AHCI_PX_IS_PSS | AHCI_PX_IS_DPS);
+ g_assert_cmphex(PX_RREG(i, AHCI_PX_IS), ==, 0);
+
+ /* Check for errors. */
+ reg = PX_RREG(i, AHCI_PX_SERR);
+ g_assert_cmphex(reg, ==, 0);
+ reg = PX_RREG(i, AHCI_PX_TFD);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR);
+ ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR);
+
+ /* Investigate CMD #0, assert that we read 512 bytes */
+ memread(clb, &cmd, sizeof(cmd));
+ g_assert_cmphex(512, ==, le32_to_cpu(cmd.prdbc));
+
+ /* Investigate FIS responses */
+ memread(fb + 0x20, pio, 0x20);
+ memread(fb + 0x40, d2h, 0x20);
+ g_assert_cmphex(pio->fis_type, ==, 0x5f);
+ g_assert_cmphex(d2h->fis_type, ==, 0x34);
+ g_assert_cmphex(pio->flags, ==, d2h->flags);
+ g_assert_cmphex(pio->status, ==, d2h->status);
+ g_assert_cmphex(pio->error, ==, d2h->error);
+
+ reg = PX_RREG(i, AHCI_PX_TFD);
+ g_assert_cmphex((reg & AHCI_PX_TFD_ERR), ==, pio->error);
+ g_assert_cmphex((reg & AHCI_PX_TFD_STS), ==, pio->status);
+ /* The PIO Setup FIS contains a "bytes read" field, which is a
+ * 16-bit value. The Physical Region Descriptor Byte Count is
+ * 32-bit, but for small transfers using one PRD, it should match. */
+ g_assert_cmphex(le16_to_cpu(pio->res4), ==, le32_to_cpu(cmd.prdbc));
+
+ /* Last, but not least: Investigate the IDENTIFY response data. */
+ memread(data_ptr, &buff, 512);
+
+ /* Check serial number/version in the buffer */
+ /* NB: IDENTIFY strings are packed in 16bit little endian chunks.
+ * Since we copy byte-for-byte in ahci-test, on both LE and BE, we need to
+ * unchunk this data. By contrast, ide-test copies 2 bytes at a time, and
+ * as a consequence, only needs to unchunk the data on LE machines. */
+ string_bswap16(&buff[10], 20);
+ rc = memcmp(&buff[10], "testdisk ", 20);
+ g_assert_cmphex(rc, ==, 0);
+
+ string_bswap16(&buff[23], 8);
+ rc = memcmp(&buff[23], "version ", 8);
+ g_assert_cmphex(rc, ==, 0);
+
+ g_free(d2h);
+ g_free(pio);
+}
+
+/******************************************************************************/
+/* Test Interfaces */
+/******************************************************************************/
+
+/**
+ * Basic sanity test to boot a machine, find an AHCI device, and shutdown.
+ */
+static void test_sanity(void)
+{
+ QPCIDevice *ahci;
+ ahci = ahci_boot();
+ ahci_shutdown(ahci);
+}
+
+/**
+ * Ensure that the PCI configuration space for the AHCI device is in-line with
+ * the AHCI 1.3 specification for initial values.
+ */
+static void test_pci_spec(void)
+{
+ QPCIDevice *ahci;
+ ahci = ahci_boot();
+ ahci_test_pci_spec(ahci);
+ ahci_shutdown(ahci);
+}
+
+/**
+ * Engage the PCI AHCI device and sanity check the response.
+ * Perform additional PCI config space bringup for the HBA.
+ */
+static void test_pci_enable(void)
+{
+ QPCIDevice *ahci;
+ void *hba_base;
+ ahci = ahci_boot();
+ ahci_pci_enable(ahci, &hba_base);
+ ahci_shutdown(ahci);
+}
+
+/**
+ * Investigate the memory mapped regions of the HBA,
+ * and test them for AHCI specification adherence.
+ */
+static void test_hba_spec(void)
+{
+ QPCIDevice *ahci;
+ void *hba_base;
+
+ ahci = ahci_boot();
+ ahci_pci_enable(ahci, &hba_base);
+ ahci_test_hba_spec(ahci, hba_base);
+ ahci_shutdown(ahci);
+}
+
+/**
+ * Engage the HBA functionality of the AHCI PCI device,
+ * and bring it into a functional idle state.
+ */
+static void test_hba_enable(void)
+{
+ QPCIDevice *ahci;
+ void *hba_base;
+
+ ahci = ahci_boot();
+ ahci_pci_enable(ahci, &hba_base);
+ ahci_hba_enable(ahci, hba_base);
+ ahci_shutdown(ahci);
+}
+
+/**
+ * Bring up the device and issue an IDENTIFY command.
+ * Inspect the state of the HBA device and the data returned.
+ */
+static void test_identify(void)
+{
+ QPCIDevice *ahci;
+ void *hba_base;
+
+ ahci = ahci_boot();
+ ahci_pci_enable(ahci, &hba_base);
+ ahci_hba_enable(ahci, hba_base);
+ ahci_test_identify(ahci, hba_base);
+ ahci_shutdown(ahci);
+}
+
+/******************************************************************************/
+
+int main(int argc, char **argv)
+{
+ const char *arch;
+ int fd;
+ int ret;
+ int c;
+
+ static struct option long_options[] = {
+ {"pedantic", no_argument, 0, 'p' },
+ {0, 0, 0, 0},
+ };
+
+ /* Should be first to utilize g_test functionality, So we can see errors. */
+ g_test_init(&argc, &argv, NULL);
+
+ while (1) {
+ c = getopt_long(argc, argv, "", long_options, NULL);
+ if (c == -1) {
+ break;
+ }
+ switch (c) {
+ case -1:
+ break;
+ case 'p':
+ ahci_pedantic = 1;
+ break;
+ default:
+ fprintf(stderr, "Unrecognized ahci_test option.\n");
+ g_assert_not_reached();
+ }
+ }
+
+ /* Check architecture */
+ arch = qtest_get_arch();
+ if (strcmp(arch, "i386") && strcmp(arch, "x86_64")) {
+ g_test_message("Skipping test for non-x86");
+ return 0;
+ }
+
+ /* Create a temporary raw image */
+ fd = mkstemp(tmp_path);
+ g_assert(fd >= 0);
+ ret = ftruncate(fd, TEST_IMAGE_SIZE);
+ g_assert(ret == 0);
+ close(fd);
+
+ /* Run the tests */
+ qtest_add_func("/ahci/sanity", test_sanity);
+ qtest_add_func("/ahci/pci_spec", test_pci_spec);
+ qtest_add_func("/ahci/pci_enable", test_pci_enable);
+ qtest_add_func("/ahci/hba_spec", test_hba_spec);
+ qtest_add_func("/ahci/hba_enable", test_hba_enable);
+ qtest_add_func("/ahci/identify", test_identify);
+
+ ret = g_test_run();
+
+ /* Cleanup */
+ unlink(tmp_path);
+
+ return ret;
+}
diff --git a/tests/image-fuzzer/qcow2/fuzz.py b/tests/image-fuzzer/qcow2/fuzz.py
index 57527f9b4a..20eba6bc1b 100644
--- a/tests/image-fuzzer/qcow2/fuzz.py
+++ b/tests/image-fuzzer/qcow2/fuzz.py
@@ -18,8 +18,8 @@
import random
-
UINT8 = 0xff
+UINT16 = 0xffff
UINT32 = 0xffffffff
UINT64 = 0xffffffffffffffff
# Most significant bit orders
@@ -28,6 +28,8 @@ UINT64_M = 63
# Fuzz vectors
UINT8_V = [0, 0x10, UINT8/4, UINT8/2 - 1, UINT8/2, UINT8/2 + 1, UINT8 - 1,
UINT8]
+UINT16_V = [0, 0x100, 0x1000, UINT16/4, UINT16/2 - 1, UINT16/2, UINT16/2 + 1,
+ UINT16 - 1, UINT16]
UINT32_V = [0, 0x100, 0x1000, 0x10000, 0x100000, UINT32/4, UINT32/2 - 1,
UINT32/2, UINT32/2 + 1, UINT32 - 1, UINT32]
UINT64_V = UINT32_V + [0x1000000, 0x10000000, 0x100000000, UINT64/4,
@@ -332,9 +334,8 @@ def l1_entry(current):
constraints = UINT64_V
# Reserved bits are ignored
# Added a possibility when only flags are fuzzed
- offset = 0x7fffffffffffffff & random.choice([selector(current,
- constraints),
- current])
+ offset = 0x7fffffffffffffff & \
+ random.choice([selector(current, constraints), current])
is_cow = random.randint(0, 1)
return offset + (is_cow << UINT64_M)
@@ -344,12 +345,23 @@ def l2_entry(current):
constraints = UINT64_V
# Reserved bits are ignored
# Add a possibility when only flags are fuzzed
- offset = 0x3ffffffffffffffe & random.choice([selector(current,
- constraints),
- current])
+ offset = 0x3ffffffffffffffe & \
+ random.choice([selector(current, constraints), current])
is_compressed = random.randint(0, 1)
is_cow = random.randint(0, 1)
is_zero = random.randint(0, 1)
value = offset + (is_cow << UINT64_M) + \
(is_compressed << UINT64_M - 1) + is_zero
return value
+
+
+def refcount_table_entry(current):
+ """Fuzz an entry of the refcount table."""
+ constraints = UINT64_V
+ return selector(current, constraints)
+
+
+def refcount_block_entry(current):
+ """Fuzz an entry of a refcount block."""
+ constraints = UINT16_V
+ return selector(current, constraints)
diff --git a/tests/image-fuzzer/qcow2/layout.py b/tests/image-fuzzer/qcow2/layout.py
index 730c771d3c..63e801f4e8 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -102,6 +102,8 @@ class Image(object):
self.end_of_extension_area = FieldsList()
self.l2_tables = FieldsList()
self.l1_table = FieldsList()
+ self.refcount_table = FieldsList()
+ self.refcount_blocks = FieldsList()
self.ext_offset = 0
self.create_header(cluster_bits, backing_file_name)
self.set_backing_file_name(backing_file_name)
@@ -113,7 +115,8 @@ class Image(object):
def __iter__(self):
return chain(self.header, self.backing_file_format,
self.feature_name_table, self.end_of_extension_area,
- self.backing_file_name, self.l1_table, self.l2_tables)
+ self.backing_file_name, self.l1_table, self.l2_tables,
+ self.refcount_table, self.refcount_blocks)
def create_header(self, cluster_bits, backing_file_name=None):
"""Generate a random valid header."""
@@ -330,6 +333,138 @@ class Image(object):
float(self.cluster_size**2)))
self.header['l1_table_offset'][0].value = l1_offset
+ def create_refcount_structures(self):
+ """Generate random refcount blocks and refcount table."""
+ def allocate_rfc_blocks(data, size):
+ """Return indices of clusters allocated for refcount blocks."""
+ cluster_ids = set()
+ diff = block_ids = set([x / size for x in data])
+ while len(diff) != 0:
+ # Allocate all yet not allocated clusters
+ new = self._get_available_clusters(data | cluster_ids,
+ len(diff))
+ # Indices of new refcount blocks necessary to cover clusters
+ # in 'new'
+ diff = set([x / size for x in new]) - block_ids
+ cluster_ids |= new
+ block_ids |= diff
+ return cluster_ids, block_ids
+
+ def allocate_rfc_table(data, init_blocks, block_size):
+ """Return indices of clusters allocated for the refcount table
+ and updated indices of clusters allocated for blocks and indices
+ of blocks.
+ """
+ blocks = set(init_blocks)
+ clusters = set()
+ # Number of entries in one cluster of the refcount table
+ size = self.cluster_size / UINT64_S
+ # Number of clusters necessary for the refcount table based on
+ # the current number of refcount blocks
+ table_size = int(ceil((max(blocks) + 1) / float(size)))
+ # Index of the first cluster of the refcount table
+ table_start = self._get_adjacent_clusters(data, table_size + 1)
+ # Clusters allocated for the current length of the refcount table
+ table_clusters = set(range(table_start, table_start + table_size))
+ # Clusters allocated for the refcount table including
+ # last optional one for potential l1 growth
+ table_clusters_allocated = set(range(table_start, table_start +
+ table_size + 1))
+ # New refcount blocks necessary for clusters occupied by the
+ # refcount table
+ diff = set([c / block_size for c in table_clusters]) - blocks
+ blocks |= diff
+ while len(diff) != 0:
+ # Allocate clusters for new refcount blocks
+ new = self._get_available_clusters((data | clusters) |
+ table_clusters_allocated,
+ len(diff))
+ # Indices of new refcount blocks necessary to cover
+ # clusters in 'new'
+ diff = set([x / block_size for x in new]) - blocks
+ clusters |= new
+ blocks |= diff
+ # Check if the refcount table needs one more cluster
+ if int(ceil((max(blocks) + 1) / float(size))) > table_size:
+ new_block_id = (table_start + table_size) / block_size
+ # Check if the additional table cluster needs
+ # one more refcount block
+ if new_block_id not in blocks:
+ diff.add(new_block_id)
+ table_clusters.add(table_start + table_size)
+ table_size += 1
+ return table_clusters, blocks, clusters
+
+ def create_table_entry(table_offset, block_cluster, block_size,
+ cluster):
+ """Generate a refcount table entry."""
+ offset = table_offset + UINT64_S * (cluster / block_size)
+ return ['>Q', offset, block_cluster * self.cluster_size,
+ 'refcount_table_entry']
+
+ def create_block_entry(block_cluster, block_size, cluster):
+ """Generate a list of entries for the current block."""
+ entry_size = self.cluster_size / block_size
+ offset = block_cluster * self.cluster_size
+ entry_offset = offset + entry_size * (cluster % block_size)
+ # While snapshots are not supported all refcounts are set to 1
+ return ['>H', entry_offset, 1, 'refcount_block_entry']
+ # Size of a block entry in bits
+ refcount_bits = 1 << self.header['refcount_order'][0].value
+ # Number of refcount entries per refcount block
+ # Convert self.cluster_size from bytes to bits to have the same
+ # base for the numerator and denominator
+ block_size = self.cluster_size * 8 / refcount_bits
+ meta_data = self._get_metadata()
+ if len(self.data_clusters) == 0:
+ # All metadata for an empty guest image needs 4 clusters:
+ # header, rfc table, rfc block, L1 table.
+ # Header takes cluster #0, other clusters ##1-3 can be used
+ block_clusters = set([random.choice(list(set(range(1, 4)) -
+ meta_data))])
+ block_ids = set([0])
+ table_clusters = set([random.choice(list(set(range(1, 4)) -
+ meta_data -
+ block_clusters))])
+ else:
+ block_clusters, block_ids = \
+ allocate_rfc_blocks(self.data_clusters |
+ meta_data, block_size)
+ table_clusters, block_ids, new_clusters = \
+ allocate_rfc_table(self.data_clusters |
+ meta_data |
+ block_clusters,
+ block_ids,
+ block_size)
+ block_clusters |= new_clusters
+
+ meta_data |= block_clusters | table_clusters
+ table_offset = min(table_clusters) * self.cluster_size
+ block_id = None
+ # Clusters allocated for refcount blocks
+ block_clusters = list(block_clusters)
+ # Indices of refcount blocks
+ block_ids = list(block_ids)
+ # Refcount table entries
+ rfc_table = []
+ # Refcount entries
+ rfc_blocks = []
+
+ for cluster in sorted(self.data_clusters | meta_data):
+ if cluster / block_size != block_id:
+ block_id = cluster / block_size
+ block_cluster = block_clusters[block_ids.index(block_id)]
+ rfc_table.append(create_table_entry(table_offset,
+ block_cluster,
+ block_size, cluster))
+ rfc_blocks.append(create_block_entry(block_cluster, block_size,
+ cluster))
+ self.refcount_table = FieldsList(rfc_table)
+ self.refcount_blocks = FieldsList(rfc_blocks)
+
+ self.header['refcount_table_offset'][0].value = table_offset
+ self.header['refcount_table_clusters'][0].value = len(table_clusters)
+
def fuzz(self, fields_to_fuzz=None):
"""Fuzz an image by corrupting values of a random subset of its fields.
@@ -471,6 +606,7 @@ def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
image.create_feature_name_table()
image.set_end_of_extension_area()
image.create_l_structures()
+ image.create_refcount_structures()
image.fuzz(fields_to_fuzz)
image.write(test_img_path)
return image.image_size
diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py
index c903c8a342..0a8743ef41 100755
--- a/tests/image-fuzzer/runner.py
+++ b/tests/image-fuzzer/runner.py
@@ -70,7 +70,7 @@ def run_app(fd, q_args):
"""Exception for signal.alarm events."""
pass
- def handler(*arg):
+ def handler(*args):
"""Notify that an alarm event occurred."""
raise Alarm
@@ -134,8 +134,8 @@ class TestEnv(object):
self.init_path = os.getcwd()
self.work_dir = work_dir
self.current_dir = os.path.join(work_dir, 'test-' + test_id)
- self.qemu_img = os.environ.get('QEMU_IMG', 'qemu-img')\
- .strip().split(' ')
+ self.qemu_img = \
+ os.environ.get('QEMU_IMG', 'qemu-img').strip().split(' ')
self.qemu_io = os.environ.get('QEMU_IO', 'qemu-io').strip().split(' ')
self.commands = [['qemu-img', 'check', '-f', 'qcow2', '$test_img'],
['qemu-img', 'info', '-f', 'qcow2', '$test_img'],
@@ -150,8 +150,7 @@ class TestEnv(object):
'discard $off $len'],
['qemu-io', '$test_img', '-c',
'truncate $off']]
- for fmt in ['raw', 'vmdk', 'vdi', 'cow', 'qcow2', 'file',
- 'qed', 'vpc']:
+ for fmt in ['raw', 'vmdk', 'vdi', 'qcow2', 'file', 'qed', 'vpc']:
self.commands.append(
['qemu-img', 'convert', '-f', 'qcow2', '-O', fmt,
'$test_img', 'converted_image.' + fmt])
@@ -178,7 +177,7 @@ class TestEnv(object):
by 'qemu-img create'.
"""
# All formats supported by the 'qemu-img create' command.
- backing_file_fmt = random.choice(['raw', 'vmdk', 'vdi', 'cow', 'qcow2',
+ backing_file_fmt = random.choice(['raw', 'vmdk', 'vdi', 'qcow2',
'file', 'qed', 'vpc'])
backing_file_name = 'backing_img.' + backing_file_fmt
backing_file_size = random.randint(MIN_BACKING_FILE_SIZE,
@@ -212,10 +211,8 @@ class TestEnv(object):
os.chdir(self.current_dir)
backing_file_name, backing_file_fmt = self._create_backing_file()
- img_size = image_generator.create_image('test.img',
- backing_file_name,
- backing_file_fmt,
- fuzz_config)
+ img_size = image_generator.create_image(
+ 'test.img', backing_file_name, backing_file_fmt, fuzz_config)
for item in commands:
shutil.copy('test.img', 'copy.img')
# 'off' and 'len' are multiple of the sector size
@@ -228,7 +225,7 @@ class TestEnv(object):
elif item[0] == 'qemu-io':
current_cmd = list(self.qemu_io)
else:
- multilog("Warning: test command '%s' is not defined.\n" \
+ multilog("Warning: test command '%s' is not defined.\n"
% item[0], sys.stderr, self.log, self.parent_log)
continue
# Replace all placeholders with their real values
@@ -244,29 +241,28 @@ class TestEnv(object):
"Backing file: %s\n" \
% (self.seed, " ".join(current_cmd),
self.current_dir, backing_file_name)
-
temp_log = StringIO.StringIO()
try:
retcode = run_app(temp_log, current_cmd)
except OSError, e:
- multilog(test_summary + "Error: Start of '%s' failed. " \
- "Reason: %s\n\n" % (os.path.basename(
- current_cmd[0]), e[1]),
+ multilog("%sError: Start of '%s' failed. Reason: %s\n\n"
+ % (test_summary, os.path.basename(current_cmd[0]),
+ e[1]),
sys.stderr, self.log, self.parent_log)
raise TestException
if retcode < 0:
self.log.write(temp_log.getvalue())
- multilog(test_summary + "FAIL: Test terminated by signal " +
- "%s\n\n" % str_signal(-retcode), sys.stderr, self.log,
- self.parent_log)
+ multilog("%sFAIL: Test terminated by signal %s\n\n"
+ % (test_summary, str_signal(-retcode)),
+ sys.stderr, self.log, self.parent_log)
self.failed = True
else:
if self.log_all:
self.log.write(temp_log.getvalue())
- multilog(test_summary + "PASS: Application exited with" +
- " the code '%d'\n\n" % retcode, sys.stdout,
- self.log, self.parent_log)
+ multilog("%sPASS: Application exited with the code " \
+ "'%d'\n\n" % (test_summary, retcode),
+ sys.stdout, self.log, self.parent_log)
temp_log.close()
os.remove('copy.img')
@@ -286,8 +282,9 @@ if __name__ == '__main__':
Set up test environment in TEST_DIR and run a test in it. A module for
test image generation should be specified via IMG_GENERATOR.
+
Example:
- runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2
+ runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2
Optional arguments:
-h, --help display this help and exit
@@ -305,20 +302,22 @@ if __name__ == '__main__':
'--command' accepts a JSON array of commands. Each command presents
an application under test with all its paramaters as a list of strings,
- e.g.
- ["qemu-io", "$test_img", "-c", "write $off $len"]
+ e.g. ["qemu-io", "$test_img", "-c", "write $off $len"].
Supported application aliases: 'qemu-img' and 'qemu-io'.
+
Supported argument aliases: $test_img for the fuzzed image, $off
for an offset, $len for length.
Values for $off and $len will be generated based on the virtual disk
- size of the fuzzed image
+ size of the fuzzed image.
+
Paths to 'qemu-img' and 'qemu-io' are retrevied from 'QEMU_IMG' and
- 'QEMU_IO' environment variables
+ 'QEMU_IO' environment variables.
'--config' accepts a JSON array of fields to be fuzzed, e.g.
- '[["header"], ["header", "version"]]'
+ '[["header"], ["header", "version"]]'.
+
Each of the list elements can consist of a complex image element only
as ["header"] or ["feature_name_table"] or an exact field as
["header", "version"]. In the first case random portion of the element
@@ -368,7 +367,6 @@ if __name__ == '__main__':
seed = None
config = None
duration = None
-
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
diff --git a/tests/libqos/pci.c b/tests/libqos/pci.c
index d5ce683d77..4e630c250a 100644
--- a/tests/libqos/pci.c
+++ b/tests/libqos/pci.c
@@ -71,6 +71,12 @@ void qpci_device_enable(QPCIDevice *dev)
cmd = qpci_config_readw(dev, PCI_COMMAND);
cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
qpci_config_writew(dev, PCI_COMMAND, cmd);
+
+ /* Verify the bits are now set. */
+ cmd = qpci_config_readw(dev, PCI_COMMAND);
+ g_assert_cmphex(cmd & PCI_COMMAND_IO, ==, PCI_COMMAND_IO);
+ g_assert_cmphex(cmd & PCI_COMMAND_MEMORY, ==, PCI_COMMAND_MEMORY);
+ g_assert_cmphex(cmd & PCI_COMMAND_MASTER, ==, PCI_COMMAND_MASTER);
}
uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id)
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 830386fdaa..2355567951 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -169,9 +169,61 @@ echo "=== Testing unallocated image header ==="
echo
_make_test_img 64M
# Create L1/L2
-$QEMU_IO -c "$OPEN_RW" -c "write 0 64k" | _filter_qemu_io
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
-$QEMU_IO -c "$OPEN_RW" -c "write 64k 64k" | _filter_qemu_io
+$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing unaligned L1 entry ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
+# aligned or not does not matter
+poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
+$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing unaligned L2 entry ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing unaligned reftable entry ==="
+echo
+_make_test_img 64M
+poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing non-fatal corruption on freeing ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Testing read-only corruption report ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+# Should only emit a single error message
+$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
+
+echo
+echo "=== Testing non-fatal and then fatal corruption report ==="
+echo
+_make_test_img 64M
+$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
+poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
+# Should emit two error messages
+$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
# success, all done
echo "*** done"
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index c27c952412..4f0c6d0c8e 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -8,7 +8,7 @@ ERROR cluster 3 refcount=1 reference=3
1 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
incompatible_features 0x0
-qcow2: Preventing invalid write on metadata (overlaps with active L1 table); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L1 table); further corruption events will be suppressed
write failed: Input/output error
incompatible_features 0x2
qemu-io: can't open device TEST_DIR/t.IMGFMT: IMGFMT: Image is corrupt; cannot be opened read/write
@@ -24,7 +24,7 @@ ERROR cluster 2 refcount=1 reference=2
2 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
incompatible_features 0x0
-qcow2: Preventing invalid write on metadata (overlaps with refcount block); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with refcount block); further corruption events will be suppressed
write failed: Input/output error
incompatible_features 0x2
Repairing refcount block 0 refcount=2
@@ -56,7 +56,7 @@ Data may be corrupted, or further writes to the image may corrupt it.
1 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
incompatible_features 0x0
-qcow2: Preventing invalid write on metadata (overlaps with inactive L2 table); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with inactive L2 table); further corruption events will be suppressed
write failed: Input/output error
incompatible_features 0x2
Repairing cluster 4 refcount=1 reference=2
@@ -88,7 +88,7 @@ wrote 65536/65536 bytes at offset 536870912
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
discard 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qcow2: Preventing invalid write on metadata (overlaps with active L2 table); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with active L2 table); further corruption events will be suppressed
blkdebug: Suspended request '0'
write failed: Input/output error
blkdebug: Resuming request '0'
@@ -99,6 +99,57 @@ aio_write failed: No medium found
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qcow2: Preventing invalid write on metadata (overlaps with qcow2_header); image marked as corrupt.
+qcow2: Marking image as corrupt: Preventing invalid write on metadata (overlaps with qcow2_header); further corruption events will be suppressed
write failed: Input/output error
+
+=== Testing unaligned L1 entry ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Marking image as corrupt: L2 table offset 0x42a00 unaligned (L1 index: 0); further corruption events will be suppressed
+read failed: Input/output error
+
+=== Testing unaligned L2 entry ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed
+read failed: Input/output error
+
+=== Testing unaligned reftable entry ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+qcow2: Marking image as corrupt: Refblock offset 0x22a00 unaligned (reftable index: 0); further corruption events will be suppressed
+write failed: Input/output error
+
+=== Testing non-fatal corruption on freeing ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed
+discard 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Testing read-only corruption report ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Image is corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further non-fatal corruption events will be suppressed
+read failed: Input/output error
+read failed: Input/output error
+
+=== Testing non-fatal and then fatal corruption report ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qcow2: Image is corrupt: Cannot free unaligned cluster 0x52a00; further non-fatal corruption events will be suppressed
+qcow2: Marking image as corrupt: Data cluster offset 0x62a00 unaligned (L2 offset: 0x40000, L2 index: 0x1); further corruption events will be suppressed
+discard 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
*** done
diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069
index e661598c4a..ce9e0541b2 100755
--- a/tests/qemu-iotests/069
+++ b/tests/qemu-iotests/069
@@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
. ./common.rc
. ./common.filter
-_supported_fmt cow qed qcow qcow2 vmdk
+_supported_fmt qed qcow qcow2 vmdk
_supported_proto file
_supported_os Linux
_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
diff --git a/tests/qemu-iotests/072 b/tests/qemu-iotests/072
index 58faa8b5a7..e4a723d733 100755
--- a/tests/qemu-iotests/072
+++ b/tests/qemu-iotests/072
@@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
. ./common.rc
. ./common.filter
-_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow cow
+_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow
_supported_proto file
_supported_os Linux
diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099
index a26d3d243e..ffc7ea795a 100755
--- a/tests/qemu-iotests/099
+++ b/tests/qemu-iotests/099
@@ -41,7 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
# Basically all formats, but "raw" has issues with _filter_imgfmt regarding the
# raw comparison image for blkverify; also, all images have to support creation
-_supported_fmt cow qcow qcow2 qed vdi vhdx vmdk vpc
+_supported_fmt qcow qcow2 qed vdi vhdx vmdk vpc
_supported_proto file
_supported_os Linux
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 70df659d5b..89c6dde263 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -136,7 +136,6 @@ common options
check options
-raw test raw (default)
-bochs test bochs
- -cow test cow
-cloop test cloop
-parallels test parallels
-qcow test qcow
@@ -182,11 +181,6 @@ testlist options
xpand=false
;;
- -cow)
- IMGFMT=cow
- xpand=false
- ;;
-
-cloop)
IMGFMT=cloop
IMGFMT_GENERIC=false
diff --git a/tests/test-aio.c b/tests/test-aio.c
index c6a8713e83..a7cb5c9915 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -14,6 +14,7 @@
#include "block/aio.h"
#include "qemu/timer.h"
#include "qemu/sockets.h"
+#include "qemu/error-report.h"
static AioContext *ctx;
@@ -810,11 +811,18 @@ static void test_source_timer_schedule(void)
int main(int argc, char **argv)
{
+ Error *local_error = NULL;
GSource *src;
init_clocks();
- ctx = aio_context_new();
+ ctx = aio_context_new(&local_error);
+ if (!ctx) {
+ error_report("Failed to create AIO Context: '%s'",
+ error_get_pretty(local_error));
+ error_free(local_error);
+ exit(1);
+ }
src = aio_get_g_source(ctx);
g_source_attach(src, NULL);
g_source_unref(src);
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index f40b7fc170..8c4d68b345 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -4,6 +4,7 @@
#include "block/thread-pool.h"
#include "block/block.h"
#include "qemu/timer.h"
+#include "qemu/error-report.h"
static AioContext *ctx;
static ThreadPool *pool;
@@ -33,7 +34,7 @@ static int long_cb(void *opaque)
static void done_cb(void *opaque, int ret)
{
WorkerTestData *data = opaque;
- g_assert_cmpint(data->ret, ==, -EINPROGRESS);
+ g_assert(data->ret == -EINPROGRESS || data->ret == -ECANCELED);
data->ret = ret;
data->aiocb = NULL;
@@ -132,7 +133,7 @@ static void test_submit_many(void)
}
}
-static void test_cancel(void)
+static void do_test_cancel(bool sync)
{
WorkerTestData data[100];
int num_canceled;
@@ -170,18 +171,25 @@ static void test_cancel(void)
for (i = 0; i < 100; i++) {
if (atomic_cmpxchg(&data[i].n, 0, 3) == 0) {
data[i].ret = -ECANCELED;
- bdrv_aio_cancel(data[i].aiocb);
- active--;
+ if (sync) {
+ bdrv_aio_cancel(data[i].aiocb);
+ } else {
+ bdrv_aio_cancel_async(data[i].aiocb);
+ }
num_canceled++;
}
}
g_assert_cmpint(active, >, 0);
g_assert_cmpint(num_canceled, <, 100);
- /* Canceling the others will be a blocking operation. */
for (i = 0; i < 100; i++) {
if (data[i].aiocb && data[i].n != 3) {
- bdrv_aio_cancel(data[i].aiocb);
+ if (sync) {
+ /* Canceling the others will be a blocking operation. */
+ bdrv_aio_cancel(data[i].aiocb);
+ } else {
+ bdrv_aio_cancel_async(data[i].aiocb);
+ }
}
}
@@ -193,22 +201,39 @@ static void test_cancel(void)
for (i = 0; i < 100; i++) {
if (data[i].n == 3) {
g_assert_cmpint(data[i].ret, ==, -ECANCELED);
- g_assert(data[i].aiocb != NULL);
+ g_assert(data[i].aiocb == NULL);
} else {
g_assert_cmpint(data[i].n, ==, 2);
- g_assert_cmpint(data[i].ret, ==, 0);
+ g_assert(data[i].ret == 0 || data[i].ret == -ECANCELED);
g_assert(data[i].aiocb == NULL);
}
}
}
+static void test_cancel(void)
+{
+ do_test_cancel(true);
+}
+
+static void test_cancel_async(void)
+{
+ do_test_cancel(false);
+}
+
int main(int argc, char **argv)
{
int ret;
+ Error *local_error = NULL;
init_clocks();
- ctx = aio_context_new();
+ ctx = aio_context_new(&local_error);
+ if (!ctx) {
+ error_report("Failed to create AIO Context: '%s'",
+ error_get_pretty(local_error));
+ error_free(local_error);
+ exit(1);
+ }
pool = aio_get_thread_pool(ctx);
g_test_init(&argc, &argv, NULL);
@@ -217,6 +242,7 @@ int main(int argc, char **argv)
g_test_add_func("/thread-pool/submit-co", test_submit_co);
g_test_add_func("/thread-pool/submit-many", test_submit_many);
g_test_add_func("/thread-pool/cancel", test_cancel);
+ g_test_add_func("/thread-pool/cancel-async", test_cancel_async);
ret = g_test_run();
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 000ae31af9..d8ba415e43 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -14,6 +14,7 @@
#include <math.h>
#include "block/aio.h"
#include "qemu/throttle.h"
+#include "qemu/error-report.h"
static AioContext *ctx;
static LeakyBucket bkt;
@@ -492,10 +493,17 @@ static void test_accounting(void)
int main(int argc, char **argv)
{
GSource *src;
+ Error *local_error = NULL;
init_clocks();
- ctx = aio_context_new();
+ ctx = aio_context_new(&local_error);
+ if (!ctx) {
+ error_report("Failed to create AIO Context: '%s'",
+ error_get_pretty(local_error));
+ error_free(local_error);
+ exit(1);
+ }
src = aio_get_g_source(ctx);
g_source_attach(src, NULL);
g_source_unref(src);
diff --git a/thread-pool.c b/thread-pool.c
index bc07d7a1c9..86fa4a8f89 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -31,7 +31,6 @@ enum ThreadState {
THREAD_QUEUED,
THREAD_ACTIVE,
THREAD_DONE,
- THREAD_CANCELED,
};
struct ThreadPoolElement {
@@ -58,7 +57,6 @@ struct ThreadPool {
AioContext *ctx;
QEMUBH *completion_bh;
QemuMutex lock;
- QemuCond check_cancel;
QemuCond worker_stopped;
QemuSemaphore sem;
int max_threads;
@@ -73,7 +71,6 @@ struct ThreadPool {
int idle_threads;
int new_threads; /* backlog of threads we need to create */
int pending_threads; /* threads created but not running yet */
- int pending_cancellations; /* whether we need a cond_broadcast */
bool stopping;
};
@@ -113,9 +110,6 @@ static void *worker_thread(void *opaque)
req->state = THREAD_DONE;
qemu_mutex_lock(&pool->lock);
- if (pool->pending_cancellations) {
- qemu_cond_broadcast(&pool->check_cancel);
- }
qemu_bh_schedule(pool->completion_bh);
}
@@ -173,7 +167,7 @@ static void thread_pool_completion_bh(void *opaque)
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
- if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
+ if (elem->state != THREAD_DONE) {
continue;
}
if (elem->state == THREAD_DONE) {
@@ -191,12 +185,12 @@ restart:
qemu_bh_schedule(pool->completion_bh);
elem->common.cb(elem->common.opaque, elem->ret);
- qemu_aio_release(elem);
+ qemu_aio_unref(elem);
goto restart;
} else {
/* remove the request */
QLIST_REMOVE(elem, all);
- qemu_aio_release(elem);
+ qemu_aio_unref(elem);
}
}
}
@@ -217,22 +211,26 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
*/
qemu_sem_timedwait(&pool->sem, 0) == 0) {
QTAILQ_REMOVE(&pool->request_list, elem, reqs);
- elem->state = THREAD_CANCELED;
qemu_bh_schedule(pool->completion_bh);
- } else {
- pool->pending_cancellations++;
- while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
- qemu_cond_wait(&pool->check_cancel, &pool->lock);
- }
- pool->pending_cancellations--;
+
+ elem->state = THREAD_DONE;
+ elem->ret = -ECANCELED;
}
+
qemu_mutex_unlock(&pool->lock);
- thread_pool_completion_bh(pool);
+}
+
+static AioContext *thread_pool_get_aio_context(BlockDriverAIOCB *acb)
+{
+ ThreadPoolElement *elem = (ThreadPoolElement *)acb;
+ ThreadPool *pool = elem->pool;
+ return pool->ctx;
}
static const AIOCBInfo thread_pool_aiocb_info = {
.aiocb_size = sizeof(ThreadPoolElement),
- .cancel = thread_pool_cancel,
+ .cancel_async = thread_pool_cancel,
+ .get_aio_context = thread_pool_get_aio_context,
};
BlockDriverAIOCB *thread_pool_submit_aio(ThreadPool *pool,
@@ -299,7 +297,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
pool->ctx = ctx;
pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
qemu_mutex_init(&pool->lock);
- qemu_cond_init(&pool->check_cancel);
qemu_cond_init(&pool->worker_stopped);
qemu_sem_init(&pool->sem, 0);
pool->max_threads = 64;
@@ -342,7 +339,6 @@ void thread_pool_free(ThreadPool *pool)
qemu_bh_delete(pool->completion_bh);
qemu_sem_destroy(&pool->sem);
- qemu_cond_destroy(&pool->check_cancel);
qemu_cond_destroy(&pool->worker_stopped);
qemu_mutex_destroy(&pool->lock);
g_free(pool);
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 3147178118..d37c29b4a4 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -857,7 +857,7 @@ QemuCocoaView *cocoaView;
[op setPrompt:@"Boot image"];
[op setMessage:@"Select the disk image you want to boot.\n\nHit the \"Cancel\" button to quit"];
NSArray *filetypes = [NSArray arrayWithObjects:@"img", @"iso", @"dmg",
- @"qcow", @"qcow2", @"cow", @"cloop", @"vmdk", nil];
+ @"qcow", @"qcow2", @"cloop", @"vmdk", nil];
#if (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
[op setAllowedFileTypes:filetypes];
[op beginSheetModalForWindow:normalWindow
diff --git a/vl.c b/vl.c
index 2695842a37..dbdca594b7 100644
--- a/vl.c
+++ b/vl.c
@@ -2968,6 +2968,7 @@ int main(int argc, char **argv, char **envp)
ram_addr_t maxram_size = default_ram_size;
uint64_t ram_slots = 0;
FILE *vmstate_dump_file = NULL;
+ Error *main_loop_err = NULL;
atexit(qemu_run_exit_notifiers);
error_set_progname(argv[0]);
@@ -3998,8 +3999,8 @@ int main(int argc, char **argv, char **envp)
os_daemonize();
- if (qemu_init_main_loop()) {
- fprintf(stderr, "qemu_init_main_loop failed\n");
+ if (qemu_init_main_loop(&main_loop_err)) {
+ error_report("%s", error_get_pretty(main_loop_err));
exit(1);
}