aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS8
-rw-r--r--Makefile1
-rw-r--r--Makefile.objs2
-rw-r--r--block/Makefile.objs6
-rw-r--r--block/blkdebug.c86
-rw-r--r--block/blkverify.c201
-rw-r--r--block/file-posix.c (renamed from block/raw-posix.c)0
-rw-r--r--block/file-win32.c (renamed from block/raw-win32.c)0
-rw-r--r--block/gluster.c4
-rw-r--r--block/quorum.c410
-rw-r--r--block/raw-format.c (renamed from block/raw_bsd.c)2
-rw-r--r--block/trace-events4
-rwxr-xr-xconfigure2
-rw-r--r--contrib/libvhost-user/Makefile.objs1
-rw-r--r--contrib/libvhost-user/libvhost-user.c1499
-rw-r--r--contrib/libvhost-user/libvhost-user.h435
-rw-r--r--hw/arm/pxa2xx.c4
-rw-r--r--hw/arm/tosa.c4
-rw-r--r--hw/arm/virt-acpi-build.c134
-rw-r--r--hw/arm/virt.c691
-rw-r--r--hw/arm/z2.c4
-rw-r--r--hw/audio/wm8750.c4
-rw-r--r--hw/block/m25p80.c29
-rw-r--r--hw/char/exynos4210_uart.c16
-rw-r--r--hw/display/ssd0303.c4
-rw-r--r--hw/gpio/max7310.c4
-rw-r--r--hw/i2c/core.c31
-rw-r--r--hw/i2c/i2c-ddc.c4
-rw-r--r--hw/i2c/smbus.c13
-rw-r--r--hw/i386/amd_iommu.c2
-rw-r--r--hw/i386/amd_iommu.h4
-rw-r--r--hw/input/lm832x.c4
-rw-r--r--hw/misc/tmp105.c3
-rw-r--r--hw/pci/pci.c4
-rw-r--r--hw/s390x/virtio-ccw.c4
-rw-r--r--hw/ssi/imx_spi.c11
-rw-r--r--hw/timer/ds1338.c4
-rw-r--r--hw/timer/twl92230.c4
-rw-r--r--hw/virtio/virtio-mmio.c2
-rw-r--r--include/block/block_int.h2
-rw-r--r--include/hw/acpi/acpi-defs.h33
-rw-r--r--include/hw/arm/virt-acpi-build.h47
-rw-r--r--include/hw/arm/virt.h41
-rw-r--r--include/hw/i2c/i2c.h16
-rw-r--r--include/qemu/coroutine.h6
-rw-r--r--qemu-img.c17
-rw-r--r--tests/Makefile.include2
-rw-r--r--tests/qemu-iotests/071.out8
-rw-r--r--tests/vhost-user-bridge.c1183
-rw-r--r--util/qemu-coroutine.c7
50 files changed, 3139 insertions, 1868 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 585cd5abd7..1444b26dc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -508,7 +508,6 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h
STM32F205
M: Alistair Francis <alistair@alistair23.me>
@@ -885,7 +884,6 @@ F: hw/acpi/*
F: hw/smbios/*
F: hw/i386/acpi-build.[hc]
F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h
ppc4xx
M: Alexander Graf <agraf@suse.de>
@@ -1720,9 +1718,9 @@ L: qemu-block@nongnu.org
S: Supported
F: block/linux-aio.c
F: include/block/raw-aio.h
-F: block/raw-posix.c
-F: block/raw-win32.c
-F: block/raw_bsd.c
+F: block/raw-format.c
+F: block/file-posix.c
+F: block/file-win32.c
F: block/win32-aio.c
qcow2
diff --git a/Makefile b/Makefile
index 214cbad35d..1a8bfb225c 100644
--- a/Makefile
+++ b/Makefile
@@ -149,6 +149,7 @@ dummy := $(call unnest-vars,, \
qga-obj-y \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
+ libvhost-user-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
diff --git a/Makefile.objs b/Makefile.objs
index 51c36a4d54..01cef866e4 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -115,7 +115,7 @@ qga-vss-dll-obj-y = qga/
# contrib
ivshmem-client-obj-y = contrib/ivshmem-client/
ivshmem-server-obj-y = contrib/ivshmem-server/
-
+libvhost-user-obj-y = contrib/libvhost-user/
######################################################################
trace-events-y = trace-events
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 67a036a1df..0b8fd06f27 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
@@ -6,8 +6,8 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o
block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
+block-obj-$(CONFIG_POSIX) += file-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-obj-y += null.o mirror.o commit.o io.o
block-obj-y += throttle-groups.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 4127571454..acccf85666 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq {
QLIST_ENTRY(BlkdebugSuspendedReq) next;
} BlkdebugSuspendedReq;
-static const AIOCBInfo blkdebug_aiocb_info = {
- .aiocb_size = sizeof(BlkdebugAIOCB),
-};
-
enum {
ACTION_INJECT_ERROR,
ACTION_SET_STATE,
@@ -77,7 +73,7 @@ typedef struct BlkdebugRule {
int error;
int immediately;
int once;
- int64_t sector;
+ int64_t offset;
} inject;
struct {
int new_state;
@@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
const char* event_name;
BlkdebugEvent event;
struct BlkdebugRule *rule;
+ int64_t sector;
/* Find the right event for the rule */
event_name = qemu_opt_get(opts, "event");
@@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
rule->options.inject.once = qemu_opt_get_bool(opts, "once", 0);
rule->options.inject.immediately =
qemu_opt_get_bool(opts, "immediately", 0);
- rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+ sector = qemu_opt_get_number(opts, "sector", -1);
+ rule->options.inject.offset =
+ sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
break;
case ACTION_SET_STATE:
@@ -408,17 +407,14 @@ out:
static void error_callback_bh(void *opaque)
{
- struct BlkdebugAIOCB *acb = opaque;
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_unref(acb);
+ Coroutine *co = opaque;
+ qemu_coroutine_enter(co);
}
-static BlockAIOCB *inject_error(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
{
BDRVBlkdebugState *s = bs->opaque;
int error = rule->options.inject.error;
- struct BlkdebugAIOCB *acb;
bool immediately = rule->options.inject.immediately;
if (rule->options.inject.once) {
@@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
remove_rule(rule);
}
- if (immediately) {
- return NULL;
+ if (!immediately) {
+ aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
+ qemu_coroutine_self());
+ qemu_coroutine_yield();
}
- acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
- acb->ret = -error;
-
- aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
-
- return &acb->common;
+ return -error;
}
-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
- if (rule->options.inject.sector == -1 ||
- (rule->options.inject.sector >= sector_num &&
- rule->options.inject.sector < sector_num + nb_sectors)) {
+ uint64_t inject_offset = rule->options.inject.offset;
+
+ if (inject_offset == -1 ||
+ (inject_offset >= offset && inject_offset < offset + bytes))
+ {
break;
}
}
if (rule && rule->options.inject.error) {
- return inject_error(bs, cb, opaque, rule);
+ return inject_error(bs, rule);
}
- return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
- cb, opaque);
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
- if (rule->options.inject.sector == -1 ||
- (rule->options.inject.sector >= sector_num &&
- rule->options.inject.sector < sector_num + nb_sectors)) {
+ uint64_t inject_offset = rule->options.inject.offset;
+
+ if (inject_offset == -1 ||
+ (inject_offset >= offset && inject_offset < offset + bytes))
+ {
break;
}
}
if (rule && rule->options.inject.error) {
- return inject_error(bs, cb, opaque, rule);
+ return inject_error(bs, rule);
}
- return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
- cb, opaque);
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
}
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque)
+static int blkdebug_co_flush(BlockDriverState *bs)
{
BDRVBlkdebugState *s = bs->opaque;
BlkdebugRule *rule = NULL;
QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
- if (rule->options.inject.sector == -1) {
+ if (rule->options.inject.offset == -1) {
break;
}
}
if (rule && rule->options.inject.error) {
- return inject_error(bs, cb, opaque, rule);
+ return inject_error(bs, rule);
}
- return bdrv_aio_flush(bs->file->bs, cb, opaque);
+ return bdrv_co_flush(bs->file->bs);
}
@@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_refresh_filename = blkdebug_refresh_filename,
.bdrv_refresh_limits = blkdebug_refresh_limits,
- .bdrv_aio_readv = blkdebug_aio_readv,
- .bdrv_aio_writev = blkdebug_aio_writev,
- .bdrv_aio_flush = blkdebug_aio_flush,
+ .bdrv_co_preadv = blkdebug_co_preadv,
+ .bdrv_co_pwritev = blkdebug_co_pwritev,
+ .bdrv_co_flush_to_disk = blkdebug_co_flush,
.bdrv_debug_event = blkdebug_debug_event,
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
diff --git a/block/blkverify.c b/block/blkverify.c
index 28f9af6dba..43a940c2f5 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,38 +19,36 @@ typedef struct {
BdrvChild *test_file;
} BDRVBlkverifyState;
-typedef struct BlkverifyAIOCB BlkverifyAIOCB;
-struct BlkverifyAIOCB {
- BlockAIOCB common;
+typedef struct BlkverifyRequest {
+ Coroutine *co;
+ BlockDriverState *bs;
/* Request metadata */
bool is_write;
- int64_t sector_num;
- int nb_sectors;
+ uint64_t offset;
+ uint64_t bytes;
+ int flags;
- int ret; /* first completed request's result */
- unsigned int done; /* completion counter */
+ int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
+ BdrvRequestFlags);
- QEMUIOVector *qiov; /* user I/O vector */
- QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
- void *buf; /* buffer for raw file I/O */
+ int ret; /* test image result */
+ int raw_ret; /* raw image result */
- void (*verify)(BlkverifyAIOCB *acb);
-};
+ unsigned int done; /* completion counter */
-static const AIOCBInfo blkverify_aiocb_info = {
- .aiocb_size = sizeof(BlkverifyAIOCB),
-};
+ QEMUIOVector *qiov; /* user I/O vector */
+ QEMUIOVector *raw_qiov; /* cloned I/O vector for raw file */
+} BlkverifyRequest;
-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
- acb->is_write ? "write" : "read", acb->sector_num,
- acb->nb_sectors);
+ fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
+ r->is_write ? "write" : "read", r->offset, r->bytes);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
@@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
return bdrv_getlength(s->test_file->bs);
}
-static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
- int64_t sector_num, QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static void coroutine_fn blkverify_do_test_req(void *opaque)
{
- BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
-
- acb->is_write = is_write;
- acb->sector_num = sector_num;
- acb->nb_sectors = nb_sectors;
- acb->ret = -EINPROGRESS;
- acb->done = 0;
- acb->qiov = qiov;
- acb->buf = NULL;
- acb->verify = NULL;
- return acb;
+ BlkverifyRequest *r = opaque;
+ BDRVBlkverifyState *s = r->bs->opaque;
+
+ r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
+ r->flags);
+ r->done++;
+ qemu_coroutine_enter_if_inactive(r->co);
}
-static void blkverify_aio_bh(void *opaque)
+static void coroutine_fn blkverify_do_raw_req(void *opaque)
{
- BlkverifyAIOCB *acb = opaque;
+ BlkverifyRequest *r = opaque;
- if (acb->buf) {
- qemu_iovec_destroy(&acb->raw_qiov);
- qemu_vfree(acb->buf);
- }
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_unref(acb);
+ r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
+ r->flags);
+ r->done++;
+ qemu_coroutine_enter_if_inactive(r->co);
}
-static void blkverify_aio_cb(void *opaque, int ret)
+static int coroutine_fn
+blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
+ int flags, bool is_write)
{
- BlkverifyAIOCB *acb = opaque;
-
- switch (++acb->done) {
- case 1:
- acb->ret = ret;
- break;
-
- case 2:
- if (acb->ret != ret) {
- blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
- }
-
- if (acb->verify) {
- acb->verify(acb);
- }
+ Coroutine *co_a, *co_b;
+
+ *r = (BlkverifyRequest) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov = qiov,
+ .raw_qiov = raw_qiov,
+ .flags = flags,
+ .is_write = is_write,
+ .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
+ };
+
+ co_a = qemu_coroutine_create(blkverify_do_test_req, r);
+ co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
+
+ qemu_coroutine_enter(co_a);
+ qemu_coroutine_enter(co_b);
+
+ while (r->done < 2) {
+ qemu_coroutine_yield();
+ }
- aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
- blkverify_aio_bh, acb);
- break;
+ if (r->ret != r->raw_ret) {
+ blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
}
+
+ return r->ret;
}
-static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+static int coroutine_fn
+blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
- ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
- if (offset != -1) {
- blkverify_err(acb, "contents mismatch in sector %" PRId64,
- acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+ BlkverifyRequest r;
+ QEMUIOVector raw_qiov;
+ void *buf;
+ ssize_t cmp_offset;
+ int ret;
+
+ buf = qemu_blockalign(bs->file->bs, qiov->size);
+ qemu_iovec_init(&raw_qiov, qiov->niov);
+ qemu_iovec_clone(&raw_qiov, qiov, buf);
+
+ ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
+ false);
+
+ cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
+ if (cmp_offset != -1) {
+ blkverify_err(&r, "contents mismatch at offset %" PRId64,
+ offset + cmp_offset);
}
-}
-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- BDRVBlkverifyState *s = bs->opaque;
- BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
- nb_sectors, cb, opaque);
-
- acb->verify = blkverify_verify_readv;
- acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
- qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
- qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
-
- bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
- blkverify_aio_cb, acb);
- bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
- blkverify_aio_cb, acb);
- return &acb->common;
+ qemu_iovec_destroy(&raw_qiov);
+ qemu_vfree(buf);
+
+ return ret;
}
-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
- BDRVBlkverifyState *s = bs->opaque;
- BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
- nb_sectors, cb, opaque);
-
- bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
- blkverify_aio_cb, acb);
- bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
- blkverify_aio_cb, acb);
- return &acb->common;
+ BlkverifyRequest r;
+ return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
}
-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
- BlockCompletionFunc *cb,
- void *opaque)
+static int blkverify_co_flush(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
- return bdrv_aio_flush(s->test_file->bs, cb, opaque);
+ return bdrv_co_flush(s->test_file->bs);
}
static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = {
.bdrv_getlength = blkverify_getlength,
.bdrv_refresh_filename = blkverify_refresh_filename,
- .bdrv_aio_readv = blkverify_aio_readv,
- .bdrv_aio_writev = blkverify_aio_writev,
- .bdrv_aio_flush = blkverify_aio_flush,
+ .bdrv_co_preadv = blkverify_co_preadv,
+ .bdrv_co_pwritev = blkverify_co_pwritev,
+ .bdrv_co_flush = blkverify_co_flush,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
diff --git a/block/raw-posix.c b/block/file-posix.c
index 28b47d977b..28b47d977b 100644
--- a/block/raw-posix.c
+++ b/block/file-posix.c
diff --git a/block/raw-win32.c b/block/file-win32.c
index 800fabdd72..800fabdd72 100644
--- a/block/raw-win32.c
+++ b/block/file-win32.c
diff --git a/block/gluster.c b/block/gluster.c
index a0a74e49fd..1a22f2982d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1253,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
* If @start is in a trailing hole or beyond EOF, return -ENXIO.
* If we can't find out, return a negative errno other than -ENXIO.
*
- * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from file-posix.c, only miniscule adaptions.)
*/
static int find_allocation(BlockDriverState *bs, off_t start,
off_t *data, off_t *hole)
@@ -1349,7 +1349,7 @@ exit:
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
*
- * (Based on raw_co_get_block_status() from raw-posix.c.)
+ * (Based on raw_co_get_block_status() from file-posix.c.)
*/
static int64_t coroutine_fn qemu_gluster_co_get_block_status(
BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
diff --git a/block/quorum.c b/block/quorum.c
index d122299352..86e2072dce 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
* $children_count QuorumChildRequest.
*/
typedef struct QuorumChildRequest {
- BlockAIOCB *aiocb;
+ BlockDriverState *bs;
QEMUIOVector qiov;
uint8_t *buf;
int ret;
@@ -110,11 +110,12 @@ typedef struct QuorumChildRequest {
* used to do operations on each children and track overall progress.
*/
struct QuorumAIOCB {
- BlockAIOCB common;
+ BlockDriverState *bs;
+ Coroutine *co;
/* Request metadata */
- uint64_t sector_num;
- int nb_sectors;
+ uint64_t offset;
+ uint64_t bytes;
QEMUIOVector *qiov; /* calling IOV */
@@ -133,32 +134,15 @@ struct QuorumAIOCB {
int children_read; /* how many children have been read from */
};
-static bool quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockAIOCB *blockacb)
-{
- QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
- BDRVQuorumState *s = acb->common.bs->opaque;
- int i;
-
- /* cancel all callbacks */
- for (i = 0; i < s->num_children; i++) {
- if (acb->qcrs[i].aiocb) {
- bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
- }
- }
-}
-
-static AIOCBInfo quorum_aiocb_info = {
- .aiocb_size = sizeof(QuorumAIOCB),
- .cancel_async = quorum_aio_cancel,
-};
+typedef struct QuorumCo {
+ QuorumAIOCB *acb;
+ int idx;
+} QuorumCo;
static void quorum_aio_finalize(QuorumAIOCB *acb)
{
- acb->common.cb(acb->common.opaque, acb->vote_ret);
g_free(acb->qcrs);
- qemu_aio_unref(acb);
+ g_free(acb);
}
static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -171,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
return a->l == b->l;
}
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
- BlockDriverState *bs,
+static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
- uint64_t sector_num,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+ uint64_t offset,
+ uint64_t bytes)
{
- QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
int i;
- acb->common.bs->opaque = s;
- acb->sector_num = sector_num;
- acb->nb_sectors = nb_sectors;
- acb->qiov = qiov;
- acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
- acb->count = 0;
- acb->success_count = 0;
- acb->rewrite_count = 0;
- acb->votes.compare = quorum_sha256_compare;
- QLIST_INIT(&acb->votes.vote_list);
- acb->is_read = false;
- acb->vote_ret = 0;
+ *acb = (QuorumAIOCB) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov = qiov,
+ .votes.compare = quorum_sha256_compare,
+ .votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
+ };
+ acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
for (i = 0; i < s->num_children; i++) {
acb->qcrs[i].buf = NULL;
acb->qcrs[i].ret = 0;
@@ -204,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
- int nb_sectors, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t offset,
+ uint64_t bytes, char *node_name, int ret)
{
const char *msg = NULL;
+ int64_t start_sector = offset / BDRV_SECTOR_SIZE;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
- sector_num, nb_sectors, &error_abort);
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
+ end_sector - start_sector, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
{
- const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
- qapi_event_send_quorum_failure(reference, acb->sector_num,
- acb->nb_sectors, &error_abort);
+ const char *reference = bdrv_get_device_or_node_name(acb->bs);
+ int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
+ int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
+ BDRV_SECTOR_SIZE);
+
+ qapi_event_send_quorum_failure(reference, start_sector,
+ end_sector - start_sector, &error_abort);
}
static int quorum_vote_error(QuorumAIOCB *acb);
static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
if (acb->success_count < s->threshold) {
acb->vote_ret = quorum_vote_error(acb);
@@ -238,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
return false;
}
-static void quorum_rewrite_aio_cb(void *opaque, int ret)
-{
- QuorumAIOCB *acb = opaque;
-
- /* one less rewrite to do */
- acb->rewrite_count--;
-
- /* wait until all rewrite callbacks have completed */
- if (acb->rewrite_count) {
- return;
- }
-
- quorum_aio_finalize(acb);
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
+static int read_fifo_child(QuorumAIOCB *acb);
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
{
@@ -272,70 +244,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
{
QuorumAIOCB *acb = sacb->parent;
QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
- quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
- sacb->aiocb->bs->node_name, ret);
-}
-
-static void quorum_fifo_aio_cb(void *opaque, int ret)
-{
- QuorumChildRequest *sacb = opaque;
- QuorumAIOCB *acb = sacb->parent;
- BDRVQuorumState *s = acb->common.bs->opaque;
-
- assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
-
- if (ret < 0) {
- quorum_report_bad_acb(sacb, ret);
-
- /* We try to read next child in FIFO order if we fail to read */
- if (acb->children_read < s->num_children) {
- read_fifo_child(acb);
- return;
- }
- }
-
- acb->vote_ret = ret;
-
- /* FIXME: rewrite failed children if acb->children_read > 1? */
- quorum_aio_finalize(acb);
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
- QuorumChildRequest *sacb = opaque;
- QuorumAIOCB *acb = sacb->parent;
- BDRVQuorumState *s = acb->common.bs->opaque;
- bool rewrite = false;
- int i;
-
- sacb->ret = ret;
- if (ret == 0) {
- acb->success_count++;
- } else {
- quorum_report_bad_acb(sacb, ret);
- }
- acb->count++;
- assert(acb->count <= s->num_children);
- assert(acb->success_count <= s->num_children);
- if (acb->count < s->num_children) {
- return;
- }
-
- /* Do the vote on read */
- if (acb->is_read) {
- rewrite = quorum_vote(acb);
- for (i = 0; i < s->num_children; i++) {
- qemu_vfree(acb->qcrs[i].buf);
- qemu_iovec_destroy(&acb->qcrs[i].qiov);
- }
- } else {
- quorum_has_too_much_io_failed(acb);
- }
-
- /* if no rewrite is done the code will finish right away */
- if (!rewrite) {
- quorum_aio_finalize(acb);
- }
+ quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -350,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
- acb->nb_sectors,
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
s->children[item->index]->bs->node_name, 0);
}
}
}
-static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+static void quorum_rewrite_entry(void *opaque)
+{
+ QuorumCo *co = opaque;
+ QuorumAIOCB *acb = co->acb;
+ BDRVQuorumState *s = acb->bs->opaque;
+
+ /* Ignore any errors, it's just a correction attempt for already
+ * corrupted data. */
+ bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
+ acb->qiov, 0);
+
+ /* Wake up the caller after the last rewrite */
+ acb->rewrite_count--;
+ if (!acb->rewrite_count) {
+ qemu_coroutine_enter_if_inactive(acb->co);
+ }
+}
+
+static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
QuorumVoteValue *value)
{
QuorumVoteVersion *version;
@@ -376,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
}
}
- /* quorum_rewrite_aio_cb will count down this to zero */
+ /* quorum_rewrite_entry will count down this to zero */
acb->rewrite_count = count;
/* now fire the correcting rewrites */
@@ -385,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- bdrv_aio_writev(s->children[item->index], acb->sector_num,
- acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
- acb);
+ Coroutine *co;
+ QuorumCo data = {
+ .acb = acb,
+ .idx = item->index,
+ };
+
+ co = qemu_coroutine_create(quorum_rewrite_entry, &data);
+ qemu_coroutine_enter(co);
}
}
@@ -507,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
va_list ap;
va_start(ap, fmt);
- fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
- acb->sector_num, acb->nb_sectors);
+ fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
+ acb->offset, acb->bytes);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
@@ -519,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb,
QEMUIOVector *a,
QEMUIOVector *b)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
ssize_t offset;
/* This driver will replace blkverify in this particular case */
if (s->is_blkverify) {
offset = qemu_iovec_compare(a, b);
if (offset != -1) {
- quorum_err(acb, "contents mismatch in sector %" PRId64,
- acb->sector_num +
- (uint64_t)(offset / BDRV_SECTOR_SIZE));
+ quorum_err(acb, "contents mismatch at offset %" PRIu64,
+ acb->offset + offset);
}
return true;
}
@@ -539,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
/* Do a vote to get the error code */
static int quorum_vote_error(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
QuorumVoteVersion *winner = NULL;
QuorumVotes error_votes;
QuorumVoteValue result_value;
@@ -568,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb)
return ret;
}
-static bool quorum_vote(QuorumAIOCB *acb)
+static void quorum_vote(QuorumAIOCB *acb)
{
bool quorum = true;
- bool rewrite = false;
int i, j, ret;
QuorumVoteValue hash;
- BDRVQuorumState *s = acb->common.bs->opaque;
+ BDRVQuorumState *s = acb->bs->opaque;
QuorumVoteVersion *winner;
if (quorum_has_too_much_io_failed(acb)) {
- return false;
+ return;
}
/* get the index of the first successful read */
@@ -606,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb)
/* Every successful read agrees */
if (quorum) {
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
- return false;
+ return;
}
/* compute hashes for each successful read, also store indexes */
@@ -641,19 +570,46 @@ static bool quorum_vote(QuorumAIOCB *acb)
/* corruption correction is enabled */
if (s->rewrite_corrupted) {
- rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+ quorum_rewrite_bad_versions(acb, &winner->value);
}
free_exit:
/* free lists */
quorum_free_vote_list(&acb->votes);
- return rewrite;
}
-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
+static void read_quorum_children_entry(void *opaque)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
- int i;
+ QuorumCo *co = opaque;
+ QuorumAIOCB *acb = co->acb;
+ BDRVQuorumState *s = acb->bs->opaque;
+ int i = co->idx;
+ QuorumChildRequest *sacb = &acb->qcrs[i];
+
+ sacb->bs = s->children[i]->bs;
+ sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
+ &acb->qcrs[i].qiov, 0);
+
+ if (sacb->ret == 0) {
+ acb->success_count++;
+ } else {
+ quorum_report_bad_acb(sacb, sacb->ret);
+ }
+
+ acb->count++;
+ assert(acb->count <= s->num_children);
+ assert(acb->success_count <= s->num_children);
+
+ /* Wake up the caller after the last read */
+ if (acb->count == s->num_children) {
+ qemu_coroutine_enter_if_inactive(acb->co);
+ }
+}
+
+static int read_quorum_children(QuorumAIOCB *acb)
+{
+ BDRVQuorumState *s = acb->bs->opaque;
+ int i, ret;
acb->children_read = s->num_children;
for (i = 0; i < s->num_children; i++) {
@@ -663,65 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
- &acb->qcrs[i].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[i]);
+ Coroutine *co;
+ QuorumCo data = {
+ .acb = acb,
+ .idx = i,
+ };
+
+ co = qemu_coroutine_create(read_quorum_children_entry, &data);
+ qemu_coroutine_enter(co);
}
- return &acb->common;
+ while (acb->count < s->num_children) {
+ qemu_coroutine_yield();
+ }
+
+ /* Do the vote on read */
+ quorum_vote(acb);
+ for (i = 0; i < s->num_children; i++) {
+ qemu_vfree(acb->qcrs[i].buf);
+ qemu_iovec_destroy(&acb->qcrs[i].qiov);
+ }
+
+ while (acb->rewrite_count) {
+ qemu_coroutine_yield();
+ }
+
+ ret = acb->vote_ret;
+
+ return ret;
}
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
+static int read_fifo_child(QuorumAIOCB *acb)
{
- BDRVQuorumState *s = acb->common.bs->opaque;
- int n = acb->children_read++;
+ BDRVQuorumState *s = acb->bs->opaque;
+ int n, ret;
+
+ /* We try to read the next child in FIFO order if we failed to read */
+ do {
+ n = acb->children_read++;
+ acb->qcrs[n].bs = s->children[n]->bs;
+ ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
+ acb->qiov, 0);
+ if (ret < 0) {
+ quorum_report_bad_acb(&acb->qcrs[n], ret);
+ }
+ } while (ret < 0 && acb->children_read < s->num_children);
- acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
- acb->qiov, acb->nb_sectors,
- quorum_fifo_aio_cb, &acb->qcrs[n]);
+ /* FIXME: rewrite failed children if acb->children_read > 1? */
- return &acb->common;
+ return ret;
}
-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
- nb_sectors, cb, opaque);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ int ret;
+
acb->is_read = true;
acb->children_read = 0;
if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
- return read_quorum_children(acb);
+ ret = read_quorum_children(acb);
+ } else {
+ ret = read_fifo_child(acb);
+ }
+ quorum_aio_finalize(acb);
+
+ return ret;
+}
+
+static void write_quorum_entry(void *opaque)
+{
+ QuorumCo *co = opaque;
+ QuorumAIOCB *acb = co->acb;
+ BDRVQuorumState *s = acb->bs->opaque;
+ int i = co->idx;
+ QuorumChildRequest *sacb = &acb->qcrs[i];
+
+ sacb->bs = s->children[i]->bs;
+ sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+ acb->qiov, 0);
+ if (sacb->ret == 0) {
+ acb->success_count++;
+ } else {
+ quorum_report_bad_acb(sacb, sacb->ret);
}
+ acb->count++;
+ assert(acb->count <= s->num_children);
+ assert(acb->success_count <= s->num_children);
- return read_fifo_child(acb);
+ /* Wake up the caller after the last write */
+ if (acb->count == s->num_children) {
+ qemu_coroutine_enter_if_inactive(acb->co);
+ }
}
-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
- cb, opaque);
- int i;
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+ int i, ret;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
- qiov, nb_sectors, &quorum_aio_cb,
- &acb->qcrs[i]);
+ Coroutine *co;
+ QuorumCo data = {
+ .acb = acb,
+ .idx = i,
+ };
+
+ co = qemu_coroutine_create(write_quorum_entry, &data);
+ qemu_coroutine_enter(co);
+ }
+
+ while (acb->count < s->num_children) {
+ qemu_coroutine_yield();
}
- return &acb->common;
+ quorum_has_too_much_io_failed(acb);
+
+ ret = acb->vote_ret;
+ quorum_aio_finalize(acb);
+
+ return ret;
}
static int64_t quorum_getlength(BlockDriverState *bs)
@@ -765,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
result = bdrv_co_flush(s->children[i]->bs);
if (result) {
quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
- bdrv_nb_sectors(s->children[i]->bs),
+ bdrv_getlength(s->children[i]->bs),
s->children[i]->bs->node_name, result);
result_value.l = result;
quorum_count_vote(&error_votes, &result_value, i);
@@ -1098,8 +1120,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_getlength = quorum_getlength,
- .bdrv_aio_readv = quorum_aio_readv,
- .bdrv_aio_writev = quorum_aio_writev,
+ .bdrv_co_preadv = quorum_co_preadv,
+ .bdrv_co_pwritev = quorum_co_pwritev,
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
diff --git a/block/raw_bsd.c b/block/raw-format.c
index 8a5b9b0424..8404a82e0c 100644
--- a/block/raw_bsd.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw"
+/* BlockDriver implementation for "raw" format driver
*
* Copyright (C) 2010-2016 Red Hat, Inc.
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
diff --git a/block/trace-events b/block/trace-events
index cfc05f2478..671a6a851c 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -53,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p"
qmp_block_job_complete(void *job) "job %p"
qmp_block_stream(void *bs, void *job) "bs %p job %p"
-# block/raw-win32.c
-# block/raw-posix.c
+# block/file-win32.c
+# block/file-posix.c
paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
diff --git a/configure b/configure
index 218df87d21..86f5214dd0 100755
--- a/configure
+++ b/configure
@@ -2750,7 +2750,7 @@ if compile_prog "" "" ; then
fi
##########################################
-# xfsctl() probe, used for raw-posix
+# xfsctl() probe, used for file-posix.c
if test "$xfs" != "no" ; then
cat > $TMPC << EOF
#include <stddef.h> /* NULL */
diff --git a/contrib/libvhost-user/Makefile.objs b/contrib/libvhost-user/Makefile.objs
new file mode 100644
index 0000000000..cef1ad6e31
--- /dev/null
+++ b/contrib/libvhost-user/Makefile.objs
@@ -0,0 +1 @@
+libvhost-user-obj-y = libvhost-user.o
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
new file mode 100644
index 0000000000..af4faad60b
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -0,0 +1,1499 @@
+/*
+ * Vhost User library
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Marc-André Lureau <mlureau@redhat.com>
+ * Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <qemu/osdep.h>
+#include <sys/eventfd.h>
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+
+#include "libvhost-user.h"
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 1
+#define LIBVHOST_USER_DEBUG 0
+
+#define DPRINT(...) \
+ do { \
+ if (LIBVHOST_USER_DEBUG) { \
+ fprintf(stderr, __VA_ARGS__); \
+ } \
+ } while (0)
+
+static const char *
+vu_request_to_string(int req)
+{
+#define REQ(req) [req] = #req
+ static const char *vu_request_str[] = {
+ REQ(VHOST_USER_NONE),
+ REQ(VHOST_USER_GET_FEATURES),
+ REQ(VHOST_USER_SET_FEATURES),
+ REQ(VHOST_USER_NONE),
+ REQ(VHOST_USER_GET_FEATURES),
+ REQ(VHOST_USER_SET_FEATURES),
+ REQ(VHOST_USER_SET_OWNER),
+ REQ(VHOST_USER_RESET_OWNER),
+ REQ(VHOST_USER_SET_MEM_TABLE),
+ REQ(VHOST_USER_SET_LOG_BASE),
+ REQ(VHOST_USER_SET_LOG_FD),
+ REQ(VHOST_USER_SET_VRING_NUM),
+ REQ(VHOST_USER_SET_VRING_ADDR),
+ REQ(VHOST_USER_SET_VRING_BASE),
+ REQ(VHOST_USER_GET_VRING_BASE),
+ REQ(VHOST_USER_SET_VRING_KICK),
+ REQ(VHOST_USER_SET_VRING_CALL),
+ REQ(VHOST_USER_SET_VRING_ERR),
+ REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
+ REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
+ REQ(VHOST_USER_GET_QUEUE_NUM),
+ REQ(VHOST_USER_SET_VRING_ENABLE),
+ REQ(VHOST_USER_SEND_RARP),
+ REQ(VHOST_USER_INPUT_GET_CONFIG),
+ REQ(VHOST_USER_MAX),
+ };
+#undef REQ
+
+ if (req < VHOST_USER_MAX) {
+ return vu_request_str[req];
+ } else {
+ return "unknown";
+ }
+}
+
+static void
+vu_panic(VuDev *dev, const char *msg, ...)
+{
+ char *buf = NULL;
+ va_list ap;
+
+ va_start(ap, msg);
+ (void)vasprintf(&buf, msg, ap);
+ va_end(ap);
+
+ dev->broken = true;
+ dev->panic(dev, buf);
+ free(buf);
+
+ /* FIXME: find a way to call virtio_error? */
+}
+
+/* Translate guest physical address to our virtual address. */
+void *
+vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
+{
+ int i;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ VuDevRegion *r = &dev->regions[i];
+
+ if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+ return (void *)(uintptr_t)
+ guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+ }
+ }
+
+ return NULL;
+}
+
+/* Translate qemu virtual address to our virtual address. */
+static void *
+qva_to_va(VuDev *dev, uint64_t qemu_addr)
+{
+ int i;
+
+ /* Find matching memory region. */
+ for (i = 0; i < dev->nregions; i++) {
+ VuDevRegion *r = &dev->regions[i];
+
+ if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+ return (void *)(uintptr_t)
+ qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+vmsg_close_fds(VhostUserMsg *vmsg)
+{
+ int i;
+
+ for (i = 0; i < vmsg->fd_num; i++) {
+ close(vmsg->fds[i]);
+ }
+}
+
+static bool
+vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+ char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+ struct iovec iov = {
+ .iov_base = (char *)vmsg,
+ .iov_len = VHOST_USER_HDR_SIZE,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ size_t fd_size;
+ struct cmsghdr *cmsg;
+ int rc;
+
+ do {
+ rc = recvmsg(conn_fd, &msg, 0);
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (rc <= 0) {
+ vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
+ return false;
+ }
+
+ vmsg->fd_num = 0;
+ for (cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msg, cmsg))
+ {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+ fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+ vmsg->fd_num = fd_size / sizeof(int);
+ memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+ break;
+ }
+ }
+
+ if (vmsg->size > sizeof(vmsg->payload)) {
+ vu_panic(dev,
+ "Error: too big message request: %d, size: vmsg->size: %u, "
+ "while sizeof(vmsg->payload) = %zu\n",
+ vmsg->request, vmsg->size, sizeof(vmsg->payload));
+ goto fail;
+ }
+
+ if (vmsg->size) {
+ do {
+ rc = read(conn_fd, &vmsg->payload, vmsg->size);
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (rc <= 0) {
+ vu_panic(dev, "Error while reading: %s", strerror(errno));
+ goto fail;
+ }
+
+ assert(rc == vmsg->size);
+ }
+
+ return true;
+
+fail:
+ vmsg_close_fds(vmsg);
+
+ return false;
+}
+
+static bool
+vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+ int rc;
+ uint8_t *p = (uint8_t *)vmsg;
+
+ /* Set the version in the flags when sending the reply */
+ vmsg->flags &= ~VHOST_USER_VERSION_MASK;
+ vmsg->flags |= VHOST_USER_VERSION;
+ vmsg->flags |= VHOST_USER_REPLY_MASK;
+
+ do {
+ rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ do {
+ if (vmsg->data) {
+ rc = write(conn_fd, vmsg->data, vmsg->size);
+ } else {
+ rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
+ }
+ } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (rc <= 0) {
+ vu_panic(dev, "Error while writing: %s", strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+/* Kick the log_call_fd if required. */
+static void
+vu_log_kick(VuDev *dev)
+{
+ if (dev->log_call_fd != -1) {
+ DPRINT("Kicking the QEMU's log...\n");
+ if (eventfd_write(dev->log_call_fd, 1) < 0) {
+ vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+ }
+ }
+}
+
+static void
+vu_log_page(uint8_t *log_table, uint64_t page)
+{
+ DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+ atomic_or(&log_table[page / 8], 1 << (page % 8));
+}
+
+static void
+vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
+{
+ uint64_t page;
+
+ if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
+ !dev->log_table || !length) {
+ return;
+ }
+
+ assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
+
+ page = address / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < address + length) {
+ vu_log_page(dev->log_table, page);
+ page += VHOST_LOG_PAGE;
+ }
+
+ vu_log_kick(dev);
+}
+
+static void
+vu_kick_cb(VuDev *dev, int condition, void *data)
+{
+ int index = (intptr_t)data;
+ VuVirtq *vq = &dev->vq[index];
+ int sock = vq->kick_fd;
+ eventfd_t kick_data;
+ ssize_t rc;
+
+ rc = eventfd_read(sock, &kick_data);
+ if (rc == -1) {
+ vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
+ dev->remove_watch(dev, dev->vq[index].kick_fd);
+ } else {
+ DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
+ kick_data, vq->handler, index);
+ if (vq->handler) {
+ vq->handler(dev, index);
+ }
+ }
+}
+
+static bool
+vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ vmsg->payload.u64 =
+ 1ULL << VHOST_F_LOG_ALL |
+ 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+
+ if (dev->iface->get_features) {
+ vmsg->payload.u64 |= dev->iface->get_features(dev);
+ }
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ return true;
+}
+
+static void
+vu_set_enable_all_rings(VuDev *dev, bool enabled)
+{
+ int i;
+
+ for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+ dev->vq[i].enable = enabled;
+ }
+}
+
+static bool
+vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ dev->features = vmsg->payload.u64;
+
+ if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
+ vu_set_enable_all_rings(dev, true);
+ }
+
+ if (dev->iface->set_features) {
+ dev->iface->set_features(dev, dev->features);
+ }
+
+ return false;
+}
+
+static bool
+vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ return false;
+}
+
+static void
+vu_close_log(VuDev *dev)
+{
+ if (dev->log_table) {
+ if (munmap(dev->log_table, dev->log_size) != 0) {
+ perror("close log munmap() error");
+ }
+
+ dev->log_table = NULL;
+ }
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ dev->log_call_fd = -1;
+ }
+}
+
+static bool
+vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ vu_set_enable_all_rings(dev, false);
+
+ return false;
+}
+
+static bool
+vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int i;
+ VhostUserMemory *memory = &vmsg->payload.memory;
+ dev->nregions = memory->nregions;
+
+ DPRINT("Nregions: %d\n", memory->nregions);
+ for (i = 0; i < dev->nregions; i++) {
+ void *mmap_addr;
+ VhostUserMemoryRegion *msg_region = &memory->regions[i];
+ VuDevRegion *dev_region = &dev->regions[i];
+
+ DPRINT("Region %d\n", i);
+ DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
+ msg_region->guest_phys_addr);
+ DPRINT(" memory_size: 0x%016"PRIx64"\n",
+ msg_region->memory_size);
+ DPRINT(" userspace_addr 0x%016"PRIx64"\n",
+ msg_region->userspace_addr);
+ DPRINT(" mmap_offset 0x%016"PRIx64"\n",
+ msg_region->mmap_offset);
+
+ dev_region->gpa = msg_region->guest_phys_addr;
+ dev_region->size = msg_region->memory_size;
+ dev_region->qva = msg_region->userspace_addr;
+ dev_region->mmap_offset = msg_region->mmap_offset;
+
+ /* We don't use offset argument of mmap() since the
+ * mapped address has to be page aligned, and we use huge
+ * pages. */
+ mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ vmsg->fds[i], 0);
+
+ if (mmap_addr == MAP_FAILED) {
+ vu_panic(dev, "region mmap error: %s", strerror(errno));
+ } else {
+ dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
+ DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
+ dev_region->mmap_addr);
+ }
+
+ close(vmsg->fds[i]);
+ }
+
+ return false;
+}
+
+static bool
+vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int fd;
+ uint64_t log_mmap_size, log_mmap_offset;
+ void *rc;
+
+ if (vmsg->fd_num != 1 ||
+ vmsg->size != sizeof(vmsg->payload.log)) {
+ vu_panic(dev, "Invalid log_base message");
+ return true;
+ }
+
+ fd = vmsg->fds[0];
+ log_mmap_offset = vmsg->payload.log.mmap_offset;
+ log_mmap_size = vmsg->payload.log.mmap_size;
+ DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+ DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
+
+ rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+ log_mmap_offset);
+ if (rc == MAP_FAILED) {
+ perror("log mmap error");
+ }
+ dev->log_table = rc;
+ dev->log_size = log_mmap_size;
+
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ return true;
+}
+
+static bool
+vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ if (vmsg->fd_num != 1) {
+ vu_panic(dev, "Invalid log_fd message");
+ return false;
+ }
+
+ if (dev->log_call_fd != -1) {
+ close(dev->log_call_fd);
+ }
+ dev->log_call_fd = vmsg->fds[0];
+ DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
+
+ return false;
+}
+
+static bool
+vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int num = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.num: %d\n", num);
+ dev->vq[index].vring.num = num;
+
+ return false;
+}
+
+static bool
+vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ struct vhost_vring_addr *vra = &vmsg->payload.addr;
+ unsigned int index = vra->index;
+ VuVirtq *vq = &dev->vq[index];
+
+ DPRINT("vhost_vring_addr:\n");
+ DPRINT(" index: %d\n", vra->index);
+ DPRINT(" flags: %d\n", vra->flags);
+ DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
+ DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
+ DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
+ DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
+
+ vq->vring.flags = vra->flags;
+ vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
+ vq->vring.used = qva_to_va(dev, vra->used_user_addr);
+ vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
+ vq->vring.log_guest_addr = vra->log_guest_addr;
+
+ DPRINT("Setting virtq addresses:\n");
+ DPRINT(" vring_desc at %p\n", vq->vring.desc);
+ DPRINT(" vring_used at %p\n", vq->vring.used);
+ DPRINT(" vring_avail at %p\n", vq->vring.avail);
+
+ if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
+ vu_panic(dev, "Invalid vring_addr message");
+ return false;
+ }
+
+ vq->used_idx = vq->vring.used->idx;
+
+ return false;
+}
+
+static bool
+vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int num = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.num: %d\n", num);
+ dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
+
+ return false;
+}
+
+static bool
+vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+
+ DPRINT("State.index: %d\n", index);
+ vmsg->payload.state.num = dev->vq[index].last_avail_idx;
+ vmsg->size = sizeof(vmsg->payload.state);
+
+ dev->vq[index].started = false;
+ if (dev->iface->queue_set_started) {
+ dev->iface->queue_set_started(dev, index, false);
+ }
+
+ if (dev->vq[index].call_fd != -1) {
+ close(dev->vq[index].call_fd);
+ dev->vq[index].call_fd = -1;
+ }
+ if (dev->vq[index].kick_fd != -1) {
+ dev->remove_watch(dev, dev->vq[index].kick_fd);
+ close(dev->vq[index].kick_fd);
+ dev->vq[index].kick_fd = -1;
+ }
+
+ return true;
+}
+
+static bool
+vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+ vmsg_close_fds(vmsg);
+ vu_panic(dev, "Invalid queue index: %u", index);
+ return false;
+ }
+
+ if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
+ vmsg->fd_num != 1) {
+ vmsg_close_fds(vmsg);
+ vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ if (!vu_check_queue_msg_file(dev, vmsg)) {
+ return false;
+ }
+
+ if (dev->vq[index].kick_fd != -1) {
+ dev->remove_watch(dev, dev->vq[index].kick_fd);
+ close(dev->vq[index].kick_fd);
+ dev->vq[index].kick_fd = -1;
+ }
+
+ if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ dev->vq[index].kick_fd = vmsg->fds[0];
+ DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+ }
+
+ dev->vq[index].started = true;
+ if (dev->iface->queue_set_started) {
+ dev->iface->queue_set_started(dev, index, true);
+ }
+
+ if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
+ dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
+ vu_kick_cb, (void *)(long)index);
+
+ DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+ dev->vq[index].kick_fd, index);
+ }
+
+ return false;
+}
+
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+ vu_queue_handler_cb handler)
+{
+ int qidx = vq - dev->vq;
+
+ vq->handler = handler;
+ if (vq->kick_fd >= 0) {
+ if (handler) {
+ dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
+ vu_kick_cb, (void *)(long)qidx);
+ } else {
+ dev->remove_watch(dev, vq->kick_fd);
+ }
+ }
+}
+
+static bool
+vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ if (!vu_check_queue_msg_file(dev, vmsg)) {
+ return false;
+ }
+
+ if (dev->vq[index].call_fd != -1) {
+ close(dev->vq[index].call_fd);
+ dev->vq[index].call_fd = -1;
+ }
+
+ if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ dev->vq[index].call_fd = vmsg->fds[0];
+ }
+
+ DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+ return false;
+}
+
+static bool
+vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+ if (!vu_check_queue_msg_file(dev, vmsg)) {
+ return false;
+ }
+
+ if (dev->vq[index].err_fd != -1) {
+ close(dev->vq[index].err_fd);
+ dev->vq[index].err_fd = -1;
+ }
+
+ if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+ dev->vq[index].err_fd = vmsg->fds[0];
+ }
+
+ return false;
+}
+
+static bool
+vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+
+ if (dev->iface->get_protocol_features) {
+ features |= dev->iface->get_protocol_features(dev);
+ }
+
+ vmsg->payload.u64 = features;
+ vmsg->size = sizeof(vmsg->payload.u64);
+
+ return true;
+}
+
+static bool
+vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ uint64_t features = vmsg->payload.u64;
+
+ DPRINT("u64: 0x%016"PRIx64"\n", features);
+
+ dev->protocol_features = vmsg->payload.u64;
+
+ if (dev->iface->set_protocol_features) {
+ dev->iface->set_protocol_features(dev, features);
+ }
+
+ return false;
+}
+
+static bool
+vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ DPRINT("Function %s() not implemented yet.\n", __func__);
+ return false;
+}
+
+static bool
+vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+ unsigned int index = vmsg->payload.state.index;
+ unsigned int enable = vmsg->payload.state.num;
+
+ DPRINT("State.index: %d\n", index);
+ DPRINT("State.enable: %d\n", enable);
+
+ if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+ vu_panic(dev, "Invalid vring_enable index: %u", index);
+ return false;
+ }
+
+ dev->vq[index].enable = enable;
+ return false;
+}
+
+static bool
+vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int do_reply = 0;
+
+ /* Print out generic part of the request. */
+ DPRINT("================ Vhost user message ================\n");
+ DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
+ vmsg->request);
+ DPRINT("Flags: 0x%x\n", vmsg->flags);
+ DPRINT("Size: %d\n", vmsg->size);
+
+ if (vmsg->fd_num) {
+ int i;
+ DPRINT("Fds:");
+ for (i = 0; i < vmsg->fd_num; i++) {
+ DPRINT(" %d", vmsg->fds[i]);
+ }
+ DPRINT("\n");
+ }
+
+ if (dev->iface->process_msg &&
+ dev->iface->process_msg(dev, vmsg, &do_reply)) {
+ return do_reply;
+ }
+
+ switch (vmsg->request) {
+ case VHOST_USER_GET_FEATURES:
+ return vu_get_features_exec(dev, vmsg);
+ case VHOST_USER_SET_FEATURES:
+ return vu_set_features_exec(dev, vmsg);
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ return vu_get_protocol_features_exec(dev, vmsg);
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ return vu_set_protocol_features_exec(dev, vmsg);
+ case VHOST_USER_SET_OWNER:
+ return vu_set_owner_exec(dev, vmsg);
+ case VHOST_USER_RESET_OWNER:
+ return vu_reset_device_exec(dev, vmsg);
+ case VHOST_USER_SET_MEM_TABLE:
+ return vu_set_mem_table_exec(dev, vmsg);
+ case VHOST_USER_SET_LOG_BASE:
+ return vu_set_log_base_exec(dev, vmsg);
+ case VHOST_USER_SET_LOG_FD:
+ return vu_set_log_fd_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_NUM:
+ return vu_set_vring_num_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ADDR:
+ return vu_set_vring_addr_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_BASE:
+ return vu_set_vring_base_exec(dev, vmsg);
+ case VHOST_USER_GET_VRING_BASE:
+ return vu_get_vring_base_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_KICK:
+ return vu_set_vring_kick_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_CALL:
+ return vu_set_vring_call_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ERR:
+ return vu_set_vring_err_exec(dev, vmsg);
+ case VHOST_USER_GET_QUEUE_NUM:
+ return vu_get_queue_num_exec(dev, vmsg);
+ case VHOST_USER_SET_VRING_ENABLE:
+ return vu_set_vring_enable_exec(dev, vmsg);
+ default:
+ vmsg_close_fds(vmsg);
+ vu_panic(dev, "Unhandled request: %d", vmsg->request);
+ }
+
+ return false;
+}
+
+bool
+vu_dispatch(VuDev *dev)
+{
+ VhostUserMsg vmsg = { 0, };
+ int reply_requested;
+ bool success = false;
+
+ if (!vu_message_read(dev, dev->sock, &vmsg)) {
+ goto end;
+ }
+
+ reply_requested = vu_process_message(dev, &vmsg);
+ if (!reply_requested) {
+ success = true;
+ goto end;
+ }
+
+ if (!vu_message_write(dev, dev->sock, &vmsg)) {
+ goto end;
+ }
+
+ success = true;
+
+end:
+ g_free(vmsg.data);
+ return success;
+}
+
+void
+vu_deinit(VuDev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->nregions; i++) {
+ VuDevRegion *r = &dev->regions[i];
+ void *m = (void *) (uintptr_t) r->mmap_addr;
+ if (m != MAP_FAILED) {
+ munmap(m, r->size + r->mmap_offset);
+ }
+ }
+ dev->nregions = 0;
+
+ for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+ VuVirtq *vq = &dev->vq[i];
+
+ if (vq->call_fd != -1) {
+ close(vq->call_fd);
+ vq->call_fd = -1;
+ }
+
+ if (vq->kick_fd != -1) {
+ close(vq->kick_fd);
+ vq->kick_fd = -1;
+ }
+
+ if (vq->err_fd != -1) {
+ close(vq->err_fd);
+ vq->err_fd = -1;
+ }
+ }
+
+
+ vu_close_log(dev);
+
+ if (dev->sock != -1) {
+ close(dev->sock);
+ }
+}
+
+void
+vu_init(VuDev *dev,
+ int socket,
+ vu_panic_cb panic,
+ vu_set_watch_cb set_watch,
+ vu_remove_watch_cb remove_watch,
+ const VuDevIface *iface)
+{
+ int i;
+
+ assert(socket >= 0);
+ assert(set_watch);
+ assert(remove_watch);
+ assert(iface);
+ assert(panic);
+
+ memset(dev, 0, sizeof(*dev));
+
+ dev->sock = socket;
+ dev->panic = panic;
+ dev->set_watch = set_watch;
+ dev->remove_watch = remove_watch;
+ dev->iface = iface;
+ dev->log_call_fd = -1;
+ for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+ dev->vq[i] = (VuVirtq) {
+ .call_fd = -1, .kick_fd = -1, .err_fd = -1,
+ .notification = true,
+ };
+ }
+}
+
+VuVirtq *
+vu_get_queue(VuDev *dev, int qidx)
+{
+ assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
+ return &dev->vq[qidx];
+}
+
+bool
+vu_queue_enabled(VuDev *dev, VuVirtq *vq)
+{
+ return vq->enable;
+}
+
+static inline uint16_t
+vring_avail_flags(VuVirtq *vq)
+{
+ return vq->vring.avail->flags;
+}
+
+static inline uint16_t
+vring_avail_idx(VuVirtq *vq)
+{
+ vq->shadow_avail_idx = vq->vring.avail->idx;
+
+ return vq->shadow_avail_idx;
+}
+
+static inline uint16_t
+vring_avail_ring(VuVirtq *vq, int i)
+{
+ return vq->vring.avail->ring[i];
+}
+
+static inline uint16_t
+vring_get_used_event(VuVirtq *vq)
+{
+ return vring_avail_ring(vq, vq->vring.num);
+}
+
+static int
+virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
+{
+ uint16_t num_heads = vring_avail_idx(vq) - idx;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (num_heads > vq->vring.num) {
+ vu_panic(dev, "Guest moved used index from %u to %u",
+ idx, vq->shadow_avail_idx);
+ return -1;
+ }
+ if (num_heads) {
+ /* On success, callers read a descriptor at vq->last_avail_idx.
+ * Make sure descriptor read does not bypass avail index read. */
+ smp_rmb();
+ }
+
+ return num_heads;
+}
+
+static bool
+virtqueue_get_head(VuDev *dev, VuVirtq *vq,
+ unsigned int idx, unsigned int *head)
+{
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+ *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (*head >= vq->vring.num) {
+ vu_panic(dev, "Guest says index %u is available", head);
+ return false;
+ }
+
+ return true;
+}
+
+enum {
+ VIRTQUEUE_READ_DESC_ERROR = -1,
+ VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
+ VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
+};
+
+static int
+virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
+ int i, unsigned int max, unsigned int *next)
+{
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+ return VIRTQUEUE_READ_DESC_DONE;
+ }
+
+ /* Check they're not leading us off end of descriptors. */
+ *next = desc[i].next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ smp_wmb();
+
+ if (*next >= max) {
+ vu_panic(dev, "Desc next is %u", next);
+ return VIRTQUEUE_READ_DESC_ERROR;
+ }
+
+ return VIRTQUEUE_READ_DESC_MORE;
+}
+
+void
+vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes,
+ unsigned max_in_bytes, unsigned max_out_bytes)
+{
+ unsigned int idx;
+ unsigned int total_bufs, in_total, out_total;
+ int rc;
+
+ idx = vq->last_avail_idx;
+
+ total_bufs = in_total = out_total = 0;
+ while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
+ unsigned int max, num_bufs, indirect = 0;
+ struct vring_desc *desc;
+ unsigned int i;
+
+ max = vq->vring.num;
+ num_bufs = total_bufs;
+ if (!virtqueue_get_head(dev, vq, idx++, &i)) {
+ goto err;
+ }
+ desc = vq->vring.desc;
+
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc)) {
+ vu_panic(dev, "Invalid size for indirect buffer table");
+ goto err;
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (num_bufs >= max) {
+ vu_panic(dev, "Looped descriptor");
+ goto err;
+ }
+
+ /* loop over the indirect descriptor table */
+ indirect = 1;
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = vu_gpa_to_va(dev, desc[i].addr);
+ num_bufs = i = 0;
+ }
+
+ do {
+ /* If we've got too many, that implies a descriptor loop. */
+ if (++num_bufs > max) {
+ vu_panic(dev, "Looped descriptor");
+ goto err;
+ }
+
+ if (desc[i].flags & VRING_DESC_F_WRITE) {
+ in_total += desc[i].len;
+ } else {
+ out_total += desc[i].len;
+ }
+ if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+ goto done;
+ }
+ rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+ goto err;
+ }
+
+ if (!indirect) {
+ total_bufs = num_bufs;
+ } else {
+ total_bufs++;
+ }
+ }
+ if (rc < 0) {
+ goto err;
+ }
+done:
+ if (in_bytes) {
+ *in_bytes = in_total;
+ }
+ if (out_bytes) {
+ *out_bytes = out_total;
+ }
+ return;
+
+err:
+ in_total = out_total = 0;
+ goto done;
+}
+
+bool
+vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+ unsigned int out_bytes)
+{
+ unsigned int in_total, out_total;
+
+ vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
+ in_bytes, out_bytes);
+
+ return in_bytes <= in_total && out_bytes <= out_total;
+}
+
+/* Fetch avail_idx from VQ memory only when we really need to know if
+ * guest has added some buffers. */
+int
+vu_queue_empty(VuDev *dev, VuVirtq *vq)
+{
+ if (vq->shadow_avail_idx != vq->last_avail_idx) {
+ return 0;
+ }
+
+ return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+static inline
+bool has_feature(uint64_t features, unsigned int fbit)
+{
+ assert(fbit < 64);
+ return !!(features & (1ULL << fbit));
+}
+
+static inline
+bool vu_has_feature(VuDev *dev,
+ unsigned int fbit)
+{
+ return has_feature(dev->features, fbit);
+}
+
+static bool
+vring_notify(VuDev *dev, VuVirtq *vq)
+{
+ uint16_t old, new;
+ bool v;
+
+ /* We need to expose used array entries before checking used event. */
+ smp_mb();
+
+ /* Always notify when queue is empty (when feature acknowledge) */
+ if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ !vq->inuse && vu_queue_empty(dev, vq)) {
+ return true;
+ }
+
+ if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+
+ v = vq->signalled_used_valid;
+ vq->signalled_used_valid = true;
+ old = vq->signalled_used;
+ new = vq->signalled_used = vq->used_idx;
+ return !v || vring_need_event(vring_get_used_event(vq), new, old);
+}
+
+void
+vu_queue_notify(VuDev *dev, VuVirtq *vq)
+{
+ if (unlikely(dev->broken)) {
+ return;
+ }
+
+ if (!vring_notify(dev, vq)) {
+ DPRINT("skipped notify...\n");
+ return;
+ }
+
+ if (eventfd_write(vq->call_fd, 1) < 0) {
+ vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+ }
+}
+
+static inline void
+vring_used_flags_set_bit(VuVirtq *vq, int mask)
+{
+ uint16_t *flags;
+
+ flags = (uint16_t *)((char*)vq->vring.used +
+ offsetof(struct vring_used, flags));
+ *flags |= mask;
+}
+
+static inline void
+vring_used_flags_unset_bit(VuVirtq *vq, int mask)
+{
+ uint16_t *flags;
+
+ flags = (uint16_t *)((char*)vq->vring.used +
+ offsetof(struct vring_used, flags));
+ *flags &= ~mask;
+}
+
+static inline void
+vring_set_avail_event(VuVirtq *vq, uint16_t val)
+{
+ if (!vq->notification) {
+ return;
+ }
+
+ *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
+}
+
+void
+vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
+{
+ vq->notification = enable;
+ if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ vring_set_avail_event(vq, vring_avail_idx(vq));
+ } else if (enable) {
+ vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
+ } else {
+ vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+ }
+ if (enable) {
+ /* Expose avail event/used flags before caller checks the avail idx. */
+ smp_mb();
+ }
+}
+
+static void
+virtqueue_map_desc(VuDev *dev,
+ unsigned int *p_num_sg, struct iovec *iov,
+ unsigned int max_num_sg, bool is_write,
+ uint64_t pa, size_t sz)
+{
+ unsigned num_sg = *p_num_sg;
+
+ assert(num_sg <= max_num_sg);
+
+ if (!sz) {
+ vu_panic(dev, "virtio: zero sized buffers are not allowed");
+ return;
+ }
+
+ iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
+ iov[num_sg].iov_len = sz;
+ num_sg++;
+
+ *p_num_sg = num_sg;
+}
+
+/* Round number down to multiple */
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
+static void *
+virtqueue_alloc_element(size_t sz,
+ unsigned out_num, unsigned in_num)
+{
+ VuVirtqElement *elem;
+ size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
+ size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
+ size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
+
+ assert(sz >= sizeof(VuVirtqElement));
+ elem = malloc(out_sg_end);
+ elem->out_num = out_num;
+ elem->in_num = in_num;
+ elem->in_sg = (void *)elem + in_sg_ofs;
+ elem->out_sg = (void *)elem + out_sg_ofs;
+ return elem;
+}
+
+void *
+vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
+{
+ unsigned int i, head, max;
+ VuVirtqElement *elem;
+ unsigned out_num, in_num;
+ struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ struct vring_desc *desc;
+ int rc;
+
+ if (unlikely(dev->broken)) {
+ return NULL;
+ }
+
+ if (vu_queue_empty(dev, vq)) {
+ return NULL;
+ }
+ /* Needed after virtio_queue_empty(), see comment in
+ * virtqueue_num_heads(). */
+ smp_rmb();
+
+ /* When we start there are none of either input nor output. */
+ out_num = in_num = 0;
+
+ max = vq->vring.num;
+ if (vq->inuse >= vq->vring.num) {
+ vu_panic(dev, "Virtqueue size exceeded");
+ return NULL;
+ }
+
+ if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
+ return NULL;
+ }
+
+ if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ vring_set_avail_event(vq, vq->last_avail_idx);
+ }
+
+ i = head;
+ desc = vq->vring.desc;
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc)) {
+ vu_panic(dev, "Invalid size for indirect buffer table");
+ }
+
+ /* loop over the indirect descriptor table */
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = vu_gpa_to_va(dev, desc[i].addr);
+ i = 0;
+ }
+
+ /* Collect all the descriptors */
+ do {
+ if (desc[i].flags & VRING_DESC_F_WRITE) {
+ virtqueue_map_desc(dev, &in_num, iov + out_num,
+ VIRTQUEUE_MAX_SIZE - out_num, true,
+ desc[i].addr, desc[i].len);
+ } else {
+ if (in_num) {
+ vu_panic(dev, "Incorrect order for descriptors");
+ return NULL;
+ }
+ virtqueue_map_desc(dev, &out_num, iov,
+ VIRTQUEUE_MAX_SIZE, false,
+ desc[i].addr, desc[i].len);
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if ((in_num + out_num) > max) {
+ vu_panic(dev, "Looped descriptor");
+ }
+ rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+ } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+ if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+ return NULL;
+ }
+
+ /* Now copy what we have collected and mapped */
+ elem = virtqueue_alloc_element(sz, out_num, in_num);
+ elem->index = head;
+ for (i = 0; i < out_num; i++) {
+ elem->out_sg[i] = iov[i];
+ }
+ for (i = 0; i < in_num; i++) {
+ elem->in_sg[i] = iov[out_num + i];
+ }
+
+ vq->inuse++;
+
+ return elem;
+}
+
+bool
+vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
+{
+ if (num > vq->inuse) {
+ return false;
+ }
+ vq->last_avail_idx -= num;
+ vq->inuse -= num;
+ return true;
+}
+
+static inline
+void vring_used_write(VuDev *dev, VuVirtq *vq,
+ struct vring_used_elem *uelem, int i)
+{
+ struct vring_used *used = vq->vring.used;
+
+ used->ring[i] = *uelem;
+ vu_log_write(dev, vq->vring.log_guest_addr +
+ offsetof(struct vring_used, ring[i]),
+ sizeof(used->ring[i]));
+}
+
+
+static void
+vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem,
+ unsigned int len)
+{
+ struct vring_desc *desc = vq->vring.desc;
+ unsigned int i, max, min;
+ unsigned num_bufs = 0;
+
+ max = vq->vring.num;
+ i = elem->index;
+
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc)) {
+ vu_panic(dev, "Invalid size for indirect buffer table");
+ }
+
+ /* loop over the indirect descriptor table */
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = vu_gpa_to_va(dev, desc[i].addr);
+ i = 0;
+ }
+
+ do {
+ if (++num_bufs > max) {
+ vu_panic(dev, "Looped descriptor");
+ return;
+ }
+
+ if (desc[i].flags & VRING_DESC_F_WRITE) {
+ min = MIN(desc[i].len, len);
+ vu_log_write(dev, desc[i].addr, min);
+ len -= min;
+ }
+
+ } while (len > 0 &&
+ (virtqueue_read_next_desc(dev, desc, i, max, &i)
+ == VIRTQUEUE_READ_DESC_MORE));
+}
+
+void
+vu_queue_fill(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem,
+ unsigned int len, unsigned int idx)
+{
+ struct vring_used_elem uelem;
+
+ if (unlikely(dev->broken)) {
+ return;
+ }
+
+ vu_log_queue_fill(dev, vq, elem, len);
+
+ idx = (idx + vq->used_idx) % vq->vring.num;
+
+ uelem.id = elem->index;
+ uelem.len = len;
+ vring_used_write(dev, vq, &uelem, idx);
+}
+
+static inline
+void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
+{
+ vq->vring.used->idx = val;
+ vu_log_write(dev,
+ vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
+ sizeof(vq->vring.used->idx));
+
+ vq->used_idx = val;
+}
+
+void
+vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
+{
+ uint16_t old, new;
+
+ if (unlikely(dev->broken)) {
+ return;
+ }
+
+ /* Make sure buffer is written before we update index. */
+ smp_wmb();
+
+ old = vq->used_idx;
+ new = old + count;
+ vring_used_idx_set(dev, vq, new);
+ vq->inuse -= count;
+ if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
+ vq->signalled_used_valid = false;
+ }
+}
+
+void
+vu_queue_push(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem, unsigned int len)
+{
+ vu_queue_fill(dev, vq, elem, len, 0);
+ vu_queue_flush(dev, vq, 1);
+}
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
new file mode 100644
index 0000000000..156b50e989
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -0,0 +1,435 @@
+/*
+ * Vhost User library
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Victor Kaplansky <victork@redhat.com>
+ * Marc-André Lureau <mlureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVHOST_USER_H
+#define LIBVHOST_USER_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/vhost.h>
+#include "standard-headers/linux/virtio_ring.h"
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_LOG_PAGE 4096
+
+#define VHOST_MAX_NR_VIRTQUEUE 8
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+enum VhostUserProtocolFeature {
+ VHOST_USER_PROTOCOL_F_MQ = 0,
+ VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+ VHOST_USER_PROTOCOL_F_RARP = 2,
+
+ VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_INPUT_GET_CONFIG = 20,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
+#if defined(_WIN32)
+# define VU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define VU_PACKED __attribute__((packed))
+#endif
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+
+ union {
+#define VHOST_USER_VRING_IDX_MASK (0xff)
+#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ VhostUserLog log;
+ } payload;
+
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ int fd_num;
+ uint8_t *data;
+} VU_PACKED VhostUserMsg;
+
+typedef struct VuDevRegion {
+ /* Guest Physical address. */
+ uint64_t gpa;
+ /* Memory region size. */
+ uint64_t size;
+ /* QEMU virtual address (userspace). */
+ uint64_t qva;
+ /* Starting offset in our mmaped space. */
+ uint64_t mmap_offset;
+ /* Start address of mmaped space. */
+ uint64_t mmap_addr;
+} VuDevRegion;
+
+typedef struct VuDev VuDev;
+
+typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
+typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
+typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
+ int *do_reply);
+typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
+
+typedef struct VuDevIface {
+ /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
+ vu_get_features_cb get_features;
+ /* enable vhost implementation features */
+ vu_set_features_cb set_features;
+ /* get the protocol feature bitmask from the underlying vhost
+ * implementation */
+ vu_get_features_cb get_protocol_features;
+ /* enable protocol features in the underlying vhost implementation. */
+ vu_set_features_cb set_protocol_features;
+ /* process_msg is called for each vhost-user message received */
+ /* skip libvhost-user processing if return value != 0 */
+ vu_process_msg_cb process_msg;
+ /* tells when queues can be processed */
+ vu_queue_set_started_cb queue_set_started;
+} VuDevIface;
+
+typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
+
+typedef struct VuRing {
+ unsigned int num;
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+ uint64_t log_guest_addr;
+ uint32_t flags;
+} VuRing;
+
+typedef struct VuVirtq {
+ VuRing vring;
+
+ /* Next head to pop */
+ uint16_t last_avail_idx;
+
+ /* Last avail_idx read from VQ. */
+ uint16_t shadow_avail_idx;
+
+ uint16_t used_idx;
+
+ /* Last used index value we have signalled on */
+ uint16_t signalled_used;
+
+ /* Last used index value we have signalled on */
+ bool signalled_used_valid;
+
+ /* Notification enabled? */
+ bool notification;
+
+ int inuse;
+
+ vu_queue_handler_cb handler;
+
+ int call_fd;
+ int kick_fd;
+ int err_fd;
+ unsigned int enable;
+ bool started;
+} VuVirtq;
+
+enum VuWatchCondtion {
+ VU_WATCH_IN = 1 << 0,
+ VU_WATCH_OUT = 1 << 1,
+ VU_WATCH_PRI = 1 << 2,
+ VU_WATCH_ERR = 1 << 3,
+ VU_WATCH_HUP = 1 << 4,
+};
+
+typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
+typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
+typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
+ vu_watch_cb cb, void *data);
+typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
+
+struct VuDev {
+ int sock;
+ uint32_t nregions;
+ VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+ VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
+ int log_call_fd;
+ uint64_t log_size;
+ uint8_t *log_table;
+ uint64_t features;
+ uint64_t protocol_features;
+ bool broken;
+
+ /* @set_watch: add or update the given fd to the watch set,
+ * call cb when condition is met */
+ vu_set_watch_cb set_watch;
+
+ /* @remove_watch: remove the given fd from the watch set */
+ vu_remove_watch_cb remove_watch;
+
+ /* @panic: encountered an unrecoverable error, you may try to
+ * re-initialize */
+ vu_panic_cb panic;
+ const VuDevIface *iface;
+};
+
+typedef struct VuVirtqElement {
+ unsigned int index;
+ unsigned int out_num;
+ unsigned int in_num;
+ struct iovec *in_sg;
+ struct iovec *out_sg;
+} VuVirtqElement;
+
+/**
+ * vu_init:
+ * @dev: a VuDev context
+ * @socket: the socket connected to vhost-user master
+ * @panic: a panic callback
+ * @set_watch: a set_watch callback
+ * @remove_watch: a remove_watch callback
+ * @iface: a VuDevIface structure with vhost-user device callbacks
+ *
+ * Intializes a VuDev vhost-user context.
+ **/
+void vu_init(VuDev *dev,
+ int socket,
+ vu_panic_cb panic,
+ vu_set_watch_cb set_watch,
+ vu_remove_watch_cb remove_watch,
+ const VuDevIface *iface);
+
+
+/**
+ * vu_deinit:
+ * @dev: a VuDev context
+ *
+ * Cleans up the VuDev context
+ */
+void vu_deinit(VuDev *dev);
+
+/**
+ * vu_dispatch:
+ * @dev: a VuDev context
+ *
+ * Process one vhost-user message.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_dispatch(VuDev *dev);
+
+/**
+ * vu_gpa_to_va:
+ * @dev: a VuDev context
+ * @guest_addr: guest address
+ *
+ * Translate a guest address to a pointer. Returns NULL on failure.
+ */
+void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
+
+/**
+ * vu_get_queue:
+ * @dev: a VuDev context
+ * @qidx: queue index
+ *
+ * Returns the queue number @qidx.
+ */
+VuVirtq *vu_get_queue(VuDev *dev, int qidx);
+
+/**
+ * vu_set_queue_handler:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @handler: the queue handler callback
+ *
+ * Set the queue handler. This function may be called several times
+ * for the same queue. If called with NULL @handler, the handler is
+ * removed.
+ */
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+ vu_queue_handler_cb handler);
+
+
+/**
+ * vu_queue_set_notification:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @enable: state
+ *
+ * Set whether the queue notifies (via event index or interrupt)
+ */
+void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is enabled.
+ */
+bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is empty.
+ */
+int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_notify:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Request to notify the queue via callfd (skipped if unnecessary)
+ */
+void vu_queue_notify(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_pop:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @sz: the size of struct to return (must be >= VuVirtqElement)
+ *
+ * Returns: a VuVirtqElement filled from the queue or NULL.
+ */
+void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
+
+/**
+ * vu_queue_rewind:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue. The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_fill:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ * @idx: optional offset for the used ring index (0 in general)
+ *
+ * Fill the used ring with @elem element.
+ */
+void vu_queue_fill(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem,
+ unsigned int len, unsigned int idx);
+
+/**
+ * vu_queue_push:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ *
+ * Helper that combines vu_queue_fill() with a vu_queue_flush().
+ */
+void vu_queue_push(VuDev *dev, VuVirtq *vq,
+ const VuVirtqElement *elem, unsigned int len);
+
+/**
+ * vu_queue_flush:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to flush
+ *
+ * Mark the last number of elements as done (used.idx is updated by
+ * num elements).
+*/
+void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_get_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: in bytes
+ * @out_bytes: out bytes
+ * @max_in_bytes: stop counting after max_in_bytes
+ * @max_out_bytes: stop counting after max_out_bytes
+ *
+ * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
+ */
+void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes,
+ unsigned max_in_bytes, unsigned max_out_bytes);
+
+/**
+ * vu_queue_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: expected in bytes
+ * @out_bytes: expected out bytes
+ *
+ * Returns: true if in_bytes <= in_total && out_bytes <= out_total
+ */
+bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+ unsigned int out_bytes);
+
+#endif /* LIBVHOST_USER_H */
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index bdcf6bcce7..d31b4577f0 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1258,7 +1258,7 @@ static void pxa2xx_i2c_update(PXA2xxI2CState *s)
}
/* These are only stubs now. */
-static void pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
+static int pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
{
PXA2xxI2CSlaveState *slave = PXA2XX_I2C_SLAVE(i2c);
PXA2xxI2CState *s = slave->host;
@@ -1280,6 +1280,8 @@ static void pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
break;
}
pxa2xx_i2c_update(s);
+
+ return 0;
}
static int pxa2xx_i2c_rx(I2CSlave *i2c)
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 39d9dbbae6..c3db996930 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -172,7 +172,7 @@ static int tosa_dac_send(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
+static int tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
{
TosaDACState *s = TOSA_DAC(i2c);
@@ -194,6 +194,8 @@ static void tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int tosa_dac_recv(I2CSlave *s)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 7102686882..085a611173 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -29,7 +29,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
-#include "hw/arm/virt-acpi-build.h"
#include "qemu/bitmap.h"
#include "trace.h"
#include "qom/cpu.h"
@@ -43,6 +42,7 @@
#include "hw/acpi/aml-build.h"
#include "hw/pci/pcie_host.h"
#include "hw/pci/pci.h"
+#include "hw/arm/virt.h"
#include "sysemu/numa.h"
#include "kvm_arm.h"
@@ -384,7 +384,7 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
}
static void
-build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_iort(GArray *table_data, BIOSLinker *linker)
{
int iort_start = table_data->len;
AcpiIortIdMapping *idmap;
@@ -439,11 +439,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
static void
-build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
AcpiSerialPortConsoleRedirection *spcr;
- const MemMapEntry *uart_memmap = &guest_info->memmap[VIRT_UART];
- int irq = guest_info->irqmap[VIRT_UART] + ARM_SPI_BASE;
+ const MemMapEntry *uart_memmap = &vms->memmap[VIRT_UART];
+ int irq = vms->irqmap[VIRT_UART] + ARM_SPI_BASE;
spcr = acpi_data_push(table_data, sizeof(*spcr));
@@ -472,16 +472,16 @@ build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
static void
-build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
AcpiSystemResourceAffinityTable *srat;
AcpiSratProcessorGiccAffinity *core;
AcpiSratMemoryAffinity *numamem;
int i, j, srat_start;
uint64_t mem_base;
- uint32_t *cpu_node = g_malloc0(guest_info->smp_cpus * sizeof(uint32_t));
+ uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t));
- for (i = 0; i < guest_info->smp_cpus; i++) {
+ for (i = 0; i < vms->smp_cpus; i++) {
j = numa_get_node_for_cpu(i);
if (j < nb_numa_nodes) {
cpu_node[i] = j;
@@ -492,7 +492,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
srat = acpi_data_push(table_data, sizeof(*srat));
srat->reserved1 = cpu_to_le32(1);
- for (i = 0; i < guest_info->smp_cpus; ++i) {
+ for (i = 0; i < vms->smp_cpus; ++i) {
core = acpi_data_push(table_data, sizeof(*core));
core->type = ACPI_SRAT_PROCESSOR_GICC;
core->length = sizeof(*core);
@@ -502,7 +502,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
g_free(cpu_node);
- mem_base = guest_info->memmap[VIRT_MEM].base;
+ mem_base = vms->memmap[VIRT_MEM].base;
for (i = 0; i < nb_numa_nodes; ++i) {
numamem = acpi_data_push(table_data, sizeof(*numamem));
build_srat_memory(numamem, mem_base, numa_info[i].node_mem, i,
@@ -515,10 +515,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
}
static void
-build_mcfg(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_mcfg(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
AcpiTableMcfg *mcfg;
- const MemMapEntry *memmap = guest_info->memmap;
+ const MemMapEntry *memmap = vms->memmap;
int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
mcfg = acpi_data_push(table_data, len);
@@ -535,24 +535,33 @@ build_mcfg(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
/* GTDT */
static void
-build_gtdt(GArray *table_data, BIOSLinker *linker)
+build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
int gtdt_start = table_data->len;
AcpiGenericTimerTable *gtdt;
+ uint32_t irqflags;
+
+ if (vmc->claim_edge_triggered_timers) {
+ irqflags = ACPI_GTDT_INTERRUPT_MODE_EDGE;
+ } else {
+ irqflags = ACPI_GTDT_INTERRUPT_MODE_LEVEL;
+ }
gtdt = acpi_data_push(table_data, sizeof *gtdt);
/* The interrupt values are the same with the device tree when adding 16 */
- gtdt->secure_el1_interrupt = ARCH_TIMER_S_EL1_IRQ + 16;
- gtdt->secure_el1_flags = ACPI_EDGE_SENSITIVE;
+ gtdt->secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_S_EL1_IRQ + 16);
+ gtdt->secure_el1_flags = cpu_to_le32(irqflags);
- gtdt->non_secure_el1_interrupt = ARCH_TIMER_NS_EL1_IRQ + 16;
- gtdt->non_secure_el1_flags = ACPI_EDGE_SENSITIVE | ACPI_GTDT_ALWAYS_ON;
+ gtdt->non_secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL1_IRQ + 16);
+ gtdt->non_secure_el1_flags = cpu_to_le32(irqflags |
+ ACPI_GTDT_CAP_ALWAYS_ON);
- gtdt->virtual_timer_interrupt = ARCH_TIMER_VIRT_IRQ + 16;
- gtdt->virtual_timer_flags = ACPI_EDGE_SENSITIVE;
+ gtdt->virtual_timer_interrupt = cpu_to_le32(ARCH_TIMER_VIRT_IRQ + 16);
+ gtdt->virtual_timer_flags = cpu_to_le32(irqflags);
- gtdt->non_secure_el2_interrupt = ARCH_TIMER_NS_EL2_IRQ + 16;
- gtdt->non_secure_el2_flags = ACPI_EDGE_SENSITIVE;
+ gtdt->non_secure_el2_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL2_IRQ + 16);
+ gtdt->non_secure_el2_flags = cpu_to_le32(irqflags);
build_header(linker, table_data,
(void *)(table_data->data + gtdt_start), "GTDT",
@@ -561,11 +570,12 @@ build_gtdt(GArray *table_data, BIOSLinker *linker)
/* MADT */
static void
-build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
int madt_start = table_data->len;
- const MemMapEntry *memmap = guest_info->memmap;
- const int *irqmap = guest_info->irqmap;
+ const MemMapEntry *memmap = vms->memmap;
+ const int *irqmap = vms->irqmap;
AcpiMultipleApicTable *madt;
AcpiMadtGenericDistributor *gicd;
AcpiMadtGenericMsiFrame *gic_msi;
@@ -576,30 +586,30 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
gicd = acpi_data_push(table_data, sizeof *gicd);
gicd->type = ACPI_APIC_GENERIC_DISTRIBUTOR;
gicd->length = sizeof(*gicd);
- gicd->base_address = memmap[VIRT_GIC_DIST].base;
- gicd->version = guest_info->gic_version;
+ gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base);
+ gicd->version = vms->gic_version;
- for (i = 0; i < guest_info->smp_cpus; i++) {
- AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data,
- sizeof *gicc);
+ for (i = 0; i < vms->smp_cpus; i++) {
+ AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data,
+ sizeof(*gicc));
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
- gicc->type = ACPI_APIC_GENERIC_INTERRUPT;
+ gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE;
gicc->length = sizeof(*gicc);
- if (guest_info->gic_version == 2) {
- gicc->base_address = memmap[VIRT_GIC_CPU].base;
+ if (vms->gic_version == 2) {
+ gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base);
}
- gicc->cpu_interface_number = i;
- gicc->arm_mpidr = armcpu->mp_affinity;
- gicc->uid = i;
- gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
+ gicc->cpu_interface_number = cpu_to_le32(i);
+ gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity);
+ gicc->uid = cpu_to_le32(i);
+ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED);
if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
}
}
- if (guest_info->gic_version == 3) {
+ if (vms->gic_version == 3) {
AcpiMadtGenericTranslator *gic_its;
AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
sizeof *gicr);
@@ -609,7 +619,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
- if (its_class_name() && !guest_info->no_its) {
+ if (its_class_name() && !vmc->no_its) {
gic_its = acpi_data_push(table_data, sizeof *gic_its);
gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR;
gic_its->length = sizeof(*gic_its);
@@ -641,8 +651,8 @@ build_fadt(GArray *table_data, BIOSLinker *linker, unsigned dsdt_tbl_offset)
/* Hardware Reduced = 1 and use PSCI 0.2+ and with HVC */
fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI);
- fadt->arm_boot_flags = cpu_to_le16((1 << ACPI_FADT_ARM_USE_PSCI_G_0_2) |
- (1 << ACPI_FADT_ARM_PSCI_USE_HVC));
+ fadt->arm_boot_flags = cpu_to_le16(ACPI_FADT_ARM_PSCI_COMPLIANT |
+ ACPI_FADT_ARM_PSCI_USE_HVC);
/* ACPI v5.1 (fadt->revision.fadt->minor_revision) */
fadt->minor_revision = 0x1;
@@ -658,11 +668,11 @@ build_fadt(GArray *table_data, BIOSLinker *linker, unsigned dsdt_tbl_offset)
/* DSDT */
static void
-build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
Aml *scope, *dsdt;
- const MemMapEntry *memmap = guest_info->memmap;
- const int *irqmap = guest_info->irqmap;
+ const MemMapEntry *memmap = vms->memmap;
+ const int *irqmap = vms->irqmap;
dsdt = init_aml_allocator();
/* Reserve space for header */
@@ -674,7 +684,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
* the RTC ACPI device at all when using UEFI.
*/
scope = aml_scope("\\_SB");
- acpi_dsdt_add_cpus(scope, guest_info->smp_cpus);
+ acpi_dsdt_add_cpus(scope, vms->smp_cpus);
acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
(irqmap[VIRT_UART] + ARM_SPI_BASE));
acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]);
@@ -682,7 +692,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
(irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),
- guest_info->use_highmem);
+ vms->highmem);
acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
(irqmap[VIRT_GPIO] + ARM_SPI_BASE));
acpi_dsdt_add_power_button(scope);
@@ -705,12 +715,12 @@ struct AcpiBuildState {
MemoryRegion *linker_mr;
/* Is table patched? */
bool patched;
- VirtGuestInfo *guest_info;
} AcpiBuildState;
static
-void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
+void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
GArray *table_offsets;
unsigned dsdt, rsdt;
GArray *tables_blob = tables->table_data;
@@ -724,32 +734,32 @@ void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
/* DSDT is pointed to by FADT */
dsdt = tables_blob->len;
- build_dsdt(tables_blob, tables->linker, guest_info);
+ build_dsdt(tables_blob, tables->linker, vms);
/* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
acpi_add_table(table_offsets, tables_blob);
build_fadt(tables_blob, tables->linker, dsdt);
acpi_add_table(table_offsets, tables_blob);
- build_madt(tables_blob, tables->linker, guest_info);
+ build_madt(tables_blob, tables->linker, vms);
acpi_add_table(table_offsets, tables_blob);
- build_gtdt(tables_blob, tables->linker);
+ build_gtdt(tables_blob, tables->linker, vms);
acpi_add_table(table_offsets, tables_blob);
- build_mcfg(tables_blob, tables->linker, guest_info);
+ build_mcfg(tables_blob, tables->linker, vms);
acpi_add_table(table_offsets, tables_blob);
- build_spcr(tables_blob, tables->linker, guest_info);
+ build_spcr(tables_blob, tables->linker, vms);
if (nb_numa_nodes > 0) {
acpi_add_table(table_offsets, tables_blob);
- build_srat(tables_blob, tables->linker, guest_info);
+ build_srat(tables_blob, tables->linker, vms);
}
- if (its_class_name() && !guest_info->no_its) {
+ if (its_class_name() && !vmc->no_its) {
acpi_add_table(table_offsets, tables_blob);
- build_iort(tables_blob, tables->linker, guest_info);
+ build_iort(tables_blob, tables->linker);
}
/* RSDT is pointed to by RSDP */
@@ -788,13 +798,12 @@ static void virt_acpi_build_update(void *build_opaque)
acpi_build_tables_init(&tables);
- virt_acpi_build(build_state->guest_info, &tables);
+ virt_acpi_build(VIRT_MACHINE(qdev_get_machine()), &tables);
acpi_ram_update(build_state->table_mr, tables.table_data);
acpi_ram_update(build_state->rsdp_mr, tables.rsdp);
acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob);
-
acpi_build_tables_cleanup(&tables, true);
}
@@ -822,12 +831,12 @@ static const VMStateDescription vmstate_virt_acpi_build = {
},
};
-void virt_acpi_setup(VirtGuestInfo *guest_info)
+void virt_acpi_setup(VirtMachineState *vms)
{
AcpiBuildTables tables;
AcpiBuildState *build_state;
- if (!guest_info->fw_cfg) {
+ if (!vms->fw_cfg) {
trace_virt_acpi_setup();
return;
}
@@ -838,10 +847,9 @@ void virt_acpi_setup(VirtGuestInfo *guest_info)
}
build_state = g_malloc0(sizeof *build_state);
- build_state->guest_info = guest_info;
acpi_build_tables_init(&tables);
- virt_acpi_build(build_state->guest_info, &tables);
+ virt_acpi_build(vms, &tables);
/* Now expose it all to Guest */
build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data,
@@ -853,8 +861,8 @@ void virt_acpi_setup(VirtGuestInfo *guest_info)
acpi_add_rom_blob(build_state, tables.linker->cmd_blob,
"etc/table-loader", 0);
- fw_cfg_add_file(guest_info->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
- tables.tcpalog->data, acpi_data_len(tables.tcpalog));
+ fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
+ acpi_data_len(tables.tcpalog));
build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp,
ACPI_BUILD_RSDP_FILE, 0);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 11c53a56e0..7a03f84051 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -41,14 +41,12 @@
#include "sysemu/numa.h"
#include "sysemu/sysemu.h"
#include "sysemu/kvm.h"
-#include "hw/boards.h"
#include "hw/compat.h"
#include "hw/loader.h"
#include "exec/address-spaces.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
#include "hw/pci-host/gpex.h"
-#include "hw/arm/virt-acpi-build.h"
#include "hw/arm/sysbus-fdt.h"
#include "hw/platform-bus.h"
#include "hw/arm/fdt.h"
@@ -59,51 +57,6 @@
#include "qapi/visitor.h"
#include "standard-headers/linux/input.h"
-/* Number of external interrupt lines to configure the GIC with */
-#define NUM_IRQS 256
-
-#define PLATFORM_BUS_NUM_IRQS 64
-
-static ARMPlatformBusSystemParams platform_bus_params;
-
-typedef struct VirtBoardInfo {
- struct arm_boot_info bootinfo;
- const char *cpu_model;
- const MemMapEntry *memmap;
- const int *irqmap;
- int smp_cpus;
- void *fdt;
- int fdt_size;
- uint32_t clock_phandle;
- uint32_t gic_phandle;
- uint32_t msi_phandle;
- bool using_psci;
-} VirtBoardInfo;
-
-typedef struct {
- MachineClass parent;
- VirtBoardInfo *daughterboard;
- bool disallow_affinity_adjustment;
- bool no_its;
- bool no_pmu;
-} VirtMachineClass;
-
-typedef struct {
- MachineState parent;
- bool secure;
- bool highmem;
- int32_t gic_version;
-} VirtMachineState;
-
-#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
-#define VIRT_MACHINE(obj) \
- OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE)
-#define VIRT_MACHINE_GET_CLASS(obj) \
- OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_VIRT_MACHINE)
-#define VIRT_MACHINE_CLASS(klass) \
- OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
-
-
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
void *data) \
@@ -133,6 +86,13 @@ typedef struct {
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
+/* Number of external interrupt lines to configure the GIC with */
+#define NUM_IRQS 256
+
+#define PLATFORM_BUS_NUM_IRQS 64
+
+static ARMPlatformBusSystemParams platform_bus_params;
+
/* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
* RAM can go up to the 256GB mark, leaving 256GB of the physical
* address space unallocated and free for future use between 256G and 512G.
@@ -202,51 +162,36 @@ static const int a15irqmap[] = {
[VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
};
-static VirtBoardInfo machines[] = {
- {
- .cpu_model = "cortex-a15",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
- {
- .cpu_model = "cortex-a53",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
- {
- .cpu_model = "cortex-a57",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
- {
- .cpu_model = "host",
- .memmap = a15memmap,
- .irqmap = a15irqmap,
- },
+static const char *valid_cpus[] = {
+ "cortex-a15",
+ "cortex-a53",
+ "cortex-a57",
+ "host",
+ NULL
};
-static VirtBoardInfo *find_machine_info(const char *cpu)
+static bool cpuname_valid(const char *cpu)
{
int i;
- for (i = 0; i < ARRAY_SIZE(machines); i++) {
- if (strcmp(cpu, machines[i].cpu_model) == 0) {
- return &machines[i];
+ for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
+ if (strcmp(cpu, valid_cpus[i]) == 0) {
+ return true;
}
}
- return NULL;
+ return false;
}
-static void create_fdt(VirtBoardInfo *vbi)
+static void create_fdt(VirtMachineState *vms)
{
- void *fdt = create_device_tree(&vbi->fdt_size);
+ void *fdt = create_device_tree(&vms->fdt_size);
if (!fdt) {
error_report("create_device_tree() failed");
exit(1);
}
- vbi->fdt = fdt;
+ vms->fdt = fdt;
/* Header */
qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,dummy-virt");
@@ -266,27 +211,27 @@ static void create_fdt(VirtBoardInfo *vbi)
* optional but in practice if you omit them the kernel refuses to
* probe for the device.
*/
- vbi->clock_phandle = qemu_fdt_alloc_phandle(fdt);
+ vms->clock_phandle = qemu_fdt_alloc_phandle(fdt);
qemu_fdt_add_subnode(fdt, "/apb-pclk");
qemu_fdt_setprop_string(fdt, "/apb-pclk", "compatible", "fixed-clock");
qemu_fdt_setprop_cell(fdt, "/apb-pclk", "#clock-cells", 0x0);
qemu_fdt_setprop_cell(fdt, "/apb-pclk", "clock-frequency", 24000000);
qemu_fdt_setprop_string(fdt, "/apb-pclk", "clock-output-names",
"clk24mhz");
- qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vbi->clock_phandle);
+ qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vms->clock_phandle);
}
-static void fdt_add_psci_node(const VirtBoardInfo *vbi)
+static void fdt_add_psci_node(const VirtMachineState *vms)
{
uint32_t cpu_suspend_fn;
uint32_t cpu_off_fn;
uint32_t cpu_on_fn;
uint32_t migrate_fn;
- void *fdt = vbi->fdt;
+ void *fdt = vms->fdt;
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
- if (!vbi->using_psci) {
+ if (!vms->using_psci) {
return;
}
@@ -327,41 +272,60 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
}
-static void fdt_add_timer_nodes(const VirtBoardInfo *vbi, int gictype)
+static void fdt_add_timer_nodes(const VirtMachineState *vms)
{
- /* Note that on A15 h/w these interrupts are level-triggered,
- * but for the GIC implementation provided by both QEMU and KVM
- * they are edge-triggered.
+ /* On real hardware these interrupts are level-triggered.
+ * On KVM they were edge-triggered before host kernel version 4.4,
+ * and level-triggered afterwards.
+ * On emulated QEMU they are level-triggered.
+ *
+ * Getting the DTB info about them wrong is awkward for some
+ * guest kernels:
+ * pre-4.8 ignore the DT and leave the interrupt configured
+ * with whatever the GIC reset value (or the bootloader) left it at
+ * 4.8 before rc6 honour the incorrect data by programming it back
+ * into the GIC, causing problems
+ * 4.8rc6 and later ignore the DT and always write "level triggered"
+ * into the GIC
+ *
+ * For backwards-compatibility, virt-2.8 and earlier will continue
+ * to say these are edge-triggered, but later machines will report
+ * the correct information.
*/
ARMCPU *armcpu;
- uint32_t irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+ uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
+
+ if (vmc->claim_edge_triggered_timers) {
+ irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
+ }
- if (gictype == 2) {
+ if (vms->gic_version == 2) {
irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
GIC_FDT_IRQ_PPI_CPU_WIDTH,
- (1 << vbi->smp_cpus) - 1);
+ (1 << vms->smp_cpus) - 1);
}
- qemu_fdt_add_subnode(vbi->fdt, "/timer");
+ qemu_fdt_add_subnode(vms->fdt, "/timer");
armcpu = ARM_CPU(qemu_get_cpu(0));
if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
- qemu_fdt_setprop(vbi->fdt, "/timer", "compatible",
+ qemu_fdt_setprop(vms->fdt, "/timer", "compatible",
compat, sizeof(compat));
} else {
- qemu_fdt_setprop_string(vbi->fdt, "/timer", "compatible",
+ qemu_fdt_setprop_string(vms->fdt, "/timer", "compatible",
"arm,armv7-timer");
}
- qemu_fdt_setprop(vbi->fdt, "/timer", "always-on", NULL, 0);
- qemu_fdt_setprop_cells(vbi->fdt, "/timer", "interrupts",
+ qemu_fdt_setprop(vms->fdt, "/timer", "always-on", NULL, 0);
+ qemu_fdt_setprop_cells(vms->fdt, "/timer", "interrupts",
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags,
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags,
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags,
GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags);
}
-static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
+static void fdt_add_cpu_nodes(const VirtMachineState *vms)
{
int cpu;
int addr_cells = 1;
@@ -380,7 +344,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
* The simplest way to go is to examine affinity IDs of all our CPUs. If
* at least one of them has Aff3 populated, we set #address-cells to 2.
*/
- for (cpu = 0; cpu < vbi->smp_cpus; cpu++) {
+ for (cpu = 0; cpu < vms->smp_cpus; cpu++) {
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
if (armcpu->mp_affinity & ARM_AFF3_MASK) {
@@ -389,101 +353,101 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
}
}
- qemu_fdt_add_subnode(vbi->fdt, "/cpus");
- qemu_fdt_setprop_cell(vbi->fdt, "/cpus", "#address-cells", addr_cells);
- qemu_fdt_setprop_cell(vbi->fdt, "/cpus", "#size-cells", 0x0);
+ qemu_fdt_add_subnode(vms->fdt, "/cpus");
+ qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells);
+ qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0);
- for (cpu = vbi->smp_cpus - 1; cpu >= 0; cpu--) {
+ for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "cpu");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible",
armcpu->dtb_compatible);
- if (vbi->using_psci && vbi->smp_cpus > 1) {
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ if (vms->using_psci && vms->smp_cpus > 1) {
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"enable-method", "psci");
}
if (addr_cells == 2) {
- qemu_fdt_setprop_u64(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_u64(vms->fdt, nodename, "reg",
armcpu->mp_affinity);
} else {
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "reg",
armcpu->mp_affinity);
}
i = numa_get_node_for_cpu(cpu);
if (i < nb_numa_nodes) {
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "numa-node-id", i);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i);
}
g_free(nodename);
}
}
-static void fdt_add_its_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_its_gic_node(VirtMachineState *vms)
{
- vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
- qemu_fdt_add_subnode(vbi->fdt, "/intc/its");
- qemu_fdt_setprop_string(vbi->fdt, "/intc/its", "compatible",
+ vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+ qemu_fdt_add_subnode(vms->fdt, "/intc/its");
+ qemu_fdt_setprop_string(vms->fdt, "/intc/its", "compatible",
"arm,gic-v3-its");
- qemu_fdt_setprop(vbi->fdt, "/intc/its", "msi-controller", NULL, 0);
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/its", "reg",
- 2, vbi->memmap[VIRT_GIC_ITS].base,
- 2, vbi->memmap[VIRT_GIC_ITS].size);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc/its", "phandle", vbi->msi_phandle);
+ qemu_fdt_setprop(vms->fdt, "/intc/its", "msi-controller", NULL, 0);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/its", "reg",
+ 2, vms->memmap[VIRT_GIC_ITS].base,
+ 2, vms->memmap[VIRT_GIC_ITS].size);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc/its", "phandle", vms->msi_phandle);
}
-static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtMachineState *vms)
{
- vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
- qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
- qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+ vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+ qemu_fdt_add_subnode(vms->fdt, "/intc/v2m");
+ qemu_fdt_setprop_string(vms->fdt, "/intc/v2m", "compatible",
"arm,gic-v2m-frame");
- qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
- 2, vbi->memmap[VIRT_GIC_V2M].base,
- 2, vbi->memmap[VIRT_GIC_V2M].size);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->msi_phandle);
+ qemu_fdt_setprop(vms->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/v2m", "reg",
+ 2, vms->memmap[VIRT_GIC_V2M].base,
+ 2, vms->memmap[VIRT_GIC_V2M].size);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc/v2m", "phandle", vms->msi_phandle);
}
-static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
+static void fdt_add_gic_node(VirtMachineState *vms)
{
- vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
- qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
-
- qemu_fdt_add_subnode(vbi->fdt, "/intc");
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#interrupt-cells", 3);
- qemu_fdt_setprop(vbi->fdt, "/intc", "interrupt-controller", NULL, 0);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
- qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
- if (type == 3) {
- qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+ vms->gic_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+ qemu_fdt_setprop_cell(vms->fdt, "/", "interrupt-parent", vms->gic_phandle);
+
+ qemu_fdt_add_subnode(vms->fdt, "/intc");
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "#interrupt-cells", 3);
+ qemu_fdt_setprop(vms->fdt, "/intc", "interrupt-controller", NULL, 0);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "#address-cells", 0x2);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2);
+ qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0);
+ if (vms->gic_version == 3) {
+ qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
"arm,gic-v3");
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
- 2, vbi->memmap[VIRT_GIC_DIST].base,
- 2, vbi->memmap[VIRT_GIC_DIST].size,
- 2, vbi->memmap[VIRT_GIC_REDIST].base,
- 2, vbi->memmap[VIRT_GIC_REDIST].size);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+ 2, vms->memmap[VIRT_GIC_DIST].base,
+ 2, vms->memmap[VIRT_GIC_DIST].size,
+ 2, vms->memmap[VIRT_GIC_REDIST].base,
+ 2, vms->memmap[VIRT_GIC_REDIST].size);
} else {
/* 'cortex-a15-gic' means 'GIC v2' */
- qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+ qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
"arm,cortex-a15-gic");
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
- 2, vbi->memmap[VIRT_GIC_DIST].base,
- 2, vbi->memmap[VIRT_GIC_DIST].size,
- 2, vbi->memmap[VIRT_GIC_CPU].base,
- 2, vbi->memmap[VIRT_GIC_CPU].size);
+ qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+ 2, vms->memmap[VIRT_GIC_DIST].base,
+ 2, vms->memmap[VIRT_GIC_DIST].size,
+ 2, vms->memmap[VIRT_GIC_CPU].base,
+ 2, vms->memmap[VIRT_GIC_CPU].size);
}
- qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
+ qemu_fdt_setprop_cell(vms->fdt, "/intc", "phandle", vms->gic_phandle);
}
-static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
+static void fdt_add_pmu_nodes(const VirtMachineState *vms)
{
CPUState *cpu;
ARMCPU *armcpu;
@@ -497,24 +461,24 @@ static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
}
}
- if (gictype == 2) {
+ if (vms->gic_version == 2) {
irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
GIC_FDT_IRQ_PPI_CPU_WIDTH,
- (1 << vbi->smp_cpus) - 1);
+ (1 << vms->smp_cpus) - 1);
}
armcpu = ARM_CPU(qemu_get_cpu(0));
- qemu_fdt_add_subnode(vbi->fdt, "/pmu");
+ qemu_fdt_add_subnode(vms->fdt, "/pmu");
if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
const char compat[] = "arm,armv8-pmuv3";
- qemu_fdt_setprop(vbi->fdt, "/pmu", "compatible",
+ qemu_fdt_setprop(vms->fdt, "/pmu", "compatible",
compat, sizeof(compat));
- qemu_fdt_setprop_cells(vbi->fdt, "/pmu", "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, "/pmu", "interrupts",
GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, irqflags);
}
}
-static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
+static void create_its(VirtMachineState *vms, DeviceState *gicdev)
{
const char *itsclass = its_class_name();
DeviceState *dev;
@@ -529,19 +493,19 @@ static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3",
&error_abort);
qdev_init_nofail(dev);
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_ITS].base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base);
- fdt_add_its_gic_node(vbi);
+ fdt_add_its_gic_node(vms);
}
-static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
{
int i;
- int irq = vbi->irqmap[VIRT_GIC_V2M];
+ int irq = vms->irqmap[VIRT_GIC_V2M];
DeviceState *dev;
dev = qdev_create(NULL, "arm-gicv2m");
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base);
qdev_prop_set_uint32(dev, "base-spi", irq);
qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
qdev_init_nofail(dev);
@@ -550,17 +514,17 @@ static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
}
- fdt_add_v2m_gic_node(vbi);
+ fdt_add_v2m_gic_node(vms);
}
-static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
- bool secure, bool no_its)
+static void create_gic(VirtMachineState *vms, qemu_irq *pic)
{
/* We create a standalone GIC */
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
DeviceState *gicdev;
SysBusDevice *gicbusdev;
const char *gictype;
- int i;
+ int type = vms->gic_version, i;
gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
@@ -572,15 +536,15 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
*/
qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32);
if (!kvm_irqchip_in_kernel()) {
- qdev_prop_set_bit(gicdev, "has-security-extensions", secure);
+ qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure);
}
qdev_init_nofail(gicdev);
gicbusdev = SYS_BUS_DEVICE(gicdev);
- sysbus_mmio_map(gicbusdev, 0, vbi->memmap[VIRT_GIC_DIST].base);
+ sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
if (type == 3) {
- sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_REDIST].base);
+ sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base);
} else {
- sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
+ sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base);
}
/* Wire the outputs from each CPU's generic timer to the
@@ -616,22 +580,22 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
pic[i] = qdev_get_gpio_in(gicdev, i);
}
- fdt_add_gic_node(vbi, type);
+ fdt_add_gic_node(vms);
- if (type == 3 && !no_its) {
- create_its(vbi, gicdev);
+ if (type == 3 && !vmc->no_its) {
+ create_its(vms, gicdev);
} else if (type == 2) {
- create_v2m(vbi, pic);
+ create_v2m(vms, pic);
}
}
-static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
+static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart,
MemoryRegion *mem, CharDriverState *chr)
{
char *nodename;
- hwaddr base = vbi->memmap[uart].base;
- hwaddr size = vbi->memmap[uart].size;
- int irq = vbi->irqmap[uart];
+ hwaddr base = vms->memmap[uart].base;
+ hwaddr size = vms->memmap[uart].size;
+ int irq = vms->irqmap[uart];
const char compat[] = "arm,pl011\0arm,primecell";
const char clocknames[] = "uartclk\0apb_pclk";
DeviceState *dev = qdev_create(NULL, "pl011");
@@ -644,51 +608,51 @@ static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
sysbus_connect_irq(s, 0, pic[irq]);
nodename = g_strdup_printf("/pl011@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
+ qemu_fdt_add_subnode(vms->fdt, nodename);
/* Note that we can't use setprop_string because of the embedded NUL */
- qemu_fdt_setprop(vbi->fdt, nodename, "compatible",
+ qemu_fdt_setprop(vms->fdt, nodename, "compatible",
compat, sizeof(compat));
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "clocks",
- vbi->clock_phandle, vbi->clock_phandle);
- qemu_fdt_setprop(vbi->fdt, nodename, "clock-names",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "clocks",
+ vms->clock_phandle, vms->clock_phandle);
+ qemu_fdt_setprop(vms->fdt, nodename, "clock-names",
clocknames, sizeof(clocknames));
if (uart == VIRT_UART) {
- qemu_fdt_setprop_string(vbi->fdt, "/chosen", "stdout-path", nodename);
+ qemu_fdt_setprop_string(vms->fdt, "/chosen", "stdout-path", nodename);
} else {
/* Mark as not usable by the normal world */
- qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
}
g_free(nodename);
}
-static void create_rtc(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_rtc(const VirtMachineState *vms, qemu_irq *pic)
{
char *nodename;
- hwaddr base = vbi->memmap[VIRT_RTC].base;
- hwaddr size = vbi->memmap[VIRT_RTC].size;
- int irq = vbi->irqmap[VIRT_RTC];
+ hwaddr base = vms->memmap[VIRT_RTC].base;
+ hwaddr size = vms->memmap[VIRT_RTC].size;
+ int irq = vms->irqmap[VIRT_RTC];
const char compat[] = "arm,pl031\0arm,primecell";
sysbus_create_simple("pl031", base, pic[irq]);
nodename = g_strdup_printf("/pl031@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
g_free(nodename);
}
@@ -703,45 +667,45 @@ static Notifier virt_system_powerdown_notifier = {
.notify = virt_powerdown_req
};
-static void create_gpio(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_gpio(const VirtMachineState *vms, qemu_irq *pic)
{
char *nodename;
DeviceState *pl061_dev;
- hwaddr base = vbi->memmap[VIRT_GPIO].base;
- hwaddr size = vbi->memmap[VIRT_GPIO].size;
- int irq = vbi->irqmap[VIRT_GPIO];
+ hwaddr base = vms->memmap[VIRT_GPIO].base;
+ hwaddr size = vms->memmap[VIRT_GPIO].size;
+ int irq = vms->irqmap[VIRT_GPIO];
const char compat[] = "arm,pl061\0arm,primecell";
pl061_dev = sysbus_create_simple("pl061", base, pic[irq]);
- uint32_t phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+ uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt);
nodename = g_strdup_printf("/pl061@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#gpio-cells", 2);
- qemu_fdt_setprop(vbi->fdt, nodename, "gpio-controller", NULL, 0);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#gpio-cells", 2);
+ qemu_fdt_setprop(vms->fdt, nodename, "gpio-controller", NULL, 0);
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "phandle", phandle);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", phandle);
gpio_key_dev = sysbus_create_simple("gpio-key", -1,
qdev_get_gpio_in(pl061_dev, 3));
- qemu_fdt_add_subnode(vbi->fdt, "/gpio-keys");
- qemu_fdt_setprop_string(vbi->fdt, "/gpio-keys", "compatible", "gpio-keys");
- qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys", "#size-cells", 0);
- qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys", "#address-cells", 1);
+ qemu_fdt_add_subnode(vms->fdt, "/gpio-keys");
+ qemu_fdt_setprop_string(vms->fdt, "/gpio-keys", "compatible", "gpio-keys");
+ qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#size-cells", 0);
+ qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#address-cells", 1);
- qemu_fdt_add_subnode(vbi->fdt, "/gpio-keys/poweroff");
- qemu_fdt_setprop_string(vbi->fdt, "/gpio-keys/poweroff",
+ qemu_fdt_add_subnode(vms->fdt, "/gpio-keys/poweroff");
+ qemu_fdt_setprop_string(vms->fdt, "/gpio-keys/poweroff",
"label", "GPIO Key Poweroff");
- qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys/poweroff", "linux,code",
+ qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys/poweroff", "linux,code",
KEY_POWER);
- qemu_fdt_setprop_cells(vbi->fdt, "/gpio-keys/poweroff",
+ qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff",
"gpios", phandle, 3, 0);
/* connect powerdown request */
@@ -750,10 +714,10 @@ static void create_gpio(const VirtBoardInfo *vbi, qemu_irq *pic)
g_free(nodename);
}
-static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic)
{
int i;
- hwaddr size = vbi->memmap[VIRT_MMIO].size;
+ hwaddr size = vms->memmap[VIRT_MMIO].size;
/* We create the transports in forwards order. Since qbus_realize()
* prepends (not appends) new child buses, the incrementing loop below will
@@ -783,8 +747,8 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
* of disks users must use UUIDs or similar mechanisms.
*/
for (i = 0; i < NUM_VIRTIO_TRANSPORTS; i++) {
- int irq = vbi->irqmap[VIRT_MMIO] + i;
- hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
+ int irq = vms->irqmap[VIRT_MMIO] + i;
+ hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
sysbus_create_simple("virtio-mmio", base, pic[irq]);
}
@@ -798,16 +762,16 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
*/
for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
char *nodename;
- int irq = vbi->irqmap[VIRT_MMIO] + i;
- hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
+ int irq = vms->irqmap[VIRT_MMIO] + i;
+ hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "virtio,mmio");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
GIC_FDT_IRQ_TYPE_SPI, irq,
GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
g_free(nodename);
@@ -870,7 +834,7 @@ static void create_one_flash(const char *name, hwaddr flashbase,
}
}
-static void create_flash(const VirtBoardInfo *vbi,
+static void create_flash(const VirtMachineState *vms,
MemoryRegion *sysmem,
MemoryRegion *secure_sysmem)
{
@@ -882,8 +846,8 @@ static void create_flash(const VirtBoardInfo *vbi,
* If sysmem == secure_sysmem this means there is no separate Secure
* address space and both flash devices are generally visible.
*/
- hwaddr flashsize = vbi->memmap[VIRT_FLASH].size / 2;
- hwaddr flashbase = vbi->memmap[VIRT_FLASH].base;
+ hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2;
+ hwaddr flashbase = vms->memmap[VIRT_FLASH].base;
char *nodename;
create_one_flash("virt.flash0", flashbase, flashsize,
@@ -894,41 +858,41 @@ static void create_flash(const VirtBoardInfo *vbi,
if (sysmem == secure_sysmem) {
/* Report both flash devices as a single node in the DT */
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, flashbase, 2, flashsize,
2, flashbase + flashsize, 2, flashsize);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
g_free(nodename);
} else {
/* Report the devices as separate nodes so we can mark one as
* only visible to the secure world.
*/
nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, flashbase, 2, flashsize);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
g_free(nodename);
nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, flashbase + flashsize, 2, flashsize);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
g_free(nodename);
}
}
-static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
+static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
{
- hwaddr base = vbi->memmap[VIRT_FW_CFG].base;
- hwaddr size = vbi->memmap[VIRT_FW_CFG].size;
+ hwaddr base = vms->memmap[VIRT_FW_CFG].base;
+ hwaddr size = vms->memmap[VIRT_FW_CFG].size;
FWCfgState *fw_cfg;
char *nodename;
@@ -936,15 +900,17 @@ static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "qemu,fw-cfg-mmio");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base, 2, size);
g_free(nodename);
+ return fw_cfg;
}
-static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
+static void create_pcie_irq_map(const VirtMachineState *vms,
+ uint32_t gic_phandle,
int first_irq, const char *nodename)
{
int devfn, pin;
@@ -971,28 +937,27 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
}
}
- qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
+ qemu_fdt_setprop(vms->fdt, nodename, "interrupt-map",
full_irq_map, sizeof(full_irq_map));
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupt-map-mask",
0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
0x7 /* PCI irq */);
}
-static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
- bool use_highmem)
+static void create_pcie(const VirtMachineState *vms, qemu_irq *pic)
{
- hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;
- hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;
- hwaddr base_mmio_high = vbi->memmap[VIRT_PCIE_MMIO_HIGH].base;
- hwaddr size_mmio_high = vbi->memmap[VIRT_PCIE_MMIO_HIGH].size;
- hwaddr base_pio = vbi->memmap[VIRT_PCIE_PIO].base;
- hwaddr size_pio = vbi->memmap[VIRT_PCIE_PIO].size;
- hwaddr base_ecam = vbi->memmap[VIRT_PCIE_ECAM].base;
- hwaddr size_ecam = vbi->memmap[VIRT_PCIE_ECAM].size;
+ hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
+ hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size;
+ hwaddr base_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].base;
+ hwaddr size_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].size;
+ hwaddr base_pio = vms->memmap[VIRT_PCIE_PIO].base;
+ hwaddr size_pio = vms->memmap[VIRT_PCIE_PIO].size;
+ hwaddr base_ecam = vms->memmap[VIRT_PCIE_ECAM].base;
+ hwaddr size_ecam = vms->memmap[VIRT_PCIE_ECAM].size;
hwaddr base = base_mmio;
int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
- int irq = vbi->irqmap[VIRT_PCIE];
+ int irq = vms->irqmap[VIRT_PCIE];
MemoryRegion *mmio_alias;
MemoryRegion *mmio_reg;
MemoryRegion *ecam_alias;
@@ -1023,7 +988,7 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
mmio_reg, base_mmio, size_mmio);
memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
- if (use_highmem) {
+ if (vms->highmem) {
/* Map high MMIO space */
MemoryRegion *high_mmio_alias = g_new0(MemoryRegion, 1);
@@ -1054,26 +1019,26 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
}
nodename = g_strdup_printf("/pcie@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename,
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "pci-host-ecam-generic");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
+ qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "pci");
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#address-cells", 3);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#size-cells", 2);
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "bus-range", 0,
nr_pcie_buses - 1);
- qemu_fdt_setprop(vbi->fdt, nodename, "dma-coherent", NULL, 0);
+ qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
- if (vbi->msi_phandle) {
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent",
- vbi->msi_phandle);
+ if (vms->msi_phandle) {
+ qemu_fdt_setprop_cells(vms->fdt, nodename, "msi-parent",
+ vms->msi_phandle);
}
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
2, base_ecam, 2, size_ecam);
- if (use_highmem) {
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+ if (vms->highmem) {
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
2, base_pio, 2, size_pio,
1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
@@ -1082,20 +1047,20 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
2, base_mmio_high,
2, base_mmio_high, 2, size_mmio_high);
} else {
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
2, base_pio, 2, size_pio,
1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
2, base_mmio, 2, size_mmio);
}
- qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
- create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);
+ qemu_fdt_setprop_cell(vms->fdt, nodename, "#interrupt-cells", 1);
+ create_pcie_irq_map(vms, vms->gic_phandle, irq, nodename);
g_free(nodename);
}
-static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
{
DeviceState *dev;
SysBusDevice *s;
@@ -1103,13 +1068,13 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1);
MemoryRegion *sysmem = get_system_memory();
- platform_bus_params.platform_bus_base = vbi->memmap[VIRT_PLATFORM_BUS].base;
- platform_bus_params.platform_bus_size = vbi->memmap[VIRT_PLATFORM_BUS].size;
- platform_bus_params.platform_bus_first_irq = vbi->irqmap[VIRT_PLATFORM_BUS];
+ platform_bus_params.platform_bus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
+ platform_bus_params.platform_bus_size = vms->memmap[VIRT_PLATFORM_BUS].size;
+ platform_bus_params.platform_bus_first_irq = vms->irqmap[VIRT_PLATFORM_BUS];
platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS;
fdt_params->system_params = &platform_bus_params;
- fdt_params->binfo = &vbi->bootinfo;
+ fdt_params->binfo = &vms->bootinfo;
fdt_params->intc = "/intc";
/*
* register a machine init done notifier that creates the device tree
@@ -1136,43 +1101,44 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
sysbus_mmio_get_region(s, 0));
}
-static void create_secure_ram(VirtBoardInfo *vbi, MemoryRegion *secure_sysmem)
+static void create_secure_ram(VirtMachineState *vms,
+ MemoryRegion *secure_sysmem)
{
MemoryRegion *secram = g_new(MemoryRegion, 1);
char *nodename;
- hwaddr base = vbi->memmap[VIRT_SECURE_MEM].base;
- hwaddr size = vbi->memmap[VIRT_SECURE_MEM].size;
+ hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
+ hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
memory_region_init_ram(secram, NULL, "virt.secure-ram", size, &error_fatal);
vmstate_register_ram_global(secram);
memory_region_add_subregion(secure_sysmem, base, secram);
nodename = g_strdup_printf("/secram@%" PRIx64, base);
- qemu_fdt_add_subnode(vbi->fdt, nodename);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "memory");
- qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
- qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
- qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+ qemu_fdt_add_subnode(vms->fdt, nodename);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "memory");
+ qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg", 2, base, 2, size);
+ qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+ qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
g_free(nodename);
}
static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
{
- const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
+ const VirtMachineState *board = container_of(binfo, VirtMachineState,
+ bootinfo);
*fdt_size = board->fdt_size;
return board->fdt;
}
-static void virt_build_smbios(VirtGuestInfo *guest_info)
+static void virt_build_smbios(VirtMachineState *vms)
{
- FWCfgState *fw_cfg = guest_info->fw_cfg;
uint8_t *smbios_tables, *smbios_anchor;
size_t smbios_tables_len, smbios_anchor_len;
const char *product = "QEMU Virtual Machine";
- if (!fw_cfg) {
+ if (!vms->fw_cfg) {
return;
}
@@ -1187,20 +1153,21 @@ static void virt_build_smbios(VirtGuestInfo *guest_info)
&smbios_anchor, &smbios_anchor_len);
if (smbios_anchor) {
- fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
+ fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables",
smbios_tables, smbios_tables_len);
- fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
+ fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-anchor",
smbios_anchor, smbios_anchor_len);
}
}
static
-void virt_guest_info_machine_done(Notifier *notifier, void *data)
+void virt_machine_done(Notifier *notifier, void *data)
{
- VirtGuestInfoState *guest_info_state = container_of(notifier,
- VirtGuestInfoState, machine_done);
- virt_acpi_setup(&guest_info_state->info);
- virt_build_smbios(&guest_info_state->info);
+ VirtMachineState *vms = container_of(notifier, VirtMachineState,
+ machine_done);
+
+ virt_acpi_setup(vms);
+ virt_build_smbios(vms);
}
static void machvirt_init(MachineState *machine)
@@ -1210,13 +1177,9 @@ static void machvirt_init(MachineState *machine)
qemu_irq pic[NUM_IRQS];
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *secure_sysmem = NULL;
- int gic_version = vms->gic_version;
int n, virt_max_cpus;
MemoryRegion *ram = g_new(MemoryRegion, 1);
const char *cpu_model = machine->cpu_model;
- VirtBoardInfo *vbi;
- VirtGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
- VirtGuestInfo *guest_info = &guest_info_state->info;
char **cpustr;
ObjectClass *oc;
const char *typename;
@@ -1232,14 +1195,14 @@ static void machvirt_init(MachineState *machine)
/* We can probe only here because during property set
* KVM is not available yet
*/
- if (!gic_version) {
+ if (!vms->gic_version) {
if (!kvm_enabled()) {
error_report("gic-version=host requires KVM");
exit(1);
}
- gic_version = kvm_arm_vgic_probe();
- if (!gic_version) {
+ vms->gic_version = kvm_arm_vgic_probe();
+ if (!vms->gic_version) {
error_report("Unable to determine GIC version supported by host");
exit(1);
}
@@ -1248,9 +1211,7 @@ static void machvirt_init(MachineState *machine)
/* Separate the actual CPU model name from any appended features */
cpustr = g_strsplit(cpu_model, ",", 2);
- vbi = find_machine_info(cpustr[0]);
-
- if (!vbi) {
+ if (!cpuname_valid(cpustr[0])) {
error_report("mach-virt: CPU %s not supported", cpustr[0]);
exit(1);
}
@@ -1262,13 +1223,13 @@ static void machvirt_init(MachineState *machine)
* let the boot ROM sort them out.
* The usual case is that we do use QEMU's PSCI implementation.
*/
- vbi->using_psci = !(vms->secure && firmware_loaded);
+ vms->using_psci = !(vms->secure && firmware_loaded);
/* The maximum number of CPUs depends on the GIC version, or on how
* many redistributors we can fit into the memory map.
*/
- if (gic_version == 3) {
- virt_max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+ if (vms->gic_version == 3) {
+ virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
clustersz = GICV3_TARGETLIST_BITS;
} else {
virt_max_cpus = GIC_NCPU;
@@ -1282,9 +1243,9 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
- vbi->smp_cpus = smp_cpus;
+ vms->smp_cpus = smp_cpus;
- if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
+ if (machine->ram_size > vms->memmap[VIRT_MEM].size) {
error_report("mach-virt: cannot model more than %dGB RAM", RAMLIMIT_GB);
exit(1);
}
@@ -1306,7 +1267,7 @@ static void machvirt_init(MachineState *machine)
memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1);
}
- create_fdt(vbi);
+ create_fdt(vms);
oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
if (!oc) {
@@ -1345,7 +1306,7 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, false, "has_el3", NULL);
}
- if (vbi->using_psci) {
+ if (vms->using_psci) {
object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC,
"psci-conduit", NULL);
@@ -1361,7 +1322,7 @@ static void machvirt_init(MachineState *machine)
}
if (object_property_find(cpuobj, "reset-cbar", NULL)) {
- object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
+ object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base,
"reset-cbar", &error_abort);
}
@@ -1374,62 +1335,55 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, true, "realized", NULL);
}
- fdt_add_timer_nodes(vbi, gic_version);
- fdt_add_cpu_nodes(vbi);
- fdt_add_psci_node(vbi);
+ fdt_add_timer_nodes(vms);
+ fdt_add_cpu_nodes(vms);
+ fdt_add_psci_node(vms);
memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
machine->ram_size);
- memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
+ memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
- create_flash(vbi, sysmem, secure_sysmem ? secure_sysmem : sysmem);
+ create_flash(vms, sysmem, secure_sysmem ? secure_sysmem : sysmem);
- create_gic(vbi, pic, gic_version, vms->secure, vmc->no_its);
+ create_gic(vms, pic);
- fdt_add_pmu_nodes(vbi, gic_version);
+ fdt_add_pmu_nodes(vms);
- create_uart(vbi, pic, VIRT_UART, sysmem, serial_hds[0]);
+ create_uart(vms, pic, VIRT_UART, sysmem, serial_hds[0]);
if (vms->secure) {
- create_secure_ram(vbi, secure_sysmem);
- create_uart(vbi, pic, VIRT_SECURE_UART, secure_sysmem, serial_hds[1]);
+ create_secure_ram(vms, secure_sysmem);
+ create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hds[1]);
}
- create_rtc(vbi, pic);
+ create_rtc(vms, pic);
- create_pcie(vbi, pic, vms->highmem);
+ create_pcie(vms, pic);
- create_gpio(vbi, pic);
+ create_gpio(vms, pic);
/* Create mmio transports, so the user can create virtio backends
* (which will be automatically plugged in to the transports). If
* no backend is created the transport will just sit harmlessly idle.
*/
- create_virtio_devices(vbi, pic);
-
- create_fw_cfg(vbi, &address_space_memory);
- rom_set_fw(fw_cfg_find());
-
- guest_info->smp_cpus = smp_cpus;
- guest_info->fw_cfg = fw_cfg_find();
- guest_info->memmap = vbi->memmap;
- guest_info->irqmap = vbi->irqmap;
- guest_info->use_highmem = vms->highmem;
- guest_info->gic_version = gic_version;
- guest_info->no_its = vmc->no_its;
- guest_info_state->machine_done.notify = virt_guest_info_machine_done;
- qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
-
- vbi->bootinfo.ram_size = machine->ram_size;
- vbi->bootinfo.kernel_filename = machine->kernel_filename;
- vbi->bootinfo.kernel_cmdline = machine->kernel_cmdline;
- vbi->bootinfo.initrd_filename = machine->initrd_filename;
- vbi->bootinfo.nb_cpus = smp_cpus;
- vbi->bootinfo.board_id = -1;
- vbi->bootinfo.loader_start = vbi->memmap[VIRT_MEM].base;
- vbi->bootinfo.get_dtb = machvirt_dtb;
- vbi->bootinfo.firmware_loaded = firmware_loaded;
- arm_load_kernel(ARM_CPU(first_cpu), &vbi->bootinfo);
+ create_virtio_devices(vms, pic);
+
+ vms->fw_cfg = create_fw_cfg(vms, &address_space_memory);
+ rom_set_fw(vms->fw_cfg);
+
+ vms->machine_done.notify = virt_machine_done;
+ qemu_add_machine_init_done_notifier(&vms->machine_done);
+
+ vms->bootinfo.ram_size = machine->ram_size;
+ vms->bootinfo.kernel_filename = machine->kernel_filename;
+ vms->bootinfo.kernel_cmdline = machine->kernel_cmdline;
+ vms->bootinfo.initrd_filename = machine->initrd_filename;
+ vms->bootinfo.nb_cpus = smp_cpus;
+ vms->bootinfo.board_id = -1;
+ vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
+ vms->bootinfo.get_dtb = machvirt_dtb;
+ vms->bootinfo.firmware_loaded = firmware_loaded;
+ arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
/*
* arm_load_kernel machine init done notifier registration must
@@ -1437,7 +1391,7 @@ static void machvirt_init(MachineState *machine)
* another notifier is registered which adds platform bus nodes.
* Notifiers are executed in registration reverse order.
*/
- create_platform_bus(vbi, pic);
+ create_platform_bus(vms, pic);
}
static bool virt_get_secure(Object *obj, Error **errp)
@@ -1556,6 +1510,9 @@ static void virt_2_9_instance_init(Object *obj)
object_property_set_description(obj, "gic-version",
"Set GIC version. "
"Valid values are 2, 3 and host", NULL);
+
+ vms->memmap = a15memmap;
+ vms->irqmap = a15irqmap;
}
static void virt_machine_2_9_options(MachineClass *mc)
@@ -1573,8 +1530,14 @@ static void virt_2_8_instance_init(Object *obj)
static void virt_machine_2_8_options(MachineClass *mc)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
virt_machine_2_9_options(mc);
SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_8);
+ /* For 2.8 and earlier we falsely claimed in the DT that
+ * our timers were edge-triggered, not level-triggered.
+ */
+ vmc->claim_edge_triggered_timers = true;
}
DEFINE_VIRT_MACHINE(2, 8)
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index b3a6bbd210..1607cbdb03 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -220,7 +220,7 @@ static int aer915_send(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void aer915_event(I2CSlave *i2c, enum i2c_event event)
+static int aer915_event(I2CSlave *i2c, enum i2c_event event)
{
AER915State *s = AER915(i2c);
@@ -238,6 +238,8 @@ static void aer915_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int aer915_recv(I2CSlave *slave)
diff --git a/hw/audio/wm8750.c b/hw/audio/wm8750.c
index 0c6500e96a..f8b5bebfc2 100644
--- a/hw/audio/wm8750.c
+++ b/hw/audio/wm8750.c
@@ -303,7 +303,7 @@ static void wm8750_reset(I2CSlave *i2c)
s->i2c_len = 0;
}
-static void wm8750_event(I2CSlave *i2c, enum i2c_event event)
+static int wm8750_event(I2CSlave *i2c, enum i2c_event event)
{
WM8750State *s = WM8750(i2c);
@@ -321,6 +321,8 @@ static void wm8750_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
#define WM8750_LINVOL 0x00
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index e3c1166ea6..4c5f8c3590 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -28,6 +28,7 @@
#include "hw/ssi/ssi.h"
#include "qemu/bitops.h"
#include "qemu/log.h"
+#include "qemu/error-report.h"
#include "qapi/error.h"
#ifndef M25P80_ERR_DEBUG
@@ -377,6 +378,8 @@ typedef enum {
MAN_GENERIC,
} Manufacturer;
+#define M25P80_INTERNAL_DATA_BUFFER_SZ 16
+
typedef struct Flash {
SSISlave parent_obj;
@@ -387,7 +390,7 @@ typedef struct Flash {
int page_size;
uint8_t state;
- uint8_t data[16];
+ uint8_t data[M25P80_INTERNAL_DATA_BUFFER_SZ];
uint32_t len;
uint32_t pos;
uint8_t needed_bytes;
@@ -1115,6 +1118,17 @@ static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
case STATE_COLLECTING_DATA:
case STATE_COLLECTING_VAR_LEN_DATA:
+
+ if (s->len >= M25P80_INTERNAL_DATA_BUFFER_SZ) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "M25P80: Write overrun internal data buffer. "
+ "SPI controller (QEMU emulator or guest driver) "
+ "is misbehaving\n");
+ s->len = s->pos = 0;
+ s->state = STATE_IDLE;
+ break;
+ }
+
s->data[s->len] = (uint8_t)tx;
s->len++;
@@ -1124,6 +1138,17 @@ static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
break;
case STATE_READING_DATA:
+
+ if (s->pos >= M25P80_INTERNAL_DATA_BUFFER_SZ) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "M25P80: Read overrun internal data buffer. "
+ "SPI controller (QEMU emulator or guest driver) "
+ "is misbehaving\n");
+ s->len = s->pos = 0;
+ s->state = STATE_IDLE;
+ break;
+ }
+
r = s->data[s->pos];
s->pos++;
if (s->pos == s->len) {
@@ -1196,7 +1221,7 @@ static const VMStateDescription vmstate_m25p80 = {
.pre_save = m25p80_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT8(state, Flash),
- VMSTATE_UINT8_ARRAY(data, Flash, 16),
+ VMSTATE_UINT8_ARRAY(data, Flash, M25P80_INTERNAL_DATA_BUFFER_SZ),
VMSTATE_UINT32(len, Flash),
VMSTATE_UINT32(pos, Flash),
VMSTATE_UINT8(needed_bytes, Flash),
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 571c324004..820d1abeb9 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -629,22 +629,26 @@ DeviceState *exynos4210_uart_create(hwaddr addr,
return dev;
}
-static int exynos4210_uart_init(SysBusDevice *dev)
+static void exynos4210_uart_init(Object *obj)
{
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
Exynos4210UartState *s = EXYNOS4210_UART(dev);
/* memory mapping */
- memory_region_init_io(&s->iomem, OBJECT(s), &exynos4210_uart_ops, s,
+ memory_region_init_io(&s->iomem, obj, &exynos4210_uart_ops, s,
"exynos4210.uart", EXYNOS4210_UART_REGS_MEM_SIZE);
sysbus_init_mmio(dev, &s->iomem);
sysbus_init_irq(dev, &s->irq);
+}
+
+static void exynos4210_uart_realize(DeviceState *dev, Error **errp)
+{
+ Exynos4210UartState *s = EXYNOS4210_UART(dev);
qemu_chr_fe_set_handlers(&s->chr, exynos4210_uart_can_receive,
exynos4210_uart_receive, exynos4210_uart_event,
s, NULL, true);
-
- return 0;
}
static Property exynos4210_uart_properties[] = {
@@ -658,9 +662,8 @@ static Property exynos4210_uart_properties[] = {
static void exynos4210_uart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
- k->init = exynos4210_uart_init;
+ dc->realize = exynos4210_uart_realize;
dc->reset = exynos4210_uart_reset;
dc->props = exynos4210_uart_properties;
dc->vmsd = &vmstate_exynos4210_uart;
@@ -670,6 +673,7 @@ static const TypeInfo exynos4210_uart_info = {
.name = TYPE_EXYNOS4210_UART,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(Exynos4210UartState),
+ .instance_init = exynos4210_uart_init,
.class_init = exynos4210_uart_class_init,
};
diff --git a/hw/display/ssd0303.c b/hw/display/ssd0303.c
index d3017563f3..68a80b9d64 100644
--- a/hw/display/ssd0303.c
+++ b/hw/display/ssd0303.c
@@ -179,7 +179,7 @@ static int ssd0303_send(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void ssd0303_event(I2CSlave *i2c, enum i2c_event event)
+static int ssd0303_event(I2CSlave *i2c, enum i2c_event event)
{
ssd0303_state *s = SSD0303(i2c);
@@ -193,6 +193,8 @@ static void ssd0303_event(I2CSlave *i2c, enum i2c_event event)
/* Nothing to do. */
break;
}
+
+ return 0;
}
static void ssd0303_update_display(void *opaque)
diff --git a/hw/gpio/max7310.c b/hw/gpio/max7310.c
index 1bd5eaf911..f82e3e6555 100644
--- a/hw/gpio/max7310.c
+++ b/hw/gpio/max7310.c
@@ -129,7 +129,7 @@ static int max7310_tx(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void max7310_event(I2CSlave *i2c, enum i2c_event event)
+static int max7310_event(I2CSlave *i2c, enum i2c_event event)
{
MAX7310State *s = MAX7310(i2c);
s->len = 0;
@@ -147,6 +147,8 @@ static void max7310_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static const VMStateDescription vmstate_max7310 = {
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index e40781ea3b..2c1234cdff 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -88,18 +88,26 @@ int i2c_bus_busy(I2CBus *bus)
return !QLIST_EMPTY(&bus->current_devs);
}
+/* TODO: Make this handle multiple masters. */
/*
- * Returns non-zero if the address is not valid. If this is called
- * again without an intervening i2c_end_transfer(), like in the SMBus
- * case where the operation is switched from write to read, this
- * function will not rescan the bus and thus cannot fail.
+ * Start or continue an i2c transaction. When this is called for the
+ * first time or after an i2c_end_transfer(), if it returns an error
+ * the bus transaction is terminated (or really never started). If
+ * this is called after another i2c_start_transfer() without an
+ * intervening i2c_end_transfer(), and it returns an error, the
+ * transaction will not be terminated. The caller must do it.
+ *
+ * This corresponds with the way real hardware works. The SMBus
+ * protocol uses a start transfer to switch from write to read mode
+ * without releasing the bus. If that fails, the bus is still
+ * in a transaction.
*/
-/* TODO: Make this handle multiple masters. */
int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
{
BusChild *kid;
I2CSlaveClass *sc;
I2CNode *node;
+ bool bus_scanned = false;
if (address == I2C_BROADCAST) {
/*
@@ -130,6 +138,7 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
}
}
}
+ bus_scanned = true;
}
if (QLIST_EMPTY(&bus->current_devs)) {
@@ -137,11 +146,21 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
}
QLIST_FOREACH(node, &bus->current_devs, next) {
+ int rv;
+
sc = I2C_SLAVE_GET_CLASS(node->elt);
/* If the bus is already busy, assume this is a repeated
start condition. */
+
if (sc->event) {
- sc->event(node->elt, recv ? I2C_START_RECV : I2C_START_SEND);
+ rv = sc->event(node->elt, recv ? I2C_START_RECV : I2C_START_SEND);
+ if (rv && !bus->broadcast) {
+ if (bus_scanned) {
+ /* First call, terminate the transfer. */
+ i2c_end_transfer(bus);
+ }
+ return rv;
+ }
}
}
return 0;
diff --git a/hw/i2c/i2c-ddc.c b/hw/i2c/i2c-ddc.c
index 1227212934..66899d7233 100644
--- a/hw/i2c/i2c-ddc.c
+++ b/hw/i2c/i2c-ddc.c
@@ -230,13 +230,15 @@ static void i2c_ddc_reset(DeviceState *ds)
s->reg = 0;
}
-static void i2c_ddc_event(I2CSlave *i2c, enum i2c_event event)
+static int i2c_ddc_event(I2CSlave *i2c, enum i2c_event event)
{
I2CDDCState *s = I2CDDC(i2c);
if (event == I2C_START_SEND) {
s->firstbyte = true;
}
+
+ return 0;
}
static int i2c_ddc_rx(I2CSlave *i2c)
diff --git a/hw/i2c/smbus.c b/hw/i2c/smbus.c
index 5b4dd3eba4..2d1b79a689 100644
--- a/hw/i2c/smbus.c
+++ b/hw/i2c/smbus.c
@@ -67,7 +67,7 @@ static void smbus_do_write(SMBusDevice *dev)
}
}
-static void smbus_i2c_event(I2CSlave *s, enum i2c_event event)
+static int smbus_i2c_event(I2CSlave *s, enum i2c_event event)
{
SMBusDevice *dev = SMBUS_DEVICE(s);
@@ -148,6 +148,8 @@ static void smbus_i2c_event(I2CSlave *s, enum i2c_event event)
break;
}
}
+
+ return 0;
}
static int smbus_i2c_recv(I2CSlave *s)
@@ -249,7 +251,8 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command)
}
i2c_send(bus, command);
if (i2c_start_transfer(bus, addr, 1)) {
- assert(0);
+ i2c_end_transfer(bus);
+ return -1;
}
data = i2c_recv(bus);
i2c_nack(bus);
@@ -276,7 +279,8 @@ int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command)
}
i2c_send(bus, command);
if (i2c_start_transfer(bus, addr, 1)) {
- assert(0);
+ i2c_end_transfer(bus);
+ return -1;
}
data = i2c_recv(bus);
data |= i2c_recv(bus) << 8;
@@ -307,7 +311,8 @@ int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data)
}
i2c_send(bus, command);
if (i2c_start_transfer(bus, addr, 1)) {
- assert(0);
+ i2c_end_transfer(bus);
+ return -1;
}
len = i2c_recv(bus);
if (len > 32) {
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 47b79d9112..e0732ccaf1 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -562,7 +562,7 @@ static void amdvi_mmio_trace(hwaddr addr, unsigned size)
trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
} else {
index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
- trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+ trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
}
}
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 884926e9e7..0d3dc6a9f2 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -49,8 +49,8 @@
#define AMDVI_CAPAB_INIT_TYPE (3 << 16)
/* No. of used MMIO registers */
-#define AMDVI_MMIO_REGS_HIGH 8
-#define AMDVI_MMIO_REGS_LOW 7
+#define AMDVI_MMIO_REGS_HIGH 7
+#define AMDVI_MMIO_REGS_LOW 8
/* MMIO registers */
#define AMDVI_MMIO_DEVICE_TABLE 0x0000
diff --git a/hw/input/lm832x.c b/hw/input/lm832x.c
index 539682cac8..2340523da0 100644
--- a/hw/input/lm832x.c
+++ b/hw/input/lm832x.c
@@ -383,7 +383,7 @@ static void lm_kbd_write(LM823KbdState *s, int reg, int byte, uint8_t value)
}
}
-static void lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
+static int lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
{
LM823KbdState *s = LM8323(i2c);
@@ -397,6 +397,8 @@ static void lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int lm_i2c_rx(I2CSlave *i2c)
diff --git a/hw/misc/tmp105.c b/hw/misc/tmp105.c
index f5c2472b5b..04e83787d4 100644
--- a/hw/misc/tmp105.c
+++ b/hw/misc/tmp105.c
@@ -176,7 +176,7 @@ static int tmp105_tx(I2CSlave *i2c, uint8_t data)
return 0;
}
-static void tmp105_event(I2CSlave *i2c, enum i2c_event event)
+static int tmp105_event(I2CSlave *i2c, enum i2c_event event)
{
TMP105State *s = TMP105(i2c);
@@ -185,6 +185,7 @@ static void tmp105_event(I2CSlave *i2c, enum i2c_event event)
}
s->len = 0;
+ return 0;
}
static int tmp105_post_load(void *opaque, int version_id)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 24fae1689d..637d54549e 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -982,8 +982,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
" new func %s cannot be exposed to guest.",
- PCI_SLOT(devfn),
- bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+ PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
+ pci_get_function_0(pci_dev)->name,
name);
return NULL;
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index f5c1d98192..07650683f7 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1098,7 +1098,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
* We do not support individual masking for channel devices, so we
* need to manually trigger any guest masking callbacks here.
*/
- if (k->guest_notifier_mask) {
+ if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
k->guest_notifier_mask(vdev, n, false);
}
/* get lost events and re-inject */
@@ -1107,7 +1107,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
event_notifier_set(notifier);
}
} else {
- if (k->guest_notifier_mask) {
+ if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
k->guest_notifier_mask(vdev, n, true);
}
if (with_irqfd) {
diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c
index e4e395fa67..b66505ca49 100644
--- a/hw/ssi/imx_spi.c
+++ b/hw/ssi/imx_spi.c
@@ -320,9 +320,6 @@ static void imx_spi_write(void *opaque, hwaddr offset, uint64_t value,
TYPE_IMX_SPI, __func__);
break;
case ECSPI_TXDATA:
- case ECSPI_MSGDATA:
- /* Is there any difference between TXDATA and MSGDATA ? */
- /* I'll have to look in the linux driver */
if (!imx_spi_is_enabled(s)) {
/* Ignore writes if device is disabled */
break;
@@ -380,6 +377,14 @@ static void imx_spi_write(void *opaque, hwaddr offset, uint64_t value,
}
break;
+ case ECSPI_MSGDATA:
+ /* it is not clear from the spec what MSGDATA is for */
+ /* Anyway it is not used by Linux driver */
+ /* So for now we just ignore it */
+ qemu_log_mask(LOG_UNIMP,
+ "[%s]%s: Trying to write to MSGDATA, ignoring\n",
+ TYPE_IMX_SPI, __func__);
+ break;
default:
s->regs[index] = value;
diff --git a/hw/timer/ds1338.c b/hw/timer/ds1338.c
index f5d04dd5d7..3849b74a68 100644
--- a/hw/timer/ds1338.c
+++ b/hw/timer/ds1338.c
@@ -94,7 +94,7 @@ static void inc_regptr(DS1338State *s)
}
}
-static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
+static int ds1338_event(I2CSlave *i2c, enum i2c_event event)
{
DS1338State *s = DS1338(i2c);
@@ -113,6 +113,8 @@ static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
default:
break;
}
+
+ return 0;
}
static int ds1338_recv(I2CSlave *i2c)
diff --git a/hw/timer/twl92230.c b/hw/timer/twl92230.c
index 7ba4e9a7c9..b8d914e49b 100644
--- a/hw/timer/twl92230.c
+++ b/hw/timer/twl92230.c
@@ -713,12 +713,14 @@ static void menelaus_write(void *opaque, uint8_t addr, uint8_t value)
}
}
-static void menelaus_event(I2CSlave *i2c, enum i2c_event event)
+static int menelaus_event(I2CSlave *i2c, enum i2c_event event)
{
MenelausState *s = TWL92230(i2c);
if (event == I2C_START_SEND)
s->firstbyte = 1;
+
+ return 0;
}
static int menelaus_tx(I2CSlave *i2c, uint8_t data)
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 17412cb7b5..60654dc19d 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -402,7 +402,7 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
event_notifier_cleanup(notifier);
}
- if (vdc->guest_notifier_mask) {
+ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
vdc->guest_notifier_mask(vdev, n, !assign);
}
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 83a423c580..4e4562d444 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -184,7 +184,7 @@ struct BlockDriver {
/*
* Flushes all data that was already written to the OS all the way down to
- * the disk (for example raw-posix calls fsync()).
+ * the disk (for example file-posix.c calls fsync()).
*/
int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 154f3b82f6..d43ec005cb 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -191,10 +191,8 @@ struct AcpiFadtDescriptorRev5_1 {
typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1;
-enum {
- ACPI_FADT_ARM_USE_PSCI_G_0_2 = 0,
- ACPI_FADT_ARM_PSCI_USE_HVC = 1,
-};
+#define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0)
+#define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1)
/*
* Serial Port Console Redirection Table (SPCR), Rev. 1.02
@@ -290,7 +288,7 @@ typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
#define ACPI_APIC_XRUPT_SOURCE 8
#define ACPI_APIC_LOCAL_X2APIC 9
#define ACPI_APIC_LOCAL_X2APIC_NMI 10
-#define ACPI_APIC_GENERIC_INTERRUPT 11
+#define ACPI_APIC_GENERIC_CPU_INTERFACE 11
#define ACPI_APIC_GENERIC_DISTRIBUTOR 12
#define ACPI_APIC_GENERIC_MSI_FRAME 13
#define ACPI_APIC_GENERIC_REDISTRIBUTOR 14
@@ -361,7 +359,7 @@ struct AcpiMadtLocalX2ApicNmi {
} QEMU_PACKED;
typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi;
-struct AcpiMadtGenericInterrupt {
+struct AcpiMadtGenericCpuInterface {
ACPI_SUB_HEADER_DEF
uint16_t reserved;
uint32_t cpu_interface_number;
@@ -378,7 +376,10 @@ struct AcpiMadtGenericInterrupt {
uint64_t arm_mpidr;
} QEMU_PACKED;
-typedef struct AcpiMadtGenericInterrupt AcpiMadtGenericInterrupt;
+typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface;
+
+/* GICC CPU Interface Flags */
+#define ACPI_MADT_GICC_ENABLED 1
struct AcpiMadtGenericDistributor {
ACPI_SUB_HEADER_DEF
@@ -427,21 +428,9 @@ typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator;
/*
* Generic Timer Description Table (GTDT)
*/
-
-#define ACPI_GTDT_INTERRUPT_MODE (1 << 0)
-#define ACPI_GTDT_INTERRUPT_POLARITY (1 << 1)
-#define ACPI_GTDT_ALWAYS_ON (1 << 2)
-
-/* Triggering */
-
-#define ACPI_LEVEL_SENSITIVE ((uint8_t) 0x00)
-#define ACPI_EDGE_SENSITIVE ((uint8_t) 0x01)
-
-/* Polarity */
-
-#define ACPI_ACTIVE_HIGH ((uint8_t) 0x00)
-#define ACPI_ACTIVE_LOW ((uint8_t) 0x01)
-#define ACPI_ACTIVE_BOTH ((uint8_t) 0x02)
+#define ACPI_GTDT_INTERRUPT_MODE_LEVEL (0 << 0)
+#define ACPI_GTDT_INTERRUPT_MODE_EDGE (1 << 0)
+#define ACPI_GTDT_CAP_ALWAYS_ON (1 << 2)
struct AcpiGenericTimerTable {
ACPI_TABLE_HEADER_DEF
diff --git a/include/hw/arm/virt-acpi-build.h b/include/hw/arm/virt-acpi-build.h
deleted file mode 100644
index f5ec749b8f..0000000000
--- a/include/hw/arm/virt-acpi-build.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *
- * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
- *
- * Author: Shannon Zhao <zhaoshenglong@huawei.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef QEMU_VIRT_ACPI_BUILD_H
-#define QEMU_VIRT_ACPI_BUILD_H
-
-#include "qemu-common.h"
-#include "hw/arm/virt.h"
-#include "qemu/notify.h"
-
-#define ACPI_GICC_ENABLED 1
-
-typedef struct VirtGuestInfo {
- int smp_cpus;
- FWCfgState *fw_cfg;
- const MemMapEntry *memmap;
- const int *irqmap;
- bool use_highmem;
- int gic_version;
- bool no_its;
-} VirtGuestInfo;
-
-
-typedef struct VirtGuestInfoState {
- VirtGuestInfo info;
- Notifier machine_done;
-} VirtGuestInfoState;
-
-void virt_acpi_setup(VirtGuestInfo *guest_info);
-
-#endif
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 9650193253..eb1c63d688 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -32,6 +32,9 @@
#include "qemu-common.h"
#include "exec/hwaddr.h"
+#include "qemu/notify.h"
+#include "hw/boards.h"
+#include "hw/arm/arm.h"
#define NUM_GICV2M_SPIS 64
#define NUM_VIRTIO_TRANSPORTS 32
@@ -74,5 +77,41 @@ typedef struct MemMapEntry {
hwaddr size;
} MemMapEntry;
+typedef struct {
+ MachineClass parent;
+ bool disallow_affinity_adjustment;
+ bool no_its;
+ bool no_pmu;
+ bool claim_edge_triggered_timers;
+} VirtMachineClass;
-#endif
+typedef struct {
+ MachineState parent;
+ Notifier machine_done;
+ FWCfgState *fw_cfg;
+ bool secure;
+ bool highmem;
+ int32_t gic_version;
+ struct arm_boot_info bootinfo;
+ const MemMapEntry *memmap;
+ const int *irqmap;
+ int smp_cpus;
+ void *fdt;
+ int fdt_size;
+ uint32_t clock_phandle;
+ uint32_t gic_phandle;
+ uint32_t msi_phandle;
+ bool using_psci;
+} VirtMachineState;
+
+#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
+#define VIRT_MACHINE(obj) \
+ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE)
+#define VIRT_MACHINE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_VIRT_MACHINE)
+#define VIRT_MACHINE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
+
+void virt_acpi_setup(VirtMachineState *vms);
+
+#endif /* QEMU_ARM_VIRT_H */
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index c4085aa366..2ce611d4c8 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -32,14 +32,22 @@ typedef struct I2CSlaveClass
/* Callbacks provided by the device. */
int (*init)(I2CSlave *dev);
- /* Master to slave. */
+ /* Master to slave. Returns non-zero for a NAK, 0 for success. */
int (*send)(I2CSlave *s, uint8_t data);
- /* Slave to master. */
+ /*
+ * Slave to master. This cannot fail, the device should always
+ * return something here. Negative values probably result in 0xff
+ * and a possible log from the driver, and shouldn't be used.
+ */
int (*recv)(I2CSlave *s);
- /* Notify the slave of a bus state change. */
- void (*event)(I2CSlave *s, enum i2c_event event);
+ /*
+ * Notify the slave of a bus state change. For start event,
+ * returns non-zero to NAK an operation. For other events the
+ * return code is not used and should be zero.
+ */
+ int (*event)(I2CSlave *s, enum i2c_event event);
} I2CSlaveClass;
struct I2CSlave
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index e6a60d55fd..12584ed1b7 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -71,6 +71,12 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
void qemu_coroutine_enter(Coroutine *coroutine);
/**
+ * Transfer control to a coroutine if it's not active (i.e. part of the call
+ * stack of the running coroutine). Otherwise, do nothing.
+ */
+void qemu_coroutine_enter_if_inactive(Coroutine *co);
+
+/**
* Transfer control back to a coroutine's caller
*
* This function does not return until the coroutine is re-entered using
diff --git a/qemu-img.c b/qemu-img.c
index 6949b73ca5..5df66fe661 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3559,20 +3559,23 @@ static void bench_cb(void *opaque, int ret)
}
while (b->n > b->in_flight && b->in_flight < b->nrreq) {
+ int64_t offset = b->offset;
+ /* blk_aio_* might look for completed I/Os and kick bench_cb
+ * again, so make sure this operation is counted by in_flight
+ * and b->offset is ready for the next submission.
+ */
+ b->in_flight++;
+ b->offset += b->step;
+ b->offset %= b->image_size;
if (b->write) {
- acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0,
- bench_cb, b);
+ acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
} else {
- acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0,
- bench_cb, b);
+ acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
}
if (!acb) {
error_report("Failed to issue request");
exit(EXIT_FAILURE);
}
- b->in_flight++;
- b->offset += b->step;
- b->offset %= b->image_size;
}
}
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 4841d582a1..f776404d86 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -689,7 +689,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
-tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8ff423f56b..dd879f1212 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -12,7 +12,7 @@ read 512/512 bytes at offset 229376
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkverify through file blockref ===
@@ -26,7 +26,7 @@ read 512/512 bytes at offset 229376
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkdebug through filename ===
@@ -56,7 +56,7 @@ QMP_VERSION
{"return": {}}
{"return": {}}
{"return": {}}
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkverify on existing raw block device ===
@@ -66,7 +66,7 @@ QMP_VERSION
{"return": {}}
{"return": {}}
{"return": {}}
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
=== Testing blkdebug's set-state through QMP ===
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 775e031069..8618c20d53 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -30,17 +30,9 @@
#define _FILE_OFFSET_BITS 64
#include "qemu/osdep.h"
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/unistd.h>
-#include <sys/eventfd.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <linux/vhost.h>
-
-#include "qemu/atomic.h"
+#include "qemu/iov.h"
#include "standard-headers/linux/virtio_net.h"
-#include "standard-headers/linux/virtio_ring.h"
+#include "contrib/libvhost-user/libvhost-user.h"
#define VHOST_USER_BRIDGE_DEBUG 1
@@ -64,6 +56,17 @@ typedef struct Dispatcher {
Event events[FD_SETSIZE];
} Dispatcher;
+typedef struct VubrDev {
+ VuDev vudev;
+ Dispatcher dispatcher;
+ int backend_udp_sock;
+ struct sockaddr_in backend_udp_dest;
+ int hdrlen;
+ int sock;
+ int ready;
+ int quit;
+} VubrDev;
+
static void
vubr_die(const char *s)
{
@@ -101,8 +104,6 @@ dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
return 0;
}
-/* dispatcher_remove() is not currently in use but may be useful
- * in the future. */
static int
dispatcher_remove(Dispatcher *dispr, int sock)
{
@@ -157,1039 +158,313 @@ dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
return 0;
}
-typedef struct VubrVirtq {
- int call_fd;
- int kick_fd;
- uint32_t size;
- uint16_t last_avail_index;
- uint16_t last_used_index;
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
- uint64_t log_guest_addr;
- int enable;
-} VubrVirtq;
-
-/* Based on qemu/hw/virtio/vhost-user.c */
-
-#define VHOST_MEMORY_MAX_NREGIONS 8
-#define VHOST_USER_F_PROTOCOL_FEATURES 30
-/* v1.0 compliant. */
-#define VIRTIO_F_VERSION_1 32
-
-#define VHOST_LOG_PAGE 4096
-
-enum VhostUserProtocolFeature {
- VHOST_USER_PROTOCOL_F_MQ = 0,
- VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
- VHOST_USER_PROTOCOL_F_RARP = 2,
-
- VHOST_USER_PROTOCOL_F_MAX
-};
-
-#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
-
-typedef enum VhostUserRequest {
- VHOST_USER_NONE = 0,
- VHOST_USER_GET_FEATURES = 1,
- VHOST_USER_SET_FEATURES = 2,
- VHOST_USER_SET_OWNER = 3,
- VHOST_USER_RESET_OWNER = 4,
- VHOST_USER_SET_MEM_TABLE = 5,
- VHOST_USER_SET_LOG_BASE = 6,
- VHOST_USER_SET_LOG_FD = 7,
- VHOST_USER_SET_VRING_NUM = 8,
- VHOST_USER_SET_VRING_ADDR = 9,
- VHOST_USER_SET_VRING_BASE = 10,
- VHOST_USER_GET_VRING_BASE = 11,
- VHOST_USER_SET_VRING_KICK = 12,
- VHOST_USER_SET_VRING_CALL = 13,
- VHOST_USER_SET_VRING_ERR = 14,
- VHOST_USER_GET_PROTOCOL_FEATURES = 15,
- VHOST_USER_SET_PROTOCOL_FEATURES = 16,
- VHOST_USER_GET_QUEUE_NUM = 17,
- VHOST_USER_SET_VRING_ENABLE = 18,
- VHOST_USER_SEND_RARP = 19,
- VHOST_USER_MAX
-} VhostUserRequest;
-
-typedef struct VhostUserMemoryRegion {
- uint64_t guest_phys_addr;
- uint64_t memory_size;
- uint64_t userspace_addr;
- uint64_t mmap_offset;
-} VhostUserMemoryRegion;
-
-typedef struct VhostUserMemory {
- uint32_t nregions;
- uint32_t padding;
- VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-} VhostUserMemory;
-
-typedef struct VhostUserLog {
- uint64_t mmap_size;
- uint64_t mmap_offset;
-} VhostUserLog;
-
-typedef struct VhostUserMsg {
- VhostUserRequest request;
-
-#define VHOST_USER_VERSION_MASK (0x3)
-#define VHOST_USER_REPLY_MASK (0x1<<2)
- uint32_t flags;
- uint32_t size; /* the following payload size */
- union {
-#define VHOST_USER_VRING_IDX_MASK (0xff)
-#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
- uint64_t u64;
- struct vhost_vring_state state;
- struct vhost_vring_addr addr;
- VhostUserMemory memory;
- VhostUserLog log;
- } payload;
- int fds[VHOST_MEMORY_MAX_NREGIONS];
- int fd_num;
-} QEMU_PACKED VhostUserMsg;
-
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
-
-/* The version of the protocol we support */
-#define VHOST_USER_VERSION (0x1)
-
-#define MAX_NR_VIRTQUEUE (8)
-
-typedef struct VubrDevRegion {
- /* Guest Physical address. */
- uint64_t gpa;
- /* Memory region size. */
- uint64_t size;
- /* QEMU virtual address (userspace). */
- uint64_t qva;
- /* Starting offset in our mmaped space. */
- uint64_t mmap_offset;
- /* Start address of mmaped space. */
- uint64_t mmap_addr;
-} VubrDevRegion;
-
-typedef struct VubrDev {
- int sock;
- Dispatcher dispatcher;
- uint32_t nregions;
- VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
- VubrVirtq vq[MAX_NR_VIRTQUEUE];
- int log_call_fd;
- uint64_t log_size;
- uint8_t *log_table;
- int backend_udp_sock;
- struct sockaddr_in backend_udp_dest;
- int ready;
- uint64_t features;
- int hdrlen;
-} VubrDev;
-
-static const char *vubr_request_str[] = {
- [VHOST_USER_NONE] = "VHOST_USER_NONE",
- [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
- [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
- [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
- [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
- [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
- [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
- [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
- [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
- [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
- [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
- [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
- [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
- [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
- [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
- [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
- [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
- [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
- [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
- [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
- [VHOST_USER_MAX] = "VHOST_USER_MAX",
-};
-
static void
-print_buffer(uint8_t *buf, size_t len)
+vubr_handle_tx(VuDev *dev, int qidx)
{
- int i;
- printf("Raw buffer:\n");
- for (i = 0; i < len; i++) {
- if (i % 16 == 0) {
- printf("\n");
- }
- if (i % 4 == 0) {
- printf(" ");
- }
- printf("%02x ", buf[i]);
- }
- printf("\n............................................................\n");
-}
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
+ int hdrlen = vubr->hdrlen;
+ VuVirtqElement *elem = NULL;
-/* Translate guest physical address to our virtual address. */
-static uint64_t
-gpa_to_va(VubrDev *dev, uint64_t guest_addr)
-{
- int i;
-
- /* Find matching memory region. */
- for (i = 0; i < dev->nregions; i++) {
- VubrDevRegion *r = &dev->regions[i];
-
- if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
- return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
- }
- }
-
- assert(!"address not found in regions");
- return 0;
-}
-
-/* Translate qemu virtual address to our virtual address. */
-static uint64_t
-qva_to_va(VubrDev *dev, uint64_t qemu_addr)
-{
- int i;
+ assert(qidx % 2);
- /* Find matching memory region. */
- for (i = 0; i < dev->nregions; i++) {
- VubrDevRegion *r = &dev->regions[i];
+ for (;;) {
+ ssize_t ret;
+ unsigned int out_num;
+ struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
- if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
- return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ break;
}
- }
-
- assert(!"address not found in regions");
- return 0;
-}
-static void
-vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
-{
- char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
- struct iovec iov = {
- .iov_base = (char *)vmsg,
- .iov_len = VHOST_USER_HDR_SIZE,
- };
- struct msghdr msg = {
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = control,
- .msg_controllen = sizeof(control),
- };
- size_t fd_size;
- struct cmsghdr *cmsg;
- int rc;
-
- rc = recvmsg(conn_fd, &msg, 0);
-
- if (rc == 0) {
- vubr_die("recvmsg");
- fprintf(stderr, "Peer disconnected.\n");
- exit(1);
- }
- if (rc < 0) {
- vubr_die("recvmsg");
- }
-
- vmsg->fd_num = 0;
- for (cmsg = CMSG_FIRSTHDR(&msg);
- cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msg, cmsg))
- {
- if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
- fd_size = cmsg->cmsg_len - CMSG_LEN(0);
- vmsg->fd_num = fd_size / sizeof(int);
- memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+ out_num = elem->out_num;
+ out_sg = elem->out_sg;
+ if (out_num < 1) {
+ fprintf(stderr, "virtio-net header not in first element\n");
break;
}
- }
-
- if (vmsg->size > sizeof(vmsg->payload)) {
- fprintf(stderr,
- "Error: too big message request: %d, size: vmsg->size: %u, "
- "while sizeof(vmsg->payload) = %zu\n",
- vmsg->request, vmsg->size, sizeof(vmsg->payload));
- exit(1);
- }
-
- if (vmsg->size) {
- rc = read(conn_fd, &vmsg->payload, vmsg->size);
- if (rc == 0) {
- vubr_die("recvmsg");
- fprintf(stderr, "Peer disconnected.\n");
- exit(1);
+ if (VHOST_USER_BRIDGE_DEBUG) {
+ iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
}
- if (rc < 0) {
- vubr_die("recvmsg");
+
+ if (hdrlen) {
+ unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+ out_sg, out_num,
+ hdrlen, -1);
+ out_num = sg_num;
+ out_sg = sg;
}
- assert(rc == vmsg->size);
- }
-}
+ struct msghdr msg = {
+ .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+ .msg_namelen = sizeof(struct sockaddr_in),
+ .msg_iov = out_sg,
+ .msg_iovlen = out_num,
+ };
+ do {
+ ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
+ } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
-static void
-vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
-{
- int rc;
+ if (ret == -1) {
+ vubr_die("sendmsg()");
+ }
- do {
- rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
- } while (rc < 0 && errno == EINTR);
+ vu_queue_push(dev, vq, elem, 0);
+ vu_queue_notify(dev, vq);
- if (rc < 0) {
- vubr_die("write");
+ free(elem);
+ elem = NULL;
}
-}
-static void
-vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
-{
- int slen = sizeof(struct sockaddr_in);
-
- if (sendto(dev->backend_udp_sock, buf, len, 0,
- (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
- vubr_die("sendto()");
- }
+ free(elem);
}
-static int
-vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+static void
+iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
{
- int slen = sizeof(struct sockaddr_in);
- int rc;
+ struct iovec *cur;
- rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
- (struct sockaddr *) &dev->backend_udp_dest,
- (socklen_t *)&slen);
- if (rc == -1) {
- vubr_die("recvfrom()");
+ for (cur = front; front != iov; cur++) {
+ bytes -= cur->iov_len;
}
- return rc;
+ cur->iov_base -= bytes;
+ cur->iov_len += bytes;
}
static void
-vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
{
- int hdrlen = dev->hdrlen;
- DPRINT(" hdrlen = %d\n", dev->hdrlen);
+ unsigned i;
- if (VHOST_USER_BRIDGE_DEBUG) {
- print_buffer(buf, len);
- }
- vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
-}
+ for (i = 0; i < iovc; i++, iov++) {
+ if (bytes < iov->iov_len) {
+ iov->iov_len = bytes;
+ return;
+ }
-/* Kick the log_call_fd if required. */
-static void
-vubr_log_kick(VubrDev *dev)
-{
- if (dev->log_call_fd != -1) {
- DPRINT("Kicking the QEMU's log...\n");
- eventfd_write(dev->log_call_fd, 1);
+ bytes -= iov->iov_len;
}
-}
-/* Kick the guest if necessary. */
-static void
-vubr_virtqueue_kick(VubrVirtq *vq)
-{
- if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
- DPRINT("Kicking the guest...\n");
- eventfd_write(vq->call_fd, 1);
- }
+ assert(!"couldn't truncate iov");
}
static void
-vubr_log_page(uint8_t *log_table, uint64_t page)
+vubr_backend_recv_cb(int sock, void *ctx)
{
- DPRINT("Logged dirty guest page: %"PRId64"\n", page);
- atomic_or(&log_table[page / 8], 1 << (page % 8));
-}
+ VubrDev *vubr = (VubrDev *) ctx;
+ VuDev *dev = &vubr->vudev;
+ VuVirtq *vq = vu_get_queue(dev, 0);
+ VuVirtqElement *elem = NULL;
+ struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+ struct virtio_net_hdr_mrg_rxbuf mhdr;
+ unsigned mhdr_cnt = 0;
+ int hdrlen = vubr->hdrlen;
+ int i = 0;
+ struct virtio_net_hdr hdr = {
+ .flags = 0,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE
+ };
-static void
-vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
-{
- uint64_t page;
+ DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
+ DPRINT(" hdrlen = %d\n", hdrlen);
- if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
- !dev->log_table || !length) {
+ if (!vu_queue_enabled(dev, vq) ||
+ !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
+ DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
return;
}
- assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
-
- page = address / VHOST_LOG_PAGE;
- while (page * VHOST_LOG_PAGE < address + length) {
- vubr_log_page(dev->log_table, page);
- page += VHOST_LOG_PAGE;
- }
- vubr_log_kick(dev);
-}
-
-static void
-vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
-{
- struct vring_desc *desc = vq->desc;
- struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
- uint64_t log_guest_addr = vq->log_guest_addr;
- int32_t remaining_len = len;
-
- unsigned int size = vq->size;
-
- uint16_t avail_index = atomic_mb_read(&avail->idx);
-
- /* We check the available descriptors before posting the
- * buffer, so here we assume that enough available
- * descriptors. */
- assert(vq->last_avail_index != avail_index);
- uint16_t a_index = vq->last_avail_index % size;
- uint16_t u_index = vq->last_used_index % size;
- uint16_t d_index = avail->ring[a_index];
-
- int i = d_index;
- uint32_t written_len = 0;
-
do {
- DPRINT("Post packet to guest on vq:\n");
- DPRINT(" size = %d\n", vq->size);
- DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
- DPRINT(" last_used_index = %d\n", vq->last_used_index);
- DPRINT(" a_index = %d\n", a_index);
- DPRINT(" u_index = %d\n", u_index);
- DPRINT(" d_index = %d\n", d_index);
- DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
- DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
- DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
- DPRINT(" avail->idx = %d\n", avail_index);
- DPRINT(" used->idx = %d\n", used->idx);
-
- if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
- /* FIXME: we should find writable descriptor. */
- fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
- exit(1);
- }
-
- void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
- uint32_t chunk_len = desc[i].len;
- uint32_t chunk_write_len = MIN(remaining_len, chunk_len);
+ struct iovec *sg;
+ ssize_t ret, total = 0;
+ unsigned int num;
- memcpy(chunk_start, buf + written_len, chunk_write_len);
- vubr_log_write(dev, desc[i].addr, chunk_write_len);
- remaining_len -= chunk_write_len;
- written_len += chunk_write_len;
-
- if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) {
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
break;
}
- i = desc[i].next;
- } while (1);
-
- if (remaining_len > 0) {
- fprintf(stderr,
- "Too long packet for RX, remaining_len = %d, Dropping...\n",
- remaining_len);
- return;
- }
-
- /* Add descriptor to the used ring. */
- used->ring[u_index].id = d_index;
- used->ring[u_index].len = len;
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, ring[u_index]),
- sizeof(used->ring[u_index]));
-
- vq->last_avail_index++;
- vq->last_used_index++;
-
- atomic_mb_set(&used->idx, vq->last_used_index);
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, idx),
- sizeof(used->idx));
-
- /* Kick the guest if necessary. */
- vubr_virtqueue_kick(vq);
-}
-
-static int
-vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
-{
- struct vring_desc *desc = vq->desc;
- struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
- uint64_t log_guest_addr = vq->log_guest_addr;
-
- unsigned int size = vq->size;
-
- uint16_t a_index = vq->last_avail_index % size;
- uint16_t u_index = vq->last_used_index % size;
- uint16_t d_index = avail->ring[a_index];
-
- uint32_t i, len = 0;
- size_t buf_size = 4096;
- uint8_t buf[4096];
-
- DPRINT("Chunks: ");
- i = d_index;
- do {
- void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
- uint32_t chunk_len = desc[i].len;
-
- assert(!(desc[i].flags & VRING_DESC_F_WRITE));
-
- if (len + chunk_len < buf_size) {
- memcpy(buf + len, chunk_start, chunk_len);
- DPRINT("%d ", chunk_len);
- } else {
- fprintf(stderr, "Error: too long packet. Dropping...\n");
+ if (elem->in_num < 1) {
+ fprintf(stderr, "virtio-net contains no in buffers\n");
break;
}
- len += chunk_len;
-
- if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
- break;
+ sg = elem->in_sg;
+ num = elem->in_num;
+ if (i == 0) {
+ if (hdrlen == 12) {
+ mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
+ sg, elem->in_num,
+ offsetof(typeof(mhdr), num_buffers),
+ sizeof(mhdr.num_buffers));
+ }
+ iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
+ total += hdrlen;
+ assert(iov_discard_front(&sg, &num, hdrlen) == hdrlen);
}
- i = desc[i].next;
- } while (1);
- DPRINT("\n");
-
- if (!len) {
- return -1;
- }
-
- /* Add descriptor to the used ring. */
- used->ring[u_index].id = d_index;
- used->ring[u_index].len = len;
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, ring[u_index]),
- sizeof(used->ring[u_index]));
-
- vubr_consume_raw_packet(dev, buf, len);
-
- return 0;
-}
+ struct msghdr msg = {
+ .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+ .msg_namelen = sizeof(struct sockaddr_in),
+ .msg_iov = sg,
+ .msg_iovlen = elem->in_num,
+ .msg_flags = MSG_DONTWAIT,
+ };
+ do {
+ ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
+ } while (ret == -1 && (errno == EINTR));
-static void
-vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
-{
- struct vring_avail *avail = vq->avail;
- struct vring_used *used = vq->used;
- uint64_t log_guest_addr = vq->log_guest_addr;
-
- while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
- vubr_process_desc(dev, vq);
- vq->last_avail_index++;
- vq->last_used_index++;
- }
+ if (i == 0) {
+ iov_restore_front(elem->in_sg, sg, hdrlen);
+ }
- atomic_mb_set(&used->idx, vq->last_used_index);
- vubr_log_write(dev,
- log_guest_addr + offsetof(struct vring_used, idx),
- sizeof(used->idx));
-}
+ if (ret == -1) {
+ if (errno == EWOULDBLOCK) {
+ vu_queue_rewind(dev, vq, 1);
+ break;
+ }
-static void
-vubr_backend_recv_cb(int sock, void *ctx)
-{
- VubrDev *dev = (VubrDev *) ctx;
- VubrVirtq *rx_vq = &dev->vq[0];
- uint8_t buf[4096];
- struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
- int hdrlen = dev->hdrlen;
- int buflen = sizeof(buf);
- int len;
-
- if (!dev->ready) {
- return;
- }
+ vubr_die("recvmsg()");
+ }
- DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
- DPRINT(" hdrlen = %d\n", hdrlen);
+ total += ret;
+ iov_truncate(elem->in_sg, elem->in_num, total);
+ vu_queue_fill(dev, vq, elem, total, i++);
- uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+ free(elem);
+ elem = NULL;
+ } while (false); /* could loop if DONTWAIT worked? */
- /* If there is no available descriptors, just do nothing.
- * The buffer will be handled by next arrived UDP packet,
- * or next kick on receive virtq. */
- if (rx_vq->last_avail_index == avail_index) {
- DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
- return;
+ if (mhdr_cnt) {
+ mhdr.num_buffers = i;
+ iov_from_buf(mhdr_sg, mhdr_cnt,
+ 0,
+ &mhdr.num_buffers, sizeof mhdr.num_buffers);
}
- memset(buf, 0, hdrlen);
- /* TODO: support mergeable buffers. */
- if (hdrlen == 12)
- hdr->num_buffers = 1;
- len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+ vu_queue_flush(dev, vq, i);
+ vu_queue_notify(dev, vq);
- vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+ free(elem);
}
static void
-vubr_kick_cb(int sock, void *ctx)
+vubr_receive_cb(int sock, void *ctx)
{
- VubrDev *dev = (VubrDev *) ctx;
- eventfd_t kick_data;
- ssize_t rc;
+ VubrDev *vubr = (VubrDev *)ctx;
- rc = eventfd_read(sock, &kick_data);
- if (rc == -1) {
- vubr_die("eventfd_read()");
- } else {
- DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
- vubr_process_avail(dev, &dev->vq[1]);
+ if (!vu_dispatch(&vubr->vudev)) {
+ fprintf(stderr, "Error while dispatching\n");
}
}
-static int
-vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
-}
+typedef struct WatchData {
+ VuDev *dev;
+ vu_watch_cb cb;
+ void *data;
+} WatchData;
-static int
-vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+watch_cb(int sock, void *ctx)
{
- vmsg->payload.u64 =
- ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VHOST_F_LOG_ALL) |
- (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
- (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
-
- vmsg->size = sizeof(vmsg->payload.u64);
+ struct WatchData *wd = ctx;
- DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
- /* Reply */
- return 1;
+ wd->cb(wd->dev, VU_WATCH_IN, wd->data);
}
-static int
-vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_watch(VuDev *dev, int fd, int condition,
+ vu_watch_cb cb, void *data)
{
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
- dev->features = vmsg->payload.u64;
- if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
- (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
- dev->hdrlen = 12;
- } else {
- dev->hdrlen = 10;
- }
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
+ static WatchData watches[FD_SETSIZE];
+ struct WatchData *wd = &watches[fd];
- return 0;
-}
-
-static int
-vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- return 0;
+ wd->cb = cb;
+ wd->data = data;
+ wd->dev = dev;
+ dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
}
static void
-vubr_close_log(VubrDev *dev)
+vubr_remove_watch(VuDev *dev, int fd)
{
- if (dev->log_table) {
- if (munmap(dev->log_table, dev->log_size) != 0) {
- vubr_die("munmap()");
- }
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
- dev->log_table = 0;
- }
- if (dev->log_call_fd != -1) {
- close(dev->log_call_fd);
- dev->log_call_fd = -1;
- }
-}
-
-static int
-vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- vubr_close_log(dev);
- dev->ready = 0;
- dev->features = 0;
- return 0;
+ dispatcher_remove(&vubr->dispatcher, fd);
}
static int
-vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
{
- int i;
- VhostUserMemory *memory = &vmsg->payload.memory;
- dev->nregions = memory->nregions;
-
- DPRINT("Nregions: %d\n", memory->nregions);
- for (i = 0; i < dev->nregions; i++) {
- void *mmap_addr;
- VhostUserMemoryRegion *msg_region = &memory->regions[i];
- VubrDevRegion *dev_region = &dev->regions[i];
-
- DPRINT("Region %d\n", i);
- DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
- msg_region->guest_phys_addr);
- DPRINT(" memory_size: 0x%016"PRIx64"\n",
- msg_region->memory_size);
- DPRINT(" userspace_addr 0x%016"PRIx64"\n",
- msg_region->userspace_addr);
- DPRINT(" mmap_offset 0x%016"PRIx64"\n",
- msg_region->mmap_offset);
-
- dev_region->gpa = msg_region->guest_phys_addr;
- dev_region->size = msg_region->memory_size;
- dev_region->qva = msg_region->userspace_addr;
- dev_region->mmap_offset = msg_region->mmap_offset;
-
- /* We don't use offset argument of mmap() since the
- * mapped address has to be page aligned, and we use huge
- * pages. */
- mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- vmsg->fds[i], 0);
-
- if (mmap_addr == MAP_FAILED) {
- vubr_die("mmap");
- }
- dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
- DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
-
- close(vmsg->fds[i]);
- }
-
+ DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
}
static int
-vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
{
- int fd;
- uint64_t log_mmap_size, log_mmap_offset;
- void *rc;
-
- assert(vmsg->fd_num == 1);
- fd = vmsg->fds[0];
-
- assert(vmsg->size == sizeof(vmsg->payload.log));
- log_mmap_offset = vmsg->payload.log.mmap_offset;
- log_mmap_size = vmsg->payload.log.mmap_size;
- DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
- DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
-
- rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
- log_mmap_offset);
- if (rc == MAP_FAILED) {
- vubr_die("mmap");
+ switch (vmsg->request) {
+ case VHOST_USER_SEND_RARP:
+ *do_reply = vubr_send_rarp_exec(dev, vmsg);
+ return 1;
+ default:
+ /* let the library handle the rest */
+ return 0;
}
- dev->log_table = rc;
- dev->log_size = log_mmap_size;
- vmsg->size = sizeof(vmsg->payload.u64);
- /* Reply */
- return 1;
-}
-
-static int
-vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- assert(vmsg->fd_num == 1);
- dev->log_call_fd = vmsg->fds[0];
- DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
return 0;
}
-static int
-vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_features(VuDev *dev, uint64_t features)
{
- unsigned int index = vmsg->payload.state.index;
- unsigned int num = vmsg->payload.state.num;
-
- DPRINT("State.index: %d\n", index);
- DPRINT("State.num: %d\n", num);
- dev->vq[index].size = num;
- return 0;
-}
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
-static int
-vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- struct vhost_vring_addr *vra = &vmsg->payload.addr;
- unsigned int index = vra->index;
- VubrVirtq *vq = &dev->vq[index];
-
- DPRINT("vhost_vring_addr:\n");
- DPRINT(" index: %d\n", vra->index);
- DPRINT(" flags: %d\n", vra->flags);
- DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
- DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
- DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
- DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
-
- vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr);
- vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr);
- vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr);
- vq->log_guest_addr = vra->log_guest_addr;
-
- DPRINT("Setting virtq addresses:\n");
- DPRINT(" vring_desc at %p\n", vq->desc);
- DPRINT(" vring_used at %p\n", vq->used);
- DPRINT(" vring_avail at %p\n", vq->avail);
-
- vq->last_used_index = vq->used->idx;
-
- if (vq->last_avail_index != vq->used->idx) {
- DPRINT("Last avail index != used index: %d != %d, resuming",
- vq->last_avail_index, vq->used->idx);
- vq->last_avail_index = vq->used->idx;
+ if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
+ (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
+ vubr->hdrlen = 12;
+ } else {
+ vubr->hdrlen = 10;
}
-
- return 0;
}
-static int
-vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- unsigned int index = vmsg->payload.state.index;
- unsigned int num = vmsg->payload.state.num;
-
- DPRINT("State.index: %d\n", index);
- DPRINT("State.num: %d\n", num);
- dev->vq[index].last_avail_index = num;
-
- return 0;
-}
-
-static int
-vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- unsigned int index = vmsg->payload.state.index;
-
- DPRINT("State.index: %d\n", index);
- vmsg->payload.state.num = dev->vq[index].last_avail_index;
- vmsg->size = sizeof(vmsg->payload.state);
- /* FIXME: this is a work-around for a bug in QEMU enabling
- * too early vrings. When protocol features are enabled,
- * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
- dev->ready = 0;
-
- if (dev->vq[index].call_fd != -1) {
- close(dev->vq[index].call_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
- dev->vq[index].call_fd = -1;
- }
- if (dev->vq[index].kick_fd != -1) {
- close(dev->vq[index].kick_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
- dev->vq[index].kick_fd = -1;
- }
-
- /* Reply */
- return 1;
-}
-
-static int
-vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static uint64_t
+vubr_get_features(VuDev *dev)
{
- uint64_t u64_arg = vmsg->payload.u64;
- int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
-
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
- assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
- assert(vmsg->fd_num == 1);
-
- if (dev->vq[index].kick_fd != -1) {
- close(dev->vq[index].kick_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
- }
- dev->vq[index].kick_fd = vmsg->fds[0];
- DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
- if (index % 2 == 1) {
- /* TX queue. */
- dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
- dev, vubr_kick_cb);
-
- DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
- dev->vq[index].kick_fd, index);
- }
- /* We temporarily use this hack to determine that both TX and RX
- * queues are set up and ready for processing.
- * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
- * actual kicks. */
- if (dev->vq[0].kick_fd != -1 &&
- dev->vq[1].kick_fd != -1) {
- dev->ready = 1;
- DPRINT("vhost-user-bridge is ready for processing queues.\n");
- }
- return 0;
-
+ return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
+ 1ULL << VIRTIO_NET_F_MRG_RXBUF;
}
-static int
-vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_queue_set_started(VuDev *dev, int qidx, bool started)
{
- uint64_t u64_arg = vmsg->payload.u64;
- int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+ VuVirtq *vq = vu_get_queue(dev, qidx);
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
- assert(vmsg->fd_num == 1);
-
- if (dev->vq[index].call_fd != -1) {
- close(dev->vq[index].call_fd);
- dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
+ if (qidx % 2 == 1) {
+ vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
}
- dev->vq[index].call_fd = vmsg->fds[0];
- DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
- return 0;
-}
-
-static int
-vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
-}
-
-static int
-vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- vmsg->size = sizeof(vmsg->payload.u64);
-
- /* Reply */
- return 1;
}
-static int
-vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- /* FIXME: unimplented */
- DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
- return 0;
-}
-
-static int
-vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
-}
-
-static int
-vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_panic(VuDev *dev, const char *msg)
{
- unsigned int index = vmsg->payload.state.index;
- unsigned int enable = vmsg->payload.state.num;
+ VubrDev *vubr = container_of(dev, VubrDev, vudev);
- DPRINT("State.index: %d\n", index);
- DPRINT("State.enable: %d\n", enable);
- dev->vq[index].enable = enable;
- return 0;
-}
+ fprintf(stderr, "PANIC: %s\n", msg);
-static int
-vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
- DPRINT("Function %s() not implemented yet.\n", __func__);
- return 0;
+ dispatcher_remove(&vubr->dispatcher, dev->sock);
+ vubr->quit = 1;
}
-static int
-vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
-{
- /* Print out generic part of the request. */
- DPRINT(
- "================== Vhost user message from QEMU ==================\n");
- DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
- vmsg->request);
- DPRINT("Flags: 0x%x\n", vmsg->flags);
- DPRINT("Size: %d\n", vmsg->size);
-
- if (vmsg->fd_num) {
- int i;
- DPRINT("Fds:");
- for (i = 0; i < vmsg->fd_num; i++) {
- DPRINT(" %d", vmsg->fds[i]);
- }
- DPRINT("\n");
- }
-
- switch (vmsg->request) {
- case VHOST_USER_NONE:
- return vubr_none_exec(dev, vmsg);
- case VHOST_USER_GET_FEATURES:
- return vubr_get_features_exec(dev, vmsg);
- case VHOST_USER_SET_FEATURES:
- return vubr_set_features_exec(dev, vmsg);
- case VHOST_USER_SET_OWNER:
- return vubr_set_owner_exec(dev, vmsg);
- case VHOST_USER_RESET_OWNER:
- return vubr_reset_device_exec(dev, vmsg);
- case VHOST_USER_SET_MEM_TABLE:
- return vubr_set_mem_table_exec(dev, vmsg);
- case VHOST_USER_SET_LOG_BASE:
- return vubr_set_log_base_exec(dev, vmsg);
- case VHOST_USER_SET_LOG_FD:
- return vubr_set_log_fd_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_NUM:
- return vubr_set_vring_num_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_ADDR:
- return vubr_set_vring_addr_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_BASE:
- return vubr_set_vring_base_exec(dev, vmsg);
- case VHOST_USER_GET_VRING_BASE:
- return vubr_get_vring_base_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_KICK:
- return vubr_set_vring_kick_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_CALL:
- return vubr_set_vring_call_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_ERR:
- return vubr_set_vring_err_exec(dev, vmsg);
- case VHOST_USER_GET_PROTOCOL_FEATURES:
- return vubr_get_protocol_features_exec(dev, vmsg);
- case VHOST_USER_SET_PROTOCOL_FEATURES:
- return vubr_set_protocol_features_exec(dev, vmsg);
- case VHOST_USER_GET_QUEUE_NUM:
- return vubr_get_queue_num_exec(dev, vmsg);
- case VHOST_USER_SET_VRING_ENABLE:
- return vubr_set_vring_enable_exec(dev, vmsg);
- case VHOST_USER_SEND_RARP:
- return vubr_send_rarp_exec(dev, vmsg);
-
- case VHOST_USER_MAX:
- assert(vmsg->request != VHOST_USER_MAX);
- }
- return 0;
-}
-
-static void
-vubr_receive_cb(int sock, void *ctx)
-{
- VubrDev *dev = (VubrDev *) ctx;
- VhostUserMsg vmsg;
- int reply_requested;
-
- vubr_message_read(sock, &vmsg);
- reply_requested = vubr_execute_request(dev, &vmsg);
- if (reply_requested) {
- /* Set the version in the flags when sending the reply */
- vmsg.flags &= ~VHOST_USER_VERSION_MASK;
- vmsg.flags |= VHOST_USER_VERSION;
- vmsg.flags |= VHOST_USER_REPLY_MASK;
- vubr_message_write(sock, &vmsg);
- }
-}
+static const VuDevIface vuiface = {
+ .get_features = vubr_get_features,
+ .set_features = vubr_set_features,
+ .process_msg = vubr_process_msg,
+ .queue_set_started = vubr_queue_set_started,
+};
static void
vubr_accept_cb(int sock, void *ctx)
@@ -1204,36 +479,26 @@ vubr_accept_cb(int sock, void *ctx)
vubr_die("accept()");
}
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+
+ vu_init(&dev->vudev,
+ conn_fd,
+ vubr_panic,
+ vubr_set_watch,
+ vubr_remove_watch,
+ &vuiface);
+
dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+ dispatcher_remove(&dev->dispatcher, sock);
}
static VubrDev *
vubr_new(const char *path, bool client)
{
VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
- dev->nregions = 0;
- int i;
struct sockaddr_un un;
CallbackFunc cb;
size_t len;
- for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
- dev->vq[i] = (VubrVirtq) {
- .call_fd = -1, .kick_fd = -1,
- .size = 0,
- .last_avail_index = 0, .last_used_index = 0,
- .desc = 0, .avail = 0, .used = 0,
- .enable = 0,
- };
- }
-
- /* Init log */
- dev->log_call_fd = -1;
- dev->log_size = 0;
- dev->log_table = 0;
- dev->ready = 0;
- dev->features = 0;
-
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {
@@ -1261,10 +526,17 @@ vubr_new(const char *path, bool client)
if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
vubr_die("connect");
}
+ vu_init(&dev->vudev,
+ dev->sock,
+ vubr_panic,
+ vubr_set_watch,
+ vubr_remove_watch,
+ &vuiface);
cb = vubr_receive_cb;
}
dispatcher_init(&dev->dispatcher);
+
dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
return dev;
@@ -1345,7 +617,7 @@ vubr_backend_udp_setup(VubrDev *dev,
static void
vubr_run(VubrDev *dev)
{
- while (1) {
+ while (!dev->quit) {
/* timeout 200ms */
dispatcher_wait(&dev->dispatcher, 200000);
/* Here one can try polling strategy. */
@@ -1421,6 +693,9 @@ main(int argc, char *argv[])
vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
vubr_run(dev);
+
+ vu_deinit(&dev->vudev);
+
return 0;
out:
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 737bffa984..a5d2f6c0c3 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -131,6 +131,13 @@ void qemu_coroutine_enter(Coroutine *co)
}
}
+void qemu_coroutine_enter_if_inactive(Coroutine *co)
+{
+ if (!qemu_coroutine_entered(co)) {
+ qemu_coroutine_enter(co);
+ }
+}
+
void coroutine_fn qemu_coroutine_yield(void)
{
Coroutine *self = qemu_coroutine_self();