50 files changed, 3139 insertions, 1868 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 585cd5abd7..1444b26dc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -508,7 +508,6 @@ M: Shannon Zhao <shannon.zhao@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h
 
 STM32F205
 M: Alistair Francis <alistair@alistair23.me>
@@ -885,7 +884,6 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
-F: include/hw/arm/virt-acpi-build.h
 
 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -1720,9 +1718,9 @@ L: qemu-block@nongnu.org
 S: Supported
 F: block/linux-aio.c
 F: include/block/raw-aio.h
-F: block/raw-posix.c
-F: block/raw-win32.c
-F: block/raw_bsd.c
+F: block/raw-format.c
+F: block/file-posix.c
+F: block/file-win32.c
 F: block/win32-aio.c
 
 qcow2
diff --git a/Makefile b/Makefile
index 214cbad35d..1a8bfb225c 100644
--- a/Makefile
+++ b/Makefile
@@ -149,6 +149,7 @@ dummy := $(call unnest-vars,, \
                 qga-obj-y \
                 ivshmem-client-obj-y \
                 ivshmem-server-obj-y \
+                libvhost-user-obj-y \
                 qga-vss-dll-obj-y \
                 block-obj-y \
                 block-obj-m \
diff --git a/Makefile.objs b/Makefile.objs
index 51c36a4d54..01cef866e4 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -115,7 +115,7 @@ qga-vss-dll-obj-y = qga/
 # contrib
 ivshmem-client-obj-y = contrib/ivshmem-client/
 ivshmem-server-obj-y = contrib/ivshmem-server/
-
+libvhost-user-obj-y = contrib/libvhost-user/
 
 ######################################################################
 trace-events-y = trace-events
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 67a036a1df..0b8fd06f27 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
+block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
@@ -6,8 +6,8 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += raw-posix.o
+block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
+block-obj-$(CONFIG_POSIX) += file-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 4127571454..acccf85666 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq {
     QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 
-static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size    = sizeof(BlkdebugAIOCB),
-};
-
 enum {
     ACTION_INJECT_ERROR,
     ACTION_SET_STATE,
@@ -77,7 +73,7 @@ typedef struct BlkdebugRule {
             int error;
             int immediately;
             int once;
-            int64_t sector;
+            int64_t offset;
         } inject;
         struct {
             int new_state;
@@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
     const char* event_name;
     BlkdebugEvent event;
     struct BlkdebugRule *rule;
+    int64_t sector;
 
     /* Find the right event for the rule */
     event_name = qemu_opt_get(opts, "event");
@@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
         rule->options.inject.immediately =
             qemu_opt_get_bool(opts, "immediately", 0);
-        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+        sector = qemu_opt_get_number(opts, "sector", -1);
+        rule->options.inject.offset =
+            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
         break;
 
     case ACTION_SET_STATE:
@@ -408,17 +407,14 @@ out:
 
 static void error_callback_bh(void *opaque)
 {
-    struct BlkdebugAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    Coroutine *co = opaque;
+    qemu_coroutine_enter(co);
 }
 
-static BlockAIOCB *inject_error(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static int inject_error(BlockDriverState *bs, BlkdebugRule *rule)
 {
     BDRVBlkdebugState *s = bs->opaque;
     int error = rule->options.inject.error;
-    struct BlkdebugAIOCB *acb;
     bool immediately = rule->options.inject.immediately;
 
     if (rule->options.inject.once) {
@@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
         remove_rule(rule);
     }
 
-    if (immediately) {
-        return NULL;
+    if (!immediately) {
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh,
+                                qemu_coroutine_self());
+        qemu_coroutine_yield();
     }
 
-    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
-    acb->ret = -error;
-
-    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
-
-    return &acb->common;
+    return -error;
 }
 
-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                   QEMUIOVector *qiov, int flags)
 {
     BDRVBlkdebugState *s = bs->opaque;
     BlkdebugRule *rule = NULL;
 
     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (inject_offset >= offset && inject_offset < offset + bytes))
+        {
             break;
         }
     }
 
     if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
     }
 
-    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
-                          cb, opaque);
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
     BDRVBlkdebugState *s = bs->opaque;
     BlkdebugRule *rule = NULL;
 
     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (inject_offset >= offset && inject_offset < offset + bytes))
+        {
             break;
         }
     }
 
     if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
     }
 
-    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
-                           cb, opaque);
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque)
+static int blkdebug_co_flush(BlockDriverState *bs)
 {
     BDRVBlkdebugState *s = bs->opaque;
     BlkdebugRule *rule = NULL;
 
     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1) {
+        if (rule->options.inject.offset == -1) {
             break;
         }
     }
 
     if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+        return inject_error(bs, rule);
     }
 
-    return bdrv_aio_flush(bs->file->bs, cb, opaque);
+    return bdrv_co_flush(bs->file->bs);
 }
 
 
@@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_refresh_filename  = blkdebug_refresh_filename,
     .bdrv_refresh_limits    = blkdebug_refresh_limits,
 
-    .bdrv_aio_readv         = blkdebug_aio_readv,
-    .bdrv_aio_writev        = blkdebug_aio_writev,
-    .bdrv_aio_flush         = blkdebug_aio_flush,
+    .bdrv_co_preadv         = blkdebug_co_preadv,
+    .bdrv_co_pwritev        = blkdebug_co_pwritev,
+    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
 
     .bdrv_debug_event           = blkdebug_debug_event,
     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
diff --git a/block/blkverify.c b/block/blkverify.c
index 28f9af6dba..43a940c2f5 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -19,38 +19,36 @@ typedef struct {
     BdrvChild *test_file;
 } BDRVBlkverifyState;
 
-typedef struct BlkverifyAIOCB BlkverifyAIOCB;
-struct BlkverifyAIOCB {
-    BlockAIOCB common;
+typedef struct BlkverifyRequest {
+    Coroutine *co;
+    BlockDriverState *bs;
 
     /* Request metadata */
     bool is_write;
-    int64_t sector_num;
-    int nb_sectors;
+    uint64_t offset;
+    uint64_t bytes;
+    int flags;
 
-    int ret;                    /* first completed request's result */
-    unsigned int done;          /* completion counter */
+    int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *,
+                      BdrvRequestFlags);
 
-    QEMUIOVector *qiov;         /* user I/O vector */
-    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
-    void *buf;                  /* buffer for raw file I/O */
+    int ret;                    /* test image result */
+    int raw_ret;                /* raw image result */
 
-    void (*verify)(BlkverifyAIOCB *acb);
-};
+    unsigned int done;          /* completion counter */
 
-static const AIOCBInfo blkverify_aiocb_info = {
-    .aiocb_size         = sizeof(BlkverifyAIOCB),
-};
+    QEMUIOVector *qiov;         /* user I/O vector */
+    QEMUIOVector *raw_qiov;     /* cloned I/O vector for raw file */
+} BlkverifyRequest;
 
-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
+static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r,
                                              const char *fmt, ...)
 {
     va_list ap;
 
     va_start(ap, fmt);
-    fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->is_write ? "write" : "read", acb->sector_num,
-            acb->nb_sectors);
+    fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ",
+            r->is_write ? "write" : "read", r->offset, r->bytes);
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
     va_end(ap);
@@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
     return bdrv_getlength(s->test_file->bs);
 }
 
-static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
-                                         int64_t sector_num, QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque)
+static void coroutine_fn blkverify_do_test_req(void *opaque)
 {
-    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
-
-    acb->is_write = is_write;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->ret = -EINPROGRESS;
-    acb->done = 0;
-    acb->qiov = qiov;
-    acb->buf = NULL;
-    acb->verify = NULL;
-    return acb;
+    BlkverifyRequest *r = opaque;
+    BDRVBlkverifyState *s = r->bs->opaque;
+
+    r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov,
+                           r->flags);
+    r->done++;
+    qemu_coroutine_enter_if_inactive(r->co);
 }
 
-static void blkverify_aio_bh(void *opaque)
+static void coroutine_fn blkverify_do_raw_req(void *opaque)
 {
-    BlkverifyAIOCB *acb = opaque;
+    BlkverifyRequest *r = opaque;
 
-    if (acb->buf) {
-        qemu_iovec_destroy(&acb->raw_qiov);
-        qemu_vfree(acb->buf);
-    }
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
+    r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov,
+                               r->flags);
+    r->done++;
+    qemu_coroutine_enter_if_inactive(r->co);
 }
 
-static void blkverify_aio_cb(void *opaque, int ret)
+static int coroutine_fn
+blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
+                  uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov,
+                  int flags, bool is_write)
 {
-    BlkverifyAIOCB *acb = opaque;
-
-    switch (++acb->done) {
-    case 1:
-        acb->ret = ret;
-        break;
-
-    case 2:
-        if (acb->ret != ret) {
-            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
-        }
-
-        if (acb->verify) {
-            acb->verify(acb);
-        }
+    Coroutine *co_a, *co_b;
+
+    *r = (BlkverifyRequest) {
+        .co         = qemu_coroutine_self(),
+        .bs         = bs,
+        .offset     = offset,
+        .bytes      = bytes,
+        .qiov       = qiov,
+        .raw_qiov   = raw_qiov,
+        .flags      = flags,
+        .is_write   = is_write,
+        .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv,
+    };
+
+    co_a = qemu_coroutine_create(blkverify_do_test_req, r);
+    co_b = qemu_coroutine_create(blkverify_do_raw_req, r);
+
+    qemu_coroutine_enter(co_a);
+    qemu_coroutine_enter(co_b);
+
+    while (r->done < 2) {
+        qemu_coroutine_yield();
+    }
 
-        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
-                                blkverify_aio_bh, acb);
-        break;
+    if (r->ret != r->raw_ret) {
+        blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret);
     }
+
+    return r->ret;
 }
 
-static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+static int coroutine_fn
+blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
-    if (offset != -1) {
-        blkverify_err(acb, "contents mismatch in sector %" PRId64,
-                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
+    BlkverifyRequest r;
+    QEMUIOVector raw_qiov;
+    void *buf;
+    ssize_t cmp_offset;
+    int ret;
+
+    buf = qemu_blockalign(bs->file->bs, qiov->size);
+    qemu_iovec_init(&raw_qiov, qiov->niov);
+    qemu_iovec_clone(&raw_qiov, qiov, buf);
+
+    ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
+                            false);
+
+    cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
+    if (cmp_offset != -1) {
+        blkverify_err(&r, "contents mismatch at offset %" PRId64,
+                      offset + cmp_offset);
     }
-}
 
-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
-                                            nb_sectors, cb, opaque);
-
-    acb->verify = blkverify_verify_readv;
-    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
-    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
-
-    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
-                   blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
-                   blkverify_aio_cb, acb);
-    return &acb->common;
+    qemu_iovec_destroy(&raw_qiov);
+    qemu_vfree(buf);
+
+    return ret;
 }
 
-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                     QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkverifyState *s = bs->opaque;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
-                                            nb_sectors, cb, opaque);
-
-    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
-                    blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
-                    blkverify_aio_cb, acb);
-    return &acb->common;
+    BlkverifyRequest r;
+    return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
 }
 
-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static int blkverify_co_flush(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
     /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
+    return bdrv_co_flush(s->test_file->bs);
 }
 
 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_getlength                   = blkverify_getlength,
     .bdrv_refresh_filename            = blkverify_refresh_filename,
 
-    .bdrv_aio_readv                   = blkverify_aio_readv,
-    .bdrv_aio_writev                  = blkverify_aio_writev,
-    .bdrv_aio_flush                   = blkverify_aio_flush,
+    .bdrv_co_preadv                   = blkverify_co_preadv,
+    .bdrv_co_pwritev                  = blkverify_co_pwritev,
+    .bdrv_co_flush                    = blkverify_co_flush,
 
     .is_filter                        = true,
     .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
diff --git a/block/raw-posix.c b/block/file-posix.c
index 28b47d977b..28b47d977b 100644
--- a/block/raw-posix.c
+++ b/block/file-posix.c
diff --git a/block/raw-win32.c b/block/file-win32.c
index 800fabdd72..800fabdd72 100644
--- a/block/raw-win32.c
+++ b/block/file-win32.c
diff --git a/block/gluster.c b/block/gluster.c
index a0a74e49fd..1a22f2982d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1253,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
  * If @start is in a trailing hole or beyond EOF, return -ENXIO.
  * If we can't find out, return a negative errno other than -ENXIO.
  *
- * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ * (Shamefully copied from file-posix.c, only miniscule adaptions.)
  */
 static int find_allocation(BlockDriverState *bs, off_t start,
                            off_t *data, off_t *hole)
@@ -1349,7 +1349,7 @@ exit:
  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
  * beyond the end of the disk image it will be clamped.
  *
- * (Based on raw_co_get_block_status() from raw-posix.c.)
+ * (Based on raw_co_get_block_status() from file-posix.c.)
  */
 static int64_t coroutine_fn qemu_gluster_co_get_block_status(
         BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
diff --git a/block/quorum.c b/block/quorum.c
index d122299352..86e2072dce 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB;
  * $children_count QuorumChildRequest.
  */
 typedef struct QuorumChildRequest {
-    BlockAIOCB *aiocb;
+    BlockDriverState *bs;
     QEMUIOVector qiov;
     uint8_t *buf;
     int ret;
@@ -110,11 +110,12 @@ typedef struct QuorumChildRequest {
  * used to do operations on each children and track overall progress.
  */
 struct QuorumAIOCB {
-    BlockAIOCB common;
+    BlockDriverState *bs;
+    Coroutine *co;
 
     /* Request metadata */
-    uint64_t sector_num;
-    int nb_sectors;
+    uint64_t offset;
+    uint64_t bytes;
 
     QEMUIOVector *qiov;         /* calling IOV */
 
@@ -133,32 +134,15 @@ struct QuorumAIOCB {
     int children_read;          /* how many children have been read from */
 };
 
-static bool quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockAIOCB *blockacb)
-{
-    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    /* cancel all callbacks */
-    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].aiocb) {
-            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
-        }
-    }
-}
-
-static AIOCBInfo quorum_aiocb_info = {
-    .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel_async       = quorum_aio_cancel,
-};
+typedef struct QuorumCo {
+    QuorumAIOCB *acb;
+    int idx;
+} QuorumCo;
 
 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
-    acb->common.cb(acb->common.opaque, acb->vote_ret);
     g_free(acb->qcrs);
-    qemu_aio_unref(acb);
+    g_free(acb);
 }
 
 static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
@@ -171,30 +155,26 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
     return a->l == b->l;
 }
 
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
-                                   BlockDriverState *bs,
+static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
                                    QEMUIOVector *qiov,
-                                   uint64_t sector_num,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
+                                   uint64_t offset,
+                                   uint64_t bytes)
 {
-    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+    BDRVQuorumState *s = bs->opaque;
+    QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
     int i;
 
-    acb->common.bs->opaque = s;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->qiov = qiov;
-    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
-    acb->count = 0;
-    acb->success_count = 0;
-    acb->rewrite_count = 0;
-    acb->votes.compare = quorum_sha256_compare;
-    QLIST_INIT(&acb->votes.vote_list);
-    acb->is_read = false;
-    acb->vote_ret = 0;
+    *acb = (QuorumAIOCB) {
+        .co                 = qemu_coroutine_self(),
+        .bs                 = bs,
+        .offset             = offset,
+        .bytes              = bytes,
+        .qiov               = qiov,
+        .votes.compare      = quorum_sha256_compare,
+        .votes.vote_list    = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
+    };
 
+    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
     for (i = 0; i < s->num_children; i++) {
         acb->qcrs[i].buf = NULL;
         acb->qcrs[i].ret = 0;
@@ -204,30 +184,37 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     return acb;
 }
 
-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
-                              int nb_sectors, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t offset,
+                              uint64_t bytes, char *node_name, int ret)
 {
     const char *msg = NULL;
+    int64_t start_sector = offset / BDRV_SECTOR_SIZE;
+    int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
     if (ret < 0) {
         msg = strerror(-ret);
     }
 
-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
-                                      sector_num, nb_sectors, &error_abort);
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
+                                      end_sector - start_sector, &error_abort);
 }
 
 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
-    qapi_event_send_quorum_failure(reference, acb->sector_num,
-                                   acb->nb_sectors, &error_abort);
+    const char *reference = bdrv_get_device_or_node_name(acb->bs);
+    int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
+    int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
+                                      BDRV_SECTOR_SIZE);
+
+    qapi_event_send_quorum_failure(reference, start_sector,
+                                   end_sector - start_sector, &error_abort);
 }
 
 static int quorum_vote_error(QuorumAIOCB *acb);
 
 static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
 
     if (acb->success_count < s->threshold) {
         acb->vote_ret = quorum_vote_error(acb);
@@ -238,22 +225,7 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
     return false;
 }
 
-static void quorum_rewrite_aio_cb(void *opaque, int ret)
-{
-    QuorumAIOCB *acb = opaque;
-
-    /* one less rewrite to do */
-    acb->rewrite_count--;
-
-    /* wait until all rewrite callbacks have completed */
-    if (acb->rewrite_count) {
-        return;
-    }
-
-    quorum_aio_finalize(acb);
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
+static int read_fifo_child(QuorumAIOCB *acb);
 
 static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
 {
@@ -272,70 +244,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
 {
     QuorumAIOCB *acb = sacb->parent;
     QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-    quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
-                      sacb->aiocb->bs->node_name, ret);
-}
-
-static void quorum_fifo_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
-
-    if (ret < 0) {
-        quorum_report_bad_acb(sacb, ret);
-
-        /* We try to read next child in FIFO order if we fail to read */
-        if (acb->children_read < s->num_children) {
-            read_fifo_child(acb);
-            return;
-        }
-    }
-
-    acb->vote_ret = ret;
-
-    /* FIXME: rewrite failed children if acb->children_read > 1? */
-    quorum_aio_finalize(acb);
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    bool rewrite = false;
-    int i;
-
-    sacb->ret = ret;
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        quorum_report_bad_acb(sacb, ret);
-    }
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-    if (acb->count < s->num_children) {
-        return;
-    }
-
-    /* Do the vote on read */
-    if (acb->is_read) {
-        rewrite = quorum_vote(acb);
-        for (i = 0; i < s->num_children; i++) {
-            qemu_vfree(acb->qcrs[i].buf);
-            qemu_iovec_destroy(&acb->qcrs[i].qiov);
-        }
-    } else {
-        quorum_has_too_much_io_failed(acb);
-    }
-
-    /* if no rewrite is done the code will finish right away */
-    if (!rewrite) {
-        quorum_aio_finalize(acb);
-    }
+    quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
 }
 
 static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -350,14 +259,31 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
             continue;
         }
         QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
-                              acb->nb_sectors,
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
                               s->children[item->index]->bs->node_name, 0);
         }
     }
 }
 
-static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+static void quorum_rewrite_entry(void *opaque)
+{
+    QuorumCo *co = opaque;
+    QuorumAIOCB *acb = co->acb;
+    BDRVQuorumState *s = acb->bs->opaque;
+
+    /* Ignore any errors, it's just a correction attempt for already
+     * corrupted data. */
+    bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
+                    acb->qiov, 0);
+
+    /* Wake up the caller after the last rewrite */
+    acb->rewrite_count--;
+    if (!acb->rewrite_count) {
+        qemu_coroutine_enter_if_inactive(acb->co);
+    }
+}
+
+static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb,
                                         QuorumVoteValue *value)
 {
     QuorumVoteVersion *version;
@@ -376,7 +302,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
         }
     }
 
-    /* quorum_rewrite_aio_cb will count down this to zero */
+    /* quorum_rewrite_entry will count down this to zero */
     acb->rewrite_count = count;
 
     /* now fire the correcting rewrites */
@@ -385,9 +311,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
             continue;
         }
         QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->children[item->index], acb->sector_num,
-                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
-                            acb);
+            Coroutine *co;
+            QuorumCo data = {
+                .acb = acb,
+                .idx = item->index,
+            };
+
+            co = qemu_coroutine_create(quorum_rewrite_entry, &data);
+            qemu_coroutine_enter(co);
         }
     }
 
@@ -507,8 +438,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
     va_list ap;
 
     va_start(ap, fmt);
-    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->sector_num, acb->nb_sectors);
+    fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
+            acb->offset, acb->bytes);
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
     va_end(ap);
@@ -519,16 +450,15 @@ static bool quorum_compare(QuorumAIOCB *acb,
                            QEMUIOVector *a,
                            QEMUIOVector *b)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
     ssize_t offset;
 
     /* This driver will replace blkverify in this particular case */
     if (s->is_blkverify) {
         offset = qemu_iovec_compare(a, b);
         if (offset != -1) {
-            quorum_err(acb, "contents mismatch in sector %" PRId64,
-                       acb->sector_num +
-                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
+            quorum_err(acb, "contents mismatch at offset %" PRIu64,
+                       acb->offset + offset);
         }
         return true;
     }
@@ -539,7 +469,7 @@ static bool quorum_compare(QuorumAIOCB *acb,
 /* Do a vote to get the error code */
 static int quorum_vote_error(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
     QuorumVoteVersion *winner = NULL;
     QuorumVotes error_votes;
     QuorumVoteValue result_value;
@@ -568,17 +498,16 @@ static int quorum_vote_error(QuorumAIOCB *acb)
     return ret;
 }
 
-static bool quorum_vote(QuorumAIOCB *acb)
+static void quorum_vote(QuorumAIOCB *acb)
 {
     bool quorum = true;
-    bool rewrite = false;
     int i, j, ret;
     QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->common.bs->opaque;
+    BDRVQuorumState *s = acb->bs->opaque;
     QuorumVoteVersion *winner;
 
     if (quorum_has_too_much_io_failed(acb)) {
-        return false;
+        return;
     }
 
     /* get the index of the first successful read */
@@ -606,7 +535,7 @@ static bool quorum_vote(QuorumAIOCB *acb)
     /* Every successful read agrees */
     if (quorum) {
         quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return false;
+        return;
     }
 
     /* compute hashes for each successful read, also store indexes */
@@ -641,19 +570,46 @@ static bool quorum_vote(QuorumAIOCB *acb)
 
     /* corruption correction is enabled */
     if (s->rewrite_corrupted) {
-        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+        quorum_rewrite_bad_versions(acb, &winner->value);
     }
 
 free_exit:
     /* free lists */
     quorum_free_vote_list(&acb->votes);
-    return rewrite;
 }
 
-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
+static void read_quorum_children_entry(void *opaque)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
+    QuorumCo *co = opaque;
+    QuorumAIOCB *acb = co->acb;
+    BDRVQuorumState *s = acb->bs->opaque;
+    int i = co->idx;
+    QuorumChildRequest *sacb = &acb->qcrs[i];
+
+    sacb->bs = s->children[i]->bs;
+    sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
+                               &acb->qcrs[i].qiov, 0);
+
+    if (sacb->ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, sacb->ret);
+    }
+
+    acb->count++;
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
+
+    /* Wake up the caller after the last read */
+    if (acb->count == s->num_children) {
+        qemu_coroutine_enter_if_inactive(acb->co);
+    }
+}
+
+static int read_quorum_children(QuorumAIOCB *acb)
+{
+    BDRVQuorumState *s = acb->bs->opaque;
+    int i, ret;
 
     acb->children_read = s->num_children;
     for (i = 0; i < s->num_children; i++) {
@@ -663,65 +619,131 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
     }
 
     for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
-                                            &acb->qcrs[i].qiov, acb->nb_sectors,
-                                            quorum_aio_cb, &acb->qcrs[i]);
+        Coroutine *co;
+        QuorumCo data = {
+            .acb = acb,
+            .idx = i,
+        };
+
+        co = qemu_coroutine_create(read_quorum_children_entry, &data);
+        qemu_coroutine_enter(co);
     }
 
-    return &acb->common;
+    while (acb->count < s->num_children) {
+        qemu_coroutine_yield();
+    }
+
+    /* Do the vote on read */
+    quorum_vote(acb);
+    for (i = 0; i < s->num_children; i++) {
+        qemu_vfree(acb->qcrs[i].buf);
+        qemu_iovec_destroy(&acb->qcrs[i].qiov);
+    }
+
+    while (acb->rewrite_count) {
+        qemu_coroutine_yield();
+    }
+
+    ret = acb->vote_ret;
+
+    return ret;
 }
 
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
+static int read_fifo_child(QuorumAIOCB *acb)
 {
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int n = acb->children_read++;
+    BDRVQuorumState *s = acb->bs->opaque;
+    int n, ret;
+
+    /* We try to read the next child in FIFO order if we failed to read */
+    do {
+        n = acb->children_read++;
+        acb->qcrs[n].bs = s->children[n]->bs;
+        ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
+                             acb->qiov, 0);
+        if (ret < 0) {
+            quorum_report_bad_acb(&acb->qcrs[n], ret);
+        }
+    } while (ret < 0 && acb->children_read < s->num_children);
 
-    acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
-                                        acb->qiov, acb->nb_sectors,
-                                        quorum_fifo_aio_cb, &acb->qcrs[n]);
+    /* FIXME: rewrite failed children if acb->children_read > 1? */
 
-    return &acb->common;
+    return ret;
 }
 
-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                    int64_t sector_num,
-                                    QEMUIOVector *qiov,
-                                    int nb_sectors,
-                                    BlockCompletionFunc *cb,
-                                    void *opaque)
+static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
+                            uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
     BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
-                                      nb_sectors, cb, opaque);
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+    int ret;
+
     acb->is_read = true;
     acb->children_read = 0;
 
     if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        return read_quorum_children(acb);
+        ret = read_quorum_children(acb);
+    } else {
+        ret = read_fifo_child(acb);
+    }
+    quorum_aio_finalize(acb);
+
+    return ret;
+}
+
+static void write_quorum_entry(void *opaque)
+{
+    QuorumCo *co = opaque;
+    QuorumAIOCB *acb = co->acb;
+    BDRVQuorumState *s = acb->bs->opaque;
+    int i = co->idx;
+    QuorumChildRequest *sacb = &acb->qcrs[i];
+
+    sacb->bs = s->children[i]->bs;
+    sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
+                                acb->qiov, 0);
+    if (sacb->ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, sacb->ret);
     }
+    acb->count++;
+    assert(acb->count <= s->num_children);
+    assert(acb->success_count <= s->num_children);
 
-    return read_fifo_child(acb);
+    /* Wake up the caller after the last write */
+    if (acb->count == s->num_children) {
+        qemu_coroutine_enter_if_inactive(acb->co);
+    }
 }
 
-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                     int64_t sector_num,
-                                     QEMUIOVector *qiov,
-                                     int nb_sectors,
-                                     BlockCompletionFunc *cb,
-                                     void *opaque)
+static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                             uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
     BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
-                                      cb, opaque);
-    int i;
+    QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+    int i, ret;
 
     for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
-                                             qiov, nb_sectors, &quorum_aio_cb,
-                                             &acb->qcrs[i]);
+        Coroutine *co;
+        QuorumCo data = {
+            .acb = acb,
+            .idx = i,
+        };
+
+        co = qemu_coroutine_create(write_quorum_entry, &data);
+        qemu_coroutine_enter(co);
+    }
+
+    while (acb->count < s->num_children) {
+        qemu_coroutine_yield();
     }
 
-    return &acb->common;
+    quorum_has_too_much_io_failed(acb);
+
+    ret = acb->vote_ret;
+    quorum_aio_finalize(acb);
+
+    return ret;
 }
 
 static int64_t quorum_getlength(BlockDriverState *bs)
@@ -765,7 +787,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
         result = bdrv_co_flush(s->children[i]->bs);
         if (result) {
             quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
-                              bdrv_nb_sectors(s->children[i]->bs),
+                              bdrv_getlength(s->children[i]->bs),
                               s->children[i]->bs->node_name, result);
             result_value.l = result;
             quorum_count_vote(&error_votes, &result_value, i);
@@ -1098,8 +1120,8 @@ static BlockDriver bdrv_quorum = {
 
     .bdrv_getlength                     = quorum_getlength,
 
-    .bdrv_aio_readv                     = quorum_aio_readv,
-    .bdrv_aio_writev                    = quorum_aio_writev,
+    .bdrv_co_preadv                     = quorum_co_preadv,
+    .bdrv_co_pwritev                    = quorum_co_pwritev,
 
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
diff --git a/block/raw_bsd.c b/block/raw-format.c
index 8a5b9b0424..8404a82e0c 100644
--- a/block/raw_bsd.c
+++ b/block/raw-format.c
@@ -1,4 +1,4 @@
-/* BlockDriver implementation for "raw"
+/* BlockDriver implementation for "raw" format driver
  *
  * Copyright (C) 2010-2016 Red Hat, Inc.
  * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
diff --git a/block/trace-events b/block/trace-events
index cfc05f2478..671a6a851c 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -53,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
-# block/raw-win32.c
-# block/raw-posix.c
+# block/file-win32.c
+# block/file-posix.c
 paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
 paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
 
diff --git a/configure b/configure
index 218df87d21..86f5214dd0 100755
--- a/configure
+++ b/configure
@@ -2750,7 +2750,7 @@ if compile_prog "" "" ; then
 fi
 
 ##########################################
-# xfsctl() probe, used for raw-posix
+# xfsctl() probe, used for file-posix.c
 if test "$xfs" != "no" ; then
   cat > $TMPC << EOF
 #include <stddef.h>  /* NULL */
diff --git a/contrib/libvhost-user/Makefile.objs b/contrib/libvhost-user/Makefile.objs
new file mode 100644
index 0000000000..cef1ad6e31
--- /dev/null
+++ b/contrib/libvhost-user/Makefile.objs
@@ -0,0 +1 @@
+libvhost-user-obj-y = libvhost-user.o
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
new file mode 100644
index 0000000000..af4faad60b
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -0,0 +1,1499 @@
+/*
+ * Vhost User library
+ *
+ * Copyright IBM, Corp. 2007
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Anthony Liguori <aliguori@us.ibm.com>
+ *  Marc-André Lureau <mlureau@redhat.com>
+ *  Victor Kaplansky <victork@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include <qemu/osdep.h>
+#include <sys/eventfd.h>
+#include <linux/vhost.h>
+
+#include "qemu/atomic.h"
+
+#include "libvhost-user.h"
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 1
+#define LIBVHOST_USER_DEBUG 0
+
+#define DPRINT(...)                             \
+    do {                                        \
+        if (LIBVHOST_USER_DEBUG) {              \
+            fprintf(stderr, __VA_ARGS__);        \
+        }                                       \
+    } while (0)
+
+static const char *
+vu_request_to_string(int req)
+{
+#define REQ(req) [req] = #req
+    static const char *vu_request_str[] = {
+        REQ(VHOST_USER_NONE),
+        REQ(VHOST_USER_GET_FEATURES),
+        REQ(VHOST_USER_SET_FEATURES),
+        REQ(VHOST_USER_NONE),
+        REQ(VHOST_USER_GET_FEATURES),
+        REQ(VHOST_USER_SET_FEATURES),
+        REQ(VHOST_USER_SET_OWNER),
+        REQ(VHOST_USER_RESET_OWNER),
+        REQ(VHOST_USER_SET_MEM_TABLE),
+        REQ(VHOST_USER_SET_LOG_BASE),
+        REQ(VHOST_USER_SET_LOG_FD),
+        REQ(VHOST_USER_SET_VRING_NUM),
+        REQ(VHOST_USER_SET_VRING_ADDR),
+        REQ(VHOST_USER_SET_VRING_BASE),
+        REQ(VHOST_USER_GET_VRING_BASE),
+        REQ(VHOST_USER_SET_VRING_KICK),
+        REQ(VHOST_USER_SET_VRING_CALL),
+        REQ(VHOST_USER_SET_VRING_ERR),
+        REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
+        REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
+        REQ(VHOST_USER_GET_QUEUE_NUM),
+        REQ(VHOST_USER_SET_VRING_ENABLE),
+        REQ(VHOST_USER_SEND_RARP),
+        REQ(VHOST_USER_INPUT_GET_CONFIG),
+        REQ(VHOST_USER_MAX),
+    };
+#undef REQ
+
+    if (req < VHOST_USER_MAX) {
+        return vu_request_str[req];
+    } else {
+        return "unknown";
+    }
+}
+
+static void
+vu_panic(VuDev *dev, const char *msg, ...)
+{
+    char *buf = NULL;
+    va_list ap;
+
+    va_start(ap, msg);
+    (void)vasprintf(&buf, msg, ap);
+    va_end(ap);
+
+    dev->broken = true;
+    dev->panic(dev, buf);
+    free(buf);
+
+    /* FIXME: find a way to call virtio_error? */
+}
+
+/* Translate guest physical address to our virtual address.  */
+void *
+vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VuDevRegion *r = &dev->regions[i];
+
+        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
+            return (void *)(uintptr_t)
+                guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    return NULL;
+}
+
+/* Translate qemu virtual address to our virtual address.  */
+static void *
+qva_to_va(VuDev *dev, uint64_t qemu_addr)
+{
+    int i;
+
+    /* Find matching memory region.  */
+    for (i = 0; i < dev->nregions; i++) {
+        VuDevRegion *r = &dev->regions[i];
+
+        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
+            return (void *)(uintptr_t)
+                qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+        }
+    }
+
+    return NULL;
+}
+
+static void
+vmsg_close_fds(VhostUserMsg *vmsg)
+{
+    int i;
+
+    for (i = 0; i < vmsg->fd_num; i++) {
+        close(vmsg->fds[i]);
+    }
+}
+
+static bool
+vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
+    struct iovec iov = {
+        .iov_base = (char *)vmsg,
+        .iov_len = VHOST_USER_HDR_SIZE,
+    };
+    struct msghdr msg = {
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+        .msg_control = control,
+        .msg_controllen = sizeof(control),
+    };
+    size_t fd_size;
+    struct cmsghdr *cmsg;
+    int rc;
+
+    do {
+        rc = recvmsg(conn_fd, &msg, 0);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc <= 0) {
+        vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
+        return false;
+    }
+
+    vmsg->fd_num = 0;
+    for (cmsg = CMSG_FIRSTHDR(&msg);
+         cmsg != NULL;
+         cmsg = CMSG_NXTHDR(&msg, cmsg))
+    {
+        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
+            vmsg->fd_num = fd_size / sizeof(int);
+            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+            break;
+        }
+    }
+
+    if (vmsg->size > sizeof(vmsg->payload)) {
+        vu_panic(dev,
+                 "Error: too big message request: %d, size: vmsg->size: %u, "
+                 "while sizeof(vmsg->payload) = %zu\n",
+                 vmsg->request, vmsg->size, sizeof(vmsg->payload));
+        goto fail;
+    }
+
+    if (vmsg->size) {
+        do {
+            rc = read(conn_fd, &vmsg->payload, vmsg->size);
+        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+        if (rc <= 0) {
+            vu_panic(dev, "Error while reading: %s", strerror(errno));
+            goto fail;
+        }
+
+        assert(rc == vmsg->size);
+    }
+
+    return true;
+
+fail:
+    vmsg_close_fds(vmsg);
+
+    return false;
+}
+
+static bool
+vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
+{
+    int rc;
+    uint8_t *p = (uint8_t *)vmsg;
+
+    /* Set the version in the flags when sending the reply */
+    vmsg->flags &= ~VHOST_USER_VERSION_MASK;
+    vmsg->flags |= VHOST_USER_VERSION;
+    vmsg->flags |= VHOST_USER_REPLY_MASK;
+
+    do {
+        rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    do {
+        if (vmsg->data) {
+            rc = write(conn_fd, vmsg->data, vmsg->size);
+        } else {
+            rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
+        }
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc <= 0) {
+        vu_panic(dev, "Error while writing: %s", strerror(errno));
+        return false;
+    }
+
+    return true;
+}
+
+/* Kick the log_call_fd if required. */
+static void
+vu_log_kick(VuDev *dev)
+{
+    if (dev->log_call_fd != -1) {
+        DPRINT("Kicking the QEMU's log...\n");
+        if (eventfd_write(dev->log_call_fd, 1) < 0) {
+            vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+        }
+    }
+}
+
+static void
+vu_log_page(uint8_t *log_table, uint64_t page)
+{
+    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
+    atomic_or(&log_table[page / 8], 1 << (page % 8));
+}
+
+static void
+vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
+{
+    uint64_t page;
+
+    if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
+        !dev->log_table || !length) {
+        return;
+    }
+
+    assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
+
+    page = address / VHOST_LOG_PAGE;
+    while (page * VHOST_LOG_PAGE < address + length) {
+        vu_log_page(dev->log_table, page);
+        page += VHOST_LOG_PAGE;
+    }
+
+    vu_log_kick(dev);
+}
+
+static void
+vu_kick_cb(VuDev *dev, int condition, void *data)
+{
+    int index = (intptr_t)data;
+    VuVirtq *vq = &dev->vq[index];
+    int sock = vq->kick_fd;
+    eventfd_t kick_data;
+    ssize_t rc;
+
+    rc = eventfd_read(sock, &kick_data);
+    if (rc == -1) {
+        vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
+        dev->remove_watch(dev, dev->vq[index].kick_fd);
+    } else {
+        DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
+               kick_data, vq->handler, index);
+        if (vq->handler) {
+            vq->handler(dev, index);
+        }
+    }
+}
+
+static bool
+vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    vmsg->payload.u64 =
+        1ULL << VHOST_F_LOG_ALL |
+        1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+
+    if (dev->iface->get_features) {
+        vmsg->payload.u64 |= dev->iface->get_features(dev);
+    }
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    return true;
+}
+
+static void
+vu_set_enable_all_rings(VuDev *dev, bool enabled)
+{
+    int i;
+
+    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+        dev->vq[i].enable = enabled;
+    }
+}
+
+static bool
+vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    dev->features = vmsg->payload.u64;
+
+    if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
+        vu_set_enable_all_rings(dev, true);
+    }
+
+    if (dev->iface->set_features) {
+        dev->iface->set_features(dev, dev->features);
+    }
+
+    return false;
+}
+
+static bool
+vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    return false;
+}
+
+static void
+vu_close_log(VuDev *dev)
+{
+    if (dev->log_table) {
+        if (munmap(dev->log_table, dev->log_size) != 0) {
+            perror("close log munmap() error");
+        }
+
+        dev->log_table = NULL;
+    }
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+        dev->log_call_fd = -1;
+    }
+}
+
+static bool
+vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    vu_set_enable_all_rings(dev, false);
+
+    return false;
+}
+
+static bool
+vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int i;
+    VhostUserMemory *memory = &vmsg->payload.memory;
+    dev->nregions = memory->nregions;
+
+    DPRINT("Nregions: %d\n", memory->nregions);
+    for (i = 0; i < dev->nregions; i++) {
+        void *mmap_addr;
+        VhostUserMemoryRegion *msg_region = &memory->regions[i];
+        VuDevRegion *dev_region = &dev->regions[i];
+
+        DPRINT("Region %d\n", i);
+        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
+               msg_region->guest_phys_addr);
+        DPRINT("    memory_size:     0x%016"PRIx64"\n",
+               msg_region->memory_size);
+        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
+               msg_region->userspace_addr);
+        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
+               msg_region->mmap_offset);
+
+        dev_region->gpa = msg_region->guest_phys_addr;
+        dev_region->size = msg_region->memory_size;
+        dev_region->qva = msg_region->userspace_addr;
+        dev_region->mmap_offset = msg_region->mmap_offset;
+
+        /* We don't use offset argument of mmap() since the
+         * mapped address has to be page aligned, and we use huge
+         * pages.  */
+        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
+                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                         vmsg->fds[i], 0);
+
+        if (mmap_addr == MAP_FAILED) {
+            vu_panic(dev, "region mmap error: %s", strerror(errno));
+        } else {
+            dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
+            DPRINT("    mmap_addr:       0x%016"PRIx64"\n",
+                   dev_region->mmap_addr);
+        }
+
+        close(vmsg->fds[i]);
+    }
+
+    return false;
+}
+
+static bool
+vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int fd;
+    uint64_t log_mmap_size, log_mmap_offset;
+    void *rc;
+
+    if (vmsg->fd_num != 1 ||
+        vmsg->size != sizeof(vmsg->payload.log)) {
+        vu_panic(dev, "Invalid log_base message");
+        return true;
+    }
+
+    fd = vmsg->fds[0];
+    log_mmap_offset = vmsg->payload.log.mmap_offset;
+    log_mmap_size = vmsg->payload.log.mmap_size;
+    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
+    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
+
+    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+              log_mmap_offset);
+    if (rc == MAP_FAILED) {
+        perror("log mmap error");
+    }
+    dev->log_table = rc;
+    dev->log_size = log_mmap_size;
+
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    return true;
+}
+
+static bool
+vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    if (vmsg->fd_num != 1) {
+        vu_panic(dev, "Invalid log_fd message");
+        return false;
+    }
+
+    if (dev->log_call_fd != -1) {
+        close(dev->log_call_fd);
+    }
+    dev->log_call_fd = vmsg->fds[0];
+    DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
+
+    return false;
+}
+
+static bool
+vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].vring.num = num;
+
+    return false;
+}
+
+static bool
+vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    struct vhost_vring_addr *vra = &vmsg->payload.addr;
+    unsigned int index = vra->index;
+    VuVirtq *vq = &dev->vq[index];
+
+    DPRINT("vhost_vring_addr:\n");
+    DPRINT("    index:  %d\n", vra->index);
+    DPRINT("    flags:  %d\n", vra->flags);
+    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
+    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
+    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
+    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
+
+    vq->vring.flags = vra->flags;
+    vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
+    vq->vring.used = qva_to_va(dev, vra->used_user_addr);
+    vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
+    vq->vring.log_guest_addr = vra->log_guest_addr;
+
+    DPRINT("Setting virtq addresses:\n");
+    DPRINT("    vring_desc  at %p\n", vq->vring.desc);
+    DPRINT("    vring_used  at %p\n", vq->vring.used);
+    DPRINT("    vring_avail at %p\n", vq->vring.avail);
+
+    if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
+        vu_panic(dev, "Invalid vring_addr message");
+        return false;
+    }
+
+    vq->used_idx = vq->vring.used->idx;
+
+    return false;
+}
+
+static bool
+vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int num = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.num:   %d\n", num);
+    dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
+
+    return false;
+}
+
+static bool
+vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+
+    DPRINT("State.index: %d\n", index);
+    vmsg->payload.state.num = dev->vq[index].last_avail_idx;
+    vmsg->size = sizeof(vmsg->payload.state);
+
+    dev->vq[index].started = false;
+    if (dev->iface->queue_set_started) {
+        dev->iface->queue_set_started(dev, index, false);
+    }
+
+    if (dev->vq[index].call_fd != -1) {
+        close(dev->vq[index].call_fd);
+        dev->vq[index].call_fd = -1;
+    }
+    if (dev->vq[index].kick_fd != -1) {
+        dev->remove_watch(dev, dev->vq[index].kick_fd);
+        close(dev->vq[index].kick_fd);
+        dev->vq[index].kick_fd = -1;
+    }
+
+    return true;
+}
+
+static bool
+vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+        vmsg_close_fds(vmsg);
+        vu_panic(dev, "Invalid queue index: %u", index);
+        return false;
+    }
+
+    if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
+        vmsg->fd_num != 1) {
+        vmsg_close_fds(vmsg);
+        vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
+        return false;
+    }
+
+    return true;
+}
+
+static bool
+vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    if (!vu_check_queue_msg_file(dev, vmsg)) {
+        return false;
+    }
+
+    if (dev->vq[index].kick_fd != -1) {
+        dev->remove_watch(dev, dev->vq[index].kick_fd);
+        close(dev->vq[index].kick_fd);
+        dev->vq[index].kick_fd = -1;
+    }
+
+    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+        dev->vq[index].kick_fd = vmsg->fds[0];
+        DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
+    }
+
+    dev->vq[index].started = true;
+    if (dev->iface->queue_set_started) {
+        dev->iface->queue_set_started(dev, index, true);
+    }
+
+    if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
+        dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
+                       vu_kick_cb, (void *)(long)index);
+
+        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
+               dev->vq[index].kick_fd, index);
+    }
+
+    return false;
+}
+
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+                          vu_queue_handler_cb handler)
+{
+    int qidx = vq - dev->vq;
+
+    vq->handler = handler;
+    if (vq->kick_fd >= 0) {
+        if (handler) {
+            dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
+                           vu_kick_cb, (void *)(long)qidx);
+        } else {
+            dev->remove_watch(dev, vq->kick_fd);
+        }
+    }
+}
+
+static bool
+vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    if (!vu_check_queue_msg_file(dev, vmsg)) {
+        return false;
+    }
+
+    if (dev->vq[index].call_fd != -1) {
+        close(dev->vq[index].call_fd);
+        dev->vq[index].call_fd = -1;
+    }
+
+    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+        dev->vq[index].call_fd = vmsg->fds[0];
+    }
+
+    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
+
+    return false;
+}
+
+static bool
+vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
+
+    if (!vu_check_queue_msg_file(dev, vmsg)) {
+        return false;
+    }
+
+    if (dev->vq[index].err_fd != -1) {
+        close(dev->vq[index].err_fd);
+        dev->vq[index].err_fd = -1;
+    }
+
+    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
+        dev->vq[index].err_fd = vmsg->fds[0];
+    }
+
+    return false;
+}
+
+static bool
+vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+
+    if (dev->iface->get_protocol_features) {
+        features |= dev->iface->get_protocol_features(dev);
+    }
+
+    vmsg->payload.u64 = features;
+    vmsg->size = sizeof(vmsg->payload.u64);
+
+    return true;
+}
+
+static bool
+vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    uint64_t features = vmsg->payload.u64;
+
+    DPRINT("u64: 0x%016"PRIx64"\n", features);
+
+    dev->protocol_features = vmsg->payload.u64;
+
+    if (dev->iface->set_protocol_features) {
+        dev->iface->set_protocol_features(dev, features);
+    }
+
+    return false;
+}
+
+static bool
+vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    DPRINT("Function %s() not implemented yet.\n", __func__);
+    return false;
+}
+
+static bool
+vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
+{
+    unsigned int index = vmsg->payload.state.index;
+    unsigned int enable = vmsg->payload.state.num;
+
+    DPRINT("State.index: %d\n", index);
+    DPRINT("State.enable:   %d\n", enable);
+
+    if (index >= VHOST_MAX_NR_VIRTQUEUE) {
+        vu_panic(dev, "Invalid vring_enable index: %u", index);
+        return false;
+    }
+
+    dev->vq[index].enable = enable;
+    return false;
+}
+
+static bool
+vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int do_reply = 0;
+
+    /* Print out generic part of the request. */
+    DPRINT("================ Vhost user message ================\n");
+    DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
+           vmsg->request);
+    DPRINT("Flags:   0x%x\n", vmsg->flags);
+    DPRINT("Size:    %d\n", vmsg->size);
+
+    if (vmsg->fd_num) {
+        int i;
+        DPRINT("Fds:");
+        for (i = 0; i < vmsg->fd_num; i++) {
+            DPRINT(" %d", vmsg->fds[i]);
+        }
+        DPRINT("\n");
+    }
+
+    if (dev->iface->process_msg &&
+        dev->iface->process_msg(dev, vmsg, &do_reply)) {
+        return do_reply;
+    }
+
+    switch (vmsg->request) {
+    case VHOST_USER_GET_FEATURES:
+        return vu_get_features_exec(dev, vmsg);
+    case VHOST_USER_SET_FEATURES:
+        return vu_set_features_exec(dev, vmsg);
+    case VHOST_USER_GET_PROTOCOL_FEATURES:
+        return vu_get_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_SET_PROTOCOL_FEATURES:
+        return vu_set_protocol_features_exec(dev, vmsg);
+    case VHOST_USER_SET_OWNER:
+        return vu_set_owner_exec(dev, vmsg);
+    case VHOST_USER_RESET_OWNER:
+        return vu_reset_device_exec(dev, vmsg);
+    case VHOST_USER_SET_MEM_TABLE:
+        return vu_set_mem_table_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_BASE:
+        return vu_set_log_base_exec(dev, vmsg);
+    case VHOST_USER_SET_LOG_FD:
+        return vu_set_log_fd_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_NUM:
+        return vu_set_vring_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ADDR:
+        return vu_set_vring_addr_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_BASE:
+        return vu_set_vring_base_exec(dev, vmsg);
+    case VHOST_USER_GET_VRING_BASE:
+        return vu_get_vring_base_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_KICK:
+        return vu_set_vring_kick_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_CALL:
+        return vu_set_vring_call_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ERR:
+        return vu_set_vring_err_exec(dev, vmsg);
+    case VHOST_USER_GET_QUEUE_NUM:
+        return vu_get_queue_num_exec(dev, vmsg);
+    case VHOST_USER_SET_VRING_ENABLE:
+        return vu_set_vring_enable_exec(dev, vmsg);
+    default:
+        vmsg_close_fds(vmsg);
+        vu_panic(dev, "Unhandled request: %d", vmsg->request);
+    }
+
+    return false;
+}
+
+bool
+vu_dispatch(VuDev *dev)
+{
+    VhostUserMsg vmsg = { 0, };
+    int reply_requested;
+    bool success = false;
+
+    if (!vu_message_read(dev, dev->sock, &vmsg)) {
+        goto end;
+    }
+
+    reply_requested = vu_process_message(dev, &vmsg);
+    if (!reply_requested) {
+        success = true;
+        goto end;
+    }
+
+    if (!vu_message_write(dev, dev->sock, &vmsg)) {
+        goto end;
+    }
+
+    success = true;
+
+end:
+    g_free(vmsg.data);
+    return success;
+}
+
+void
+vu_deinit(VuDev *dev)
+{
+    int i;
+
+    for (i = 0; i < dev->nregions; i++) {
+        VuDevRegion *r = &dev->regions[i];
+        void *m = (void *) (uintptr_t) r->mmap_addr;
+        if (m != MAP_FAILED) {
+            munmap(m, r->size + r->mmap_offset);
+        }
+    }
+    dev->nregions = 0;
+
+    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+        VuVirtq *vq = &dev->vq[i];
+
+        if (vq->call_fd != -1) {
+            close(vq->call_fd);
+            vq->call_fd = -1;
+        }
+
+        if (vq->kick_fd != -1) {
+            close(vq->kick_fd);
+            vq->kick_fd = -1;
+        }
+
+        if (vq->err_fd != -1) {
+            close(vq->err_fd);
+            vq->err_fd = -1;
+        }
+    }
+
+
+    vu_close_log(dev);
+
+    if (dev->sock != -1) {
+        close(dev->sock);
+    }
+}
+
+void
+vu_init(VuDev *dev,
+        int socket,
+        vu_panic_cb panic,
+        vu_set_watch_cb set_watch,
+        vu_remove_watch_cb remove_watch,
+        const VuDevIface *iface)
+{
+    int i;
+
+    assert(socket >= 0);
+    assert(set_watch);
+    assert(remove_watch);
+    assert(iface);
+    assert(panic);
+
+    memset(dev, 0, sizeof(*dev));
+
+    dev->sock = socket;
+    dev->panic = panic;
+    dev->set_watch = set_watch;
+    dev->remove_watch = remove_watch;
+    dev->iface = iface;
+    dev->log_call_fd = -1;
+    for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
+        dev->vq[i] = (VuVirtq) {
+            .call_fd = -1, .kick_fd = -1, .err_fd = -1,
+            .notification = true,
+        };
+    }
+}
+
+VuVirtq *
+vu_get_queue(VuDev *dev, int qidx)
+{
+    assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
+    return &dev->vq[qidx];
+}
+
+bool
+vu_queue_enabled(VuDev *dev, VuVirtq *vq)
+{
+    return vq->enable;
+}
+
+static inline uint16_t
+vring_avail_flags(VuVirtq *vq)
+{
+    return vq->vring.avail->flags;
+}
+
+static inline uint16_t
+vring_avail_idx(VuVirtq *vq)
+{
+    vq->shadow_avail_idx = vq->vring.avail->idx;
+
+    return vq->shadow_avail_idx;
+}
+
+static inline uint16_t
+vring_avail_ring(VuVirtq *vq, int i)
+{
+    return vq->vring.avail->ring[i];
+}
+
+static inline uint16_t
+vring_get_used_event(VuVirtq *vq)
+{
+    return vring_avail_ring(vq, vq->vring.num);
+}
+
+static int
+virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
+{
+    uint16_t num_heads = vring_avail_idx(vq) - idx;
+
+    /* Check it isn't doing very strange things with descriptor numbers. */
+    if (num_heads > vq->vring.num) {
+        vu_panic(dev, "Guest moved used index from %u to %u",
+                 idx, vq->shadow_avail_idx);
+        return -1;
+    }
+    if (num_heads) {
+        /* On success, callers read a descriptor at vq->last_avail_idx.
+         * Make sure descriptor read does not bypass avail index read. */
+        smp_rmb();
+    }
+
+    return num_heads;
+}
+
+static bool
+virtqueue_get_head(VuDev *dev, VuVirtq *vq,
+                   unsigned int idx, unsigned int *head)
+{
+    /* Grab the next descriptor number they're advertising, and increment
+     * the index we've seen. */
+    *head = vring_avail_ring(vq, idx % vq->vring.num);
+
+    /* If their number is silly, that's a fatal mistake. */
+    if (*head >= vq->vring.num) {
+        vu_panic(dev, "Guest says index %u is available", head);
+        return false;
+    }
+
+    return true;
+}
+
+enum {
+    VIRTQUEUE_READ_DESC_ERROR = -1,
+    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
+    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
+};
+
+static int
+virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
+                         int i, unsigned int max, unsigned int *next)
+{
+    /* If this descriptor says it doesn't chain, we're done. */
+    if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
+        return VIRTQUEUE_READ_DESC_DONE;
+    }
+
+    /* Check they're not leading us off end of descriptors. */
+    *next = desc[i].next;
+    /* Make sure compiler knows to grab that: we don't want it changing! */
+    smp_wmb();
+
+    if (*next >= max) {
+        vu_panic(dev, "Desc next is %u", next);
+        return VIRTQUEUE_READ_DESC_ERROR;
+    }
+
+    return VIRTQUEUE_READ_DESC_MORE;
+}
+
+void
+vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
+                         unsigned int *out_bytes,
+                         unsigned max_in_bytes, unsigned max_out_bytes)
+{
+    unsigned int idx;
+    unsigned int total_bufs, in_total, out_total;
+    int rc;
+
+    idx = vq->last_avail_idx;
+
+    total_bufs = in_total = out_total = 0;
+    while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
+        unsigned int max, num_bufs, indirect = 0;
+        struct vring_desc *desc;
+        unsigned int i;
+
+        max = vq->vring.num;
+        num_bufs = total_bufs;
+        if (!virtqueue_get_head(dev, vq, idx++, &i)) {
+            goto err;
+        }
+        desc = vq->vring.desc;
+
+        if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+            if (desc[i].len % sizeof(struct vring_desc)) {
+                vu_panic(dev, "Invalid size for indirect buffer table");
+                goto err;
+            }
+
+            /* If we've got too many, that implies a descriptor loop. */
+            if (num_bufs >= max) {
+                vu_panic(dev, "Looped descriptor");
+                goto err;
+            }
+
+            /* loop over the indirect descriptor table */
+            indirect = 1;
+            max = desc[i].len / sizeof(struct vring_desc);
+            desc = vu_gpa_to_va(dev, desc[i].addr);
+            num_bufs = i = 0;
+        }
+
+        do {
+            /* If we've got too many, that implies a descriptor loop. */
+            if (++num_bufs > max) {
+                vu_panic(dev, "Looped descriptor");
+                goto err;
+            }
+
+            if (desc[i].flags & VRING_DESC_F_WRITE) {
+                in_total += desc[i].len;
+            } else {
+                out_total += desc[i].len;
+            }
+            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+                goto done;
+            }
+            rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+            goto err;
+        }
+
+        if (!indirect) {
+            total_bufs = num_bufs;
+        } else {
+            total_bufs++;
+        }
+    }
+    if (rc < 0) {
+        goto err;
+    }
+done:
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
+}
+
+bool
+vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+                     unsigned int out_bytes)
+{
+    unsigned int in_total, out_total;
+
+    vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
+                             in_bytes, out_bytes);
+
+    return in_bytes <= in_total && out_bytes <= out_total;
+}
+
+/* Fetch avail_idx from VQ memory only when we really need to know if
+ * guest has added some buffers. */
+int
+vu_queue_empty(VuDev *dev, VuVirtq *vq)
+{
+    if (vq->shadow_avail_idx != vq->last_avail_idx) {
+        return 0;
+    }
+
+    return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+static inline
+bool has_feature(uint64_t features, unsigned int fbit)
+{
+    assert(fbit < 64);
+    return !!(features & (1ULL << fbit));
+}
+
+static inline
+bool vu_has_feature(VuDev *dev,
+                    unsigned int fbit)
+{
+    return has_feature(dev->features, fbit);
+}
+
+static bool
+vring_notify(VuDev *dev, VuVirtq *vq)
+{
+    uint16_t old, new;
+    bool v;
+
+    /* We need to expose used array entries before checking used event. */
+    smp_mb();
+
+    /* Always notify when queue is empty (when feature acknowledge) */
+    if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+        !vq->inuse && vu_queue_empty(dev, vq)) {
+        return true;
+    }
+
+    if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+    }
+
+    v = vq->signalled_used_valid;
+    vq->signalled_used_valid = true;
+    old = vq->signalled_used;
+    new = vq->signalled_used = vq->used_idx;
+    return !v || vring_need_event(vring_get_used_event(vq), new, old);
+}
+
+void
+vu_queue_notify(VuDev *dev, VuVirtq *vq)
+{
+    if (unlikely(dev->broken)) {
+        return;
+    }
+
+    if (!vring_notify(dev, vq)) {
+        DPRINT("skipped notify...\n");
+        return;
+    }
+
+    if (eventfd_write(vq->call_fd, 1) < 0) {
+        vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
+    }
+}
+
+static inline void
+vring_used_flags_set_bit(VuVirtq *vq, int mask)
+{
+    uint16_t *flags;
+
+    flags = (uint16_t *)((char*)vq->vring.used +
+                         offsetof(struct vring_used, flags));
+    *flags |= mask;
+}
+
+static inline void
+vring_used_flags_unset_bit(VuVirtq *vq, int mask)
+{
+    uint16_t *flags;
+
+    flags = (uint16_t *)((char*)vq->vring.used +
+                         offsetof(struct vring_used, flags));
+    *flags &= ~mask;
+}
+
+static inline void
+vring_set_avail_event(VuVirtq *vq, uint16_t val)
+{
+    if (!vq->notification) {
+        return;
+    }
+
+    *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
+}
+
+void
+vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
+{
+    vq->notification = enable;
+    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vring_avail_idx(vq));
+    } else if (enable) {
+        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
+    } else {
+        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+    }
+    if (enable) {
+        /* Expose avail event/used flags before caller checks the avail idx. */
+        smp_mb();
+    }
+}
+
+static void
+virtqueue_map_desc(VuDev *dev,
+                   unsigned int *p_num_sg, struct iovec *iov,
+                   unsigned int max_num_sg, bool is_write,
+                   uint64_t pa, size_t sz)
+{
+    unsigned num_sg = *p_num_sg;
+
+    assert(num_sg <= max_num_sg);
+
+    if (!sz) {
+        vu_panic(dev, "virtio: zero sized buffers are not allowed");
+        return;
+    }
+
+    iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
+    iov[num_sg].iov_len = sz;
+    num_sg++;
+
+    *p_num_sg = num_sg;
+}
+
+/* Round number down to multiple */
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
+static void *
+virtqueue_alloc_element(size_t sz,
+                                     unsigned out_num, unsigned in_num)
+{
+    VuVirtqElement *elem;
+    size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
+    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
+    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
+
+    assert(sz >= sizeof(VuVirtqElement));
+    elem = malloc(out_sg_end);
+    elem->out_num = out_num;
+    elem->in_num = in_num;
+    elem->in_sg = (void *)elem + in_sg_ofs;
+    elem->out_sg = (void *)elem + out_sg_ofs;
+    return elem;
+}
+
+void *
+vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
+{
+    unsigned int i, head, max;
+    VuVirtqElement *elem;
+    unsigned out_num, in_num;
+    struct iovec iov[VIRTQUEUE_MAX_SIZE];
+    struct vring_desc *desc;
+    int rc;
+
+    if (unlikely(dev->broken)) {
+        return NULL;
+    }
+
+    if (vu_queue_empty(dev, vq)) {
+        return NULL;
+    }
+    /* Needed after virtio_queue_empty(), see comment in
+     * virtqueue_num_heads(). */
+    smp_rmb();
+
+    /* When we start there are none of either input nor output. */
+    out_num = in_num = 0;
+
+    max = vq->vring.num;
+    if (vq->inuse >= vq->vring.num) {
+        vu_panic(dev, "Virtqueue size exceeded");
+        return NULL;
+    }
+
+    if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
+        return NULL;
+    }
+
+    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vq->last_avail_idx);
+    }
+
+    i = head;
+    desc = vq->vring.desc;
+    if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+        if (desc[i].len % sizeof(struct vring_desc)) {
+            vu_panic(dev, "Invalid size for indirect buffer table");
+        }
+
+        /* loop over the indirect descriptor table */
+        max = desc[i].len / sizeof(struct vring_desc);
+        desc = vu_gpa_to_va(dev, desc[i].addr);
+        i = 0;
+    }
+
+    /* Collect all the descriptors */
+    do {
+        if (desc[i].flags & VRING_DESC_F_WRITE) {
+            virtqueue_map_desc(dev, &in_num, iov + out_num,
+                               VIRTQUEUE_MAX_SIZE - out_num, true,
+                               desc[i].addr, desc[i].len);
+        } else {
+            if (in_num) {
+                vu_panic(dev, "Incorrect order for descriptors");
+                return NULL;
+            }
+            virtqueue_map_desc(dev, &out_num, iov,
+                               VIRTQUEUE_MAX_SIZE, false,
+                               desc[i].addr, desc[i].len);
+        }
+
+        /* If we've got too many, that implies a descriptor loop. */
+        if ((in_num + out_num) > max) {
+            vu_panic(dev, "Looped descriptor");
+        }
+        rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
+    } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+        return NULL;
+    }
+
+    /* Now copy what we have collected and mapped */
+    elem = virtqueue_alloc_element(sz, out_num, in_num);
+    elem->index = head;
+    for (i = 0; i < out_num; i++) {
+        elem->out_sg[i] = iov[i];
+    }
+    for (i = 0; i < in_num; i++) {
+        elem->in_sg[i] = iov[out_num + i];
+    }
+
+    vq->inuse++;
+
+    return elem;
+}
+
+bool
+vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
+{
+    if (num > vq->inuse) {
+        return false;
+    }
+    vq->last_avail_idx -= num;
+    vq->inuse -= num;
+    return true;
+}
+
+static inline
+void vring_used_write(VuDev *dev, VuVirtq *vq,
+                      struct vring_used_elem *uelem, int i)
+{
+    struct vring_used *used = vq->vring.used;
+
+    used->ring[i] = *uelem;
+    vu_log_write(dev, vq->vring.log_guest_addr +
+                 offsetof(struct vring_used, ring[i]),
+                 sizeof(used->ring[i]));
+}
+
+
+static void
+vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
+                  const VuVirtqElement *elem,
+                  unsigned int len)
+{
+    struct vring_desc *desc = vq->vring.desc;
+    unsigned int i, max, min;
+    unsigned num_bufs = 0;
+
+    max = vq->vring.num;
+    i = elem->index;
+
+    if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+        if (desc[i].len % sizeof(struct vring_desc)) {
+            vu_panic(dev, "Invalid size for indirect buffer table");
+        }
+
+        /* loop over the indirect descriptor table */
+        max = desc[i].len / sizeof(struct vring_desc);
+        desc = vu_gpa_to_va(dev, desc[i].addr);
+        i = 0;
+    }
+
+    do {
+        if (++num_bufs > max) {
+            vu_panic(dev, "Looped descriptor");
+            return;
+        }
+
+        if (desc[i].flags & VRING_DESC_F_WRITE) {
+            min = MIN(desc[i].len, len);
+            vu_log_write(dev, desc[i].addr, min);
+            len -= min;
+        }
+
+    } while (len > 0 &&
+             (virtqueue_read_next_desc(dev, desc, i, max, &i)
+              == VIRTQUEUE_READ_DESC_MORE));
+}
+
+void
+vu_queue_fill(VuDev *dev, VuVirtq *vq,
+              const VuVirtqElement *elem,
+              unsigned int len, unsigned int idx)
+{
+    struct vring_used_elem uelem;
+
+    if (unlikely(dev->broken)) {
+        return;
+    }
+
+    vu_log_queue_fill(dev, vq, elem, len);
+
+    idx = (idx + vq->used_idx) % vq->vring.num;
+
+    uelem.id = elem->index;
+    uelem.len = len;
+    vring_used_write(dev, vq, &uelem, idx);
+}
+
+static inline
+void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
+{
+    vq->vring.used->idx = val;
+    vu_log_write(dev,
+                 vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
+                 sizeof(vq->vring.used->idx));
+
+    vq->used_idx = val;
+}
+
+void
+vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
+{
+    uint16_t old, new;
+
+    if (unlikely(dev->broken)) {
+        return;
+    }
+
+    /* Make sure buffer is written before we update index. */
+    smp_wmb();
+
+    old = vq->used_idx;
+    new = old + count;
+    vring_used_idx_set(dev, vq, new);
+    vq->inuse -= count;
+    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
+        vq->signalled_used_valid = false;
+    }
+}
+
+void
+vu_queue_push(VuDev *dev, VuVirtq *vq,
+              const VuVirtqElement *elem, unsigned int len)
+{
+    vu_queue_fill(dev, vq, elem, len, 0);
+    vu_queue_flush(dev, vq, 1);
+}
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
new file mode 100644
index 0000000000..156b50e989
--- /dev/null
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -0,0 +1,435 @@
+/*
+ * Vhost User library
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ *  Victor Kaplansky <victork@redhat.com>
+ *  Marc-André Lureau <mlureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVHOST_USER_H
+#define LIBVHOST_USER_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/vhost.h>
+#include "standard-headers/linux/virtio_ring.h"
+
+/* Based on qemu/hw/virtio/vhost-user.c */
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_LOG_PAGE 4096
+
+#define VHOST_MAX_NR_VIRTQUEUE 8
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+enum VhostUserProtocolFeature {
+    VHOST_USER_PROTOCOL_F_MQ = 0,
+    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
+    VHOST_USER_PROTOCOL_F_RARP = 2,
+
+    VHOST_USER_PROTOCOL_F_MAX
+};
+
+#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
+    VHOST_USER_SET_VRING_ENABLE = 18,
+    VHOST_USER_SEND_RARP = 19,
+    VHOST_USER_INPUT_GET_CONFIG = 20,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+} VhostUserLog;
+
+#if defined(_WIN32)
+# define VU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define VU_PACKED __attribute__((packed))
+#endif
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+
+    union {
+#define VHOST_USER_VRING_IDX_MASK   (0xff)
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+        VhostUserLog log;
+    } payload;
+
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int fd_num;
+    uint8_t *data;
+} VU_PACKED VhostUserMsg;
+
+typedef struct VuDevRegion {
+    /* Guest Physical address. */
+    uint64_t gpa;
+    /* Memory region size. */
+    uint64_t size;
+    /* QEMU virtual address (userspace). */
+    uint64_t qva;
+    /* Starting offset in our mmaped space. */
+    uint64_t mmap_offset;
+    /* Start address of mmaped space. */
+    uint64_t mmap_addr;
+} VuDevRegion;
+
+typedef struct VuDev VuDev;
+
+typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
+typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
+typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
+                                  int *do_reply);
+typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
+
+typedef struct VuDevIface {
+    /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
+    vu_get_features_cb get_features;
+    /* enable vhost implementation features */
+    vu_set_features_cb set_features;
+    /* get the protocol feature bitmask from the underlying vhost
+     * implementation */
+    vu_get_features_cb get_protocol_features;
+    /* enable protocol features in the underlying vhost implementation. */
+    vu_set_features_cb set_protocol_features;
+    /* process_msg is called for each vhost-user message received */
+    /* skip libvhost-user processing if return value != 0 */
+    vu_process_msg_cb process_msg;
+    /* tells when queues can be processed */
+    vu_queue_set_started_cb queue_set_started;
+} VuDevIface;
+
+typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
+
+typedef struct VuRing {
+    unsigned int num;
+    struct vring_desc *desc;
+    struct vring_avail *avail;
+    struct vring_used *used;
+    uint64_t log_guest_addr;
+    uint32_t flags;
+} VuRing;
+
+typedef struct VuVirtq {
+    VuRing vring;
+
+    /* Next head to pop */
+    uint16_t last_avail_idx;
+
+    /* Last avail_idx read from VQ. */
+    uint16_t shadow_avail_idx;
+
+    uint16_t used_idx;
+
+    /* Last used index value we have signalled on */
+    uint16_t signalled_used;
+
+    /* Last used index value we have signalled on */
+    bool signalled_used_valid;
+
+    /* Notification enabled? */
+    bool notification;
+
+    int inuse;
+
+    vu_queue_handler_cb handler;
+
+    int call_fd;
+    int kick_fd;
+    int err_fd;
+    unsigned int enable;
+    bool started;
+} VuVirtq;
+
+enum VuWatchCondtion {
+    VU_WATCH_IN = 1 << 0,
+    VU_WATCH_OUT = 1 << 1,
+    VU_WATCH_PRI = 1 << 2,
+    VU_WATCH_ERR = 1 << 3,
+    VU_WATCH_HUP = 1 << 4,
+};
+
+typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
+typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
+typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
+                                 vu_watch_cb cb, void *data);
+typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
+
+struct VuDev {
+    int sock;
+    uint32_t nregions;
+    VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+    VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
+    int log_call_fd;
+    uint64_t log_size;
+    uint8_t *log_table;
+    uint64_t features;
+    uint64_t protocol_features;
+    bool broken;
+
+    /* @set_watch: add or update the given fd to the watch set,
+     * call cb when condition is met */
+    vu_set_watch_cb set_watch;
+
+    /* @remove_watch: remove the given fd from the watch set */
+    vu_remove_watch_cb remove_watch;
+
+    /* @panic: encountered an unrecoverable error, you may try to
+     * re-initialize */
+    vu_panic_cb panic;
+    const VuDevIface *iface;
+};
+
+typedef struct VuVirtqElement {
+    unsigned int index;
+    unsigned int out_num;
+    unsigned int in_num;
+    struct iovec *in_sg;
+    struct iovec *out_sg;
+} VuVirtqElement;
+
+/**
+ * vu_init:
+ * @dev: a VuDev context
+ * @socket: the socket connected to vhost-user master
+ * @panic: a panic callback
+ * @set_watch: a set_watch callback
+ * @remove_watch: a remove_watch callback
+ * @iface: a VuDevIface structure with vhost-user device callbacks
+ *
+ * Intializes a VuDev vhost-user context.
+ **/
+void vu_init(VuDev *dev,
+             int socket,
+             vu_panic_cb panic,
+             vu_set_watch_cb set_watch,
+             vu_remove_watch_cb remove_watch,
+             const VuDevIface *iface);
+
+
+/**
+ * vu_deinit:
+ * @dev: a VuDev context
+ *
+ * Cleans up the VuDev context
+ */
+void vu_deinit(VuDev *dev);
+
+/**
+ * vu_dispatch:
+ * @dev: a VuDev context
+ *
+ * Process one vhost-user message.
+ *
+ * Returns: TRUE on success, FALSE on failure.
+ */
+bool vu_dispatch(VuDev *dev);
+
+/**
+ * vu_gpa_to_va:
+ * @dev: a VuDev context
+ * @guest_addr: guest address
+ *
+ * Translate a guest address to a pointer. Returns NULL on failure.
+ */
+void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
+
+/**
+ * vu_get_queue:
+ * @dev: a VuDev context
+ * @qidx: queue index
+ *
+ * Returns the queue number @qidx.
+ */
+VuVirtq *vu_get_queue(VuDev *dev, int qidx);
+
+/**
+ * vu_set_queue_handler:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @handler: the queue handler callback
+ *
+ * Set the queue handler. This function may be called several times
+ * for the same queue. If called with NULL @handler, the handler is
+ * removed.
+ */
+void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
+                          vu_queue_handler_cb handler);
+
+
+/**
+ * vu_queue_set_notification:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @enable: state
+ *
+ * Set whether the queue notifies (via event index or interrupt)
+ */
+void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is enabled.
+ */
+bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_enabled:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Returns: whether the queue is empty.
+ */
+int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_notify:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ *
+ * Request to notify the queue via callfd (skipped if unnecessary)
+ */
+void vu_queue_notify(VuDev *dev, VuVirtq *vq);
+
+/**
+ * vu_queue_pop:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @sz: the size of struct to return (must be >= VuVirtqElement)
+ *
+ * Returns: a VuVirtqElement filled from the queue or NULL.
+ */
+void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
+
+/**
+ * vu_queue_rewind:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue.  The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_fill:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ * @idx: optional offset for the used ring index (0 in general)
+ *
+ * Fill the used ring with @elem element.
+ */
+void vu_queue_fill(VuDev *dev, VuVirtq *vq,
+                   const VuVirtqElement *elem,
+                   unsigned int len, unsigned int idx);
+
+/**
+ * vu_queue_push:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: a VuVirtqElement
+ * @len: length in bytes to write
+ *
+ * Helper that combines vu_queue_fill() with a vu_queue_flush().
+ */
+void vu_queue_push(VuDev *dev, VuVirtq *vq,
+                   const VuVirtqElement *elem, unsigned int len);
+
+/**
+ * vu_queue_flush:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @num: number of elements to flush
+ *
+ * Mark the last number of elements as done (used.idx is updated by
+ * num elements).
+*/
+void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
+
+/**
+ * vu_queue_get_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: in bytes
+ * @out_bytes: out bytes
+ * @max_in_bytes: stop counting after max_in_bytes
+ * @max_out_bytes: stop counting after max_out_bytes
+ *
+ * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
+ */
+void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
+                              unsigned int *out_bytes,
+                              unsigned max_in_bytes, unsigned max_out_bytes);
+
+/**
+ * vu_queue_avail_bytes:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @in_bytes: expected in bytes
+ * @out_bytes: expected out bytes
+ *
+ * Returns: true if in_bytes <= in_total && out_bytes <= out_total
+ */
+bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
+                          unsigned int out_bytes);
+
+#endif /* LIBVHOST_USER_H */
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index bdcf6bcce7..d31b4577f0 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1258,7 +1258,7 @@ static void pxa2xx_i2c_update(PXA2xxI2CState *s)
 }
 
 /* These are only stubs now.  */
-static void pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
+static int pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
 {
     PXA2xxI2CSlaveState *slave = PXA2XX_I2C_SLAVE(i2c);
     PXA2xxI2CState *s = slave->host;
@@ -1280,6 +1280,8 @@ static void pxa2xx_i2c_event(I2CSlave *i2c, enum i2c_event event)
         break;
     }
     pxa2xx_i2c_update(s);
+
+    return 0;
 }
 
 static int pxa2xx_i2c_rx(I2CSlave *i2c)
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 39d9dbbae6..c3db996930 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -172,7 +172,7 @@ static int tosa_dac_send(I2CSlave *i2c, uint8_t data)
     return 0;
 }
 
-static void tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
+static int tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
 {
     TosaDACState *s = TOSA_DAC(i2c);
 
@@ -194,6 +194,8 @@ static void tosa_dac_event(I2CSlave *i2c, enum i2c_event event)
     default:
         break;
     }
+
+    return 0;
 }
 
 static int tosa_dac_recv(I2CSlave *s)
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 7102686882..085a611173 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -29,7 +29,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "hw/arm/virt-acpi-build.h"
 #include "qemu/bitmap.h"
 #include "trace.h"
 #include "qom/cpu.h"
@@ -43,6 +42,7 @@
 #include "hw/acpi/aml-build.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
+#include "hw/arm/virt.h"
 #include "sysemu/numa.h"
 #include "kvm_arm.h"
 
@@ -384,7 +384,7 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
 }
 
 static void
-build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_iort(GArray *table_data, BIOSLinker *linker)
 {
     int iort_start = table_data->len;
     AcpiIortIdMapping *idmap;
@@ -439,11 +439,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
 }
 
 static void
-build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
     AcpiSerialPortConsoleRedirection *spcr;
-    const MemMapEntry *uart_memmap = &guest_info->memmap[VIRT_UART];
-    int irq = guest_info->irqmap[VIRT_UART] + ARM_SPI_BASE;
+    const MemMapEntry *uart_memmap = &vms->memmap[VIRT_UART];
+    int irq = vms->irqmap[VIRT_UART] + ARM_SPI_BASE;
 
     spcr = acpi_data_push(table_data, sizeof(*spcr));
 
@@ -472,16 +472,16 @@ build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
 }
 
 static void
-build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
     AcpiSystemResourceAffinityTable *srat;
     AcpiSratProcessorGiccAffinity *core;
     AcpiSratMemoryAffinity *numamem;
     int i, j, srat_start;
     uint64_t mem_base;
-    uint32_t *cpu_node = g_malloc0(guest_info->smp_cpus * sizeof(uint32_t));
+    uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t));
 
-    for (i = 0; i < guest_info->smp_cpus; i++) {
+    for (i = 0; i < vms->smp_cpus; i++) {
         j = numa_get_node_for_cpu(i);
         if (j < nb_numa_nodes) {
                 cpu_node[i] = j;
@@ -492,7 +492,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
     srat = acpi_data_push(table_data, sizeof(*srat));
     srat->reserved1 = cpu_to_le32(1);
 
-    for (i = 0; i < guest_info->smp_cpus; ++i) {
+    for (i = 0; i < vms->smp_cpus; ++i) {
         core = acpi_data_push(table_data, sizeof(*core));
         core->type = ACPI_SRAT_PROCESSOR_GICC;
         core->length = sizeof(*core);
@@ -502,7 +502,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
     }
     g_free(cpu_node);
 
-    mem_base = guest_info->memmap[VIRT_MEM].base;
+    mem_base = vms->memmap[VIRT_MEM].base;
     for (i = 0; i < nb_numa_nodes; ++i) {
         numamem = acpi_data_push(table_data, sizeof(*numamem));
         build_srat_memory(numamem, mem_base, numa_info[i].node_mem, i,
@@ -515,10 +515,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
 }
 
 static void
-build_mcfg(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_mcfg(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
     AcpiTableMcfg *mcfg;
-    const MemMapEntry *memmap = guest_info->memmap;
+    const MemMapEntry *memmap = vms->memmap;
     int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
 
     mcfg = acpi_data_push(table_data, len);
@@ -535,24 +535,33 @@ build_mcfg(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
 
 /* GTDT */
 static void
-build_gtdt(GArray *table_data, BIOSLinker *linker)
+build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     int gtdt_start = table_data->len;
     AcpiGenericTimerTable *gtdt;
+    uint32_t irqflags;
+
+    if (vmc->claim_edge_triggered_timers) {
+        irqflags = ACPI_GTDT_INTERRUPT_MODE_EDGE;
+    } else {
+        irqflags = ACPI_GTDT_INTERRUPT_MODE_LEVEL;
+    }
 
     gtdt = acpi_data_push(table_data, sizeof *gtdt);
     /* The interrupt values are the same with the device tree when adding 16 */
-    gtdt->secure_el1_interrupt = ARCH_TIMER_S_EL1_IRQ + 16;
-    gtdt->secure_el1_flags = ACPI_EDGE_SENSITIVE;
+    gtdt->secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_S_EL1_IRQ + 16);
+    gtdt->secure_el1_flags = cpu_to_le32(irqflags);
 
-    gtdt->non_secure_el1_interrupt = ARCH_TIMER_NS_EL1_IRQ + 16;
-    gtdt->non_secure_el1_flags = ACPI_EDGE_SENSITIVE | ACPI_GTDT_ALWAYS_ON;
+    gtdt->non_secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL1_IRQ + 16);
+    gtdt->non_secure_el1_flags = cpu_to_le32(irqflags |
+                                             ACPI_GTDT_CAP_ALWAYS_ON);
 
-    gtdt->virtual_timer_interrupt = ARCH_TIMER_VIRT_IRQ + 16;
-    gtdt->virtual_timer_flags = ACPI_EDGE_SENSITIVE;
+    gtdt->virtual_timer_interrupt = cpu_to_le32(ARCH_TIMER_VIRT_IRQ + 16);
+    gtdt->virtual_timer_flags = cpu_to_le32(irqflags);
 
-    gtdt->non_secure_el2_interrupt = ARCH_TIMER_NS_EL2_IRQ + 16;
-    gtdt->non_secure_el2_flags = ACPI_EDGE_SENSITIVE;
+    gtdt->non_secure_el2_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL2_IRQ + 16);
+    gtdt->non_secure_el2_flags = cpu_to_le32(irqflags);
 
     build_header(linker, table_data,
                  (void *)(table_data->data + gtdt_start), "GTDT",
@@ -561,11 +570,12 @@ build_gtdt(GArray *table_data, BIOSLinker *linker)
 
 /* MADT */
 static void
-build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     int madt_start = table_data->len;
-    const MemMapEntry *memmap = guest_info->memmap;
-    const int *irqmap = guest_info->irqmap;
+    const MemMapEntry *memmap = vms->memmap;
+    const int *irqmap = vms->irqmap;
     AcpiMultipleApicTable *madt;
     AcpiMadtGenericDistributor *gicd;
     AcpiMadtGenericMsiFrame *gic_msi;
@@ -576,30 +586,30 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
     gicd = acpi_data_push(table_data, sizeof *gicd);
     gicd->type = ACPI_APIC_GENERIC_DISTRIBUTOR;
     gicd->length = sizeof(*gicd);
-    gicd->base_address = memmap[VIRT_GIC_DIST].base;
-    gicd->version = guest_info->gic_version;
+    gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base);
+    gicd->version = vms->gic_version;
 
-    for (i = 0; i < guest_info->smp_cpus; i++) {
-        AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data,
-                                                     sizeof *gicc);
+    for (i = 0; i < vms->smp_cpus; i++) {
+        AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data,
+                                                           sizeof(*gicc));
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
 
-        gicc->type = ACPI_APIC_GENERIC_INTERRUPT;
+        gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE;
         gicc->length = sizeof(*gicc);
-        if (guest_info->gic_version == 2) {
-            gicc->base_address = memmap[VIRT_GIC_CPU].base;
+        if (vms->gic_version == 2) {
+            gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base);
         }
-        gicc->cpu_interface_number = i;
-        gicc->arm_mpidr = armcpu->mp_affinity;
-        gicc->uid = i;
-        gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
+        gicc->cpu_interface_number = cpu_to_le32(i);
+        gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity);
+        gicc->uid = cpu_to_le32(i);
+        gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED);
 
         if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
             gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
         }
     }
 
-    if (guest_info->gic_version == 3) {
+    if (vms->gic_version == 3) {
         AcpiMadtGenericTranslator *gic_its;
         AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
                                                          sizeof *gicr);
@@ -609,7 +619,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
         gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
         gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
 
-        if (its_class_name() && !guest_info->no_its) {
+        if (its_class_name() && !vmc->no_its) {
             gic_its = acpi_data_push(table_data, sizeof *gic_its);
             gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR;
             gic_its->length = sizeof(*gic_its);
@@ -641,8 +651,8 @@ build_fadt(GArray *table_data, BIOSLinker *linker, unsigned dsdt_tbl_offset)
 
     /* Hardware Reduced = 1 and use PSCI 0.2+ and with HVC */
     fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI);
-    fadt->arm_boot_flags = cpu_to_le16((1 << ACPI_FADT_ARM_USE_PSCI_G_0_2) |
-                                       (1 << ACPI_FADT_ARM_PSCI_USE_HVC));
+    fadt->arm_boot_flags = cpu_to_le16(ACPI_FADT_ARM_PSCI_COMPLIANT |
+                                       ACPI_FADT_ARM_PSCI_USE_HVC);
 
     /* ACPI v5.1 (fadt->revision.fadt->minor_revision) */
     fadt->minor_revision = 0x1;
@@ -658,11 +668,11 @@ build_fadt(GArray *table_data, BIOSLinker *linker, unsigned dsdt_tbl_offset)
 
 /* DSDT */
 static void
-build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
     Aml *scope, *dsdt;
-    const MemMapEntry *memmap = guest_info->memmap;
-    const int *irqmap = guest_info->irqmap;
+    const MemMapEntry *memmap = vms->memmap;
+    const int *irqmap = vms->irqmap;
 
     dsdt = init_aml_allocator();
     /* Reserve space for header */
@@ -674,7 +684,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
      * the RTC ACPI device at all when using UEFI.
      */
     scope = aml_scope("\\_SB");
-    acpi_dsdt_add_cpus(scope, guest_info->smp_cpus);
+    acpi_dsdt_add_cpus(scope, vms->smp_cpus);
     acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
                        (irqmap[VIRT_UART] + ARM_SPI_BASE));
     acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]);
@@ -682,7 +692,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
     acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
                     (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS);
     acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),
-                      guest_info->use_highmem);
+                      vms->highmem);
     acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
                        (irqmap[VIRT_GPIO] + ARM_SPI_BASE));
     acpi_dsdt_add_power_button(scope);
@@ -705,12 +715,12 @@ struct AcpiBuildState {
     MemoryRegion *linker_mr;
     /* Is table patched? */
     bool patched;
-    VirtGuestInfo *guest_info;
 } AcpiBuildState;
 
 static
-void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
+void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
 {
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     GArray *table_offsets;
     unsigned dsdt, rsdt;
     GArray *tables_blob = tables->table_data;
@@ -724,32 +734,32 @@ void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
 
     /* DSDT is pointed to by FADT */
     dsdt = tables_blob->len;
-    build_dsdt(tables_blob, tables->linker, guest_info);
+    build_dsdt(tables_blob, tables->linker, vms);
 
     /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
     acpi_add_table(table_offsets, tables_blob);
     build_fadt(tables_blob, tables->linker, dsdt);
 
     acpi_add_table(table_offsets, tables_blob);
-    build_madt(tables_blob, tables->linker, guest_info);
+    build_madt(tables_blob, tables->linker, vms);
 
     acpi_add_table(table_offsets, tables_blob);
-    build_gtdt(tables_blob, tables->linker);
+    build_gtdt(tables_blob, tables->linker, vms);
 
     acpi_add_table(table_offsets, tables_blob);
-    build_mcfg(tables_blob, tables->linker, guest_info);
+    build_mcfg(tables_blob, tables->linker, vms);
 
     acpi_add_table(table_offsets, tables_blob);
-    build_spcr(tables_blob, tables->linker, guest_info);
+    build_spcr(tables_blob, tables->linker, vms);
 
     if (nb_numa_nodes > 0) {
         acpi_add_table(table_offsets, tables_blob);
-        build_srat(tables_blob, tables->linker, guest_info);
+        build_srat(tables_blob, tables->linker, vms);
     }
 
-    if (its_class_name() && !guest_info->no_its) {
+    if (its_class_name() && !vmc->no_its) {
         acpi_add_table(table_offsets, tables_blob);
-        build_iort(tables_blob, tables->linker, guest_info);
+        build_iort(tables_blob, tables->linker);
     }
 
     /* RSDT is pointed to by RSDP */
@@ -788,13 +798,12 @@ static void virt_acpi_build_update(void *build_opaque)
 
     acpi_build_tables_init(&tables);
 
-    virt_acpi_build(build_state->guest_info, &tables);
+    virt_acpi_build(VIRT_MACHINE(qdev_get_machine()), &tables);
 
     acpi_ram_update(build_state->table_mr, tables.table_data);
     acpi_ram_update(build_state->rsdp_mr, tables.rsdp);
     acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob);
 
-
     acpi_build_tables_cleanup(&tables, true);
 }
 
@@ -822,12 +831,12 @@ static const VMStateDescription vmstate_virt_acpi_build = {
     },
 };
 
-void virt_acpi_setup(VirtGuestInfo *guest_info)
+void virt_acpi_setup(VirtMachineState *vms)
 {
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
 
-    if (!guest_info->fw_cfg) {
+    if (!vms->fw_cfg) {
         trace_virt_acpi_setup();
         return;
     }
@@ -838,10 +847,9 @@ void virt_acpi_setup(VirtGuestInfo *guest_info)
     }
 
     build_state = g_malloc0(sizeof *build_state);
-    build_state->guest_info = guest_info;
 
     acpi_build_tables_init(&tables);
-    virt_acpi_build(build_state->guest_info, &tables);
+    virt_acpi_build(vms, &tables);
 
     /* Now expose it all to Guest */
     build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data,
@@ -853,8 +861,8 @@ void virt_acpi_setup(VirtGuestInfo *guest_info)
         acpi_add_rom_blob(build_state, tables.linker->cmd_blob,
                           "etc/table-loader", 0);
 
-    fw_cfg_add_file(guest_info->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
-                    tables.tcpalog->data, acpi_data_len(tables.tcpalog));
+    fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
+                    acpi_data_len(tables.tcpalog));
 
     build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp,
                                               ACPI_BUILD_RSDP_FILE, 0);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 11c53a56e0..7a03f84051 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -41,14 +41,12 @@
 #include "sysemu/numa.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
-#include "hw/boards.h"
 #include "hw/compat.h"
 #include "hw/loader.h"
 #include "exec/address-spaces.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
 #include "hw/pci-host/gpex.h"
-#include "hw/arm/virt-acpi-build.h"
 #include "hw/arm/sysbus-fdt.h"
 #include "hw/platform-bus.h"
 #include "hw/arm/fdt.h"
@@ -59,51 +57,6 @@
 #include "qapi/visitor.h"
 #include "standard-headers/linux/input.h"
 
-/* Number of external interrupt lines to configure the GIC with */
-#define NUM_IRQS 256
-
-#define PLATFORM_BUS_NUM_IRQS 64
-
-static ARMPlatformBusSystemParams platform_bus_params;
-
-typedef struct VirtBoardInfo {
-    struct arm_boot_info bootinfo;
-    const char *cpu_model;
-    const MemMapEntry *memmap;
-    const int *irqmap;
-    int smp_cpus;
-    void *fdt;
-    int fdt_size;
-    uint32_t clock_phandle;
-    uint32_t gic_phandle;
-    uint32_t msi_phandle;
-    bool using_psci;
-} VirtBoardInfo;
-
-typedef struct {
-    MachineClass parent;
-    VirtBoardInfo *daughterboard;
-    bool disallow_affinity_adjustment;
-    bool no_its;
-    bool no_pmu;
-} VirtMachineClass;
-
-typedef struct {
-    MachineState parent;
-    bool secure;
-    bool highmem;
-    int32_t gic_version;
-} VirtMachineState;
-
-#define TYPE_VIRT_MACHINE   MACHINE_TYPE_NAME("virt")
-#define VIRT_MACHINE(obj) \
-    OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE)
-#define VIRT_MACHINE_GET_CLASS(obj) \
-    OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_VIRT_MACHINE)
-#define VIRT_MACHINE_CLASS(klass) \
-    OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
-
-
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
     static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
                                                     void *data) \
@@ -133,6 +86,13 @@ typedef struct {
     DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
 
 
+/* Number of external interrupt lines to configure the GIC with */
+#define NUM_IRQS 256
+
+#define PLATFORM_BUS_NUM_IRQS 64
+
+static ARMPlatformBusSystemParams platform_bus_params;
+
 /* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means
  * RAM can go up to the 256GB mark, leaving 256GB of the physical
  * address space unallocated and free for future use between 256G and 512G.
@@ -202,51 +162,36 @@ static const int a15irqmap[] = {
     [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
 };
 
-static VirtBoardInfo machines[] = {
-    {
-        .cpu_model = "cortex-a15",
-        .memmap = a15memmap,
-        .irqmap = a15irqmap,
-    },
-    {
-        .cpu_model = "cortex-a53",
-        .memmap = a15memmap,
-        .irqmap = a15irqmap,
-    },
-    {
-        .cpu_model = "cortex-a57",
-        .memmap = a15memmap,
-        .irqmap = a15irqmap,
-    },
-    {
-        .cpu_model = "host",
-        .memmap = a15memmap,
-        .irqmap = a15irqmap,
-    },
+static const char *valid_cpus[] = {
+    "cortex-a15",
+    "cortex-a53",
+    "cortex-a57",
+    "host",
+    NULL
 };
 
-static VirtBoardInfo *find_machine_info(const char *cpu)
+static bool cpuname_valid(const char *cpu)
 {
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(machines); i++) {
-        if (strcmp(cpu, machines[i].cpu_model) == 0) {
-            return &machines[i];
+    for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
+        if (strcmp(cpu, valid_cpus[i]) == 0) {
+            return true;
         }
     }
-    return NULL;
+    return false;
 }
 
-static void create_fdt(VirtBoardInfo *vbi)
+static void create_fdt(VirtMachineState *vms)
 {
-    void *fdt = create_device_tree(&vbi->fdt_size);
+    void *fdt = create_device_tree(&vms->fdt_size);
 
     if (!fdt) {
         error_report("create_device_tree() failed");
         exit(1);
     }
 
-    vbi->fdt = fdt;
+    vms->fdt = fdt;
 
     /* Header */
     qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,dummy-virt");
@@ -266,27 +211,27 @@ static void create_fdt(VirtBoardInfo *vbi)
      * optional but in practice if you omit them the kernel refuses to
      * probe for the device.
      */
-    vbi->clock_phandle = qemu_fdt_alloc_phandle(fdt);
+    vms->clock_phandle = qemu_fdt_alloc_phandle(fdt);
     qemu_fdt_add_subnode(fdt, "/apb-pclk");
     qemu_fdt_setprop_string(fdt, "/apb-pclk", "compatible", "fixed-clock");
     qemu_fdt_setprop_cell(fdt, "/apb-pclk", "#clock-cells", 0x0);
     qemu_fdt_setprop_cell(fdt, "/apb-pclk", "clock-frequency", 24000000);
     qemu_fdt_setprop_string(fdt, "/apb-pclk", "clock-output-names",
                                 "clk24mhz");
-    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vbi->clock_phandle);
+    qemu_fdt_setprop_cell(fdt, "/apb-pclk", "phandle", vms->clock_phandle);
 
 }
 
-static void fdt_add_psci_node(const VirtBoardInfo *vbi)
+static void fdt_add_psci_node(const VirtMachineState *vms)
 {
     uint32_t cpu_suspend_fn;
     uint32_t cpu_off_fn;
     uint32_t cpu_on_fn;
     uint32_t migrate_fn;
-    void *fdt = vbi->fdt;
+    void *fdt = vms->fdt;
     ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
 
-    if (!vbi->using_psci) {
+    if (!vms->using_psci) {
         return;
     }
 
@@ -327,41 +272,60 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
     qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
 }
 
-static void fdt_add_timer_nodes(const VirtBoardInfo *vbi, int gictype)
+static void fdt_add_timer_nodes(const VirtMachineState *vms)
 {
-    /* Note that on A15 h/w these interrupts are level-triggered,
-     * but for the GIC implementation provided by both QEMU and KVM
-     * they are edge-triggered.
+    /* On real hardware these interrupts are level-triggered.
+     * On KVM they were edge-triggered before host kernel version 4.4,
+     * and level-triggered afterwards.
+     * On emulated QEMU they are level-triggered.
+     *
+     * Getting the DTB info about them wrong is awkward for some
+     * guest kernels:
+     *  pre-4.8 ignore the DT and leave the interrupt configured
+     *   with whatever the GIC reset value (or the bootloader) left it at
+     *  4.8 before rc6 honour the incorrect data by programming it back
+     *   into the GIC, causing problems
+     *  4.8rc6 and later ignore the DT and always write "level triggered"
+     *   into the GIC
+     *
+     * For backwards-compatibility, virt-2.8 and earlier will continue
+     * to say these are edge-triggered, but later machines will report
+     * the correct information.
      */
     ARMCPU *armcpu;
-    uint32_t irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+    uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
+
+    if (vmc->claim_edge_triggered_timers) {
+        irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
+    }
 
-    if (gictype == 2) {
+    if (vms->gic_version == 2) {
         irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                              GIC_FDT_IRQ_PPI_CPU_WIDTH,
-                             (1 << vbi->smp_cpus) - 1);
+                             (1 << vms->smp_cpus) - 1);
     }
 
-    qemu_fdt_add_subnode(vbi->fdt, "/timer");
+    qemu_fdt_add_subnode(vms->fdt, "/timer");
 
     armcpu = ARM_CPU(qemu_get_cpu(0));
     if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
         const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
-        qemu_fdt_setprop(vbi->fdt, "/timer", "compatible",
+        qemu_fdt_setprop(vms->fdt, "/timer", "compatible",
                          compat, sizeof(compat));
     } else {
-        qemu_fdt_setprop_string(vbi->fdt, "/timer", "compatible",
+        qemu_fdt_setprop_string(vms->fdt, "/timer", "compatible",
                                 "arm,armv7-timer");
     }
-    qemu_fdt_setprop(vbi->fdt, "/timer", "always-on", NULL, 0);
-    qemu_fdt_setprop_cells(vbi->fdt, "/timer", "interrupts",
+    qemu_fdt_setprop(vms->fdt, "/timer", "always-on", NULL, 0);
+    qemu_fdt_setprop_cells(vms->fdt, "/timer", "interrupts",
                        GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags,
                        GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags,
                        GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags,
                        GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags);
 }
 
-static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
+static void fdt_add_cpu_nodes(const VirtMachineState *vms)
 {
     int cpu;
     int addr_cells = 1;
@@ -380,7 +344,7 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
      *  The simplest way to go is to examine affinity IDs of all our CPUs. If
      *  at least one of them has Aff3 populated, we set #address-cells to 2.
      */
-    for (cpu = 0; cpu < vbi->smp_cpus; cpu++) {
+    for (cpu = 0; cpu < vms->smp_cpus; cpu++) {
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
 
         if (armcpu->mp_affinity & ARM_AFF3_MASK) {
@@ -389,101 +353,101 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
         }
     }
 
-    qemu_fdt_add_subnode(vbi->fdt, "/cpus");
-    qemu_fdt_setprop_cell(vbi->fdt, "/cpus", "#address-cells", addr_cells);
-    qemu_fdt_setprop_cell(vbi->fdt, "/cpus", "#size-cells", 0x0);
+    qemu_fdt_add_subnode(vms->fdt, "/cpus");
+    qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells);
+    qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0);
 
-    for (cpu = vbi->smp_cpus - 1; cpu >= 0; cpu--) {
+    for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) {
         char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
 
-        qemu_fdt_add_subnode(vbi->fdt, nodename);
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "cpu");
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible",
+        qemu_fdt_add_subnode(vms->fdt, nodename);
+        qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu");
+        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible",
                                     armcpu->dtb_compatible);
 
-        if (vbi->using_psci && vbi->smp_cpus > 1) {
-            qemu_fdt_setprop_string(vbi->fdt, nodename,
+        if (vms->using_psci && vms->smp_cpus > 1) {
+            qemu_fdt_setprop_string(vms->fdt, nodename,
                                         "enable-method", "psci");
         }
 
         if (addr_cells == 2) {
-            qemu_fdt_setprop_u64(vbi->fdt, nodename, "reg",
+            qemu_fdt_setprop_u64(vms->fdt, nodename, "reg",
                                  armcpu->mp_affinity);
         } else {
-            qemu_fdt_setprop_cell(vbi->fdt, nodename, "reg",
+            qemu_fdt_setprop_cell(vms->fdt, nodename, "reg",
                                   armcpu->mp_affinity);
         }
 
         i = numa_get_node_for_cpu(cpu);
         if (i < nb_numa_nodes) {
-            qemu_fdt_setprop_cell(vbi->fdt, nodename, "numa-node-id", i);
+            qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i);
         }
 
         g_free(nodename);
     }
 }
 
-static void fdt_add_its_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_its_gic_node(VirtMachineState *vms)
 {
-    vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
-    qemu_fdt_add_subnode(vbi->fdt, "/intc/its");
-    qemu_fdt_setprop_string(vbi->fdt, "/intc/its", "compatible",
+    vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+    qemu_fdt_add_subnode(vms->fdt, "/intc/its");
+    qemu_fdt_setprop_string(vms->fdt, "/intc/its", "compatible",
                             "arm,gic-v3-its");
-    qemu_fdt_setprop(vbi->fdt, "/intc/its", "msi-controller", NULL, 0);
-    qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/its", "reg",
-                                 2, vbi->memmap[VIRT_GIC_ITS].base,
-                                 2, vbi->memmap[VIRT_GIC_ITS].size);
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc/its", "phandle", vbi->msi_phandle);
+    qemu_fdt_setprop(vms->fdt, "/intc/its", "msi-controller", NULL, 0);
+    qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/its", "reg",
+                                 2, vms->memmap[VIRT_GIC_ITS].base,
+                                 2, vms->memmap[VIRT_GIC_ITS].size);
+    qemu_fdt_setprop_cell(vms->fdt, "/intc/its", "phandle", vms->msi_phandle);
 }
 
-static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtMachineState *vms)
 {
-    vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
-    qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
-    qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+    vms->msi_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+    qemu_fdt_add_subnode(vms->fdt, "/intc/v2m");
+    qemu_fdt_setprop_string(vms->fdt, "/intc/v2m", "compatible",
                             "arm,gic-v2m-frame");
-    qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
-    qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
-                                 2, vbi->memmap[VIRT_GIC_V2M].base,
-                                 2, vbi->memmap[VIRT_GIC_V2M].size);
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->msi_phandle);
+    qemu_fdt_setprop(vms->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+    qemu_fdt_setprop_sized_cells(vms->fdt, "/intc/v2m", "reg",
+                                 2, vms->memmap[VIRT_GIC_V2M].base,
+                                 2, vms->memmap[VIRT_GIC_V2M].size);
+    qemu_fdt_setprop_cell(vms->fdt, "/intc/v2m", "phandle", vms->msi_phandle);
 }
 
-static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
+static void fdt_add_gic_node(VirtMachineState *vms)
 {
-    vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
-    qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
-
-    qemu_fdt_add_subnode(vbi->fdt, "/intc");
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#interrupt-cells", 3);
-    qemu_fdt_setprop(vbi->fdt, "/intc", "interrupt-controller", NULL, 0);
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
-    qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
-    if (type == 3) {
-        qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+    vms->gic_phandle = qemu_fdt_alloc_phandle(vms->fdt);
+    qemu_fdt_setprop_cell(vms->fdt, "/", "interrupt-parent", vms->gic_phandle);
+
+    qemu_fdt_add_subnode(vms->fdt, "/intc");
+    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#interrupt-cells", 3);
+    qemu_fdt_setprop(vms->fdt, "/intc", "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#address-cells", 0x2);
+    qemu_fdt_setprop_cell(vms->fdt, "/intc", "#size-cells", 0x2);
+    qemu_fdt_setprop(vms->fdt, "/intc", "ranges", NULL, 0);
+    if (vms->gic_version == 3) {
+        qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
                                 "arm,gic-v3");
-        qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
-                                     2, vbi->memmap[VIRT_GIC_DIST].base,
-                                     2, vbi->memmap[VIRT_GIC_DIST].size,
-                                     2, vbi->memmap[VIRT_GIC_REDIST].base,
-                                     2, vbi->memmap[VIRT_GIC_REDIST].size);
+        qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+                                     2, vms->memmap[VIRT_GIC_DIST].base,
+                                     2, vms->memmap[VIRT_GIC_DIST].size,
+                                     2, vms->memmap[VIRT_GIC_REDIST].base,
+                                     2, vms->memmap[VIRT_GIC_REDIST].size);
     } else {
         /* 'cortex-a15-gic' means 'GIC v2' */
-        qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+        qemu_fdt_setprop_string(vms->fdt, "/intc", "compatible",
                                 "arm,cortex-a15-gic");
-        qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
-                                      2, vbi->memmap[VIRT_GIC_DIST].base,
-                                      2, vbi->memmap[VIRT_GIC_DIST].size,
-                                      2, vbi->memmap[VIRT_GIC_CPU].base,
-                                      2, vbi->memmap[VIRT_GIC_CPU].size);
+        qemu_fdt_setprop_sized_cells(vms->fdt, "/intc", "reg",
+                                      2, vms->memmap[VIRT_GIC_DIST].base,
+                                      2, vms->memmap[VIRT_GIC_DIST].size,
+                                      2, vms->memmap[VIRT_GIC_CPU].base,
+                                      2, vms->memmap[VIRT_GIC_CPU].size);
     }
 
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
+    qemu_fdt_setprop_cell(vms->fdt, "/intc", "phandle", vms->gic_phandle);
 }
 
-static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
+static void fdt_add_pmu_nodes(const VirtMachineState *vms)
 {
     CPUState *cpu;
     ARMCPU *armcpu;
@@ -497,24 +461,24 @@ static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
         }
     }
 
-    if (gictype == 2) {
+    if (vms->gic_version == 2) {
         irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
                              GIC_FDT_IRQ_PPI_CPU_WIDTH,
-                             (1 << vbi->smp_cpus) - 1);
+                             (1 << vms->smp_cpus) - 1);
     }
 
     armcpu = ARM_CPU(qemu_get_cpu(0));
-    qemu_fdt_add_subnode(vbi->fdt, "/pmu");
+    qemu_fdt_add_subnode(vms->fdt, "/pmu");
     if (arm_feature(&armcpu->env, ARM_FEATURE_V8)) {
         const char compat[] = "arm,armv8-pmuv3";
-        qemu_fdt_setprop(vbi->fdt, "/pmu", "compatible",
+        qemu_fdt_setprop(vms->fdt, "/pmu", "compatible",
                          compat, sizeof(compat));
-        qemu_fdt_setprop_cells(vbi->fdt, "/pmu", "interrupts",
+        qemu_fdt_setprop_cells(vms->fdt, "/pmu", "interrupts",
                                GIC_FDT_IRQ_TYPE_PPI, VIRTUAL_PMU_IRQ, irqflags);
     }
 }
 
-static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
+static void create_its(VirtMachineState *vms, DeviceState *gicdev)
 {
     const char *itsclass = its_class_name();
     DeviceState *dev;
@@ -529,19 +493,19 @@ static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
     object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3",
                              &error_abort);
     qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_ITS].base);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base);
 
-    fdt_add_its_gic_node(vbi);
+    fdt_add_its_gic_node(vms);
 }
 
-static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
 {
     int i;
-    int irq = vbi->irqmap[VIRT_GIC_V2M];
+    int irq = vms->irqmap[VIRT_GIC_V2M];
     DeviceState *dev;
 
     dev = qdev_create(NULL, "arm-gicv2m");
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base);
     qdev_prop_set_uint32(dev, "base-spi", irq);
     qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
     qdev_init_nofail(dev);
@@ -550,17 +514,17 @@ static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
         sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
     }
 
-    fdt_add_v2m_gic_node(vbi);
+    fdt_add_v2m_gic_node(vms);
 }
 
-static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
-                       bool secure, bool no_its)
+static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 {
     /* We create a standalone GIC */
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     DeviceState *gicdev;
     SysBusDevice *gicbusdev;
     const char *gictype;
-    int i;
+    int type = vms->gic_version, i;
 
     gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
 
@@ -572,15 +536,15 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
      */
     qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32);
     if (!kvm_irqchip_in_kernel()) {
-        qdev_prop_set_bit(gicdev, "has-security-extensions", secure);
+        qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure);
     }
     qdev_init_nofail(gicdev);
     gicbusdev = SYS_BUS_DEVICE(gicdev);
-    sysbus_mmio_map(gicbusdev, 0, vbi->memmap[VIRT_GIC_DIST].base);
+    sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base);
     if (type == 3) {
-        sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_REDIST].base);
+        sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base);
     } else {
-        sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
+        sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_CPU].base);
     }
 
     /* Wire the outputs from each CPU's generic timer to the
@@ -616,22 +580,22 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
         pic[i] = qdev_get_gpio_in(gicdev, i);
     }
 
-    fdt_add_gic_node(vbi, type);
+    fdt_add_gic_node(vms);
 
-    if (type == 3 && !no_its) {
-        create_its(vbi, gicdev);
+    if (type == 3 && !vmc->no_its) {
+        create_its(vms, gicdev);
     } else if (type == 2) {
-        create_v2m(vbi, pic);
+        create_v2m(vms, pic);
     }
 }
 
-static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
+static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart,
                         MemoryRegion *mem, CharDriverState *chr)
 {
     char *nodename;
-    hwaddr base = vbi->memmap[uart].base;
-    hwaddr size = vbi->memmap[uart].size;
-    int irq = vbi->irqmap[uart];
+    hwaddr base = vms->memmap[uart].base;
+    hwaddr size = vms->memmap[uart].size;
+    int irq = vms->irqmap[uart];
     const char compat[] = "arm,pl011\0arm,primecell";
     const char clocknames[] = "uartclk\0apb_pclk";
     DeviceState *dev = qdev_create(NULL, "pl011");
@@ -644,51 +608,51 @@ static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic, int uart,
     sysbus_connect_irq(s, 0, pic[irq]);
 
     nodename = g_strdup_printf("/pl011@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
+    qemu_fdt_add_subnode(vms->fdt, nodename);
     /* Note that we can't use setprop_string because of the embedded NUL */
-    qemu_fdt_setprop(vbi->fdt, nodename, "compatible",
+    qemu_fdt_setprop(vms->fdt, nodename, "compatible",
                          compat, sizeof(compat));
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                      2, base, 2, size);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
                                GIC_FDT_IRQ_TYPE_SPI, irq,
                                GIC_FDT_IRQ_FLAGS_LEVEL_HI);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "clocks",
-                               vbi->clock_phandle, vbi->clock_phandle);
-    qemu_fdt_setprop(vbi->fdt, nodename, "clock-names",
+    qemu_fdt_setprop_cells(vms->fdt, nodename, "clocks",
+                               vms->clock_phandle, vms->clock_phandle);
+    qemu_fdt_setprop(vms->fdt, nodename, "clock-names",
                          clocknames, sizeof(clocknames));
 
     if (uart == VIRT_UART) {
-        qemu_fdt_setprop_string(vbi->fdt, "/chosen", "stdout-path", nodename);
+        qemu_fdt_setprop_string(vms->fdt, "/chosen", "stdout-path", nodename);
     } else {
         /* Mark as not usable by the normal world */
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+        qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+        qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
     }
 
     g_free(nodename);
 }
 
-static void create_rtc(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_rtc(const VirtMachineState *vms, qemu_irq *pic)
 {
     char *nodename;
-    hwaddr base = vbi->memmap[VIRT_RTC].base;
-    hwaddr size = vbi->memmap[VIRT_RTC].size;
-    int irq = vbi->irqmap[VIRT_RTC];
+    hwaddr base = vms->memmap[VIRT_RTC].base;
+    hwaddr size = vms->memmap[VIRT_RTC].size;
+    int irq = vms->irqmap[VIRT_RTC];
     const char compat[] = "arm,pl031\0arm,primecell";
 
     sysbus_create_simple("pl031", base, pic[irq]);
 
     nodename = g_strdup_printf("/pl031@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
-    qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+    qemu_fdt_add_subnode(vms->fdt, nodename);
+    qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                  2, base, 2, size);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
                            GIC_FDT_IRQ_TYPE_SPI, irq,
                            GIC_FDT_IRQ_FLAGS_LEVEL_HI);
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
-    qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
+    qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
     g_free(nodename);
 }
 
@@ -703,45 +667,45 @@ static Notifier virt_system_powerdown_notifier = {
     .notify = virt_powerdown_req
 };
 
-static void create_gpio(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_gpio(const VirtMachineState *vms, qemu_irq *pic)
 {
     char *nodename;
     DeviceState *pl061_dev;
-    hwaddr base = vbi->memmap[VIRT_GPIO].base;
-    hwaddr size = vbi->memmap[VIRT_GPIO].size;
-    int irq = vbi->irqmap[VIRT_GPIO];
+    hwaddr base = vms->memmap[VIRT_GPIO].base;
+    hwaddr size = vms->memmap[VIRT_GPIO].size;
+    int irq = vms->irqmap[VIRT_GPIO];
     const char compat[] = "arm,pl061\0arm,primecell";
 
     pl061_dev = sysbus_create_simple("pl061", base, pic[irq]);
 
-    uint32_t phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+    uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt);
     nodename = g_strdup_printf("/pl061@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+    qemu_fdt_add_subnode(vms->fdt, nodename);
+    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                  2, base, 2, size);
-    qemu_fdt_setprop(vbi->fdt, nodename, "compatible", compat, sizeof(compat));
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#gpio-cells", 2);
-    qemu_fdt_setprop(vbi->fdt, nodename, "gpio-controller", NULL, 0);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+    qemu_fdt_setprop(vms->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "#gpio-cells", 2);
+    qemu_fdt_setprop(vms->fdt, nodename, "gpio-controller", NULL, 0);
+    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
                            GIC_FDT_IRQ_TYPE_SPI, irq,
                            GIC_FDT_IRQ_FLAGS_LEVEL_HI);
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
-    qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "phandle", phandle);
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "clocks", vms->clock_phandle);
+    qemu_fdt_setprop_string(vms->fdt, nodename, "clock-names", "apb_pclk");
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", phandle);
 
     gpio_key_dev = sysbus_create_simple("gpio-key", -1,
                                         qdev_get_gpio_in(pl061_dev, 3));
-    qemu_fdt_add_subnode(vbi->fdt, "/gpio-keys");
-    qemu_fdt_setprop_string(vbi->fdt, "/gpio-keys", "compatible", "gpio-keys");
-    qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys", "#size-cells", 0);
-    qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys", "#address-cells", 1);
+    qemu_fdt_add_subnode(vms->fdt, "/gpio-keys");
+    qemu_fdt_setprop_string(vms->fdt, "/gpio-keys", "compatible", "gpio-keys");
+    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#size-cells", 0);
+    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys", "#address-cells", 1);
 
-    qemu_fdt_add_subnode(vbi->fdt, "/gpio-keys/poweroff");
-    qemu_fdt_setprop_string(vbi->fdt, "/gpio-keys/poweroff",
+    qemu_fdt_add_subnode(vms->fdt, "/gpio-keys/poweroff");
+    qemu_fdt_setprop_string(vms->fdt, "/gpio-keys/poweroff",
                             "label", "GPIO Key Poweroff");
-    qemu_fdt_setprop_cell(vbi->fdt, "/gpio-keys/poweroff", "linux,code",
+    qemu_fdt_setprop_cell(vms->fdt, "/gpio-keys/poweroff", "linux,code",
                           KEY_POWER);
-    qemu_fdt_setprop_cells(vbi->fdt, "/gpio-keys/poweroff",
+    qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff",
                            "gpios", phandle, 3, 0);
 
     /* connect powerdown request */
@@ -750,10 +714,10 @@ static void create_gpio(const VirtBoardInfo *vbi, qemu_irq *pic)
     g_free(nodename);
 }
 
-static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic)
 {
     int i;
-    hwaddr size = vbi->memmap[VIRT_MMIO].size;
+    hwaddr size = vms->memmap[VIRT_MMIO].size;
 
     /* We create the transports in forwards order. Since qbus_realize()
      * prepends (not appends) new child buses, the incrementing loop below will
@@ -783,8 +747,8 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
      * of disks users must use UUIDs or similar mechanisms.
      */
     for (i = 0; i < NUM_VIRTIO_TRANSPORTS; i++) {
-        int irq = vbi->irqmap[VIRT_MMIO] + i;
-        hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
+        int irq = vms->irqmap[VIRT_MMIO] + i;
+        hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
 
         sysbus_create_simple("virtio-mmio", base, pic[irq]);
     }
@@ -798,16 +762,16 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
      */
     for (i = NUM_VIRTIO_TRANSPORTS - 1; i >= 0; i--) {
         char *nodename;
-        int irq = vbi->irqmap[VIRT_MMIO] + i;
-        hwaddr base = vbi->memmap[VIRT_MMIO].base + i * size;
+        int irq = vms->irqmap[VIRT_MMIO] + i;
+        hwaddr base = vms->memmap[VIRT_MMIO].base + i * size;
 
         nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
-        qemu_fdt_add_subnode(vbi->fdt, nodename);
-        qemu_fdt_setprop_string(vbi->fdt, nodename,
+        qemu_fdt_add_subnode(vms->fdt, nodename);
+        qemu_fdt_setprop_string(vms->fdt, nodename,
                                 "compatible", "virtio,mmio");
-        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                      2, base, 2, size);
-        qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
+        qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupts",
                                GIC_FDT_IRQ_TYPE_SPI, irq,
                                GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
         g_free(nodename);
@@ -870,7 +834,7 @@ static void create_one_flash(const char *name, hwaddr flashbase,
     }
 }
 
-static void create_flash(const VirtBoardInfo *vbi,
+static void create_flash(const VirtMachineState *vms,
                          MemoryRegion *sysmem,
                          MemoryRegion *secure_sysmem)
 {
@@ -882,8 +846,8 @@ static void create_flash(const VirtBoardInfo *vbi,
      * If sysmem == secure_sysmem this means there is no separate Secure
      * address space and both flash devices are generally visible.
      */
-    hwaddr flashsize = vbi->memmap[VIRT_FLASH].size / 2;
-    hwaddr flashbase = vbi->memmap[VIRT_FLASH].base;
+    hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2;
+    hwaddr flashbase = vms->memmap[VIRT_FLASH].base;
     char *nodename;
 
     create_one_flash("virt.flash0", flashbase, flashsize,
@@ -894,41 +858,41 @@ static void create_flash(const VirtBoardInfo *vbi,
     if (sysmem == secure_sysmem) {
         /* Report both flash devices as a single node in the DT */
         nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
-        qemu_fdt_add_subnode(vbi->fdt, nodename);
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
-        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+        qemu_fdt_add_subnode(vms->fdt, nodename);
+        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                      2, flashbase, 2, flashsize,
                                      2, flashbase + flashsize, 2, flashsize);
-        qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
         g_free(nodename);
     } else {
         /* Report the devices as separate nodes so we can mark one as
          * only visible to the secure world.
          */
         nodename = g_strdup_printf("/secflash@%" PRIx64, flashbase);
-        qemu_fdt_add_subnode(vbi->fdt, nodename);
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
-        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+        qemu_fdt_add_subnode(vms->fdt, nodename);
+        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                      2, flashbase, 2, flashsize);
-        qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
+        qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+        qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
         g_free(nodename);
 
         nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
-        qemu_fdt_add_subnode(vbi->fdt, nodename);
-        qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
-        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+        qemu_fdt_add_subnode(vms->fdt, nodename);
+        qemu_fdt_setprop_string(vms->fdt, nodename, "compatible", "cfi-flash");
+        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                      2, flashbase + flashsize, 2, flashsize);
-        qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+        qemu_fdt_setprop_cell(vms->fdt, nodename, "bank-width", 4);
         g_free(nodename);
     }
 }
 
-static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
+static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as)
 {
-    hwaddr base = vbi->memmap[VIRT_FW_CFG].base;
-    hwaddr size = vbi->memmap[VIRT_FW_CFG].size;
+    hwaddr base = vms->memmap[VIRT_FW_CFG].base;
+    hwaddr size = vms->memmap[VIRT_FW_CFG].size;
     FWCfgState *fw_cfg;
     char *nodename;
 
@@ -936,15 +900,17 @@ static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
     fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
 
     nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
-    qemu_fdt_setprop_string(vbi->fdt, nodename,
+    qemu_fdt_add_subnode(vms->fdt, nodename);
+    qemu_fdt_setprop_string(vms->fdt, nodename,
                             "compatible", "qemu,fw-cfg-mmio");
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                  2, base, 2, size);
     g_free(nodename);
+    return fw_cfg;
 }
 
-static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
+static void create_pcie_irq_map(const VirtMachineState *vms,
+                                uint32_t gic_phandle,
                                 int first_irq, const char *nodename)
 {
     int devfn, pin;
@@ -971,28 +937,27 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
         }
     }
 
-    qemu_fdt_setprop(vbi->fdt, nodename, "interrupt-map",
+    qemu_fdt_setprop(vms->fdt, nodename, "interrupt-map",
                      full_irq_map, sizeof(full_irq_map));
 
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupt-map-mask",
+    qemu_fdt_setprop_cells(vms->fdt, nodename, "interrupt-map-mask",
                            0x1800, 0, 0, /* devfn (PCI_SLOT(3)) */
                            0x7           /* PCI irq */);
 }
 
-static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
-                        bool use_highmem)
+static void create_pcie(const VirtMachineState *vms, qemu_irq *pic)
 {
-    hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;
-    hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;
-    hwaddr base_mmio_high = vbi->memmap[VIRT_PCIE_MMIO_HIGH].base;
-    hwaddr size_mmio_high = vbi->memmap[VIRT_PCIE_MMIO_HIGH].size;
-    hwaddr base_pio = vbi->memmap[VIRT_PCIE_PIO].base;
-    hwaddr size_pio = vbi->memmap[VIRT_PCIE_PIO].size;
-    hwaddr base_ecam = vbi->memmap[VIRT_PCIE_ECAM].base;
-    hwaddr size_ecam = vbi->memmap[VIRT_PCIE_ECAM].size;
+    hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
+    hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size;
+    hwaddr base_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].base;
+    hwaddr size_mmio_high = vms->memmap[VIRT_PCIE_MMIO_HIGH].size;
+    hwaddr base_pio = vms->memmap[VIRT_PCIE_PIO].base;
+    hwaddr size_pio = vms->memmap[VIRT_PCIE_PIO].size;
+    hwaddr base_ecam = vms->memmap[VIRT_PCIE_ECAM].base;
+    hwaddr size_ecam = vms->memmap[VIRT_PCIE_ECAM].size;
     hwaddr base = base_mmio;
     int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
-    int irq = vbi->irqmap[VIRT_PCIE];
+    int irq = vms->irqmap[VIRT_PCIE];
     MemoryRegion *mmio_alias;
     MemoryRegion *mmio_reg;
     MemoryRegion *ecam_alias;
@@ -1023,7 +988,7 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
                              mmio_reg, base_mmio, size_mmio);
     memory_region_add_subregion(get_system_memory(), base_mmio, mmio_alias);
 
-    if (use_highmem) {
+    if (vms->highmem) {
         /* Map high MMIO space */
         MemoryRegion *high_mmio_alias = g_new0(MemoryRegion, 1);
 
@@ -1054,26 +1019,26 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
     }
 
     nodename = g_strdup_printf("/pcie@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
-    qemu_fdt_setprop_string(vbi->fdt, nodename,
+    qemu_fdt_add_subnode(vms->fdt, nodename);
+    qemu_fdt_setprop_string(vms->fdt, nodename,
                             "compatible", "pci-host-ecam-generic");
-    qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "pci");
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#address-cells", 3);
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#size-cells", 2);
-    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
+    qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "pci");
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "#address-cells", 3);
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "#size-cells", 2);
+    qemu_fdt_setprop_cells(vms->fdt, nodename, "bus-range", 0,
                            nr_pcie_buses - 1);
-    qemu_fdt_setprop(vbi->fdt, nodename, "dma-coherent", NULL, 0);
+    qemu_fdt_setprop(vms->fdt, nodename, "dma-coherent", NULL, 0);
 
-    if (vbi->msi_phandle) {
-        qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent",
-                               vbi->msi_phandle);
+    if (vms->msi_phandle) {
+        qemu_fdt_setprop_cells(vms->fdt, nodename, "msi-parent",
+                               vms->msi_phandle);
     }
 
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg",
                                  2, base_ecam, 2, size_ecam);
 
-    if (use_highmem) {
-        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+    if (vms->highmem) {
+        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
                                      1, FDT_PCI_RANGE_IOPORT, 2, 0,
                                      2, base_pio, 2, size_pio,
                                      1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
@@ -1082,20 +1047,20 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
                                      2, base_mmio_high,
                                      2, base_mmio_high, 2, size_mmio_high);
     } else {
-        qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
+        qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "ranges",
                                      1, FDT_PCI_RANGE_IOPORT, 2, 0,
                                      2, base_pio, 2, size_pio,
                                      1, FDT_PCI_RANGE_MMIO, 2, base_mmio,
                                      2, base_mmio, 2, size_mmio);
     }
 
-    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
-    create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);
+    qemu_fdt_setprop_cell(vms->fdt, nodename, "#interrupt-cells", 1);
+    create_pcie_irq_map(vms, vms->gic_phandle, irq, nodename);
 
     g_free(nodename);
 }
 
-static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic)
 {
     DeviceState *dev;
     SysBusDevice *s;
@@ -1103,13 +1068,13 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
     ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1);
     MemoryRegion *sysmem = get_system_memory();
 
-    platform_bus_params.platform_bus_base = vbi->memmap[VIRT_PLATFORM_BUS].base;
-    platform_bus_params.platform_bus_size = vbi->memmap[VIRT_PLATFORM_BUS].size;
-    platform_bus_params.platform_bus_first_irq = vbi->irqmap[VIRT_PLATFORM_BUS];
+    platform_bus_params.platform_bus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
+    platform_bus_params.platform_bus_size = vms->memmap[VIRT_PLATFORM_BUS].size;
+    platform_bus_params.platform_bus_first_irq = vms->irqmap[VIRT_PLATFORM_BUS];
     platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS;
 
     fdt_params->system_params = &platform_bus_params;
-    fdt_params->binfo = &vbi->bootinfo;
+    fdt_params->binfo = &vms->bootinfo;
     fdt_params->intc = "/intc";
     /*
      * register a machine init done notifier that creates the device tree
@@ -1136,43 +1101,44 @@ static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
                                 sysbus_mmio_get_region(s, 0));
 }
 
-static void create_secure_ram(VirtBoardInfo *vbi, MemoryRegion *secure_sysmem)
+static void create_secure_ram(VirtMachineState *vms,
+                              MemoryRegion *secure_sysmem)
 {
     MemoryRegion *secram = g_new(MemoryRegion, 1);
     char *nodename;
-    hwaddr base = vbi->memmap[VIRT_SECURE_MEM].base;
-    hwaddr size = vbi->memmap[VIRT_SECURE_MEM].size;
+    hwaddr base = vms->memmap[VIRT_SECURE_MEM].base;
+    hwaddr size = vms->memmap[VIRT_SECURE_MEM].size;
 
     memory_region_init_ram(secram, NULL, "virt.secure-ram", size, &error_fatal);
     vmstate_register_ram_global(secram);
     memory_region_add_subregion(secure_sysmem, base, secram);
 
     nodename = g_strdup_printf("/secram@%" PRIx64, base);
-    qemu_fdt_add_subnode(vbi->fdt, nodename);
-    qemu_fdt_setprop_string(vbi->fdt, nodename, "device_type", "memory");
-    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg", 2, base, 2, size);
-    qemu_fdt_setprop_string(vbi->fdt, nodename, "status", "disabled");
-    qemu_fdt_setprop_string(vbi->fdt, nodename, "secure-status", "okay");
+    qemu_fdt_add_subnode(vms->fdt, nodename);
+    qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "memory");
+    qemu_fdt_setprop_sized_cells(vms->fdt, nodename, "reg", 2, base, 2, size);
+    qemu_fdt_setprop_string(vms->fdt, nodename, "status", "disabled");
+    qemu_fdt_setprop_string(vms->fdt, nodename, "secure-status", "okay");
 
     g_free(nodename);
 }
 
 static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
 {
-    const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
+    const VirtMachineState *board = container_of(binfo, VirtMachineState,
+                                                 bootinfo);
 
     *fdt_size = board->fdt_size;
     return board->fdt;
 }
 
-static void virt_build_smbios(VirtGuestInfo *guest_info)
+static void virt_build_smbios(VirtMachineState *vms)
 {
-    FWCfgState *fw_cfg = guest_info->fw_cfg;
     uint8_t *smbios_tables, *smbios_anchor;
     size_t smbios_tables_len, smbios_anchor_len;
     const char *product = "QEMU Virtual Machine";
 
-    if (!fw_cfg) {
+    if (!vms->fw_cfg) {
         return;
     }
 
@@ -1187,20 +1153,21 @@ static void virt_build_smbios(VirtGuestInfo *guest_info)
                       &smbios_anchor, &smbios_anchor_len);
 
     if (smbios_anchor) {
-        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables",
+        fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables",
                         smbios_tables, smbios_tables_len);
-        fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor",
+        fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-anchor",
                         smbios_anchor, smbios_anchor_len);
     }
 }
 
 static
-void virt_guest_info_machine_done(Notifier *notifier, void *data)
+void virt_machine_done(Notifier *notifier, void *data)
 {
-    VirtGuestInfoState *guest_info_state = container_of(notifier,
-                                              VirtGuestInfoState, machine_done);
-    virt_acpi_setup(&guest_info_state->info);
-    virt_build_smbios(&guest_info_state->info);
+    VirtMachineState *vms = container_of(notifier, VirtMachineState,
+                                         machine_done);
+
+    virt_acpi_setup(vms);
+    virt_build_smbios(vms);
 }
 
 static void machvirt_init(MachineState *machine)
@@ -1210,13 +1177,9 @@ static void machvirt_init(MachineState *machine)
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *secure_sysmem = NULL;
-    int gic_version = vms->gic_version;
     int n, virt_max_cpus;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     const char *cpu_model = machine->cpu_model;
-    VirtBoardInfo *vbi;
-    VirtGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
-    VirtGuestInfo *guest_info = &guest_info_state->info;
     char **cpustr;
     ObjectClass *oc;
     const char *typename;
@@ -1232,14 +1195,14 @@ static void machvirt_init(MachineState *machine)
     /* We can probe only here because during property set
      * KVM is not available yet
      */
-    if (!gic_version) {
+    if (!vms->gic_version) {
         if (!kvm_enabled()) {
             error_report("gic-version=host requires KVM");
             exit(1);
         }
 
-        gic_version = kvm_arm_vgic_probe();
-        if (!gic_version) {
+        vms->gic_version = kvm_arm_vgic_probe();
+        if (!vms->gic_version) {
             error_report("Unable to determine GIC version supported by host");
             exit(1);
         }
@@ -1248,9 +1211,7 @@ static void machvirt_init(MachineState *machine)
     /* Separate the actual CPU model name from any appended features */
     cpustr = g_strsplit(cpu_model, ",", 2);
 
-    vbi = find_machine_info(cpustr[0]);
-
-    if (!vbi) {
+    if (!cpuname_valid(cpustr[0])) {
         error_report("mach-virt: CPU %s not supported", cpustr[0]);
         exit(1);
     }
@@ -1262,13 +1223,13 @@ static void machvirt_init(MachineState *machine)
      * let the boot ROM sort them out.
      * The usual case is that we do use QEMU's PSCI implementation.
      */
-    vbi->using_psci = !(vms->secure && firmware_loaded);
+    vms->using_psci = !(vms->secure && firmware_loaded);
 
     /* The maximum number of CPUs depends on the GIC version, or on how
      * many redistributors we can fit into the memory map.
      */
-    if (gic_version == 3) {
-        virt_max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+    if (vms->gic_version == 3) {
+        virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000;
         clustersz = GICV3_TARGETLIST_BITS;
     } else {
         virt_max_cpus = GIC_NCPU;
@@ -1282,9 +1243,9 @@ static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
-    vbi->smp_cpus = smp_cpus;
+    vms->smp_cpus = smp_cpus;
 
-    if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
+    if (machine->ram_size > vms->memmap[VIRT_MEM].size) {
         error_report("mach-virt: cannot model more than %dGB RAM", RAMLIMIT_GB);
         exit(1);
     }
@@ -1306,7 +1267,7 @@ static void machvirt_init(MachineState *machine)
         memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1);
     }
 
-    create_fdt(vbi);
+    create_fdt(vms);
 
     oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
     if (!oc) {
@@ -1345,7 +1306,7 @@ static void machvirt_init(MachineState *machine)
             object_property_set_bool(cpuobj, false, "has_el3", NULL);
         }
 
-        if (vbi->using_psci) {
+        if (vms->using_psci) {
             object_property_set_int(cpuobj, QEMU_PSCI_CONDUIT_HVC,
                                     "psci-conduit", NULL);
 
@@ -1361,7 +1322,7 @@ static void machvirt_init(MachineState *machine)
         }
 
         if (object_property_find(cpuobj, "reset-cbar", NULL)) {
-            object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
+            object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base,
                                     "reset-cbar", &error_abort);
         }
 
@@ -1374,62 +1335,55 @@ static void machvirt_init(MachineState *machine)
 
         object_property_set_bool(cpuobj, true, "realized", NULL);
     }
-    fdt_add_timer_nodes(vbi, gic_version);
-    fdt_add_cpu_nodes(vbi);
-    fdt_add_psci_node(vbi);
+    fdt_add_timer_nodes(vms);
+    fdt_add_cpu_nodes(vms);
+    fdt_add_psci_node(vms);
 
     memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
                                          machine->ram_size);
-    memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
+    memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
 
-    create_flash(vbi, sysmem, secure_sysmem ? secure_sysmem : sysmem);
+    create_flash(vms, sysmem, secure_sysmem ? secure_sysmem : sysmem);
 
-    create_gic(vbi, pic, gic_version, vms->secure, vmc->no_its);
+    create_gic(vms, pic);
 
-    fdt_add_pmu_nodes(vbi, gic_version);
+    fdt_add_pmu_nodes(vms);
 
-    create_uart(vbi, pic, VIRT_UART, sysmem, serial_hds[0]);
+    create_uart(vms, pic, VIRT_UART, sysmem, serial_hds[0]);
 
     if (vms->secure) {
-        create_secure_ram(vbi, secure_sysmem);
-        create_uart(vbi, pic, VIRT_SECURE_UART, secure_sysmem, serial_hds[1]);
+        create_secure_ram(vms, secure_sysmem);
+        create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hds[1]);
     }
 
-    create_rtc(vbi, pic);
+    create_rtc(vms, pic);
 
-    create_pcie(vbi, pic, vms->highmem);
+    create_pcie(vms, pic);
 
-    create_gpio(vbi, pic);
+    create_gpio(vms, pic);
 
     /* Create mmio transports, so the user can create virtio backends
      * (which will be automatically plugged in to the transports). If
      * no backend is created the transport will just sit harmlessly idle.
      */
-    create_virtio_devices(vbi, pic);
-
-    create_fw_cfg(vbi, &address_space_memory);
-    rom_set_fw(fw_cfg_find());
-
-    guest_info->smp_cpus = smp_cpus;
-    guest_info->fw_cfg = fw_cfg_find();
-    guest_info->memmap = vbi->memmap;
-    guest_info->irqmap = vbi->irqmap;
-    guest_info->use_highmem = vms->highmem;
-    guest_info->gic_version = gic_version;
-    guest_info->no_its = vmc->no_its;
-    guest_info_state->machine_done.notify = virt_guest_info_machine_done;
-    qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
-
-    vbi->bootinfo.ram_size = machine->ram_size;
-    vbi->bootinfo.kernel_filename = machine->kernel_filename;
-    vbi->bootinfo.kernel_cmdline = machine->kernel_cmdline;
-    vbi->bootinfo.initrd_filename = machine->initrd_filename;
-    vbi->bootinfo.nb_cpus = smp_cpus;
-    vbi->bootinfo.board_id = -1;
-    vbi->bootinfo.loader_start = vbi->memmap[VIRT_MEM].base;
-    vbi->bootinfo.get_dtb = machvirt_dtb;
-    vbi->bootinfo.firmware_loaded = firmware_loaded;
-    arm_load_kernel(ARM_CPU(first_cpu), &vbi->bootinfo);
+    create_virtio_devices(vms, pic);
+
+    vms->fw_cfg = create_fw_cfg(vms, &address_space_memory);
+    rom_set_fw(vms->fw_cfg);
+
+    vms->machine_done.notify = virt_machine_done;
+    qemu_add_machine_init_done_notifier(&vms->machine_done);
+
+    vms->bootinfo.ram_size = machine->ram_size;
+    vms->bootinfo.kernel_filename = machine->kernel_filename;
+    vms->bootinfo.kernel_cmdline = machine->kernel_cmdline;
+    vms->bootinfo.initrd_filename = machine->initrd_filename;
+    vms->bootinfo.nb_cpus = smp_cpus;
+    vms->bootinfo.board_id = -1;
+    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
+    vms->bootinfo.get_dtb = machvirt_dtb;
+    vms->bootinfo.firmware_loaded = firmware_loaded;
+    arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
 
     /*
      * arm_load_kernel machine init done notifier registration must
@@ -1437,7 +1391,7 @@ static void machvirt_init(MachineState *machine)
      * another notifier is registered which adds platform bus nodes.
      * Notifiers are executed in registration reverse order.
      */
-    create_platform_bus(vbi, pic);
+    create_platform_bus(vms, pic);
 }
 
 static bool virt_get_secure(Object *obj, Error **errp)
@@ -1556,6 +1510,9 @@ static void virt_2_9_instance_init(Object *obj)
     object_property_set_description(obj, "gic-version",
                                     "Set GIC version. "
                                     "Valid values are 2, 3 and host", NULL);
+
+    vms->memmap = a15memmap;
+    vms->irqmap = a15irqmap;
 }
 
 static void virt_machine_2_9_options(MachineClass *mc)
@@ -1573,8 +1530,14 @@ static void virt_2_8_instance_init(Object *obj)
 
 static void virt_machine_2_8_options(MachineClass *mc)
 {
+    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
     virt_machine_2_9_options(mc);
     SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_8);
+    /* For 2.8 and earlier we falsely claimed in the DT that
+     * our timers were edge-triggered, not level-triggered.
+     */
+    vmc->claim_edge_triggered_timers = true;
 }
 DEFINE_VIRT_MACHINE(2, 8)
 
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index b3a6bbd210..1607cbdb03 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -220,7 +220,7 @@ static int aer915_send(I2CSlave *i2c, uint8_t data)
     return 0;
 }
 
-static void aer915_event(I2CSlave *i2c, enum i2c_event event)
+static int aer915_event(I2CSlave *i2c, enum i2c_event event)
 {
     AER915State *s = AER915(i2c);
 
@@ -238,6 +238,8 @@ static void aer915_event(I2CSlave *i2c, enum i2c_event event)
     default:
         break;
     }
+
+    return 0;
 }
 
 static int aer915_recv(I2CSlave *slave)
diff --git a/hw/audio/wm8750.c b/hw/audio/wm8750.c
index 0c6500e96a..f8b5bebfc2 100644
--- a/hw/audio/wm8750.c
+++ b/hw/audio/wm8750.c
@@ -303,7 +303,7 @@ static void wm8750_reset(I2CSlave *i2c)
     s->i2c_len = 0;
 }
 
-static void wm8750_event(I2CSlave *i2c, enum i2c_event event)
+static int wm8750_event(I2CSlave *i2c, enum i2c_event event)
 {
     WM8750State *s = WM8750(i2c);
 
@@ -321,6 +321,8 @@ static void wm8750_event(I2CSlave *i2c, enum i2c_event event)
     default:
         break;
     }
+
+    return 0;
 }
 
 #define WM8750_LINVOL	0x00
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index e3c1166ea6..4c5f8c3590 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -28,6 +28,7 @@
 #include "hw/ssi/ssi.h"
 #include "qemu/bitops.h"
 #include "qemu/log.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 
 #ifndef M25P80_ERR_DEBUG
@@ -377,6 +378,8 @@ typedef enum {
     MAN_GENERIC,
 } Manufacturer;
 
+#define M25P80_INTERNAL_DATA_BUFFER_SZ 16
+
 typedef struct Flash {
     SSISlave parent_obj;
 
@@ -387,7 +390,7 @@ typedef struct Flash {
     int page_size;
 
     uint8_t state;
-    uint8_t data[16];
+    uint8_t data[M25P80_INTERNAL_DATA_BUFFER_SZ];
     uint32_t len;
     uint32_t pos;
     uint8_t needed_bytes;
@@ -1115,6 +1118,17 @@ static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
 
     case STATE_COLLECTING_DATA:
     case STATE_COLLECTING_VAR_LEN_DATA:
+
+        if (s->len >= M25P80_INTERNAL_DATA_BUFFER_SZ) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "M25P80: Write overrun internal data buffer. "
+                          "SPI controller (QEMU emulator or guest driver) "
+                          "is misbehaving\n");
+            s->len = s->pos = 0;
+            s->state = STATE_IDLE;
+            break;
+        }
+
         s->data[s->len] = (uint8_t)tx;
         s->len++;
 
@@ -1124,6 +1138,17 @@ static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
         break;
 
     case STATE_READING_DATA:
+
+        if (s->pos >= M25P80_INTERNAL_DATA_BUFFER_SZ) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "M25P80: Read overrun internal data buffer. "
+                          "SPI controller (QEMU emulator or guest driver) "
+                          "is misbehaving\n");
+            s->len = s->pos = 0;
+            s->state = STATE_IDLE;
+            break;
+        }
+
         r = s->data[s->pos];
         s->pos++;
         if (s->pos == s->len) {
@@ -1196,7 +1221,7 @@ static const VMStateDescription vmstate_m25p80 = {
     .pre_save = m25p80_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(state, Flash),
-        VMSTATE_UINT8_ARRAY(data, Flash, 16),
+        VMSTATE_UINT8_ARRAY(data, Flash, M25P80_INTERNAL_DATA_BUFFER_SZ),
         VMSTATE_UINT32(len, Flash),
         VMSTATE_UINT32(pos, Flash),
         VMSTATE_UINT8(needed_bytes, Flash),
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 571c324004..820d1abeb9 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -629,22 +629,26 @@ DeviceState *exynos4210_uart_create(hwaddr addr,
     return dev;
 }
 
-static int exynos4210_uart_init(SysBusDevice *dev)
+static void exynos4210_uart_init(Object *obj)
 {
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
     Exynos4210UartState *s = EXYNOS4210_UART(dev);
 
     /* memory mapping */
-    memory_region_init_io(&s->iomem, OBJECT(s), &exynos4210_uart_ops, s,
+    memory_region_init_io(&s->iomem, obj, &exynos4210_uart_ops, s,
                           "exynos4210.uart", EXYNOS4210_UART_REGS_MEM_SIZE);
     sysbus_init_mmio(dev, &s->iomem);
 
     sysbus_init_irq(dev, &s->irq);
+}
+
+static void exynos4210_uart_realize(DeviceState *dev, Error **errp)
+{
+    Exynos4210UartState *s = EXYNOS4210_UART(dev);
 
     qemu_chr_fe_set_handlers(&s->chr, exynos4210_uart_can_receive,
                              exynos4210_uart_receive, exynos4210_uart_event,
                              s, NULL, true);
-
-    return 0;
 }
 
 static Property exynos4210_uart_properties[] = {
@@ -658,9 +662,8 @@ static Property exynos4210_uart_properties[] = {
 static void exynos4210_uart_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = exynos4210_uart_init;
+    dc->realize = exynos4210_uart_realize;
     dc->reset = exynos4210_uart_reset;
     dc->props = exynos4210_uart_properties;
     dc->vmsd = &vmstate_exynos4210_uart;
@@ -670,6 +673,7 @@ static const TypeInfo exynos4210_uart_info = {
     .name          = TYPE_EXYNOS4210_UART,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(Exynos4210UartState),
+    .instance_init = exynos4210_uart_init,
     .class_init    = exynos4210_uart_class_init,
 };
 
diff --git a/hw/display/ssd0303.c b/hw/display/ssd0303.c
index d3017563f3..68a80b9d64 100644
--- a/hw/display/ssd0303.c
+++ b/hw/display/ssd0303.c
@@ -179,7 +179,7 @@ static int ssd0303_send(I2CSlave *i2c, uint8_t data)
     return 0;
 }
 
-static void ssd0303_event(I2CSlave *i2c, enum i2c_event event)
+static int ssd0303_event(I2CSlave *i2c, enum i2c_event event)
 {
     ssd0303_state *s = SSD0303(i2c);
 
@@ -193,6 +193,8 @@ static void ssd0303_event(I2CSlave *i2c, enum i2c_event event)
         /* Nothing to do.  */
         break;
     }
+
+    return 0;
 }
 
 static void ssd0303_update_display(void *opaque)
diff --git a/hw/gpio/max7310.c b/hw/gpio/max7310.c
index 1bd5eaf911..f82e3e6555 100644
--- a/hw/gpio/max7310.c
+++ b/hw/gpio/max7310.c
@@ -129,7 +129,7 @@ static int max7310_tx(I2CSlave *i2c, uint8_t data)
     return 0;
 }
 
-static void max7310_event(I2CSlave *i2c, enum i2c_event event)
+static int max7310_event(I2CSlave *i2c, enum i2c_event event)
 {
     MAX7310State *s = MAX7310(i2c);
     s->len = 0;
@@ -147,6 +147,8 @@ static void max7310_event(I2CSlave *i2c, enum i2c_event event)
     default:
         break;
     }
+
+    return 0;
 }
 
 static const VMStateDescription vmstate_max7310 = {
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index e40781ea3b..2c1234cdff 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -88,18 +88,26 @@ int i2c_bus_busy(I2CBus *bus)
     return !QLIST_EMPTY(&bus->current_devs);
 }
 
+/* TODO: Make this handle multiple masters.  */
 /*
- * Returns non-zero if the address is not valid.  If this is called
- * again without an intervening i2c_end_transfer(), like in the SMBus
- * case where the operation is switched from write to read, this
- * function will not rescan the bus and thus cannot fail.
+ * Start or continue an i2c transaction.  When this is called for the
+ * first time or after an i2c_end_transfer(), if it returns an error
+ * the bus transaction is terminated (or really never started).  If
+ * this is called after another i2c_start_transfer() without an
+ * intervening i2c_end_transfer(), and it returns an error, the
+ * transaction will not be terminated.  The caller must do it.
+ *
+ * This corresponds with the way real hardware works.  The SMBus
+ * protocol uses a start transfer to switch from write to read mode
+ * without releasing the bus.  If that fails, the bus is still
+ * in a transaction.
  */
-/* TODO: Make this handle multiple masters.  */
 int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
 {
     BusChild *kid;
     I2CSlaveClass *sc;
     I2CNode *node;
+    bool bus_scanned = false;
 
     if (address == I2C_BROADCAST) {
         /*
@@ -130,6 +138,7 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
                 }
             }
         }
+        bus_scanned = true;
     }
 
     if (QLIST_EMPTY(&bus->current_devs)) {
@@ -137,11 +146,21 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
     }
 
     QLIST_FOREACH(node, &bus->current_devs, next) {
+        int rv;
+
         sc = I2C_SLAVE_GET_CLASS(node->elt);
         /* If the bus is already busy, assume this is a repeated
            start condition.  */
+
         if (sc->event) {
-            sc->event(node->elt, recv ? I2C_START_RECV : I2C_START_SEND);
+            rv = sc->event(node->elt, recv ? I2C_START_RECV : I2C_START_SEND);
+            if (rv && !bus->broadcast) {
+                if (bus_scanned) {
+                    /* First call, terminate the transfer. */
+                    i2c_end_transfer(bus);
+                }
+                return rv;
+            }
         }
     }
     return 0;
diff --git a/hw/i2c/i2c-ddc.c b/hw/i2c/i2c-ddc.c
index 1227212934..66899d7233 100644
--- a/hw/i2c/i2c-ddc.c
+++ b/hw/i2c/i2c-ddc.c
@@ -230,13 +230,15 @@ static void i2c_ddc_reset(DeviceState *ds)
     s->reg = 0;
 }
 
-static void i2c_ddc_event(I2CSlave *i2c, enum i2c_event event)
+static int i2c_ddc_event(I2CSlave *i2c, enum i2c_event event)
 {
     I2CDDCState *s = I2CDDC(i2c);
 
     if (event == I2C_START_SEND) {
         s->firstbyte = true;
     }
+
+    return 0;
 }
 
 static int i2c_ddc_rx(I2CSlave *i2c)
diff --git a/hw/i2c/smbus.c b/hw/i2c/smbus.c
index 5b4dd3eba4..2d1b79a689 100644
--- a/hw/i2c/smbus.c
+++ b/hw/i2c/smbus.c
@@ -67,7 +67,7 @@ static void smbus_do_write(SMBusDevice *dev)
     }
 }
 
-static void smbus_i2c_event(I2CSlave *s, enum i2c_event event)
+static int smbus_i2c_event(I2CSlave *s, enum i2c_event event)
 {
     SMBusDevice *dev = SMBUS_DEVICE(s);
 
@@ -148,6 +148,8 @@ static void smbus_i2c_event(I2CSlave *s, enum i2c_event event)
             break;
         }
     }
+
+    return 0;
 }
 
 static int smbus_i2c_recv(I2CSlave *s)
@@ -249,7 +251,8 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command)
     }
     i2c_send(bus, command);
     if (i2c_start_transfer(bus, addr, 1)) {
-        assert(0);
+        i2c_end_transfer(bus);
+        return -1;
     }
     data = i2c_recv(bus);
     i2c_nack(bus);
@@ -276,7 +279,8 @@ int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command)
     }
     i2c_send(bus, command);
     if (i2c_start_transfer(bus, addr, 1)) {
-        assert(0);
+        i2c_end_transfer(bus);
+        return -1;
     }
     data = i2c_recv(bus);
     data |= i2c_recv(bus) << 8;
@@ -307,7 +311,8 @@ int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data)
     }
     i2c_send(bus, command);
     if (i2c_start_transfer(bus, addr, 1)) {
-        assert(0);
+        i2c_end_transfer(bus);
+        return -1;
     }
     len = i2c_recv(bus);
     if (len > 32) {
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 47b79d9112..e0732ccaf1 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -562,7 +562,7 @@ static void amdvi_mmio_trace(hwaddr addr, unsigned size)
         trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
     } else {
         index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
-        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+        trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
     }
 }
 
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 884926e9e7..0d3dc6a9f2 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -49,8 +49,8 @@
 #define AMDVI_CAPAB_INIT_TYPE         (3 << 16)
 
 /* No. of used MMIO registers */
-#define AMDVI_MMIO_REGS_HIGH  8
-#define AMDVI_MMIO_REGS_LOW   7
+#define AMDVI_MMIO_REGS_HIGH  7
+#define AMDVI_MMIO_REGS_LOW   8
 
 /* MMIO registers */
 #define AMDVI_MMIO_DEVICE_TABLE       0x0000
diff --git a/hw/input/lm832x.c b/hw/input/lm832x.c
index 539682cac8..2340523da0 100644
--- a/hw/input/lm832x.c
+++ b/hw/input/lm832x.c
@@ -383,7 +383,7 @@ static void lm_kbd_write(LM823KbdState *s, int reg, int byte, uint8_t value)
     }
 }
 
-static void lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
+static int lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
 {
     LM823KbdState *s = LM8323(i2c);
 
@@ -397,6 +397,8 @@ static void lm_i2c_event(I2CSlave *i2c, enum i2c_event event)
     default:
         break;
     }
+
+    return 0;
 }
 
 static int lm_i2c_rx(I2CSlave *i2c)
diff --git a/hw/misc/tmp105.c b/hw/misc/tmp105.c
index f5c2472b5b..04e83787d4 100644
--- a/hw/misc/tmp105.c
+++ b/hw/misc/tmp105.c
@@ -176,7 +176,7 @@ static int tmp105_tx(I2CSlave *i2c, uint8_t data)
     return 0;
 }
 
-static void tmp105_event(I2CSlave *i2c, enum i2c_event event)
+static int tmp105_event(I2CSlave *i2c, enum i2c_event event)
 {
     TMP105State *s = TMP105(i2c);
 
@@ -185,6 +185,7 @@ static void tmp105_event(I2CSlave *i2c, enum i2c_event event)
     }
 
     s->len = 0;
+    return 0;
 }
 
 static int tmp105_post_load(void *opaque, int version_id)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 24fae1689d..637d54549e 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -982,8 +982,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                pci_get_function_0(pci_dev)) {
         error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
                    " new func %s cannot be exposed to guest.",
-                   PCI_SLOT(devfn),
-                   bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
+                   PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
+                   pci_get_function_0(pci_dev)->name,
                    name);
 
        return NULL;
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index f5c1d98192..07650683f7 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1098,7 +1098,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
          * We do not support individual masking for channel devices, so we
          * need to manually trigger any guest masking callbacks here.
          */
-        if (k->guest_notifier_mask) {
+        if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
             k->guest_notifier_mask(vdev, n, false);
         }
         /* get lost events and re-inject */
@@ -1107,7 +1107,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
             event_notifier_set(notifier);
         }
     } else {
-        if (k->guest_notifier_mask) {
+        if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
             k->guest_notifier_mask(vdev, n, true);
         }
         if (with_irqfd) {
diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c
index e4e395fa67..b66505ca49 100644
--- a/hw/ssi/imx_spi.c
+++ b/hw/ssi/imx_spi.c
@@ -320,9 +320,6 @@ static void imx_spi_write(void *opaque, hwaddr offset, uint64_t value,
                       TYPE_IMX_SPI, __func__);
         break;
     case ECSPI_TXDATA:
-    case ECSPI_MSGDATA:
-        /* Is there any difference between TXDATA and MSGDATA ? */
-        /* I'll have to look in the linux driver */
         if (!imx_spi_is_enabled(s)) {
             /* Ignore writes if device is disabled */
             break;
@@ -380,6 +377,14 @@ static void imx_spi_write(void *opaque, hwaddr offset, uint64_t value,
         }
 
         break;
+    case ECSPI_MSGDATA:
+        /* it is not clear from the spec what MSGDATA is for */
+        /* Anyway it is not used by Linux driver */
+        /* So for now we just ignore it */
+        qemu_log_mask(LOG_UNIMP,
+                      "[%s]%s: Trying to write to MSGDATA, ignoring\n",
+                      TYPE_IMX_SPI, __func__);
+        break;
     default:
         s->regs[index] = value;
 
diff --git a/hw/timer/ds1338.c b/hw/timer/ds1338.c
index f5d04dd5d7..3849b74a68 100644
--- a/hw/timer/ds1338.c
+++ b/hw/timer/ds1338.c
@@ -94,7 +94,7 @@ static void inc_regptr(DS1338State *s)
     }
 }
 
-static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
+static int ds1338_event(I2CSlave *i2c, enum i2c_event event)
 {
     DS1338State *s = DS1338(i2c);
 
@@ -113,6 +113,8 @@ static void ds1338_event(I2CSlave *i2c, enum i2c_event event)
     default:
         break;
     }
+
+    return 0;
 }
 
 static int ds1338_recv(I2CSlave *i2c)
diff --git a/hw/timer/twl92230.c b/hw/timer/twl92230.c
index 7ba4e9a7c9..b8d914e49b 100644
--- a/hw/timer/twl92230.c
+++ b/hw/timer/twl92230.c
@@ -713,12 +713,14 @@ static void menelaus_write(void *opaque, uint8_t addr, uint8_t value)
     }
 }
 
-static void menelaus_event(I2CSlave *i2c, enum i2c_event event)
+static int menelaus_event(I2CSlave *i2c, enum i2c_event event)
 {
     MenelausState *s = TWL92230(i2c);
 
     if (event == I2C_START_SEND)
         s->firstbyte = 1;
+
+    return 0;
 }
 
 static int menelaus_tx(I2CSlave *i2c, uint8_t data)
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 17412cb7b5..60654dc19d 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -402,7 +402,7 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
         event_notifier_cleanup(notifier);
     }
 
-    if (vdc->guest_notifier_mask) {
+    if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
         vdc->guest_notifier_mask(vdev, n, !assign);
     }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 83a423c580..4e4562d444 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -184,7 +184,7 @@ struct BlockDriver {
 
     /*
      * Flushes all data that was already written to the OS all the way down to
-     * the disk (for example raw-posix calls fsync()).
+     * the disk (for example file-posix.c calls fsync()).
      */
     int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
 
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 154f3b82f6..d43ec005cb 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -191,10 +191,8 @@ struct AcpiFadtDescriptorRev5_1 {
 
 typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1;
 
-enum {
-    ACPI_FADT_ARM_USE_PSCI_G_0_2 = 0,
-    ACPI_FADT_ARM_PSCI_USE_HVC = 1,
-};
+#define ACPI_FADT_ARM_PSCI_COMPLIANT  (1 << 0)
+#define ACPI_FADT_ARM_PSCI_USE_HVC    (1 << 1)
 
 /*
  * Serial Port Console Redirection Table (SPCR), Rev. 1.02
@@ -290,7 +288,7 @@ typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
 #define ACPI_APIC_XRUPT_SOURCE       8
 #define ACPI_APIC_LOCAL_X2APIC       9
 #define ACPI_APIC_LOCAL_X2APIC_NMI      10
-#define ACPI_APIC_GENERIC_INTERRUPT     11
+#define ACPI_APIC_GENERIC_CPU_INTERFACE 11
 #define ACPI_APIC_GENERIC_DISTRIBUTOR   12
 #define ACPI_APIC_GENERIC_MSI_FRAME     13
 #define ACPI_APIC_GENERIC_REDISTRIBUTOR 14
@@ -361,7 +359,7 @@ struct AcpiMadtLocalX2ApicNmi {
 } QEMU_PACKED;
 typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi;
 
-struct AcpiMadtGenericInterrupt {
+struct AcpiMadtGenericCpuInterface {
     ACPI_SUB_HEADER_DEF
     uint16_t reserved;
     uint32_t cpu_interface_number;
@@ -378,7 +376,10 @@ struct AcpiMadtGenericInterrupt {
     uint64_t arm_mpidr;
 } QEMU_PACKED;
 
-typedef struct AcpiMadtGenericInterrupt AcpiMadtGenericInterrupt;
+typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface;
+
+/* GICC CPU Interface Flags */
+#define ACPI_MADT_GICC_ENABLED 1
 
 struct AcpiMadtGenericDistributor {
     ACPI_SUB_HEADER_DEF
@@ -427,21 +428,9 @@ typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator;
 /*
  * Generic Timer Description Table (GTDT)
  */
-
-#define ACPI_GTDT_INTERRUPT_MODE        (1 << 0)
-#define ACPI_GTDT_INTERRUPT_POLARITY    (1 << 1)
-#define ACPI_GTDT_ALWAYS_ON             (1 << 2)
-
-/* Triggering */
-
-#define ACPI_LEVEL_SENSITIVE            ((uint8_t) 0x00)
-#define ACPI_EDGE_SENSITIVE             ((uint8_t) 0x01)
-
-/* Polarity */
-
-#define ACPI_ACTIVE_HIGH                ((uint8_t) 0x00)
-#define ACPI_ACTIVE_LOW                 ((uint8_t) 0x01)
-#define ACPI_ACTIVE_BOTH                ((uint8_t) 0x02)
+#define ACPI_GTDT_INTERRUPT_MODE_LEVEL    (0 << 0)
+#define ACPI_GTDT_INTERRUPT_MODE_EDGE     (1 << 0)
+#define ACPI_GTDT_CAP_ALWAYS_ON           (1 << 2)
 
 struct AcpiGenericTimerTable {
     ACPI_TABLE_HEADER_DEF
diff --git a/include/hw/arm/virt-acpi-build.h b/include/hw/arm/virt-acpi-build.h
deleted file mode 100644
index f5ec749b8f..0000000000
--- a/include/hw/arm/virt-acpi-build.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- *
- * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
- *
- * Author: Shannon Zhao <zhaoshenglong@huawei.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef QEMU_VIRT_ACPI_BUILD_H
-#define QEMU_VIRT_ACPI_BUILD_H
-
-#include "qemu-common.h"
-#include "hw/arm/virt.h"
-#include "qemu/notify.h"
-
-#define ACPI_GICC_ENABLED 1
-
-typedef struct VirtGuestInfo {
-    int smp_cpus;
-    FWCfgState *fw_cfg;
-    const MemMapEntry *memmap;
-    const int *irqmap;
-    bool use_highmem;
-    int gic_version;
-    bool no_its;
-} VirtGuestInfo;
-
-
-typedef struct VirtGuestInfoState {
-    VirtGuestInfo info;
-    Notifier machine_done;
-} VirtGuestInfoState;
-
-void virt_acpi_setup(VirtGuestInfo *guest_info);
-
-#endif
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 9650193253..eb1c63d688 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -32,6 +32,9 @@
 
 #include "qemu-common.h"
 #include "exec/hwaddr.h"
+#include "qemu/notify.h"
+#include "hw/boards.h"
+#include "hw/arm/arm.h"
 
 #define NUM_GICV2M_SPIS       64
 #define NUM_VIRTIO_TRANSPORTS 32
@@ -74,5 +77,41 @@ typedef struct MemMapEntry {
     hwaddr size;
 } MemMapEntry;
 
+typedef struct {
+    MachineClass parent;
+    bool disallow_affinity_adjustment;
+    bool no_its;
+    bool no_pmu;
+    bool claim_edge_triggered_timers;
+} VirtMachineClass;
 
-#endif
+typedef struct {
+    MachineState parent;
+    Notifier machine_done;
+    FWCfgState *fw_cfg;
+    bool secure;
+    bool highmem;
+    int32_t gic_version;
+    struct arm_boot_info bootinfo;
+    const MemMapEntry *memmap;
+    const int *irqmap;
+    int smp_cpus;
+    void *fdt;
+    int fdt_size;
+    uint32_t clock_phandle;
+    uint32_t gic_phandle;
+    uint32_t msi_phandle;
+    bool using_psci;
+} VirtMachineState;
+
+#define TYPE_VIRT_MACHINE   MACHINE_TYPE_NAME("virt")
+#define VIRT_MACHINE(obj) \
+    OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE)
+#define VIRT_MACHINE_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_VIRT_MACHINE)
+#define VIRT_MACHINE_CLASS(klass) \
+    OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE)
+
+void virt_acpi_setup(VirtMachineState *vms);
+
+#endif /* QEMU_ARM_VIRT_H */
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index c4085aa366..2ce611d4c8 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -32,14 +32,22 @@ typedef struct I2CSlaveClass
     /* Callbacks provided by the device.  */
     int (*init)(I2CSlave *dev);
 
-    /* Master to slave.  */
+    /* Master to slave. Returns non-zero for a NAK, 0 for success. */
     int (*send)(I2CSlave *s, uint8_t data);
 
-    /* Slave to master.  */
+    /*
+     * Slave to master.  This cannot fail, the device should always
+     * return something here.  Negative values probably result in 0xff
+     * and a possible log from the driver, and shouldn't be used.
+     */
     int (*recv)(I2CSlave *s);
 
-    /* Notify the slave of a bus state change.  */
-    void (*event)(I2CSlave *s, enum i2c_event event);
+    /*
+     * Notify the slave of a bus state change.  For start event,
+     * returns non-zero to NAK an operation.  For other events the
+     * return code is not used and should be zero.
+     */
+    int (*event)(I2CSlave *s, enum i2c_event event);
 } I2CSlaveClass;
 
 struct I2CSlave
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index e6a60d55fd..12584ed1b7 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -71,6 +71,12 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque);
 void qemu_coroutine_enter(Coroutine *coroutine);
 
 /**
+ * Transfer control to a coroutine if it's not active (i.e. part of the call
+ * stack of the running coroutine). Otherwise, do nothing.
+ */
+void qemu_coroutine_enter_if_inactive(Coroutine *co);
+
+/**
  * Transfer control back to a coroutine's caller
  *
  * This function does not return until the coroutine is re-entered using
diff --git a/qemu-img.c b/qemu-img.c
index 6949b73ca5..5df66fe661 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3559,20 +3559,23 @@ static void bench_cb(void *opaque, int ret)
     }
 
     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
+        int64_t offset = b->offset;
+        /* blk_aio_* might look for completed I/Os and kick bench_cb
+         * again, so make sure this operation is counted by in_flight
+         * and b->offset is ready for the next submission.
+         */
+        b->in_flight++;
+        b->offset += b->step;
+        b->offset %= b->image_size;
         if (b->write) {
-            acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0,
-                                  bench_cb, b);
+            acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
         } else {
-            acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0,
-                                 bench_cb, b);
+            acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
         }
         if (!acb) {
             error_report("Failed to issue request");
             exit(EXIT_FAILURE);
         }
-        b->in_flight++;
-        b->offset += b->step;
-        b->offset %= b->image_size;
     }
 }
 
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 4841d582a1..f776404d86 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -689,7 +689,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
 tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
 tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
 tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
-tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
+tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
 tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
 tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
 
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8ff423f56b..dd879f1212 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -12,7 +12,7 @@ read 512/512 bytes at offset 229376
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
 
 === Testing blkverify through file blockref ===
 
@@ -26,7 +26,7 @@ read 512/512 bytes at offset 229376
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
 
 === Testing blkdebug through filename ===
 
@@ -56,7 +56,7 @@ QMP_VERSION
 {"return": {}}
 {"return": {}}
 {"return": {}}
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
 
 
 === Testing blkverify on existing raw block device ===
@@ -66,7 +66,7 @@ QMP_VERSION
 {"return": {}}
 {"return": {}}
 {"return": {}}
-blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+blkverify: read offset=0 bytes=512 contents mismatch at offset 0
 
 
 === Testing blkdebug's set-state through QMP ===
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 775e031069..8618c20d53 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -30,17 +30,9 @@
 #define _FILE_OFFSET_BITS 64
 
 #include "qemu/osdep.h"
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/unistd.h>
-#include <sys/eventfd.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <linux/vhost.h>
-
-#include "qemu/atomic.h"
+#include "qemu/iov.h"
 #include "standard-headers/linux/virtio_net.h"
-#include "standard-headers/linux/virtio_ring.h"
+#include "contrib/libvhost-user/libvhost-user.h"
 
 #define VHOST_USER_BRIDGE_DEBUG 1
 
@@ -64,6 +56,17 @@ typedef struct Dispatcher {
     Event events[FD_SETSIZE];
 } Dispatcher;
 
+typedef struct VubrDev {
+    VuDev vudev;
+    Dispatcher dispatcher;
+    int backend_udp_sock;
+    struct sockaddr_in backend_udp_dest;
+    int hdrlen;
+    int sock;
+    int ready;
+    int quit;
+} VubrDev;
+
 static void
 vubr_die(const char *s)
 {
@@ -101,8 +104,6 @@ dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
     return 0;
 }
 
-/* dispatcher_remove() is not currently in use but may be useful
- * in the future. */
 static int
 dispatcher_remove(Dispatcher *dispr, int sock)
 {
@@ -157,1039 +158,313 @@ dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
     return 0;
 }
 
-typedef struct VubrVirtq {
-    int call_fd;
-    int kick_fd;
-    uint32_t size;
-    uint16_t last_avail_index;
-    uint16_t last_used_index;
-    struct vring_desc *desc;
-    struct vring_avail *avail;
-    struct vring_used *used;
-    uint64_t log_guest_addr;
-    int enable;
-} VubrVirtq;
-
-/* Based on qemu/hw/virtio/vhost-user.c */
-
-#define VHOST_MEMORY_MAX_NREGIONS    8
-#define VHOST_USER_F_PROTOCOL_FEATURES 30
-/* v1.0 compliant. */
-#define VIRTIO_F_VERSION_1		32
-
-#define VHOST_LOG_PAGE 4096
-
-enum VhostUserProtocolFeature {
-    VHOST_USER_PROTOCOL_F_MQ = 0,
-    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
-    VHOST_USER_PROTOCOL_F_RARP = 2,
-
-    VHOST_USER_PROTOCOL_F_MAX
-};
-
-#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
-
-typedef enum VhostUserRequest {
-    VHOST_USER_NONE = 0,
-    VHOST_USER_GET_FEATURES = 1,
-    VHOST_USER_SET_FEATURES = 2,
-    VHOST_USER_SET_OWNER = 3,
-    VHOST_USER_RESET_OWNER = 4,
-    VHOST_USER_SET_MEM_TABLE = 5,
-    VHOST_USER_SET_LOG_BASE = 6,
-    VHOST_USER_SET_LOG_FD = 7,
-    VHOST_USER_SET_VRING_NUM = 8,
-    VHOST_USER_SET_VRING_ADDR = 9,
-    VHOST_USER_SET_VRING_BASE = 10,
-    VHOST_USER_GET_VRING_BASE = 11,
-    VHOST_USER_SET_VRING_KICK = 12,
-    VHOST_USER_SET_VRING_CALL = 13,
-    VHOST_USER_SET_VRING_ERR = 14,
-    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
-    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
-    VHOST_USER_GET_QUEUE_NUM = 17,
-    VHOST_USER_SET_VRING_ENABLE = 18,
-    VHOST_USER_SEND_RARP = 19,
-    VHOST_USER_MAX
-} VhostUserRequest;
-
-typedef struct VhostUserMemoryRegion {
-    uint64_t guest_phys_addr;
-    uint64_t memory_size;
-    uint64_t userspace_addr;
-    uint64_t mmap_offset;
-} VhostUserMemoryRegion;
-
-typedef struct VhostUserMemory {
-    uint32_t nregions;
-    uint32_t padding;
-    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-} VhostUserMemory;
-
-typedef struct VhostUserLog {
-    uint64_t mmap_size;
-    uint64_t mmap_offset;
-} VhostUserLog;
-
-typedef struct VhostUserMsg {
-    VhostUserRequest request;
-
-#define VHOST_USER_VERSION_MASK     (0x3)
-#define VHOST_USER_REPLY_MASK       (0x1<<2)
-    uint32_t flags;
-    uint32_t size; /* the following payload size */
-    union {
-#define VHOST_USER_VRING_IDX_MASK   (0xff)
-#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
-        uint64_t u64;
-        struct vhost_vring_state state;
-        struct vhost_vring_addr addr;
-        VhostUserMemory memory;
-        VhostUserLog log;
-    } payload;
-    int fds[VHOST_MEMORY_MAX_NREGIONS];
-    int fd_num;
-} QEMU_PACKED VhostUserMsg;
-
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
-
-/* The version of the protocol we support */
-#define VHOST_USER_VERSION    (0x1)
-
-#define MAX_NR_VIRTQUEUE (8)
-
-typedef struct VubrDevRegion {
-    /* Guest Physical address. */
-    uint64_t gpa;
-    /* Memory region size. */
-    uint64_t size;
-    /* QEMU virtual address (userspace). */
-    uint64_t qva;
-    /* Starting offset in our mmaped space. */
-    uint64_t mmap_offset;
-    /* Start address of mmaped space. */
-    uint64_t mmap_addr;
-} VubrDevRegion;
-
-typedef struct VubrDev {
-    int sock;
-    Dispatcher dispatcher;
-    uint32_t nregions;
-    VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
-    VubrVirtq vq[MAX_NR_VIRTQUEUE];
-    int log_call_fd;
-    uint64_t log_size;
-    uint8_t *log_table;
-    int backend_udp_sock;
-    struct sockaddr_in backend_udp_dest;
-    int ready;
-    uint64_t features;
-    int hdrlen;
-} VubrDev;
-
-static const char *vubr_request_str[] = {
-    [VHOST_USER_NONE]                   =  "VHOST_USER_NONE",
-    [VHOST_USER_GET_FEATURES]           =  "VHOST_USER_GET_FEATURES",
-    [VHOST_USER_SET_FEATURES]           =  "VHOST_USER_SET_FEATURES",
-    [VHOST_USER_SET_OWNER]              =  "VHOST_USER_SET_OWNER",
-    [VHOST_USER_RESET_OWNER]           =  "VHOST_USER_RESET_OWNER",
-    [VHOST_USER_SET_MEM_TABLE]          =  "VHOST_USER_SET_MEM_TABLE",
-    [VHOST_USER_SET_LOG_BASE]           =  "VHOST_USER_SET_LOG_BASE",
-    [VHOST_USER_SET_LOG_FD]             =  "VHOST_USER_SET_LOG_FD",
-    [VHOST_USER_SET_VRING_NUM]          =  "VHOST_USER_SET_VRING_NUM",
-    [VHOST_USER_SET_VRING_ADDR]         =  "VHOST_USER_SET_VRING_ADDR",
-    [VHOST_USER_SET_VRING_BASE]         =  "VHOST_USER_SET_VRING_BASE",
-    [VHOST_USER_GET_VRING_BASE]         =  "VHOST_USER_GET_VRING_BASE",
-    [VHOST_USER_SET_VRING_KICK]         =  "VHOST_USER_SET_VRING_KICK",
-    [VHOST_USER_SET_VRING_CALL]         =  "VHOST_USER_SET_VRING_CALL",
-    [VHOST_USER_SET_VRING_ERR]          =  "VHOST_USER_SET_VRING_ERR",
-    [VHOST_USER_GET_PROTOCOL_FEATURES]  =  "VHOST_USER_GET_PROTOCOL_FEATURES",
-    [VHOST_USER_SET_PROTOCOL_FEATURES]  =  "VHOST_USER_SET_PROTOCOL_FEATURES",
-    [VHOST_USER_GET_QUEUE_NUM]          =  "VHOST_USER_GET_QUEUE_NUM",
-    [VHOST_USER_SET_VRING_ENABLE]       =  "VHOST_USER_SET_VRING_ENABLE",
-    [VHOST_USER_SEND_RARP]              =  "VHOST_USER_SEND_RARP",
-    [VHOST_USER_MAX]                    =  "VHOST_USER_MAX",
-};
-
 static void
-print_buffer(uint8_t *buf, size_t len)
+vubr_handle_tx(VuDev *dev, int qidx)
 {
-    int i;
-    printf("Raw buffer:\n");
-    for (i = 0; i < len; i++) {
-        if (i % 16 == 0) {
-            printf("\n");
-        }
-        if (i % 4 == 0) {
-            printf("   ");
-        }
-        printf("%02x ", buf[i]);
-    }
-    printf("\n............................................................\n");
-}
+    VuVirtq *vq = vu_get_queue(dev, qidx);
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
+    int hdrlen = vubr->hdrlen;
+    VuVirtqElement *elem = NULL;
 
-/* Translate guest physical address to our virtual address.  */
-static uint64_t
-gpa_to_va(VubrDev *dev, uint64_t guest_addr)
-{
-    int i;
-
-    /* Find matching memory region.  */
-    for (i = 0; i < dev->nregions; i++) {
-        VubrDevRegion *r = &dev->regions[i];
-
-        if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
-            return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
-        }
-    }
-
-    assert(!"address not found in regions");
-    return 0;
-}
-
-/* Translate qemu virtual address to our virtual address.  */
-static uint64_t
-qva_to_va(VubrDev *dev, uint64_t qemu_addr)
-{
-    int i;
+    assert(qidx % 2);
 
-    /* Find matching memory region.  */
-    for (i = 0; i < dev->nregions; i++) {
-        VubrDevRegion *r = &dev->regions[i];
+    for (;;) {
+        ssize_t ret;
+        unsigned int out_num;
+        struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
 
-        if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
-            return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
+        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
+            break;
         }
-    }
-
-    assert(!"address not found in regions");
-    return 0;
-}
 
-static void
-vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
-{
-    char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
-    struct iovec iov = {
-        .iov_base = (char *)vmsg,
-        .iov_len = VHOST_USER_HDR_SIZE,
-    };
-    struct msghdr msg = {
-        .msg_iov = &iov,
-        .msg_iovlen = 1,
-        .msg_control = control,
-        .msg_controllen = sizeof(control),
-    };
-    size_t fd_size;
-    struct cmsghdr *cmsg;
-    int rc;
-
-    rc = recvmsg(conn_fd, &msg, 0);
-
-    if (rc == 0) {
-        vubr_die("recvmsg");
-        fprintf(stderr, "Peer disconnected.\n");
-        exit(1);
-    }
-    if (rc < 0) {
-        vubr_die("recvmsg");
-    }
-
-    vmsg->fd_num = 0;
-    for (cmsg = CMSG_FIRSTHDR(&msg);
-         cmsg != NULL;
-         cmsg = CMSG_NXTHDR(&msg, cmsg))
-    {
-        if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
-            fd_size = cmsg->cmsg_len - CMSG_LEN(0);
-            vmsg->fd_num = fd_size / sizeof(int);
-            memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
+        out_num = elem->out_num;
+        out_sg = elem->out_sg;
+        if (out_num < 1) {
+            fprintf(stderr, "virtio-net header not in first element\n");
             break;
         }
-    }
-
-    if (vmsg->size > sizeof(vmsg->payload)) {
-        fprintf(stderr,
-                "Error: too big message request: %d, size: vmsg->size: %u, "
-                "while sizeof(vmsg->payload) = %zu\n",
-                vmsg->request, vmsg->size, sizeof(vmsg->payload));
-        exit(1);
-    }
-
-    if (vmsg->size) {
-        rc = read(conn_fd, &vmsg->payload, vmsg->size);
-        if (rc == 0) {
-            vubr_die("recvmsg");
-            fprintf(stderr, "Peer disconnected.\n");
-            exit(1);
+        if (VHOST_USER_BRIDGE_DEBUG) {
+            iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
         }
-        if (rc < 0) {
-            vubr_die("recvmsg");
+
+        if (hdrlen) {
+            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+                                       out_sg, out_num,
+                                       hdrlen, -1);
+            out_num = sg_num;
+            out_sg = sg;
         }
 
-        assert(rc == vmsg->size);
-    }
-}
+        struct msghdr msg = {
+            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+            .msg_namelen = sizeof(struct sockaddr_in),
+            .msg_iov = out_sg,
+            .msg_iovlen = out_num,
+        };
+        do {
+            ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
+        } while (ret == -1 && (errno == EAGAIN || errno == EINTR));
 
-static void
-vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
-{
-    int rc;
+        if (ret == -1) {
+            vubr_die("sendmsg()");
+        }
 
-    do {
-        rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
-    } while (rc < 0 && errno == EINTR);
+        vu_queue_push(dev, vq, elem, 0);
+        vu_queue_notify(dev, vq);
 
-    if (rc < 0) {
-        vubr_die("write");
+        free(elem);
+        elem = NULL;
     }
-}
 
-static void
-vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
-{
-    int slen = sizeof(struct sockaddr_in);
-
-    if (sendto(dev->backend_udp_sock, buf, len, 0,
-               (struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
-        vubr_die("sendto()");
-    }
+    free(elem);
 }
 
-static int
-vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
+static void
+iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
 {
-    int slen = sizeof(struct sockaddr_in);
-    int rc;
+    struct iovec *cur;
 
-    rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
-                  (struct sockaddr *) &dev->backend_udp_dest,
-                  (socklen_t *)&slen);
-    if (rc == -1) {
-        vubr_die("recvfrom()");
+    for (cur = front; front != iov; cur++) {
+        bytes -= cur->iov_len;
     }
 
-    return rc;
+    cur->iov_base -= bytes;
+    cur->iov_len += bytes;
 }
 
 static void
-vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
+iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
 {
-    int hdrlen = dev->hdrlen;
-    DPRINT("    hdrlen = %d\n", dev->hdrlen);
+    unsigned i;
 
-    if (VHOST_USER_BRIDGE_DEBUG) {
-        print_buffer(buf, len);
-    }
-    vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
-}
+    for (i = 0; i < iovc; i++, iov++) {
+        if (bytes < iov->iov_len) {
+            iov->iov_len = bytes;
+            return;
+        }
 
-/* Kick the log_call_fd if required. */
-static void
-vubr_log_kick(VubrDev *dev)
-{
-    if (dev->log_call_fd != -1) {
-        DPRINT("Kicking the QEMU's log...\n");
-        eventfd_write(dev->log_call_fd, 1);
+        bytes -= iov->iov_len;
     }
-}
 
-/* Kick the guest if necessary. */
-static void
-vubr_virtqueue_kick(VubrVirtq *vq)
-{
-    if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
-        DPRINT("Kicking the guest...\n");
-        eventfd_write(vq->call_fd, 1);
-    }
+    assert(!"couldn't truncate iov");
 }
 
 static void
-vubr_log_page(uint8_t *log_table, uint64_t page)
+vubr_backend_recv_cb(int sock, void *ctx)
 {
-    DPRINT("Logged dirty guest page: %"PRId64"\n", page);
-    atomic_or(&log_table[page / 8], 1 << (page % 8));
-}
+    VubrDev *vubr = (VubrDev *) ctx;
+    VuDev *dev = &vubr->vudev;
+    VuVirtq *vq = vu_get_queue(dev, 0);
+    VuVirtqElement *elem = NULL;
+    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+    struct virtio_net_hdr_mrg_rxbuf mhdr;
+    unsigned mhdr_cnt = 0;
+    int hdrlen = vubr->hdrlen;
+    int i = 0;
+    struct virtio_net_hdr hdr = {
+        .flags = 0,
+        .gso_type = VIRTIO_NET_HDR_GSO_NONE
+    };
 
-static void
-vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
-{
-    uint64_t page;
+    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
+    DPRINT("    hdrlen = %d\n", hdrlen);
 
-    if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
-        !dev->log_table || !length) {
+    if (!vu_queue_enabled(dev, vq) ||
+        !vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
+        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
         return;
     }
 
-    assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
-
-    page = address / VHOST_LOG_PAGE;
-    while (page * VHOST_LOG_PAGE < address + length) {
-        vubr_log_page(dev->log_table, page);
-        page += VHOST_LOG_PAGE;
-    }
-    vubr_log_kick(dev);
-}
-
-static void
-vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
-{
-    struct vring_desc *desc = vq->desc;
-    struct vring_avail *avail = vq->avail;
-    struct vring_used *used = vq->used;
-    uint64_t log_guest_addr = vq->log_guest_addr;
-    int32_t remaining_len = len;
-
-    unsigned int size = vq->size;
-
-    uint16_t avail_index = atomic_mb_read(&avail->idx);
-
-    /* We check the available descriptors before posting the
-     * buffer, so here we assume that enough available
-     * descriptors. */
-    assert(vq->last_avail_index != avail_index);
-    uint16_t a_index = vq->last_avail_index % size;
-    uint16_t u_index = vq->last_used_index % size;
-    uint16_t d_index = avail->ring[a_index];
-
-    int i = d_index;
-    uint32_t written_len = 0;
-
     do {
-        DPRINT("Post packet to guest on vq:\n");
-        DPRINT("    size             = %d\n", vq->size);
-        DPRINT("    last_avail_index = %d\n", vq->last_avail_index);
-        DPRINT("    last_used_index  = %d\n", vq->last_used_index);
-        DPRINT("    a_index = %d\n", a_index);
-        DPRINT("    u_index = %d\n", u_index);
-        DPRINT("    d_index = %d\n", d_index);
-        DPRINT("    desc[%d].addr    = 0x%016"PRIx64"\n", i, desc[i].addr);
-        DPRINT("    desc[%d].len     = %d\n", i, desc[i].len);
-        DPRINT("    desc[%d].flags   = %d\n", i, desc[i].flags);
-        DPRINT("    avail->idx = %d\n", avail_index);
-        DPRINT("    used->idx  = %d\n", used->idx);
-
-        if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
-            /* FIXME: we should find writable descriptor. */
-            fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
-            exit(1);
-        }
-
-        void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
-        uint32_t chunk_len = desc[i].len;
-        uint32_t chunk_write_len = MIN(remaining_len, chunk_len);
+        struct iovec *sg;
+        ssize_t ret, total = 0;
+        unsigned int num;
 
-        memcpy(chunk_start, buf + written_len, chunk_write_len);
-        vubr_log_write(dev, desc[i].addr, chunk_write_len);
-        remaining_len -= chunk_write_len;
-        written_len += chunk_write_len;
-
-        if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) {
+        elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
             break;
         }
 
-        i = desc[i].next;
-    } while (1);
-
-    if (remaining_len > 0) {
-            fprintf(stderr,
-                    "Too long packet for RX, remaining_len = %d, Dropping...\n",
-                    remaining_len);
-            return;
-    }
-
-    /* Add descriptor to the used ring. */
-    used->ring[u_index].id = d_index;
-    used->ring[u_index].len = len;
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
-                   sizeof(used->ring[u_index]));
-
-    vq->last_avail_index++;
-    vq->last_used_index++;
-
-    atomic_mb_set(&used->idx, vq->last_used_index);
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, idx),
-                   sizeof(used->idx));
-
-    /* Kick the guest if necessary. */
-    vubr_virtqueue_kick(vq);
-}
-
-static int
-vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
-{
-    struct vring_desc *desc = vq->desc;
-    struct vring_avail *avail = vq->avail;
-    struct vring_used *used = vq->used;
-    uint64_t log_guest_addr = vq->log_guest_addr;
-
-    unsigned int size = vq->size;
-
-    uint16_t a_index = vq->last_avail_index % size;
-    uint16_t u_index = vq->last_used_index % size;
-    uint16_t d_index = avail->ring[a_index];
-
-    uint32_t i, len = 0;
-    size_t buf_size = 4096;
-    uint8_t buf[4096];
-
-    DPRINT("Chunks: ");
-    i = d_index;
-    do {
-        void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
-        uint32_t chunk_len = desc[i].len;
-
-        assert(!(desc[i].flags & VRING_DESC_F_WRITE));
-
-        if (len + chunk_len < buf_size) {
-            memcpy(buf + len, chunk_start, chunk_len);
-            DPRINT("%d ", chunk_len);
-        } else {
-            fprintf(stderr, "Error: too long packet. Dropping...\n");
+        if (elem->in_num < 1) {
+            fprintf(stderr, "virtio-net contains no in buffers\n");
             break;
         }
 
-        len += chunk_len;
-
-        if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
-            break;
+        sg = elem->in_sg;
+        num = elem->in_num;
+        if (i == 0) {
+            if (hdrlen == 12) {
+                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
+                                    sg, elem->in_num,
+                                    offsetof(typeof(mhdr), num_buffers),
+                                    sizeof(mhdr.num_buffers));
+            }
+            iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
+            total += hdrlen;
+            assert(iov_discard_front(&sg, &num, hdrlen) == hdrlen);
         }
 
-        i = desc[i].next;
-    } while (1);
-    DPRINT("\n");
-
-    if (!len) {
-        return -1;
-    }
-
-    /* Add descriptor to the used ring. */
-    used->ring[u_index].id = d_index;
-    used->ring[u_index].len = len;
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, ring[u_index]),
-                   sizeof(used->ring[u_index]));
-
-    vubr_consume_raw_packet(dev, buf, len);
-
-    return 0;
-}
+        struct msghdr msg = {
+            .msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
+            .msg_namelen = sizeof(struct sockaddr_in),
+            .msg_iov = sg,
+            .msg_iovlen = elem->in_num,
+            .msg_flags = MSG_DONTWAIT,
+        };
+        do {
+            ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
+        } while (ret == -1 && (errno == EINTR));
 
-static void
-vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
-{
-    struct vring_avail *avail = vq->avail;
-    struct vring_used *used = vq->used;
-    uint64_t log_guest_addr = vq->log_guest_addr;
-
-    while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
-        vubr_process_desc(dev, vq);
-        vq->last_avail_index++;
-        vq->last_used_index++;
-    }
+        if (i == 0) {
+            iov_restore_front(elem->in_sg, sg, hdrlen);
+        }
 
-    atomic_mb_set(&used->idx, vq->last_used_index);
-    vubr_log_write(dev,
-                   log_guest_addr + offsetof(struct vring_used, idx),
-                   sizeof(used->idx));
-}
+        if (ret == -1) {
+            if (errno == EWOULDBLOCK) {
+                vu_queue_rewind(dev, vq, 1);
+                break;
+            }
 
-static void
-vubr_backend_recv_cb(int sock, void *ctx)
-{
-    VubrDev *dev = (VubrDev *) ctx;
-    VubrVirtq *rx_vq = &dev->vq[0];
-    uint8_t buf[4096];
-    struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
-    int hdrlen = dev->hdrlen;
-    int buflen = sizeof(buf);
-    int len;
-
-    if (!dev->ready) {
-        return;
-    }
+            vubr_die("recvmsg()");
+        }
 
-    DPRINT("\n\n   ***   IN UDP RECEIVE CALLBACK    ***\n\n");
-    DPRINT("    hdrlen = %d\n", hdrlen);
+        total += ret;
+        iov_truncate(elem->in_sg, elem->in_num, total);
+        vu_queue_fill(dev, vq, elem, total, i++);
 
-    uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
+        free(elem);
+        elem = NULL;
+    } while (false); /* could loop if DONTWAIT worked? */
 
-    /* If there is no available descriptors, just do nothing.
-     * The buffer will be handled by next arrived UDP packet,
-     * or next kick on receive virtq. */
-    if (rx_vq->last_avail_index == avail_index) {
-        DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
-        return;
+    if (mhdr_cnt) {
+        mhdr.num_buffers = i;
+        iov_from_buf(mhdr_sg, mhdr_cnt,
+                     0,
+                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
     }
 
-    memset(buf, 0, hdrlen);
-    /* TODO: support mergeable buffers. */
-    if (hdrlen == 12)
-        hdr->num_buffers = 1;
-    len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
+    vu_queue_flush(dev, vq, i);
+    vu_queue_notify(dev, vq);
 
-    vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
+    free(elem);
 }
 
 static void
-vubr_kick_cb(int sock, void *ctx)
+vubr_receive_cb(int sock, void *ctx)
 {
-    VubrDev *dev = (VubrDev *) ctx;
-    eventfd_t kick_data;
-    ssize_t rc;
+    VubrDev *vubr = (VubrDev *)ctx;
 
-    rc = eventfd_read(sock, &kick_data);
-    if (rc == -1) {
-        vubr_die("eventfd_read()");
-    } else {
-        DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
-        vubr_process_avail(dev, &dev->vq[1]);
+    if (!vu_dispatch(&vubr->vudev)) {
+        fprintf(stderr, "Error while dispatching\n");
     }
 }
 
-static int
-vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
-}
+typedef struct WatchData {
+    VuDev *dev;
+    vu_watch_cb cb;
+    void *data;
+} WatchData;
 
-static int
-vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+watch_cb(int sock, void *ctx)
 {
-    vmsg->payload.u64 =
-            ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
-             (1ULL << VHOST_F_LOG_ALL) |
-             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-             (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
-
-    vmsg->size = sizeof(vmsg->payload.u64);
+    struct WatchData *wd = ctx;
 
-    DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
-    /* Reply */
-    return 1;
+    wd->cb(wd->dev, VU_WATCH_IN, wd->data);
 }
 
-static int
-vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_watch(VuDev *dev, int fd, int condition,
+               vu_watch_cb cb, void *data)
 {
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
-    dev->features = vmsg->payload.u64;
-    if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
-        (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
-        dev->hdrlen = 12;
-    } else {
-        dev->hdrlen = 10;
-    }
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
+    static WatchData watches[FD_SETSIZE];
+    struct WatchData *wd = &watches[fd];
 
-    return 0;
-}
-
-static int
-vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    return 0;
+    wd->cb = cb;
+    wd->data = data;
+    wd->dev = dev;
+    dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
 }
 
 static void
-vubr_close_log(VubrDev *dev)
+vubr_remove_watch(VuDev *dev, int fd)
 {
-    if (dev->log_table) {
-        if (munmap(dev->log_table, dev->log_size) != 0) {
-            vubr_die("munmap()");
-        }
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 
-        dev->log_table = 0;
-    }
-    if (dev->log_call_fd != -1) {
-        close(dev->log_call_fd);
-        dev->log_call_fd = -1;
-    }
-}
-
-static int
-vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    vubr_close_log(dev);
-    dev->ready = 0;
-    dev->features = 0;
-    return 0;
+    dispatcher_remove(&vubr->dispatcher, fd);
 }
 
 static int
-vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
-    int i;
-    VhostUserMemory *memory = &vmsg->payload.memory;
-    dev->nregions = memory->nregions;
-
-    DPRINT("Nregions: %d\n", memory->nregions);
-    for (i = 0; i < dev->nregions; i++) {
-        void *mmap_addr;
-        VhostUserMemoryRegion *msg_region = &memory->regions[i];
-        VubrDevRegion *dev_region = &dev->regions[i];
-
-        DPRINT("Region %d\n", i);
-        DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
-               msg_region->guest_phys_addr);
-        DPRINT("    memory_size:     0x%016"PRIx64"\n",
-               msg_region->memory_size);
-        DPRINT("    userspace_addr   0x%016"PRIx64"\n",
-               msg_region->userspace_addr);
-        DPRINT("    mmap_offset      0x%016"PRIx64"\n",
-               msg_region->mmap_offset);
-
-        dev_region->gpa = msg_region->guest_phys_addr;
-        dev_region->size = msg_region->memory_size;
-        dev_region->qva = msg_region->userspace_addr;
-        dev_region->mmap_offset = msg_region->mmap_offset;
-
-        /* We don't use offset argument of mmap() since the
-         * mapped address has to be page aligned, and we use huge
-         * pages.  */
-        mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
-                         PROT_READ | PROT_WRITE, MAP_SHARED,
-                         vmsg->fds[i], 0);
-
-        if (mmap_addr == MAP_FAILED) {
-            vubr_die("mmap");
-        }
-        dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
-        DPRINT("    mmap_addr:       0x%016"PRIx64"\n", dev_region->mmap_addr);
-
-        close(vmsg->fds[i]);
-    }
-
+    DPRINT("Function %s() not implemented yet.\n", __func__);
     return 0;
 }
 
 static int
-vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
+vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
 {
-    int fd;
-    uint64_t log_mmap_size, log_mmap_offset;
-    void *rc;
-
-    assert(vmsg->fd_num == 1);
-    fd = vmsg->fds[0];
-
-    assert(vmsg->size == sizeof(vmsg->payload.log));
-    log_mmap_offset = vmsg->payload.log.mmap_offset;
-    log_mmap_size = vmsg->payload.log.mmap_size;
-    DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
-    DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
-
-    rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
-              log_mmap_offset);
-    if (rc == MAP_FAILED) {
-        vubr_die("mmap");
+    switch (vmsg->request) {
+    case VHOST_USER_SEND_RARP:
+        *do_reply = vubr_send_rarp_exec(dev, vmsg);
+        return 1;
+    default:
+        /* let the library handle the rest */
+        return 0;
     }
-    dev->log_table = rc;
-    dev->log_size = log_mmap_size;
 
-    vmsg->size = sizeof(vmsg->payload.u64);
-    /* Reply */
-    return 1;
-}
-
-static int
-vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    assert(vmsg->fd_num == 1);
-    dev->log_call_fd = vmsg->fds[0];
-    DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
     return 0;
 }
 
-static int
-vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_set_features(VuDev *dev, uint64_t features)
 {
-    unsigned int index = vmsg->payload.state.index;
-    unsigned int num = vmsg->payload.state.num;
-
-    DPRINT("State.index: %d\n", index);
-    DPRINT("State.num:   %d\n", num);
-    dev->vq[index].size = num;
-    return 0;
-}
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 
-static int
-vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    struct vhost_vring_addr *vra = &vmsg->payload.addr;
-    unsigned int index = vra->index;
-    VubrVirtq *vq = &dev->vq[index];
-
-    DPRINT("vhost_vring_addr:\n");
-    DPRINT("    index:  %d\n", vra->index);
-    DPRINT("    flags:  %d\n", vra->flags);
-    DPRINT("    desc_user_addr:   0x%016llx\n", vra->desc_user_addr);
-    DPRINT("    used_user_addr:   0x%016llx\n", vra->used_user_addr);
-    DPRINT("    avail_user_addr:  0x%016llx\n", vra->avail_user_addr);
-    DPRINT("    log_guest_addr:   0x%016llx\n", vra->log_guest_addr);
-
-    vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr);
-    vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr);
-    vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr);
-    vq->log_guest_addr = vra->log_guest_addr;
-
-    DPRINT("Setting virtq addresses:\n");
-    DPRINT("    vring_desc  at %p\n", vq->desc);
-    DPRINT("    vring_used  at %p\n", vq->used);
-    DPRINT("    vring_avail at %p\n", vq->avail);
-
-    vq->last_used_index = vq->used->idx;
-
-    if (vq->last_avail_index != vq->used->idx) {
-        DPRINT("Last avail index != used index: %d != %d, resuming",
-               vq->last_avail_index, vq->used->idx);
-        vq->last_avail_index = vq->used->idx;
+    if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
+        (features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
+        vubr->hdrlen = 12;
+    } else {
+        vubr->hdrlen = 10;
     }
-
-    return 0;
 }
 
-static int
-vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    unsigned int index = vmsg->payload.state.index;
-    unsigned int num = vmsg->payload.state.num;
-
-    DPRINT("State.index: %d\n", index);
-    DPRINT("State.num:   %d\n", num);
-    dev->vq[index].last_avail_index = num;
-
-    return 0;
-}
-
-static int
-vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    unsigned int index = vmsg->payload.state.index;
-
-    DPRINT("State.index: %d\n", index);
-    vmsg->payload.state.num = dev->vq[index].last_avail_index;
-    vmsg->size = sizeof(vmsg->payload.state);
-    /* FIXME: this is a work-around for a bug in QEMU enabling
-     * too early vrings. When protocol features are enabled,
-     * we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
-    dev->ready = 0;
-
-    if (dev->vq[index].call_fd != -1) {
-        close(dev->vq[index].call_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
-        dev->vq[index].call_fd = -1;
-    }
-    if (dev->vq[index].kick_fd != -1) {
-        close(dev->vq[index].kick_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
-        dev->vq[index].kick_fd = -1;
-    }
-
-    /* Reply */
-    return 1;
-}
-
-static int
-vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static uint64_t
+vubr_get_features(VuDev *dev)
 {
-    uint64_t u64_arg = vmsg->payload.u64;
-    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
-
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-
-    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
-    assert(vmsg->fd_num == 1);
-
-    if (dev->vq[index].kick_fd != -1) {
-        close(dev->vq[index].kick_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
-    }
-    dev->vq[index].kick_fd = vmsg->fds[0];
-    DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
-    if (index % 2 == 1) {
-        /* TX queue. */
-        dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
-                       dev, vubr_kick_cb);
-
-        DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
-               dev->vq[index].kick_fd, index);
-    }
-    /* We temporarily use this hack to determine that both TX and RX
-     * queues are set up and ready for processing.
-     * FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
-     * actual kicks. */
-    if (dev->vq[0].kick_fd != -1 &&
-        dev->vq[1].kick_fd != -1) {
-        dev->ready = 1;
-        DPRINT("vhost-user-bridge is ready for processing queues.\n");
-    }
-    return 0;
-
+    return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
+        1ULL << VIRTIO_NET_F_MRG_RXBUF;
 }
 
-static int
-vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_queue_set_started(VuDev *dev, int qidx, bool started)
 {
-    uint64_t u64_arg = vmsg->payload.u64;
-    int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
+    VuVirtq *vq = vu_get_queue(dev, qidx);
 
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
-    assert(vmsg->fd_num == 1);
-
-    if (dev->vq[index].call_fd != -1) {
-        close(dev->vq[index].call_fd);
-        dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
+    if (qidx % 2 == 1) {
+        vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
     }
-    dev->vq[index].call_fd = vmsg->fds[0];
-    DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
-
-    return 0;
-}
-
-static int
-vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
-}
-
-static int
-vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    vmsg->size = sizeof(vmsg->payload.u64);
-
-    /* Reply */
-    return 1;
 }
 
-static int
-vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    /* FIXME: unimplented */
-    DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
-    return 0;
-}
-
-static int
-vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
-}
-
-static int
-vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
+static void
+vubr_panic(VuDev *dev, const char *msg)
 {
-    unsigned int index = vmsg->payload.state.index;
-    unsigned int enable = vmsg->payload.state.num;
+    VubrDev *vubr = container_of(dev, VubrDev, vudev);
 
-    DPRINT("State.index: %d\n", index);
-    DPRINT("State.enable:   %d\n", enable);
-    dev->vq[index].enable = enable;
-    return 0;
-}
+    fprintf(stderr, "PANIC: %s\n", msg);
 
-static int
-vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    DPRINT("Function %s() not implemented yet.\n", __func__);
-    return 0;
+    dispatcher_remove(&vubr->dispatcher, dev->sock);
+    vubr->quit = 1;
 }
 
-static int
-vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
-{
-    /* Print out generic part of the request. */
-    DPRINT(
-           "==================   Vhost user message from QEMU   ==================\n");
-    DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
-           vmsg->request);
-    DPRINT("Flags:   0x%x\n", vmsg->flags);
-    DPRINT("Size:    %d\n", vmsg->size);
-
-    if (vmsg->fd_num) {
-        int i;
-        DPRINT("Fds:");
-        for (i = 0; i < vmsg->fd_num; i++) {
-            DPRINT(" %d", vmsg->fds[i]);
-        }
-        DPRINT("\n");
-    }
-
-    switch (vmsg->request) {
-    case VHOST_USER_NONE:
-        return vubr_none_exec(dev, vmsg);
-    case VHOST_USER_GET_FEATURES:
-        return vubr_get_features_exec(dev, vmsg);
-    case VHOST_USER_SET_FEATURES:
-        return vubr_set_features_exec(dev, vmsg);
-    case VHOST_USER_SET_OWNER:
-        return vubr_set_owner_exec(dev, vmsg);
-    case VHOST_USER_RESET_OWNER:
-        return vubr_reset_device_exec(dev, vmsg);
-    case VHOST_USER_SET_MEM_TABLE:
-        return vubr_set_mem_table_exec(dev, vmsg);
-    case VHOST_USER_SET_LOG_BASE:
-        return vubr_set_log_base_exec(dev, vmsg);
-    case VHOST_USER_SET_LOG_FD:
-        return vubr_set_log_fd_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_NUM:
-        return vubr_set_vring_num_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_ADDR:
-        return vubr_set_vring_addr_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_BASE:
-        return vubr_set_vring_base_exec(dev, vmsg);
-    case VHOST_USER_GET_VRING_BASE:
-        return vubr_get_vring_base_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_KICK:
-        return vubr_set_vring_kick_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_CALL:
-        return vubr_set_vring_call_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_ERR:
-        return vubr_set_vring_err_exec(dev, vmsg);
-    case VHOST_USER_GET_PROTOCOL_FEATURES:
-        return vubr_get_protocol_features_exec(dev, vmsg);
-    case VHOST_USER_SET_PROTOCOL_FEATURES:
-        return vubr_set_protocol_features_exec(dev, vmsg);
-    case VHOST_USER_GET_QUEUE_NUM:
-        return vubr_get_queue_num_exec(dev, vmsg);
-    case VHOST_USER_SET_VRING_ENABLE:
-        return vubr_set_vring_enable_exec(dev, vmsg);
-    case VHOST_USER_SEND_RARP:
-        return vubr_send_rarp_exec(dev, vmsg);
-
-    case VHOST_USER_MAX:
-        assert(vmsg->request != VHOST_USER_MAX);
-    }
-    return 0;
-}
-
-static void
-vubr_receive_cb(int sock, void *ctx)
-{
-    VubrDev *dev = (VubrDev *) ctx;
-    VhostUserMsg vmsg;
-    int reply_requested;
-
-    vubr_message_read(sock, &vmsg);
-    reply_requested = vubr_execute_request(dev, &vmsg);
-    if (reply_requested) {
-        /* Set the version in the flags when sending the reply */
-        vmsg.flags &= ~VHOST_USER_VERSION_MASK;
-        vmsg.flags |= VHOST_USER_VERSION;
-        vmsg.flags |= VHOST_USER_REPLY_MASK;
-        vubr_message_write(sock, &vmsg);
-    }
-}
+static const VuDevIface vuiface = {
+    .get_features = vubr_get_features,
+    .set_features = vubr_set_features,
+    .process_msg = vubr_process_msg,
+    .queue_set_started = vubr_queue_set_started,
+};
 
 static void
 vubr_accept_cb(int sock, void *ctx)
@@ -1204,36 +479,26 @@ vubr_accept_cb(int sock, void *ctx)
         vubr_die("accept()");
     }
     DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
+
+    vu_init(&dev->vudev,
+            conn_fd,
+            vubr_panic,
+            vubr_set_watch,
+            vubr_remove_watch,
+            &vuiface);
+
     dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
+    dispatcher_remove(&dev->dispatcher, sock);
 }
 
 static VubrDev *
 vubr_new(const char *path, bool client)
 {
     VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
-    dev->nregions = 0;
-    int i;
     struct sockaddr_un un;
     CallbackFunc cb;
     size_t len;
 
-    for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
-        dev->vq[i] = (VubrVirtq) {
-            .call_fd = -1, .kick_fd = -1,
-            .size = 0,
-            .last_avail_index = 0, .last_used_index = 0,
-            .desc = 0, .avail = 0, .used = 0,
-            .enable = 0,
-        };
-    }
-
-    /* Init log */
-    dev->log_call_fd = -1;
-    dev->log_size = 0;
-    dev->log_table = 0;
-    dev->ready = 0;
-    dev->features = 0;
-
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
@@ -1261,10 +526,17 @@ vubr_new(const char *path, bool client)
         if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
             vubr_die("connect");
         }
+        vu_init(&dev->vudev,
+                dev->sock,
+                vubr_panic,
+                vubr_set_watch,
+                vubr_remove_watch,
+                &vuiface);
         cb = vubr_receive_cb;
     }
 
     dispatcher_init(&dev->dispatcher);
+
     dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
 
     return dev;
@@ -1345,7 +617,7 @@ vubr_backend_udp_setup(VubrDev *dev,
 static void
 vubr_run(VubrDev *dev)
 {
-    while (1) {
+    while (!dev->quit) {
         /* timeout 200ms */
         dispatcher_wait(&dev->dispatcher, 200000);
         /* Here one can try polling strategy. */
@@ -1421,6 +693,9 @@ main(int argc, char *argv[])
 
     vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
     vubr_run(dev);
+
+    vu_deinit(&dev->vudev);
+
     return 0;
 
 out:
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 737bffa984..a5d2f6c0c3 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -131,6 +131,13 @@ void qemu_coroutine_enter(Coroutine *co)
     }
 }
 
+void qemu_coroutine_enter_if_inactive(Coroutine *co)
+{
+    if (!qemu_coroutine_entered(co)) {
+        qemu_coroutine_enter(co);
+    }
+}
+
 void coroutine_fn qemu_coroutine_yield(void)
 {
     Coroutine *self = qemu_coroutine_self();