aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-01-19 10:17:20 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-01-19 10:17:20 +0000
commit3e5bdc6573edf0585e4085e6a4e349b135abf3b4 (patch)
treed5a30ac7e2374d362cc7a927665d1938ed1e03ba
parentb4d6ed1c5ae519d3efb5297be3ef6625ca2a20f4 (diff)
parentf4bf56fb78ed0e9f60fa1ed656c14ff4c494da5a (diff)
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
pc, pci, virtio: features, fixes, cleanups A bunch of fixes, cleanus and new features all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Thu 18 Jan 2018 20:41:03 GMT # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: (29 commits) vhost: remove assertion to prevent crash vhost-user: fix misaligned access to payload vhost-user: factor out msg head and payload tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables usage tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its usage tests: acpi: init table descriptor in test_dst_table() tests: acpi: move tested tables array allocation outside of test_acpi_dsdt_table() x86_iommu: check if machine has PCI bus x86_iommu: Move machine check to x86_iommu_realize() vhost-user-test: use init_virtio_dev in multiqueue test vhost-user-test: make features mask an init_virtio_dev() argument vhost-user-test: setup virtqueues in all tests vhost-user-test: extract read-guest-mem test from main loop vhost-user-test: fix features mask hw/acpi-build: Make next_base easy to follow ACPI/unit-test: Add a testcase for RAM allocation in numa node hw/pci-bridge: fix QEMU crash because of pcie-root-port intel-iommu: Extend address width to 48 bits intel-iommu: Redefine macros to enable supporting 48 bit address width vhost-user: fix multiple queue specification ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--.gitignore1
-rw-r--r--MAINTAINERS1
-rw-r--r--Makefile3
-rw-r--r--Makefile.objs1
-rw-r--r--accel/kvm/kvm-all.c4
-rw-r--r--contrib/libvhost-user/libvhost-user.c42
-rw-r--r--contrib/libvhost-user/libvhost-user.h33
-rw-r--r--contrib/vhost-user-blk/Makefile.objs1
-rw-r--r--contrib/vhost-user-blk/vhost-user-blk.c545
-rw-r--r--default-configs/pci.mak1
-rw-r--r--default-configs/s390x-softmmu.mak1
-rw-r--r--docs/interop/vhost-user.txt59
-rw-r--r--hw/block/Makefile.objs3
-rw-r--r--hw/block/vhost-user-blk.c359
-rw-r--r--hw/i386/acpi-build.c5
-rw-r--r--hw/i386/amd_iommu.c13
-rw-r--r--hw/i386/intel_iommu.c136
-rw-r--r--hw/i386/intel_iommu_internal.h43
-rw-r--r--hw/i386/x86-iommu.c13
-rw-r--r--hw/pci-bridge/gen_pcie_root_port.c7
-rw-r--r--hw/pci/shpc.c13
-rw-r--r--hw/virtio/vhost-user.c318
-rw-r--r--hw/virtio/vhost.c32
-rw-r--r--hw/virtio/virtio-bus.c19
-rw-r--r--hw/virtio/virtio-pci.c55
-rw-r--r--hw/virtio/virtio-pci.h18
-rw-r--r--hw/virtio/virtio.c5
-rw-r--r--include/hw/i386/intel_iommu.h7
-rw-r--r--include/hw/virtio/vhost-backend.h12
-rw-r--r--include/hw/virtio/vhost-user-blk.h41
-rw-r--r--include/hw/virtio/vhost.h15
-rw-r--r--include/qemu/event_notifier.h1
-rw-r--r--include/qemu/host-utils.h10
-rw-r--r--tests/acpi-test-data/pc/DSDT.numamembin0 -> 5150 bytes
-rw-r--r--tests/acpi-test-data/pc/SRAT.numamembin0 -> 224 bytes
-rw-r--r--tests/acpi-test-data/q35/DSDT.numamembin0 -> 7834 bytes
-rw-r--r--tests/acpi-test-data/q35/SRAT.numamembin0 -> 224 bytes
-rw-r--r--tests/bios-tables-test.c50
-rw-r--r--tests/vhost-user-test.c171
-rw-r--r--util/event_notifier-posix.c5
-rw-r--r--util/event_notifier-win32.c2
41 files changed, 1739 insertions, 306 deletions
diff --git a/.gitignore b/.gitignore
index 433f64f429..704b22285d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,7 @@
/module_block.h
/scsi/qemu-pr-helper
/vhost-user-scsi
+/vhost-user-blk
/fsdev/virtfs-proxy-helper
*.tmp
*.[1-9]
diff --git a/MAINTAINERS b/MAINTAINERS
index 4770f105d4..753e7996ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -852,6 +852,7 @@ X86 Machines
------------
PC
M: Michael S. Tsirkin <mst@redhat.com>
+M: Marcel Apfelbaum <marcel@redhat.com>
S: Supported
F: include/hw/i386/
F: hw/i386/
diff --git a/Makefile b/Makefile
index f26ef1b1df..d835bb92e7 100644
--- a/Makefile
+++ b/Makefile
@@ -334,6 +334,7 @@ dummy := $(call unnest-vars,, \
ivshmem-server-obj-y \
libvhost-user-obj-y \
vhost-user-scsi-obj-y \
+ vhost-user-blk-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
@@ -565,6 +566,8 @@ ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS)
endif
vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a
$(call LINK, $^)
+vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
+ $(call LINK, $^)
module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
$(call quiet-command,$(PYTHON) $< $@ \
diff --git a/Makefile.objs b/Makefile.objs
index c8b1bba593..669d8d684d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -115,6 +115,7 @@ libvhost-user-obj-y = contrib/libvhost-user/
vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
+vhost-user-blk-obj-y = contrib/vhost-user-blk/
######################################################################
trace-events-subdirs =
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f290f487a5..071f4f57c0 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -812,6 +812,10 @@ static void kvm_mem_ioeventfd_del(MemoryListener *listener,
if (r < 0) {
abort();
}
+
+ if (e->cleanup) {
+ e->cleanup(e);
+ }
}
static void kvm_io_ioeventfd_add(MemoryListener *listener,
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index f409bd3d41..27cc59791b 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -84,6 +84,8 @@ vu_request_to_string(unsigned int req)
REQ(VHOST_USER_SET_SLAVE_REQ_FD),
REQ(VHOST_USER_IOTLB_MSG),
REQ(VHOST_USER_SET_VRING_ENDIAN),
+ REQ(VHOST_USER_GET_CONFIG),
+ REQ(VHOST_USER_SET_CONFIG),
REQ(VHOST_USER_MAX),
};
#undef REQ
@@ -798,6 +800,42 @@ vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg)
}
static bool
+vu_get_config(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int ret = -1;
+
+ if (dev->iface->get_config) {
+ ret = dev->iface->get_config(dev, vmsg->payload.config.region,
+ vmsg->payload.config.size);
+ }
+
+ if (ret) {
+ /* resize to zero to indicate an error to master */
+ vmsg->size = 0;
+ }
+
+ return true;
+}
+
+static bool
+vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
+{
+ int ret = -1;
+
+ if (dev->iface->set_config) {
+ ret = dev->iface->set_config(dev, vmsg->payload.config.region,
+ vmsg->payload.config.offset,
+ vmsg->payload.config.size,
+ vmsg->payload.config.flags);
+ if (ret) {
+ vu_panic(dev, "Set virtio configuration space failed");
+ }
+ }
+
+ return false;
+}
+
+static bool
vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
{
int do_reply = 0;
@@ -862,6 +900,10 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
return vu_set_vring_enable_exec(dev, vmsg);
case VHOST_USER_SET_SLAVE_REQ_FD:
return vu_set_slave_req_fd(dev, vmsg);
+ case VHOST_USER_GET_CONFIG:
+ return vu_get_config(dev, vmsg);
+ case VHOST_USER_SET_CONFIG:
+ return vu_set_config(dev, vmsg);
case VHOST_USER_NONE:
break;
default:
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 2f5864b5c4..f8a730b725 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -30,6 +30,16 @@
#define VHOST_MEMORY_MAX_NREGIONS 8
+typedef enum VhostSetConfigType {
+ VHOST_SET_CONFIG_TYPE_MASTER = 0,
+ VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
+} VhostSetConfigType;
+
+/*
+ * Maximum size of virtio device config space
+ */
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -69,6 +79,8 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_SLAVE_REQ_FD = 21,
VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_SET_VRING_ENDIAN = 23,
+ VHOST_USER_GET_CONFIG = 24,
+ VHOST_USER_SET_CONFIG = 25,
VHOST_USER_MAX
} VhostUserRequest;
@@ -90,6 +102,18 @@ typedef struct VhostUserLog {
uint64_t mmap_offset;
} VhostUserLog;
+typedef struct VhostUserConfig {
+ uint32_t offset;
+ uint32_t size;
+ uint32_t flags;
+ uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+} VhostUserConfig;
+
+static VhostUserConfig c __attribute__ ((unused));
+#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
+ + sizeof(c.size) \
+ + sizeof(c.flags))
+
#if defined(_WIN32)
# define VU_PACKED __attribute__((gcc_struct, packed))
#else
@@ -112,6 +136,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
+ VhostUserConfig config;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
@@ -140,6 +165,10 @@ typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
int *do_reply);
typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
+typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
+typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size,
+ uint32_t flags);
typedef struct VuDevIface {
/* called by VHOST_USER_GET_FEATURES to get the features bitmask */
@@ -162,6 +191,10 @@ typedef struct VuDevIface {
* on unmanaged exit/crash.
*/
vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
+ /* get the config space of the device */
+ vu_get_config_cb get_config;
+ /* set the config space of the device */
+ vu_set_config_cb set_config;
} VuDevIface;
typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
diff --git a/contrib/vhost-user-blk/Makefile.objs b/contrib/vhost-user-blk/Makefile.objs
new file mode 100644
index 0000000000..72e2cdc3ad
--- /dev/null
+++ b/contrib/vhost-user-blk/Makefile.objs
@@ -0,0 +1 @@
+vhost-user-blk-obj-y = vhost-user-blk.o
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
new file mode 100644
index 0000000000..67dac8155a
--- /dev/null
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -0,0 +1,545 @@
+/*
+ * vhost-user-blk sample application
+ *
+ * Copyright (c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Author:
+ * Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
+ * implementation by:
+ * Felipe Franciosi <felipe@nutanix.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "contrib/libvhost-user/libvhost-user-glib.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+
+#include <glib.h>
+
+struct virtio_blk_inhdr {
+ unsigned char status;
+};
+
+/* vhost user block device */
+typedef struct VubDev {
+ VugDev parent;
+ int blk_fd;
+ struct virtio_blk_config blkcfg;
+ char *blk_name;
+ GMainLoop *loop;
+} VubDev;
+
+typedef struct VubReq {
+ VuVirtqElement *elem;
+ int64_t sector_num;
+ size_t size;
+ struct virtio_blk_inhdr *in;
+ struct virtio_blk_outhdr *out;
+ VubDev *vdev_blk;
+ struct VuVirtq *vq;
+} VubReq;
+
+/* refer util/iov.c */
+static size_t vub_iov_size(const struct iovec *iov,
+ const unsigned int iov_cnt)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
+static void vub_panic_cb(VuDev *vu_dev, const char *buf)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+
+ assert(vu_dev);
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+ if (buf) {
+ g_warning("vu_panic: %s", buf);
+ }
+
+ g_main_loop_quit(vdev_blk->loop);
+}
+
+static void vub_req_complete(VubReq *req)
+{
+ VugDev *gdev = &req->vdev_blk->parent;
+ VuDev *vu_dev = &gdev->parent;
+
+ /* IO size with 1 extra status byte */
+ vu_queue_push(vu_dev, req->vq, req->elem,
+ req->size + 1);
+ vu_queue_notify(vu_dev, req->vq);
+
+ if (req->elem) {
+ free(req->elem);
+ }
+
+ g_free(req);
+}
+
+static int vub_open(const char *file_name, bool wce)
+{
+ int fd;
+ int flags = O_RDWR;
+
+ if (!wce) {
+ flags |= O_DIRECT;
+ }
+
+ fd = open(file_name, flags);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open file %s, %s\n", file_name,
+ strerror(errno));
+ return -1;
+ }
+
+ return fd;
+}
+
+static ssize_t
+vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
+{
+ VubDev *vdev_blk = req->vdev_blk;
+ ssize_t rc;
+
+ if (!iovcnt) {
+ fprintf(stderr, "Invalid Read IOV count\n");
+ return -1;
+ }
+
+ req->size = vub_iov_size(iov, iovcnt);
+ rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
+ if (rc < 0) {
+ fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
+ vdev_blk->blk_name, req->sector_num, req->size,
+ strerror(errno));
+ return -1;
+ }
+
+ return rc;
+}
+
+static ssize_t
+vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
+{
+ VubDev *vdev_blk = req->vdev_blk;
+ ssize_t rc;
+
+ if (!iovcnt) {
+ fprintf(stderr, "Invalid Write IOV count\n");
+ return -1;
+ }
+
+ req->size = vub_iov_size(iov, iovcnt);
+ rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
+ if (rc < 0) {
+ fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
+ vdev_blk->blk_name, req->sector_num, req->size,
+ strerror(errno));
+ return -1;
+ }
+
+ return rc;
+}
+
+static void
+vub_flush(VubReq *req)
+{
+ VubDev *vdev_blk = req->vdev_blk;
+
+ fdatasync(vdev_blk->blk_fd);
+}
+
+static int vub_virtio_process_req(VubDev *vdev_blk,
+ VuVirtq *vq)
+{
+ VugDev *gdev = &vdev_blk->parent;
+ VuDev *vu_dev = &gdev->parent;
+ VuVirtqElement *elem;
+ uint32_t type;
+ unsigned in_num;
+ unsigned out_num;
+ VubReq *req;
+
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
+ if (!elem) {
+ return -1;
+ }
+
+ /* refer to hw/block/virtio_blk.c */
+ if (elem->out_num < 1 || elem->in_num < 1) {
+ fprintf(stderr, "virtio-blk request missing headers\n");
+ free(elem);
+ return -1;
+ }
+
+ req = g_new0(VubReq, 1);
+ req->vdev_blk = vdev_blk;
+ req->vq = vq;
+ req->elem = elem;
+
+ in_num = elem->in_num;
+ out_num = elem->out_num;
+
+ /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
+ if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
+ fprintf(stderr, "Invalid outhdr size\n");
+ goto err;
+ }
+ req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
+ out_num--;
+
+ if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+ fprintf(stderr, "Invalid inhdr size\n");
+ goto err;
+ }
+ req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
+ in_num--;
+
+ type = le32toh(req->out->type);
+ switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
+ case VIRTIO_BLK_T_IN: {
+ ssize_t ret = 0;
+ bool is_write = type & VIRTIO_BLK_T_OUT;
+ req->sector_num = le64toh(req->out->sector);
+ if (is_write) {
+ ret = vub_writev(req, &elem->out_sg[1], out_num);
+ } else {
+ ret = vub_readv(req, &elem->in_sg[0], in_num);
+ }
+ if (ret >= 0) {
+ req->in->status = VIRTIO_BLK_S_OK;
+ } else {
+ req->in->status = VIRTIO_BLK_S_IOERR;
+ }
+ vub_req_complete(req);
+ break;
+ }
+ case VIRTIO_BLK_T_FLUSH: {
+ vub_flush(req);
+ req->in->status = VIRTIO_BLK_S_OK;
+ vub_req_complete(req);
+ break;
+ }
+ case VIRTIO_BLK_T_GET_ID: {
+ size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
+ VIRTIO_BLK_ID_BYTES);
+ snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+ req->in->status = VIRTIO_BLK_S_OK;
+ req->size = elem->in_sg[0].iov_len;
+ vub_req_complete(req);
+ break;
+ }
+ default: {
+ req->in->status = VIRTIO_BLK_S_UNSUPP;
+ vub_req_complete(req);
+ break;
+ }
+ }
+
+ return 0;
+
+err:
+ free(elem);
+ g_free(req);
+ return -1;
+}
+
+static void vub_process_vq(VuDev *vu_dev, int idx)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+ VuVirtq *vq;
+ int ret;
+
+ if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+ fprintf(stderr, "VQ Index out of range: %d\n", idx);
+ vub_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+ assert(vdev_blk);
+
+ vq = vu_get_queue(vu_dev, idx);
+ assert(vq);
+
+ while (1) {
+ ret = vub_virtio_process_req(vdev_blk, vq);
+ if (ret) {
+ break;
+ }
+ }
+}
+
+static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
+{
+ VuVirtq *vq;
+
+ assert(vu_dev);
+
+ vq = vu_get_queue(vu_dev, idx);
+ vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
+}
+
+static uint64_t
+vub_get_features(VuDev *dev)
+{
+ return 1ull << VIRTIO_BLK_F_SIZE_MAX |
+ 1ull << VIRTIO_BLK_F_SEG_MAX |
+ 1ull << VIRTIO_BLK_F_TOPOLOGY |
+ 1ull << VIRTIO_BLK_F_BLK_SIZE |
+ 1ull << VIRTIO_BLK_F_FLUSH |
+ 1ull << VIRTIO_BLK_F_CONFIG_WCE |
+ 1ull << VIRTIO_F_VERSION_1 |
+ 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
+}
+
+static int
+vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+ memcpy(config, &vdev_blk->blkcfg, len);
+
+ return 0;
+}
+
+static int
+vub_set_config(VuDev *vu_dev, const uint8_t *data,
+ uint32_t offset, uint32_t size, uint32_t flags)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+ uint8_t wce;
+ int fd;
+
+ /* don't support live migration */
+ if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
+ return -1;
+ }
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+
+ if (offset != offsetof(struct virtio_blk_config, wce) ||
+ size != 1) {
+ return -1;
+ }
+
+ wce = *data;
+ if (wce == vdev_blk->blkcfg.wce) {
+ /* Do nothing as same with old configuration */
+ return 0;
+ }
+
+ vdev_blk->blkcfg.wce = wce;
+ fprintf(stdout, "Write Cache Policy Changed\n");
+ if (vdev_blk->blk_fd >= 0) {
+ close(vdev_blk->blk_fd);
+ vdev_blk->blk_fd = -1;
+ }
+
+ fd = vub_open(vdev_blk->blk_name, wce);
+ if (fd < 0) {
+ fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
+ vdev_blk->blk_fd = -1;
+ return -1;
+ }
+ vdev_blk->blk_fd = fd;
+
+ return 0;
+}
+
+static const VuDevIface vub_iface = {
+ .get_features = vub_get_features,
+ .queue_set_started = vub_queue_set_started,
+ .get_config = vub_get_config,
+ .set_config = vub_set_config,
+};
+
+static int unix_sock_new(char *unix_fn)
+{
+ int sock;
+ struct sockaddr_un un;
+ size_t len;
+
+ assert(unix_fn);
+
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock <= 0) {
+ perror("socket");
+ return -1;
+ }
+
+ un.sun_family = AF_UNIX;
+ (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
+ len = sizeof(un.sun_family) + strlen(un.sun_path);
+
+ (void)unlink(unix_fn);
+ if (bind(sock, (struct sockaddr *)&un, len) < 0) {
+ perror("bind");
+ goto fail;
+ }
+
+ if (listen(sock, 1) < 0) {
+ perror("listen");
+ goto fail;
+ }
+
+ return sock;
+
+fail:
+ (void)close(sock);
+
+ return -1;
+}
+
+static void vub_free(struct VubDev *vdev_blk)
+{
+ if (!vdev_blk) {
+ return;
+ }
+
+ g_main_loop_unref(vdev_blk->loop);
+ if (vdev_blk->blk_fd >= 0) {
+ close(vdev_blk->blk_fd);
+ }
+ g_free(vdev_blk);
+}
+
+static uint32_t
+vub_get_blocksize(int fd)
+{
+ uint32_t blocksize = 512;
+
+#if defined(__linux__) && defined(BLKSSZGET)
+ if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
+ return blocklen;
+ }
+#endif
+
+ return blocksize;
+}
+
+static void
+vub_initialize_config(int fd, struct virtio_blk_config *config)
+{
+ off64_t capacity;
+
+ capacity = lseek64(fd, 0, SEEK_END);
+ config->capacity = capacity >> 9;
+ config->blk_size = vub_get_blocksize(fd);
+ config->size_max = 65536;
+ config->seg_max = 128 - 2;
+ config->min_io_size = 1;
+ config->opt_io_size = 1;
+ config->num_queues = 1;
+}
+
+static VubDev *
+vub_new(char *blk_file)
+{
+ VubDev *vdev_blk;
+
+ vdev_blk = g_new0(VubDev, 1);
+ vdev_blk->loop = g_main_loop_new(NULL, FALSE);
+ vdev_blk->blk_fd = vub_open(blk_file, 0);
+ if (vdev_blk->blk_fd < 0) {
+ fprintf(stderr, "Error to open block device %s\n", blk_file);
+ vub_free(vdev_blk);
+ return NULL;
+ }
+ vdev_blk->blkcfg.wce = 0;
+ vdev_blk->blk_name = blk_file;
+
+ /* fill virtio_blk_config with block parameters */
+ vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
+
+ return vdev_blk;
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char *unix_socket = NULL;
+ char *blk_file = NULL;
+ int lsock = -1, csock = -1;
+ VubDev *vdev_blk = NULL;
+
+ while ((opt = getopt(argc, argv, "b:s:h")) != -1) {
+ switch (opt) {
+ case 'b':
+ blk_file = g_strdup(optarg);
+ break;
+ case 's':
+ unix_socket = g_strdup(optarg);
+ break;
+ case 'h':
+ default:
+ printf("Usage: %s [-b block device or file, -s UNIX domain socket]"
+ " | [ -h ]\n", argv[0]);
+ return 0;
+ }
+ }
+
+ if (!unix_socket || !blk_file) {
+ printf("Usage: %s [-b block device or file, -s UNIX domain socket] |"
+ " [ -h ]\n", argv[0]);
+ return -1;
+ }
+
+ lsock = unix_sock_new(unix_socket);
+ if (lsock < 0) {
+ goto err;
+ }
+
+ csock = accept(lsock, (void *)0, (void *)0);
+ if (csock < 0) {
+ fprintf(stderr, "Accept error %s\n", strerror(errno));
+ goto err;
+ }
+
+ vdev_blk = vub_new(blk_file);
+ if (!vdev_blk) {
+ goto err;
+ }
+
+ vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
+
+ g_main_loop_run(vdev_blk->loop);
+
+ vug_deinit(&vdev_blk->parent);
+
+err:
+ vub_free(vdev_blk);
+ if (csock >= 0) {
+ close(csock);
+ }
+ if (lsock >= 0) {
+ close(lsock);
+ }
+ g_free(unix_socket);
+ g_free(blk_file);
+
+ return 0;
+}
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index e514bdef42..49a0f285ac 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -43,3 +43,4 @@ CONFIG_VGA_PCI=y
CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM)
CONFIG_ROCKER=y
CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
+CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak
index 444bf16b80..2f4bfe73b4 100644
--- a/default-configs/s390x-softmmu.mak
+++ b/default-configs/s390x-softmmu.mak
@@ -1,6 +1,7 @@
CONFIG_PCI=y
CONFIG_VIRTIO_PCI=$(CONFIG_PCI)
CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
+CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
CONFIG_VIRTIO=y
CONFIG_SCLPCONSOLE=y
CONFIG_TERMINAL3270=y
diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index d49444e037..9fcf48d611 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -119,6 +119,19 @@ Depending on the request type, payload can be:
- 3: IOTLB invalidate
- 4: IOTLB access fail
+ * Virtio device config space
+ -----------------------------------
+ | offset | size | flags | payload |
+ -----------------------------------
+
+ Offset: a 32-bit offset of virtio device's configuration space
+ Size: a 32-bit configuration space access size in bytes
+ Flags: a 32-bit value:
+ - 0: Vhost master messages used for writeable fields
+ - 1: Vhost master messages used for live migration
+ Payload: Size bytes array holding the contents of the virtio
+ device's configuration space
+
In QEMU the vhost-user message is implemented with the following struct:
typedef struct VhostUserMsg {
@@ -132,6 +145,7 @@ typedef struct VhostUserMsg {
VhostUserMemory memory;
VhostUserLog log;
struct vhost_iotlb_msg iotlb;
+ VhostUserConfig config;
};
} QEMU_PACKED VhostUserMsg;
@@ -214,8 +228,8 @@ Multiple queue is treated as a protocol extension, hence the slave has to
implement protocol features first. The multiple queues feature is supported
only when the protocol feature VHOST_USER_PROTOCOL_F_MQ (bit 0) is set.
-The max number of queues the slave supports can be queried with message
-VHOST_USER_GET_PROTOCOL_FEATURES. Master should stop when the number of
+The max number of queue pairs the slave supports can be queried with message
+VHOST_USER_GET_QUEUE_NUM. Master should stop when the number of
requested queues is bigger than that.
As all queues share one connection, the master uses a unique index for each
@@ -623,6 +637,32 @@ Master message types
and expect this message once (per VQ) during device configuration
(ie. before the master starts the VQ).
+ * VHOST_USER_GET_CONFIG
+
+ Id: 24
+ Equivalent ioctl: N/A
+ Master payload: virtio device config space
+ Slave payload: virtio device config space
+
+ Submitted by the vhost-user master to fetch the contents of the virtio
+ device configuration space, vhost-user slave's payload size MUST match
+ master's request, vhost-user slave uses zero length of payload to
+ indicate an error to vhost-user master. The vhost-user master may
+ cache the contents to avoid repeated VHOST_USER_GET_CONFIG calls.
+
+* VHOST_USER_SET_CONFIG
+
+ Id: 25
+ Equivalent ioctl: N/A
+ Master payload: virtio device config space
+ Slave payload: N/A
+
+ Submitted by the vhost-user master when the Guest changes the virtio
+ device configuration space and also can be used for live migration
+ on the destination host. The vhost-user slave must check the flags
+ field, and slaves MUST NOT accept SET_CONFIG for read-only
+ configuration space fields unless the live migration bit is set.
+
Slave message types
-------------------
@@ -641,6 +681,21 @@ Slave message types
This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature
has been successfully negotiated.
+* VHOST_USER_SLAVE_CONFIG_CHANGE_MSG
+
+ Id: 2
+ Equivalent ioctl: N/A
+ Slave payload: N/A
+ Master payload: N/A
+
+ Vhost-user slave sends such messages to notify that the virtio device's
+ configuration space has changed, for those host devices which can support
+ such feature, host driver can send VHOST_USER_GET_CONFIG message to slave
+ to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
+ negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
+ respond with zero when operation is successfully completed, or non-zero
+ otherwise.
+
VHOST_USER_PROTOCOL_F_REPLY_ACK:
-------------------------------
The original vhost-user specification only demands replies for certain
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index e0ed980c90..4c19a583c8 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -13,3 +13,6 @@ obj-$(CONFIG_SH4) += tc58128.o
obj-$(CONFIG_VIRTIO) += virtio-blk.o
obj-$(CONFIG_VIRTIO) += dataplane/
+ifeq ($(CONFIG_VIRTIO),y)
+obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
+endif
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
new file mode 100644
index 0000000000..b53b4c9c57
--- /dev/null
+++ b/hw/block/vhost-user-blk.c
@@ -0,0 +1,359 @@
+/*
+ * vhost-user-blk host device
+ *
+ * Copyright(C) 2017 Intel Corporation.
+ *
+ * Authors:
+ * Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by:
+ * Felipe Franciosi <felipe@nutanix.com>
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Nicholas Bellinger <nab@risingtidesystems.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/typedefs.h"
+#include "qemu/cutils.h"
+#include "qom/object.h"
+#include "hw/qdev-core.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user-blk.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+
+static const int user_feature_bits[] = {
+ VIRTIO_BLK_F_SIZE_MAX,
+ VIRTIO_BLK_F_SEG_MAX,
+ VIRTIO_BLK_F_GEOMETRY,
+ VIRTIO_BLK_F_BLK_SIZE,
+ VIRTIO_BLK_F_TOPOLOGY,
+ VIRTIO_BLK_F_MQ,
+ VIRTIO_BLK_F_RO,
+ VIRTIO_BLK_F_FLUSH,
+ VIRTIO_BLK_F_CONFIG_WCE,
+ VIRTIO_F_VERSION_1,
+ VIRTIO_RING_F_INDIRECT_DESC,
+ VIRTIO_RING_F_EVENT_IDX,
+ VIRTIO_F_NOTIFY_ON_EMPTY,
+ VHOST_INVALID_FEATURE_BIT
+};
+
+static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+ memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config));
+}
+
+static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
+ int ret;
+
+ if (blkcfg->wce == s->blkcfg.wce) {
+ return;
+ }
+
+ ret = vhost_dev_set_config(&s->dev, &blkcfg->wce,
+ offsetof(struct virtio_blk_config, wce),
+ sizeof(blkcfg->wce),
+ VHOST_SET_CONFIG_TYPE_MASTER);
+ if (ret) {
+ error_report("set device config space failed");
+ return;
+ }
+
+ s->blkcfg.wce = blkcfg->wce;
+}
+
+static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
+{
+ int ret;
+ struct virtio_blk_config blkcfg;
+ VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
+
+ ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg,
+ sizeof(struct virtio_blk_config));
+ if (ret < 0) {
+ error_report("get config space failed");
+ return -1;
+ }
+
+ /* valid for resize only */
+ if (blkcfg.capacity != s->blkcfg.capacity) {
+ s->blkcfg.capacity = blkcfg.capacity;
+ memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config));
+ virtio_notify_config(dev->vdev);
+ }
+
+ return 0;
+}
+
+const VhostDevConfigOps blk_ops = {
+ .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
+};
+
+static void vhost_user_blk_start(VirtIODevice *vdev)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+ int i, ret;
+
+ if (!k->set_guest_notifiers) {
+ error_report("binding does not support guest notifiers");
+ return;
+ }
+
+ ret = vhost_dev_enable_notifiers(&s->dev, vdev);
+ if (ret < 0) {
+ error_report("Error enabling host notifiers: %d", -ret);
+ return;
+ }
+
+ ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
+ if (ret < 0) {
+ error_report("Error binding guest notifier: %d", -ret);
+ goto err_host_notifiers;
+ }
+
+ s->dev.acked_features = vdev->guest_features;
+ ret = vhost_dev_start(&s->dev, vdev);
+ if (ret < 0) {
+ error_report("Error starting vhost: %d", -ret);
+ goto err_guest_notifiers;
+ }
+
+ /* guest_notifier_mask/pending not used yet, so just unmask
+ * everything here. virtio-pci will do the right thing by
+ * enabling/disabling irqfd.
+ */
+ for (i = 0; i < s->dev.nvqs; i++) {
+ vhost_virtqueue_mask(&s->dev, vdev, i, false);
+ }
+
+ return;
+
+err_guest_notifiers:
+ k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
+err_host_notifiers:
+ vhost_dev_disable_notifiers(&s->dev, vdev);
+}
+
+static void vhost_user_blk_stop(VirtIODevice *vdev)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+ int ret;
+
+ if (!k->set_guest_notifiers) {
+ return;
+ }
+
+ vhost_dev_stop(&s->dev, vdev);
+
+ ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
+ if (ret < 0) {
+ error_report("vhost guest notifier cleanup failed: %d", ret);
+ return;
+ }
+
+ vhost_dev_disable_notifiers(&s->dev, vdev);
+}
+
+static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+ if (!vdev->vm_running) {
+ should_start = false;
+ }
+
+ if (s->dev.started == should_start) {
+ return;
+ }
+
+ if (should_start) {
+ vhost_user_blk_start(vdev);
+ } else {
+ vhost_user_blk_stop(vdev);
+ }
+
+}
+
+static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
+ uint64_t features,
+ Error **errp)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ uint64_t get_features;
+
+ /* Turn on pre-defined features */
+ virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
+ virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
+ virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
+ virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
+ virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
+
+ if (s->config_wce) {
+ virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
+ }
+ if (s->config_ro) {
+ virtio_add_feature(&features, VIRTIO_BLK_F_RO);
+ }
+ if (s->num_queues > 1) {
+ virtio_add_feature(&features, VIRTIO_BLK_F_MQ);
+ }
+
+ get_features = vhost_get_features(&s->dev, user_feature_bits, features);
+
+ return get_features;
+}
+
+static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+
+}
+
+static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ int i, ret;
+
+ if (!s->chardev.chr) {
+ error_setg(errp, "vhost-user-blk: chardev is mandatory");
+ return;
+ }
+
+ if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) {
+ error_setg(errp, "vhost-user-blk: invalid number of IO queues");
+ return;
+ }
+
+ if (!s->queue_size) {
+ error_setg(errp, "vhost-user-blk: queue size must be non-zero");
+ return;
+ }
+
+ virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
+ sizeof(struct virtio_blk_config));
+
+ for (i = 0; i < s->num_queues; i++) {
+ virtio_add_queue(vdev, s->queue_size,
+ vhost_user_blk_handle_output);
+ }
+
+ s->dev.nvqs = s->num_queues;
+ s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
+ s->dev.vq_index = 0;
+ s->dev.backend_features = 0;
+
+ ret = vhost_dev_init(&s->dev, &s->chardev, VHOST_BACKEND_TYPE_USER, 0);
+ if (ret < 0) {
+ error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
+ strerror(-ret));
+ goto virtio_err;
+ }
+
+ ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
+ sizeof(struct virtio_blk_config));
+ if (ret < 0) {
+ error_setg(errp, "vhost-user-blk: get block config failed");
+ goto vhost_err;
+ }
+
+ if (s->blkcfg.num_queues != s->num_queues) {
+ s->blkcfg.num_queues = s->num_queues;
+ }
+
+ vhost_dev_set_config_notifier(&s->dev, &blk_ops);
+
+ return;
+
+vhost_err:
+ vhost_dev_cleanup(&s->dev);
+virtio_err:
+ g_free(s->dev.vqs);
+ virtio_cleanup(vdev);
+}
+
+static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VHostUserBlk *s = VHOST_USER_BLK(dev);
+
+ vhost_user_blk_set_status(vdev, 0);
+ vhost_dev_cleanup(&s->dev);
+ g_free(s->dev.vqs);
+ virtio_cleanup(vdev);
+}
+
+static void vhost_user_blk_instance_init(Object *obj)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(obj);
+
+ device_add_bootindex_property(obj, &s->bootindex, "bootindex",
+ "/disk@0,0", DEVICE(obj), NULL);
+}
+
+static const VMStateDescription vmstate_vhost_user_blk = {
+ .name = "vhost-user-blk",
+ .minimum_version_id = 1,
+ .version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_VIRTIO_DEVICE,
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property vhost_user_blk_properties[] = {
+ DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev),
+ DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, 1),
+ DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128),
+ DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true),
+ DEFINE_PROP_BIT("config-ro", VHostUserBlk, config_ro, 0, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+ dc->props = vhost_user_blk_properties;
+ dc->vmsd = &vmstate_vhost_user_blk;
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ vdc->realize = vhost_user_blk_device_realize;
+ vdc->unrealize = vhost_user_blk_device_unrealize;
+ vdc->get_config = vhost_user_blk_update_config;
+ vdc->set_config = vhost_user_blk_set_config;
+ vdc->get_features = vhost_user_blk_get_features;
+ vdc->set_status = vhost_user_blk_set_status;
+}
+
+static const TypeInfo vhost_user_blk_info = {
+ .name = TYPE_VHOST_USER_BLK,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VHostUserBlk),
+ .instance_init = vhost_user_blk_instance_init,
+ .class_init = vhost_user_blk_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&vhost_user_blk_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 18b939e469..dc4b2b9ffe 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2394,7 +2394,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
}
mem_base = 1ULL << 32;
mem_len = next_base - pcms->below_4g_mem_size;
- next_base += (1ULL << 32) - pcms->below_4g_mem_size;
+ next_base = mem_base + mem_len;
}
numamem = acpi_data_push(table_data, sizeof *numamem);
build_srat_memory(numamem, mem_base, mem_len, i - 1,
@@ -2473,6 +2473,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
AcpiDmarDeviceScope *scope = NULL;
/* Root complex IOAPIC use one path[0] only */
size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+ IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
assert(iommu);
if (iommu->intr_supported) {
@@ -2480,7 +2481,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
}
dmar = acpi_data_push(table_data, sizeof(*dmar));
- dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+ dmar->host_address_width = intel_iommu->aw_bits - 1;
dmar->flags = dmar_flags;
/* DMAR Remapping Hardware Unit Definition structure */
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index eeaf0e0aa8..63d46ff6ee 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1144,18 +1144,9 @@ static void amdvi_realize(DeviceState *dev, Error **err)
AMDVIState *s = AMD_IOMMU_DEVICE(dev);
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
MachineState *ms = MACHINE(qdev_get_machine());
- MachineClass *mc = MACHINE_GET_CLASS(ms);
- PCMachineState *pcms =
- PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
- PCIBus *bus;
-
- if (!pcms) {
- error_setg(err, "Machine-type '%s' not supported by amd-iommu",
- mc->name);
- return;
- }
+ PCMachineState *pcms = PC_MACHINE(ms);
+ PCIBus *bus = pcms->bus;
- bus = pcms->bus;
s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
amdvi_uint64_equal, g_free, g_free);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index fe15d3ba84..2e841cde27 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -521,9 +521,9 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
}
-static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
+static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
{
- return slpte & VTD_SL_PT_BASE_ADDR_MASK;
+ return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
}
/* Whether the pte indicates the address of the page frame */
@@ -608,35 +608,29 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
return true;
}
-static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
+static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
{
uint32_t ce_agaw = vtd_ce_get_agaw(ce);
- return 1ULL << MIN(ce_agaw, VTD_MGAW);
+ return 1ULL << MIN(ce_agaw, aw);
}
/* Return true if IOVA passes range check, otherwise false. */
-static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
+static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
+ uint8_t aw)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
- return !(iova & ~(vtd_iova_limit(ce) - 1));
-}
-
-static const uint64_t vtd_paging_entry_rsvd_field[] = {
- [0] = ~0ULL,
- /* For not large page */
- [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- /* For large page */
- [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
- [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
-};
+ return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
+}
+
+/*
+ * Rsvd field masks for spte:
+ * Index [1] to [4] 4k pages
+ * Index [5] to [8] large pages
+ */
+static uint64_t vtd_paging_entry_rsvd_field[9];
static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
{
@@ -676,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
*/
static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes)
+ bool *reads, bool *writes, uint8_t aw_bits)
{
dma_addr_t addr = vtd_ce_get_slpt_base(ce);
uint32_t level = vtd_ce_get_level(ce);
@@ -684,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
uint64_t slpte;
uint64_t access_right_check;
- if (!vtd_iova_range_check(iova, ce)) {
+ if (!vtd_iova_range_check(iova, ce, aw_bits)) {
trace_vtd_err_dmar_iova_overflow(iova);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@@ -721,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
*slpte_level = level;
return 0;
}
- addr = vtd_get_slpte_addr(slpte);
+ addr = vtd_get_slpte_addr(slpte, aw_bits);
level--;
}
}
@@ -739,11 +733,12 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
* @read: whether parent level has read permission
* @write: whether parent level has write permission
* @notify_unmap: whether we should notify invalid entries
+ * @aw: maximum address width
*/
static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
uint64_t end, vtd_page_walk_hook hook_fn,
- void *private, uint32_t level,
- bool read, bool write, bool notify_unmap)
+ void *private, uint32_t level, bool read,
+ bool write, bool notify_unmap, uint8_t aw)
{
bool read_cur, write_cur, entry_valid;
uint32_t offset;
@@ -790,7 +785,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
entry.target_as = &address_space_memory;
entry.iova = iova & subpage_mask;
/* NOTE: this is only meaningful if entry_valid == true */
- entry.translated_addr = vtd_get_slpte_addr(slpte);
+ entry.translated_addr = vtd_get_slpte_addr(slpte, aw);
entry.addr_mask = ~subpage_mask;
entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
if (!entry_valid && !notify_unmap) {
@@ -810,10 +805,10 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
- ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
+ ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova,
MIN(iova_next, end), hook_fn, private,
level - 1, read_cur, write_cur,
- notify_unmap);
+ notify_unmap, aw);
if (ret < 0) {
return ret;
}
@@ -834,25 +829,26 @@ next:
* @end: IOVA range end address (start <= addr < end)
* @hook_fn: the hook that to be called for each detected area
* @private: private data for the hook function
+ * @aw: maximum address width
*/
static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
vtd_page_walk_hook hook_fn, void *private,
- bool notify_unmap)
+ bool notify_unmap, uint8_t aw)
{
dma_addr_t addr = vtd_ce_get_slpt_base(ce);
uint32_t level = vtd_ce_get_level(ce);
- if (!vtd_iova_range_check(start, ce)) {
+ if (!vtd_iova_range_check(start, ce, aw)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(end, ce)) {
+ if (!vtd_iova_range_check(end, ce, aw)) {
/* Fix end so that it reaches the maximum */
- end = vtd_iova_limit(ce);
+ end = vtd_iova_limit(ce, aw);
}
return vtd_page_walk_level(addr, start, end, hook_fn, private,
- level, true, true, notify_unmap);
+ level, true, true, notify_unmap, aw);
}
/* Map a device to its corresponding domain (context-entry) */
@@ -874,7 +870,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
return -VTD_FR_ROOT_ENTRY_P;
}
- if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
+ if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
trace_vtd_re_invalid(re.rsvd, re.val);
return -VTD_FR_ROOT_ENTRY_RSVD;
}
@@ -891,7 +887,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
}
if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
- (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
+ (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
trace_vtd_ce_invalid(ce->hi, ce->lo);
return -VTD_FR_CONTEXT_ENTRY_RSVD;
}
@@ -1173,7 +1169,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
}
ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
- &reads, &writes);
+ &reads, &writes, s->aw_bits);
if (ret_fr) {
ret_fr = -ret_fr;
if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
@@ -1190,7 +1186,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
access_flags, level);
out:
entry->iova = addr & page_mask;
- entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
+ entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
entry->perm = access_flags;
return true;
@@ -1207,7 +1203,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
{
s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
s->root_extended = s->root & VTD_RTADDR_RTT;
- s->root &= VTD_RTADDR_ADDR_MASK;
+ s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits);
trace_vtd_reg_dmar_root(s->root, s->root_extended);
}
@@ -1223,7 +1219,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
uint64_t value = 0;
value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
- s->intr_root = value & VTD_IRTA_ADDR_MASK;
+ s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits);
s->intr_eime = value & VTD_IRTA_EIME;
/* Notify global invalidation */
@@ -1399,7 +1395,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
vtd_page_invalidate_notify_hook,
- (void *)&vtd_as->iommu, true);
+ (void *)&vtd_as->iommu, true, s->aw_bits);
}
}
}
@@ -1479,7 +1475,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
trace_vtd_inv_qi_enable(en);
if (en) {
- s->iq = iqa_val & VTD_IQA_IQA_MASK;
+ s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits);
/* 2^(x+8) entries */
s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
s->qi_enabled = true;
@@ -2410,6 +2406,8 @@ static Property vtd_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
+ DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits,
+ VTD_HOST_ADDRESS_WIDTH),
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_END_OF_LIST(),
};
@@ -2765,6 +2763,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
hwaddr size;
hwaddr start = n->start;
hwaddr end = n->end;
+ IntelIOMMUState *s = as->iommu_state;
/*
* Note: all the codes in this function has a assumption that IOVA
@@ -2772,12 +2771,12 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
* VT-d spec), otherwise we need to consider overflow of 64 bits.
*/
- if (end > VTD_ADDRESS_SIZE) {
+ if (end > VTD_ADDRESS_SIZE(s->aw_bits)) {
/*
* Don't need to unmap regions that is bigger than the whole
* VT-d supported address space size
*/
- end = VTD_ADDRESS_SIZE;
+ end = VTD_ADDRESS_SIZE(s->aw_bits);
}
assert(start <= end);
@@ -2789,9 +2788,9 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
* suite the minimum available mask.
*/
int n = 64 - clz64(size);
- if (n > VTD_MGAW) {
+ if (n > s->aw_bits) {
/* should not happen, but in case it happens, limit it */
- n = VTD_MGAW;
+ n = s->aw_bits;
}
size = 1ULL << n;
}
@@ -2851,7 +2850,8 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
PCI_FUNC(vtd_as->devfn),
VTD_CONTEXT_ENTRY_DID(ce.hi),
ce.hi, ce.lo);
- vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
+ vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false,
+ s->aw_bits);
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
@@ -2882,10 +2882,27 @@ static void vtd_init(IntelIOMMUState *s)
s->qi_enabled = false;
s->iq_last_desc_type = VTD_INV_DESC_NONE;
s->next_frcd_reg = 0;
- s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
- VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
+ s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
+ VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
+ VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+ if (s->aw_bits == VTD_HOST_AW_48BIT) {
+ s->cap |= VTD_CAP_SAGAW_48bit;
+ }
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
+ /*
+ * Rsvd field masks for spte
+ */
+ vtd_paging_entry_rsvd_field[0] = ~0ULL;
+ vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits);
+
if (x86_iommu->intr_supported) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -3021,26 +3038,25 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
}
}
+ /* Currently only address widths supported are 39 and 48 bits */
+ if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
+ (s->aw_bits != VTD_HOST_AW_48BIT)) {
+ error_setg(errp, "Supported values for x-aw-bits are: %d, %d",
+ VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
+ return false;
+ }
+
return true;
}
static void vtd_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
- MachineClass *mc = MACHINE_GET_CLASS(ms);
- PCMachineState *pcms =
- PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
- PCIBus *bus;
+ PCMachineState *pcms = PC_MACHINE(ms);
+ PCIBus *bus = pcms->bus;
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
- if (!pcms) {
- error_setg(errp, "Machine-type '%s' not supported by intel-iommu",
- mc->name);
- return;
- }
-
- bus = pcms->bus;
x86_iommu->type = TYPE_INTEL;
if (!vtd_decide_config(s, errp)) {
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0e73a65bf2..d084099ed9 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -131,7 +131,7 @@
#define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK)
/* IVA_REG */
-#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1))
+#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL)
#define VTD_IVA_AM(val) ((val) & 0x3fULL)
/* GCMD_REG */
@@ -172,10 +172,10 @@
/* RTADDR_REG */
#define VTD_RTADDR_RTT (1ULL << 11)
-#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_RTADDR_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL)
/* IRTA_REG */
-#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IRTA_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL)
#define VTD_IRTA_EIME (1ULL << 11)
#define VTD_IRTA_SIZE_MASK (0xfULL)
@@ -197,9 +197,8 @@
#define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */
#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
-#define VTD_MGAW 39 /* Maximum Guest Address Width */
-#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW)
-#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16)
+#define VTD_ADDRESS_SIZE(aw) (1ULL << (aw))
+#define VTD_CAP_MGAW(aw) ((((aw) - 1) & 0x3fULL) << 16)
#define VTD_MAMV 18ULL
#define VTD_CAP_MAMV (VTD_MAMV << 48)
#define VTD_CAP_PSI (1ULL << 39)
@@ -213,13 +212,12 @@
#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT)
/* 48-bit AGAW, 4-level page-table */
#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT)
-#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit
/* IQT_REG */
#define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL)
/* IQA_REG */
-#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IQA_IQA_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL)
#define VTD_IQA_QS 0x7ULL
/* IQH_REG */
@@ -252,7 +250,7 @@
#define VTD_FRCD_SID_MASK 0xffffULL
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
/* For the low 64-bit of 128-bit */
-#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL))
+#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
/* DMA Remapping Fault Conditions */
typedef enum VTDFaultReason {
@@ -360,8 +358,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4)
#define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4)
#define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
-#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \
- ((1ULL << VTD_MGAW) - 1))
+#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL)
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
@@ -373,6 +370,24 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
#define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
+/* Rsvd field masks for spte */
+#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw) \
+ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \
+ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L3_RSVD_MASK(aw) \
+ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \
+ (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \
+ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \
+ (0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
+ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \
+ (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
@@ -403,7 +418,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_ROOT_ENTRY_CTP (~0xfffULL)
#define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry))
-#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK)
+#define VTD_ROOT_ENTRY_RSVD(aw) (0xffeULL | ~VTD_HAW_MASK(aw))
/* Masks for struct VTDContextEntry */
/* lo */
@@ -415,7 +430,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
/* Second Level Page Translation Pointer*/
#define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL)
-#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK)
+#define VTD_CONTEXT_ENTRY_RSVD_LO(aw) (0xff0ULL | ~VTD_HAW_MASK(aw))
/* hi */
#define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */
#define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK)
@@ -439,7 +454,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SL_RW_MASK 3ULL
#define VTD_SL_R 1ULL
#define VTD_SL_W (1ULL << 1)
-#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK)
+#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
#define VTD_SL_IGN_COM 0xbff0000000000000ULL
#endif
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 293caf83ef..8a01a2dd25 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -21,6 +21,8 @@
#include "hw/sysbus.h"
#include "hw/boards.h"
#include "hw/i386/x86-iommu.h"
+#include "hw/i386/pc.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#include "trace.h"
@@ -80,7 +82,18 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev);
+ MachineState *ms = MACHINE(qdev_get_machine());
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+ PCMachineState *pcms =
+ PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
QLIST_INIT(&x86_iommu->iec_notifiers);
+
+ if (!pcms || !pcms->bus) {
+ error_setg(errp, "Machine-type '%s' not supported by IOMMU",
+ mc->name);
+ return;
+ }
+
if (x86_class->realize) {
x86_class->realize(dev, errp);
}
diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
index ad4e6aa7ff..0e2f2e8bf1 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -74,8 +74,13 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
PCIDevice *d = PCI_DEVICE(dev);
GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d);
PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d);
+ Error *local_err = NULL;
- rpc->parent_realize(dev, errp);
+ rpc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
int rc = pci_bridge_qemu_reserve_cap_init(d, 0, grp->bus_reserve,
grp->io_reserve, grp->mem_reserve, grp->pref32_reserve,
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index 69fc14b218..a8462d48bb 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -1,6 +1,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
+#include "qemu/host-utils.h"
#include "qemu/range.h"
#include "qemu/error-report.h"
#include "hw/pci/shpc.h"
@@ -122,16 +123,6 @@
#define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1)
#define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1)
-static int roundup_pow_of_two(int x)
-{
- x |= (x >> 1);
- x |= (x >> 2);
- x |= (x >> 4);
- x |= (x >> 8);
- x |= (x >> 16);
- return x + 1;
-}
-
static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk)
{
uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
@@ -656,7 +647,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar,
int shpc_bar_size(PCIDevice *d)
{
- return roundup_pow_of_two(SHPC_SLOT_REG(SHPC_MAX_SLOTS));
+ return pow2roundup32(SHPC_SLOT_REG(SHPC_MAX_SLOTS));
}
void shpc_cleanup(PCIDevice *d, MemoryRegion *bar)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 093675ed98..6eb97980ad 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -26,6 +26,11 @@
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+/*
+ * Maximum size of virtio device config space
+ */
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -65,12 +70,15 @@ typedef enum VhostUserRequest {
VHOST_USER_SET_SLAVE_REQ_FD = 21,
VHOST_USER_IOTLB_MSG = 22,
VHOST_USER_SET_VRING_ENDIAN = 23,
+ VHOST_USER_GET_CONFIG = 24,
+ VHOST_USER_SET_CONFIG = 25,
VHOST_USER_MAX
} VhostUserRequest;
typedef enum VhostUserSlaveRequest {
VHOST_USER_SLAVE_NONE = 0,
VHOST_USER_SLAVE_IOTLB_MSG = 1,
+ VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
VHOST_USER_SLAVE_MAX
} VhostUserSlaveRequest;
@@ -92,7 +100,19 @@ typedef struct VhostUserLog {
uint64_t mmap_offset;
} VhostUserLog;
-typedef struct VhostUserMsg {
+typedef struct VhostUserConfig {
+ uint32_t offset;
+ uint32_t size;
+ uint32_t flags;
+ uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+} VhostUserConfig;
+
+static VhostUserConfig c __attribute__ ((unused));
+#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
+ + sizeof(c.size) \
+ + sizeof(c.flags))
+
+typedef struct {
VhostUserRequest request;
#define VHOST_USER_VERSION_MASK (0x3)
@@ -100,7 +120,9 @@ typedef struct VhostUserMsg {
#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
uint32_t flags;
uint32_t size; /* the following payload size */
- union {
+} QEMU_PACKED VhostUserHeader;
+
+typedef union {
#define VHOST_USER_VRING_IDX_MASK (0xff)
#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
uint64_t u64;
@@ -109,15 +131,18 @@ typedef struct VhostUserMsg {
VhostUserMemory memory;
VhostUserLog log;
struct vhost_iotlb_msg iotlb;
- } payload;
+ VhostUserConfig config;
+} VhostUserPayload;
+
+typedef struct VhostUserMsg {
+ VhostUserHeader hdr;
+ VhostUserPayload payload;
} QEMU_PACKED VhostUserMsg;
static VhostUserMsg m __attribute__ ((unused));
-#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
- + sizeof(m.flags) \
- + sizeof(m.size))
+#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
-#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
/* The version of the protocol we support */
#define VHOST_USER_VERSION (0x1)
@@ -142,33 +167,33 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
r = qemu_chr_fe_read_all(chr, p, size);
if (r != size) {
error_report("Failed to read msg header. Read %d instead of %d."
- " Original request %d.", r, size, msg->request);
+ " Original request %d.", r, size, msg->hdr.request);
goto fail;
}
/* validate received flags */
- if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
+ if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
error_report("Failed to read msg header."
- " Flags 0x%x instead of 0x%x.", msg->flags,
+ " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
goto fail;
}
/* validate message size is sane */
- if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
+ if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
error_report("Failed to read msg header."
- " Size %d exceeds the maximum %zu.", msg->size,
+ " Size %d exceeds the maximum %zu.", msg->hdr.size,
VHOST_USER_PAYLOAD_SIZE);
goto fail;
}
- if (msg->size) {
+ if (msg->hdr.size) {
p += VHOST_USER_HDR_SIZE;
- size = msg->size;
+ size = msg->hdr.size;
r = qemu_chr_fe_read_all(chr, p, size);
if (r != size) {
error_report("Failed to read msg payload."
- " Read %d instead of %d.", r, msg->size);
+ " Read %d instead of %d.", r, msg->hdr.size);
goto fail;
}
}
@@ -184,7 +209,7 @@ static int process_message_reply(struct vhost_dev *dev,
{
VhostUserMsg msg_reply;
- if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
+ if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
return 0;
}
@@ -192,10 +217,10 @@ static int process_message_reply(struct vhost_dev *dev,
return -1;
}
- if (msg_reply.request != msg->request) {
+ if (msg_reply.hdr.request != msg->hdr.request) {
error_report("Received unexpected msg type."
"Expected %d received %d",
- msg->request, msg_reply.request);
+ msg->hdr.request, msg_reply.hdr.request);
return -1;
}
@@ -222,15 +247,15 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
{
struct vhost_user *u = dev->opaque;
CharBackend *chr = u->chr;
- int ret, size = VHOST_USER_HDR_SIZE + msg->size;
+ int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
/*
* For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
* we just need send it once in the first time. For later such
* request, we just ignore it.
*/
- if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
- msg->flags &= ~VHOST_USER_NEED_REPLY_MASK;
+ if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
+ msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
return 0;
}
@@ -257,11 +282,11 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
bool shmfd = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_LOG_SHMFD);
VhostUserMsg msg = {
- .request = VHOST_USER_SET_LOG_BASE,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_SET_LOG_BASE,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.log.mmap_size = log->size * sizeof(*(log->log)),
.payload.log.mmap_offset = 0,
- .size = sizeof(msg.payload.log),
+ .hdr.size = sizeof(msg.payload.log),
};
if (shmfd && log->fd != -1) {
@@ -273,15 +298,15 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
}
if (shmfd) {
- msg.size = 0;
+ msg.hdr.size = 0;
if (vhost_user_read(dev, &msg) < 0) {
return -1;
}
- if (msg.request != VHOST_USER_SET_LOG_BASE) {
+ if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
error_report("Received unexpected msg type. "
"Expected %d received %d",
- VHOST_USER_SET_LOG_BASE, msg.request);
+ VHOST_USER_SET_LOG_BASE, msg.hdr.request);
return -1;
}
}
@@ -299,12 +324,12 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
VHOST_USER_PROTOCOL_F_REPLY_ACK);
VhostUserMsg msg = {
- .request = VHOST_USER_SET_MEM_TABLE,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_SET_MEM_TABLE,
+ .hdr.flags = VHOST_USER_VERSION,
};
if (reply_supported) {
- msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+ msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
}
for (i = 0; i < dev->mem->nregions; ++i) {
@@ -317,11 +342,14 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
+ if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
+ error_report("Failed preparing vhost-user memory table msg");
+ return -1;
+ }
msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
- assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
}
@@ -334,9 +362,9 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
return -1;
}
- msg.size = sizeof(msg.payload.memory.nregions);
- msg.size += sizeof(msg.payload.memory.padding);
- msg.size += fd_num * sizeof(VhostUserMemoryRegion);
+ msg.hdr.size = sizeof(msg.payload.memory.nregions);
+ msg.hdr.size += sizeof(msg.payload.memory.padding);
+ msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
return -1;
@@ -353,10 +381,10 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev,
struct vhost_vring_addr *addr)
{
VhostUserMsg msg = {
- .request = VHOST_USER_SET_VRING_ADDR,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_SET_VRING_ADDR,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.addr = *addr,
- .size = sizeof(msg.payload.addr),
+ .hdr.size = sizeof(msg.payload.addr),
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -372,10 +400,10 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev,
bool cross_endian = virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
VhostUserMsg msg = {
- .request = VHOST_USER_SET_VRING_ENDIAN,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.state = *ring,
- .size = sizeof(msg.payload.state),
+ .hdr.size = sizeof(msg.payload.state),
};
if (!cross_endian) {
@@ -395,10 +423,10 @@ static int vhost_set_vring(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
VhostUserMsg msg = {
- .request = request,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = request,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.state = *ring,
- .size = sizeof(msg.payload.state),
+ .hdr.size = sizeof(msg.payload.state),
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -444,10 +472,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
VhostUserMsg msg = {
- .request = VHOST_USER_GET_VRING_BASE,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_GET_VRING_BASE,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.state = *ring,
- .size = sizeof(msg.payload.state),
+ .hdr.size = sizeof(msg.payload.state),
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -458,13 +486,13 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
return -1;
}
- if (msg.request != VHOST_USER_GET_VRING_BASE) {
+ if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
error_report("Received unexpected msg type. Expected %d received %d",
- VHOST_USER_GET_VRING_BASE, msg.request);
+ VHOST_USER_GET_VRING_BASE, msg.hdr.request);
return -1;
}
- if (msg.size != sizeof(msg.payload.state)) {
+ if (msg.hdr.size != sizeof(msg.payload.state)) {
error_report("Received bad msg size.");
return -1;
}
@@ -481,10 +509,10 @@ static int vhost_set_vring_file(struct vhost_dev *dev,
int fds[VHOST_MEMORY_MAX_NREGIONS];
size_t fd_num = 0;
VhostUserMsg msg = {
- .request = request,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = request,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
- .size = sizeof(msg.payload.u64),
+ .hdr.size = sizeof(msg.payload.u64),
};
if (ioeventfd_enabled() && file->fd > 0) {
@@ -515,10 +543,10 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev,
static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
{
VhostUserMsg msg = {
- .request = request,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = request,
+ .hdr.flags = VHOST_USER_VERSION,
.payload.u64 = u64,
- .size = sizeof(msg.payload.u64),
+ .hdr.size = sizeof(msg.payload.u64),
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -543,8 +571,8 @@ static int vhost_user_set_protocol_features(struct vhost_dev *dev,
static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
{
VhostUserMsg msg = {
- .request = request,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = request,
+ .hdr.flags = VHOST_USER_VERSION,
};
if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
@@ -559,13 +587,13 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
return -1;
}
- if (msg.request != request) {
+ if (msg.hdr.request != request) {
error_report("Received unexpected msg type. Expected %d received %d",
- request, msg.request);
+ request, msg.hdr.request);
return -1;
}
- if (msg.size != sizeof(msg.payload.u64)) {
+ if (msg.hdr.size != sizeof(msg.payload.u64)) {
error_report("Received bad msg size.");
return -1;
}
@@ -583,8 +611,8 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
static int vhost_user_set_owner(struct vhost_dev *dev)
{
VhostUserMsg msg = {
- .request = VHOST_USER_SET_OWNER,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_SET_OWNER,
+ .hdr.flags = VHOST_USER_VERSION,
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -597,8 +625,8 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
static int vhost_user_reset_device(struct vhost_dev *dev)
{
VhostUserMsg msg = {
- .request = VHOST_USER_RESET_OWNER,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_RESET_OWNER,
+ .hdr.flags = VHOST_USER_VERSION,
};
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -608,37 +636,56 @@ static int vhost_user_reset_device(struct vhost_dev *dev)
return 0;
}
+static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
+{
+ int ret = -1;
+
+ if (!dev->config_ops) {
+ return -1;
+ }
+
+ if (dev->config_ops->vhost_dev_config_notifier) {
+ ret = dev->config_ops->vhost_dev_config_notifier(dev);
+ }
+
+ return ret;
+}
+
static void slave_read(void *opaque)
{
struct vhost_dev *dev = opaque;
struct vhost_user *u = dev->opaque;
- VhostUserMsg msg = { 0, };
+ VhostUserHeader hdr = { 0, };
+ VhostUserPayload payload = { 0, };
int size, ret = 0;
/* Read header */
- size = read(u->slave_fd, &msg, VHOST_USER_HDR_SIZE);
+ size = read(u->slave_fd, &hdr, VHOST_USER_HDR_SIZE);
if (size != VHOST_USER_HDR_SIZE) {
error_report("Failed to read from slave.");
goto err;
}
- if (msg.size > VHOST_USER_PAYLOAD_SIZE) {
+ if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
error_report("Failed to read msg header."
- " Size %d exceeds the maximum %zu.", msg.size,
+ " Size %d exceeds the maximum %zu.", hdr.size,
VHOST_USER_PAYLOAD_SIZE);
goto err;
}
/* Read payload */
- size = read(u->slave_fd, &msg.payload, msg.size);
- if (size != msg.size) {
+ size = read(u->slave_fd, &payload, hdr.size);
+ if (size != hdr.size) {
error_report("Failed to read payload from slave.");
goto err;
}
- switch (msg.request) {
+ switch (hdr.request) {
case VHOST_USER_SLAVE_IOTLB_MSG:
- ret = vhost_backend_handle_iotlb_msg(dev, &msg.payload.iotlb);
+ ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
+ break;
+ case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
+ ret = vhost_user_slave_handle_config_change(dev);
break;
default:
error_report("Received unexpected msg type.");
@@ -649,15 +696,23 @@ static void slave_read(void *opaque)
* REPLY_ACK feature handling. Other reply types has to be managed
* directly in their request handlers.
*/
- if (msg.flags & VHOST_USER_NEED_REPLY_MASK) {
- msg.flags &= ~VHOST_USER_NEED_REPLY_MASK;
- msg.flags |= VHOST_USER_REPLY_MASK;
+ if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
+ struct iovec iovec[2];
+
+
+ hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
+ hdr.flags |= VHOST_USER_REPLY_MASK;
+
+ payload.u64 = !!ret;
+ hdr.size = sizeof(payload.u64);
- msg.payload.u64 = !!ret;
- msg.size = sizeof(msg.payload.u64);
+ iovec[0].iov_base = &hdr;
+ iovec[0].iov_len = VHOST_USER_HDR_SIZE;
+ iovec[1].iov_base = &payload;
+ iovec[1].iov_len = hdr.size;
- size = write(u->slave_fd, &msg, VHOST_USER_HDR_SIZE + msg.size);
- if (size != VHOST_USER_HDR_SIZE + msg.size) {
+ size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
+ if (size != VHOST_USER_HDR_SIZE + hdr.size) {
error_report("Failed to send msg reply to slave.");
goto err;
}
@@ -675,8 +730,8 @@ err:
static int vhost_setup_slave_channel(struct vhost_dev *dev)
{
VhostUserMsg msg = {
- .request = VHOST_USER_SET_SLAVE_REQ_FD,
- .flags = VHOST_USER_VERSION,
+ .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
+ .hdr.flags = VHOST_USER_VERSION,
};
struct vhost_user *u = dev->opaque;
int sv[2], ret = 0;
@@ -697,7 +752,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev)
qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
if (reply_supported) {
- msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+ msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
}
ret = vhost_user_write(dev, &msg, &sv[1], 1);
@@ -842,10 +897,10 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
/* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
if (virtio_has_feature(dev->protocol_features,
VHOST_USER_PROTOCOL_F_RARP)) {
- msg.request = VHOST_USER_SEND_RARP;
- msg.flags = VHOST_USER_VERSION;
+ msg.hdr.request = VHOST_USER_SEND_RARP;
+ msg.hdr.flags = VHOST_USER_VERSION;
memcpy((char *)&msg.payload.u64, mac_addr, 6);
- msg.size = sizeof(msg.payload.u64);
+ msg.hdr.size = sizeof(msg.payload.u64);
return vhost_user_write(dev, &msg, NULL, 0);
}
@@ -879,12 +934,12 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
return 0;
}
- msg.request = VHOST_USER_NET_SET_MTU;
+ msg.hdr.request = VHOST_USER_NET_SET_MTU;
msg.payload.u64 = mtu;
- msg.size = sizeof(msg.payload.u64);
- msg.flags = VHOST_USER_VERSION;
+ msg.hdr.size = sizeof(msg.payload.u64);
+ msg.hdr.flags = VHOST_USER_VERSION;
if (reply_supported) {
- msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+ msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
}
if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
@@ -903,9 +958,9 @@ static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
struct vhost_iotlb_msg *imsg)
{
VhostUserMsg msg = {
- .request = VHOST_USER_IOTLB_MSG,
- .size = sizeof(msg.payload.iotlb),
- .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+ .hdr.request = VHOST_USER_IOTLB_MSG,
+ .hdr.size = sizeof(msg.payload.iotlb),
+ .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
.payload.iotlb = *imsg,
};
@@ -922,6 +977,83 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
/* No-op as the receive channel is not dedicated to IOTLB messages. */
}
+static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
+ uint32_t config_len)
+{
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_GET_CONFIG,
+ .hdr.flags = VHOST_USER_VERSION,
+ .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
+ };
+
+ if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
+ return -1;
+ }
+
+ msg.payload.config.offset = 0;
+ msg.payload.config.size = config_len;
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ return -1;
+ }
+
+ if (vhost_user_read(dev, &msg) < 0) {
+ return -1;
+ }
+
+ if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
+ error_report("Received unexpected msg type. Expected %d received %d",
+ VHOST_USER_GET_CONFIG, msg.hdr.request);
+ return -1;
+ }
+
+ if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
+ error_report("Received bad msg size.");
+ return -1;
+ }
+
+ memcpy(config, msg.payload.config.region, config_len);
+
+ return 0;
+}
+
+static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size, uint32_t flags)
+{
+ uint8_t *p;
+ bool reply_supported = virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+ VhostUserMsg msg = {
+ .hdr.request = VHOST_USER_SET_CONFIG,
+ .hdr.flags = VHOST_USER_VERSION,
+ .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
+ };
+
+ if (reply_supported) {
+ msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+ }
+
+ if (size > VHOST_USER_MAX_CONFIG_SIZE) {
+ return -1;
+ }
+
+ msg.payload.config.offset = offset,
+ msg.payload.config.size = size,
+ msg.payload.config.flags = flags,
+ p = msg.payload.config.region;
+ memcpy(p, data, size);
+
+ if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+ return -1;
+ }
+
+ if (reply_supported) {
+ return process_message_reply(dev, &msg);
+ }
+
+ return 0;
+}
+
const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_backend_init = vhost_user_init,
@@ -948,4 +1080,6 @@ const VhostOps user_ops = {
.vhost_net_set_mtu = vhost_user_net_set_mtu,
.vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
.vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
+ .vhost_get_config = vhost_user_get_config,
+ .vhost_set_config = vhost_user_set_config,
};
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index e4290ce93d..386aef85be 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1505,6 +1505,38 @@ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
}
}
+int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
+ uint32_t config_len)
+{
+ assert(hdev->vhost_ops);
+
+ if (hdev->vhost_ops->vhost_get_config) {
+ return hdev->vhost_ops->vhost_get_config(hdev, config, config_len);
+ }
+
+ return -1;
+}
+
+int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
+ uint32_t offset, uint32_t size, uint32_t flags)
+{
+ assert(hdev->vhost_ops);
+
+ if (hdev->vhost_ops->vhost_set_config) {
+ return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
+ size, flags);
+ }
+
+ return -1;
+}
+
+void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
+ const VhostDevConfigOps *ops)
+{
+ assert(hdev->vhost_ops);
+ hdev->config_ops = ops;
+}
+
/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index 3042232daf..8106346927 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -256,6 +256,15 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus)
return k->ioeventfd_assign && k->ioeventfd_enabled(proxy);
}
+static void virtio_bus_cleanup_event_notifier(EventNotifier *notifier)
+{
+ /* Test and clear notifier after disabling event,
+ * in case poll callback didn't have time to run.
+ */
+ virtio_queue_host_notifier_read(notifier);
+ event_notifier_cleanup(notifier);
+}
+
/*
* This function switches ioeventfd on/off in the device.
* The caller must set or clear the handlers for the EventNotifier.
@@ -283,19 +292,13 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
r = k->ioeventfd_assign(proxy, notifier, n, true);
if (r < 0) {
error_report("%s: unable to assign ioeventfd: %d", __func__, r);
- goto cleanup_event_notifier;
+ virtio_bus_cleanup_event_notifier(notifier);
}
- return 0;
} else {
+ notifier->cleanup = virtio_bus_cleanup_event_notifier;
k->ioeventfd_assign(proxy, notifier, n, false);
}
-cleanup_event_notifier:
- /* Test and clear notifier after disabling event,
- * in case poll callback didn't have time to run.
- */
- virtio_queue_host_notifier_read(notifier);
- event_notifier_cleanup(notifier);
return r;
}
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 6c75cca88a..9ae10f0cdd 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1978,6 +1978,58 @@ static const TypeInfo virtio_blk_pci_info = {
.class_init = virtio_blk_pci_class_init,
};
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+/* vhost-user-blk */
+
+static Property vhost_user_blk_pci_properties[] = {
+ DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
+ DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+ VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev);
+ DeviceState *vdev = DEVICE(&dev->vdev);
+
+ qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+ object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+}
+
+static void vhost_user_blk_pci_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+ PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ dc->props = vhost_user_blk_pci_properties;
+ k->realize = vhost_user_blk_pci_realize;
+ pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+ pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK;
+ pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+ pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI;
+}
+
+static void vhost_user_blk_pci_instance_init(Object *obj)
+{
+ VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(obj);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VHOST_USER_BLK);
+ object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+ "bootindex", &error_abort);
+}
+
+static const TypeInfo vhost_user_blk_pci_info = {
+ .name = TYPE_VHOST_USER_BLK_PCI,
+ .parent = TYPE_VIRTIO_PCI,
+ .instance_size = sizeof(VHostUserBlkPCI),
+ .instance_init = vhost_user_blk_pci_instance_init,
+ .class_init = vhost_user_blk_pci_class_init,
+};
+#endif
+
/* virtio-scsi-pci */
static Property virtio_scsi_pci_properties[] = {
@@ -2624,6 +2676,9 @@ static void virtio_pci_register_types(void)
type_register_static(&virtio_9p_pci_info);
#endif
type_register_static(&virtio_blk_pci_info);
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+ type_register_static(&vhost_user_blk_pci_info);
+#endif
type_register_static(&virtio_scsi_pci_info);
type_register_static(&virtio_balloon_pci_info);
type_register_static(&virtio_serial_pci_info);
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 12d3a90686..813082b0d7 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -27,6 +27,9 @@
#include "hw/virtio/virtio-gpu.h"
#include "hw/virtio/virtio-crypto.h"
#include "hw/virtio/vhost-user-scsi.h"
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+#include "hw/virtio/vhost-user-blk.h"
+#endif
#ifdef CONFIG_VIRTFS
#include "hw/9pfs/virtio-9p.h"
@@ -46,6 +49,7 @@ typedef struct VirtIOSerialPCI VirtIOSerialPCI;
typedef struct VirtIONetPCI VirtIONetPCI;
typedef struct VHostSCSIPCI VHostSCSIPCI;
typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;
+typedef struct VHostUserBlkPCI VHostUserBlkPCI;
typedef struct VirtIORngPCI VirtIORngPCI;
typedef struct VirtIOInputPCI VirtIOInputPCI;
typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
@@ -244,6 +248,20 @@ struct VHostUserSCSIPCI {
VHostUserSCSI vdev;
};
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+/*
+ * vhost-user-blk-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci"
+#define VHOST_USER_BLK_PCI(obj) \
+ OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI)
+
+struct VHostUserBlkPCI {
+ VirtIOPCIProxy parent_obj;
+ VHostUserBlk vdev;
+};
+#endif
+
/*
* virtio-blk-pci: This extends VirtioPCIProxy.
*/
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d6002ee550..3ac3491bee 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2574,6 +2574,7 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
int n, r, err;
+ memory_region_transaction_begin();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
VirtQueue *vq = &vdev->vq[n];
if (!virtio_queue_get_num(vdev, n)) {
@@ -2596,6 +2597,7 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
}
event_notifier_set(&vq->host_notifier);
}
+ memory_region_transaction_commit();
return 0;
assign_error:
@@ -2609,6 +2611,7 @@ assign_error:
r = virtio_bus_set_host_notifier(qbus, n, false);
assert(r >= 0);
}
+ memory_region_transaction_commit();
return err;
}
@@ -2625,6 +2628,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
int n, r;
+ memory_region_transaction_begin();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
VirtQueue *vq = &vdev->vq[n];
@@ -2635,6 +2639,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
r = virtio_bus_set_host_notifier(qbus, n, false);
assert(r >= 0);
}
+ memory_region_transaction_commit();
}
void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index ac15e6be14..45ec8919b6 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -46,8 +46,10 @@
#define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff)
#define DMAR_REG_SIZE 0x230
-#define VTD_HOST_ADDRESS_WIDTH 39
-#define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1)
+#define VTD_HOST_AW_39BIT 39
+#define VTD_HOST_AW_48BIT 48
+#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT
+#define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1)
#define DMAR_REPORT_F_INTR (1)
@@ -302,6 +304,7 @@ struct IntelIOMMUState {
bool intr_eime; /* Extended interrupt mode enabled */
OnOffAuto intr_eim; /* Toggle for EIM cabability */
bool buggy_eim; /* Force buggy EIM unless eim=off */
+ uint8_t aw_bits; /* Host/IOVA address width (in bits) */
};
/* Find the VTD Address space associated with the given bus pointer,
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index a7a5f22bc6..592254f40d 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -20,6 +20,11 @@ typedef enum VhostBackendType {
VHOST_BACKEND_TYPE_MAX = 3,
} VhostBackendType;
+typedef enum VhostSetConfigType {
+ VHOST_SET_CONFIG_TYPE_MASTER = 0,
+ VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
+} VhostSetConfigType;
+
struct vhost_dev;
struct vhost_log;
struct vhost_memory;
@@ -84,6 +89,11 @@ typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev,
int enabled);
typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev,
struct vhost_iotlb_msg *imsg);
+typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size,
+ uint32_t flags);
+typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config,
+ uint32_t config_len);
typedef struct VhostOps {
VhostBackendType backend_type;
@@ -118,6 +128,8 @@ typedef struct VhostOps {
vhost_vsock_set_running_op vhost_vsock_set_running;
vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg;
+ vhost_get_config_op vhost_get_config;
+ vhost_set_config_op vhost_set_config;
} VhostOps;
extern const VhostOps user_ops;
diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
new file mode 100644
index 0000000000..5804cc904a
--- /dev/null
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -0,0 +1,41 @@
+/*
+ * vhost-user-blk host device
+ * Copyright(C) 2017 Intel Corporation.
+ *
+ * Authors:
+ * Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * Based on vhost-scsi.h, Copyright IBM, Corp. 2011
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_BLK_H
+#define VHOST_USER_BLK_H
+
+#include "standard-headers/linux/virtio_blk.h"
+#include "qemu-common.h"
+#include "hw/qdev.h"
+#include "hw/block/block.h"
+#include "chardev/char-fe.h"
+#include "hw/virtio/vhost.h"
+
+#define TYPE_VHOST_USER_BLK "vhost-user-blk"
+#define VHOST_USER_BLK(obj) \
+ OBJECT_CHECK(VHostUserBlk, (obj), TYPE_VHOST_USER_BLK)
+
+typedef struct VHostUserBlk {
+ VirtIODevice parent_obj;
+ CharBackend chardev;
+ int32_t bootindex;
+ struct virtio_blk_config blkcfg;
+ uint16_t num_queues;
+ uint32_t queue_size;
+ uint32_t config_wce;
+ uint32_t config_ro;
+ struct vhost_dev dev;
+} VHostUserBlk;
+
+#endif
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 467dc7794b..1dc2d73d76 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -46,6 +46,12 @@ struct vhost_iommu {
QLIST_ENTRY(vhost_iommu) iommu_next;
};
+typedef struct VhostDevConfigOps {
+ /* Vhost device config space changed callback
+ */
+ int (*vhost_dev_config_notifier)(struct vhost_dev *dev);
+} VhostDevConfigOps;
+
struct vhost_memory;
struct vhost_dev {
VirtIODevice *vdev;
@@ -76,6 +82,7 @@ struct vhost_dev {
QLIST_ENTRY(vhost_dev) entry;
QLIST_HEAD(, vhost_iommu) iommu_list;
IOMMUNotifier n;
+ const VhostDevConfigOps *config_ops;
};
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -106,4 +113,12 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
struct vhost_vring_file *file);
int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
+int vhost_dev_get_config(struct vhost_dev *dev, uint8_t *config,
+ uint32_t config_len);
+int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size, uint32_t flags);
+/* notifier callback in case vhost device config space changed
+ */
+void vhost_dev_set_config_notifier(struct vhost_dev *dev,
+ const VhostDevConfigOps *ops);
#endif
diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h
index 599c99f1a5..b30a45474f 100644
--- a/include/qemu/event_notifier.h
+++ b/include/qemu/event_notifier.h
@@ -26,6 +26,7 @@ struct EventNotifier {
int rfd;
int wfd;
#endif
+ void (*cleanup)(EventNotifier *);
};
typedef void EventNotifierHandler(EventNotifier *);
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 5ac621cf1f..38da849be9 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -400,6 +400,16 @@ static inline uint64_t pow2ceil(uint64_t value)
return 0x8000000000000000ull >> (n - 1);
}
+static inline uint32_t pow2roundup32(uint32_t x)
+{
+ x |= (x >> 1);
+ x |= (x >> 2);
+ x |= (x >> 4);
+ x |= (x >> 8);
+ x |= (x >> 16);
+ return x + 1;
+}
+
/**
* urshift - 128-bit Unsigned Right Shift.
* @plow: in/out - lower 64-bit integer.
diff --git a/tests/acpi-test-data/pc/DSDT.numamem b/tests/acpi-test-data/pc/DSDT.numamem
new file mode 100644
index 0000000000..224cfdd9e9
--- /dev/null
+++ b/tests/acpi-test-data/pc/DSDT.numamem
Binary files differ
diff --git a/tests/acpi-test-data/pc/SRAT.numamem b/tests/acpi-test-data/pc/SRAT.numamem
new file mode 100644
index 0000000000..dbc595d9cb
--- /dev/null
+++ b/tests/acpi-test-data/pc/SRAT.numamem
Binary files differ
diff --git a/tests/acpi-test-data/q35/DSDT.numamem b/tests/acpi-test-data/q35/DSDT.numamem
new file mode 100644
index 0000000000..8c9fa445b0
--- /dev/null
+++ b/tests/acpi-test-data/q35/DSDT.numamem
Binary files differ
diff --git a/tests/acpi-test-data/q35/SRAT.numamem b/tests/acpi-test-data/q35/SRAT.numamem
new file mode 100644
index 0000000000..dbc595d9cb
--- /dev/null
+++ b/tests/acpi-test-data/q35/SRAT.numamem
Binary files differ
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index e28e0c98cf..b354aaafe6 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -210,10 +210,15 @@ static void test_acpi_facs_table(test_data *data)
ACPI_ASSERT_CMP(facs_table->signature, "FACS");
}
-static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr)
+/** fetch_table
+ * load ACPI table at @addr into table descriptor @sdt_table
+ * and check that header checksum matches actual one.
+ */
+static void fetch_table(AcpiSdtTable *sdt_table, uint32_t addr)
{
uint8_t checksum;
+ memset(sdt_table, 0, sizeof(*sdt_table));
ACPI_READ_TABLE_HEADER(&sdt_table->header, addr);
sdt_table->aml_len = le32_to_cpu(sdt_table->header.length)
@@ -233,17 +238,15 @@ static void test_acpi_dsdt_table(test_data *data)
AcpiSdtTable dsdt_table;
uint32_t addr = le32_to_cpu(data->fadt_table.dsdt);
- memset(&dsdt_table, 0, sizeof(dsdt_table));
- data->tables = g_array_new(false, true, sizeof(AcpiSdtTable));
-
- test_dst_table(&dsdt_table, addr);
+ fetch_table(&dsdt_table, addr);
ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT");
- /* Place DSDT first */
+ /* Since DSDT isn't in RSDT, add DSDT to ASL test tables list manually */
g_array_append_val(data->tables, dsdt_table);
}
-static void test_acpi_tables(test_data *data)
+/* Load all tables and add to test list directly RSDT referenced tables */
+static void fetch_rsdt_referenced_tables(test_data *data)
{
int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */
int i;
@@ -252,9 +255,10 @@ static void test_acpi_tables(test_data *data)
AcpiSdtTable ssdt_table;
uint32_t addr;
- memset(&ssdt_table, 0, sizeof(ssdt_table));
addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */
- test_dst_table(&ssdt_table, addr);
+ fetch_table(&ssdt_table, addr);
+
+ /* Add table to ASL test tables list */
g_array_append_val(data->tables, ssdt_table);
}
}
@@ -425,6 +429,7 @@ try_again:
return exp_tables;
}
+/* test the list of tables in @data->tables against reference tables */
static void test_acpi_asl(test_data *data)
{
int i;
@@ -636,13 +641,14 @@ static void test_acpi_one(const char *params, test_data *data)
boot_sector_test();
+ data->tables = g_array_new(false, true, sizeof(AcpiSdtTable));
test_acpi_rsdp_address(data);
test_acpi_rsdp_table(data);
test_acpi_rsdt_table(data);
test_acpi_fadt_table(data);
test_acpi_facs_table(data);
test_acpi_dsdt_table(data);
- test_acpi_tables(data);
+ fetch_rsdt_referenced_tables(data);
if (iasl) {
if (getenv(ACPI_REBUILD_EXPECTED_AML)) {
@@ -810,6 +816,28 @@ static void test_acpi_piix4_tcg_memhp(void)
free_test_data(&data);
}
+static void test_acpi_q35_tcg_numamem(void)
+{
+ test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.machine = MACHINE_Q35;
+ data.variant = ".numamem";
+ test_acpi_one(" -numa node -numa node,mem=128", &data);
+ free_test_data(&data);
+}
+
+static void test_acpi_piix4_tcg_numamem(void)
+{
+ test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.machine = MACHINE_PC;
+ data.variant = ".numamem";
+ test_acpi_one(" -numa node -numa node,mem=128", &data);
+ free_test_data(&data);
+}
+
int main(int argc, char *argv[])
{
const char *arch = qtest_get_arch();
@@ -832,6 +860,8 @@ int main(int argc, char *argv[])
qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp);
qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp);
qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp);
+ qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem);
+ qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
}
ret = g_test_run();
boot_sector_cleanup(disk);
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index e2c89ed376..ec6ac9dc9e 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -55,6 +55,7 @@
/*********** FROM hw/virtio/vhost-user.c *************************************/
#define VHOST_MEMORY_MAX_NREGIONS 8
+#define VHOST_MAX_VIRTQUEUES 0x100
#define VHOST_USER_F_PROTOCOL_FEATURES 30
#define VHOST_USER_PROTOCOL_F_MQ 0
@@ -141,6 +142,8 @@ enum {
typedef struct TestServer {
QPCIBus *bus;
+ QVirtioPCIDevice *dev;
+ QVirtQueue *vq[VHOST_MAX_VIRTQUEUES];
gchar *socket_path;
gchar *mig_path;
gchar *chr_name;
@@ -155,33 +158,51 @@ typedef struct TestServer {
bool test_fail;
int test_flags;
int queues;
+ QGuestAllocator *alloc;
} TestServer;
static const char *tmpfs;
static const char *root;
-static void init_virtio_dev(TestServer *s)
+static void init_virtio_dev(TestServer *s, uint32_t features_mask)
{
- QVirtioPCIDevice *dev;
uint32_t features;
+ int i;
s->bus = qpci_init_pc(NULL);
g_assert_nonnull(s->bus);
- dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
- g_assert_nonnull(dev);
+ s->dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
+ g_assert_nonnull(s->dev);
+
+ qvirtio_pci_device_enable(s->dev);
+ qvirtio_reset(&s->dev->vdev);
+ qvirtio_set_acknowledge(&s->dev->vdev);
+ qvirtio_set_driver(&s->dev->vdev);
- qvirtio_pci_device_enable(dev);
- qvirtio_reset(&dev->vdev);
- qvirtio_set_acknowledge(&dev->vdev);
- qvirtio_set_driver(&dev->vdev);
+ s->alloc = pc_alloc_init();
- features = qvirtio_get_features(&dev->vdev);
- features = features & VIRTIO_NET_F_MAC;
- qvirtio_set_features(&dev->vdev, features);
+ for (i = 0; i < s->queues * 2; i++) {
+ s->vq[i] = qvirtqueue_setup(&s->dev->vdev, s->alloc, i);
+ }
+
+ features = qvirtio_get_features(&s->dev->vdev);
+ features = features & features_mask;
+ qvirtio_set_features(&s->dev->vdev, features);
- qvirtio_set_driver_ok(&dev->vdev);
- qvirtio_pci_device_free(dev);
+ qvirtio_set_driver_ok(&s->dev->vdev);
+}
+
+static void uninit_virtio_dev(TestServer *s)
+{
+ int i;
+
+ for (i = 0; i < s->queues * 2; i++) {
+ qvirtqueue_cleanup(s->dev->vdev.bus, s->vq[i], s->alloc);
+ }
+ pc_alloc_uninit(s->alloc);
+
+ qvirtio_pci_device_free(s->dev);
}
static void wait_for_fds(TestServer *s)
@@ -617,6 +638,30 @@ GSourceFuncs test_migrate_source_funcs = {
.check = test_migrate_source_check,
};
+static void test_read_guest_mem(void)
+{
+ TestServer *server = NULL;
+ char *qemu_cmd = NULL;
+ QTestState *s = NULL;
+
+ server = test_server_new("test");
+ test_server_listen(server);
+
+ qemu_cmd = GET_QEMU_CMD(server);
+
+ s = qtest_start(qemu_cmd);
+ g_free(qemu_cmd);
+
+ init_virtio_dev(server, 1u << VIRTIO_NET_F_MAC);
+
+ read_guest_mem(server);
+
+ uninit_virtio_dev(server);
+
+ qtest_quit(s);
+ test_server_free(server);
+}
+
static void test_migrate(void)
{
TestServer *s = test_server_new("src");
@@ -636,7 +681,7 @@ static void test_migrate(void)
from = qtest_start(cmd);
g_free(cmd);
- init_virtio_dev(s);
+ init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
wait_for_fds(s);
size = get_log_size(s);
g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8));
@@ -689,6 +734,8 @@ static void test_migrate(void)
read_guest_mem(dest);
+ uninit_virtio_dev(s);
+
g_source_destroy(source);
g_source_unref(source);
@@ -756,7 +803,7 @@ static void test_reconnect_subprocess(void)
qtest_start(cmd);
g_free(cmd);
- init_virtio_dev(s);
+ init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
wait_for_fds(s);
wait_for_rings_started(s, 2);
@@ -767,6 +814,8 @@ static void test_reconnect_subprocess(void)
wait_for_fds(s);
wait_for_rings_started(s, 2);
+ uninit_virtio_dev(s);
+
qtest_end();
test_server_free(s);
return;
@@ -792,10 +841,12 @@ static void test_connect_fail_subprocess(void)
qtest_start(cmd);
g_free(cmd);
- init_virtio_dev(s);
+ init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
wait_for_fds(s);
wait_for_rings_started(s, 2);
+ uninit_virtio_dev(s);
+
qtest_end();
test_server_free(s);
}
@@ -820,10 +871,12 @@ static void test_flags_mismatch_subprocess(void)
qtest_start(cmd);
g_free(cmd);
- init_virtio_dev(s);
+ init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
wait_for_fds(s);
wait_for_rings_started(s, 2);
+ uninit_virtio_dev(s);
+
qtest_end();
test_server_free(s);
}
@@ -839,79 +892,30 @@ static void test_flags_mismatch(void)
#endif
-static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot)
-{
- QVirtioPCIDevice *dev;
-
- dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
- g_assert(dev != NULL);
- g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET);
-
- qvirtio_pci_device_enable(dev);
- qvirtio_reset(&dev->vdev);
- qvirtio_set_acknowledge(&dev->vdev);
- qvirtio_set_driver(&dev->vdev);
-
- return dev;
-}
-
-static void driver_init(QVirtioDevice *dev)
-{
- uint32_t features;
-
- features = qvirtio_get_features(dev);
- features = features & ~(QVIRTIO_F_BAD_FEATURE |
- (1u << VIRTIO_RING_F_INDIRECT_DESC) |
- (1u << VIRTIO_RING_F_EVENT_IDX));
- qvirtio_set_features(dev, features);
-
- qvirtio_set_driver_ok(dev);
-}
-
-#define PCI_SLOT 0x04
-
static void test_multiqueue(void)
{
- const int queues = 2;
TestServer *s = test_server_new("mq");
- QVirtioPCIDevice *dev;
- QPCIBus *bus;
- QVirtQueuePCI *vq[queues * 2];
- QGuestAllocator *alloc;
char *cmd;
- int i;
-
- s->queues = queues;
+ uint32_t features_mask = ~(QVIRTIO_F_BAD_FEATURE |
+ (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1u << VIRTIO_RING_F_EVENT_IDX));
+ s->queues = 2;
test_server_listen(s);
cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d "
"-device virtio-net-pci,netdev=net0,mq=on,vectors=%d",
512, 512, root, s->chr_name,
s->socket_path, "", s->chr_name,
- queues, queues * 2 + 2);
+ s->queues, s->queues * 2 + 2);
qtest_start(cmd);
g_free(cmd);
- bus = qpci_init_pc(NULL);
- dev = virtio_net_pci_init(bus, PCI_SLOT);
+ init_virtio_dev(s, features_mask);
- alloc = pc_alloc_init();
- for (i = 0; i < queues * 2; i++) {
- vq[i] = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, alloc, i);
- }
+ wait_for_rings_started(s, s->queues * 2);
- driver_init(&dev->vdev);
- wait_for_rings_started(s, queues * 2);
+ uninit_virtio_dev(s);
- /* End test */
- for (i = 0; i < queues * 2; i++) {
- qvirtqueue_cleanup(dev->vdev.bus, &vq[i]->vq, alloc);
- }
- pc_alloc_uninit(alloc);
- qvirtio_pci_device_disable(dev);
- g_free(dev->pdev);
- g_free(dev);
- qpci_free_pc(bus);
qtest_end();
test_server_free(s);
@@ -919,10 +923,7 @@ static void test_multiqueue(void)
int main(int argc, char **argv)
{
- QTestState *s = NULL;
- TestServer *server = NULL;
const char *hugefs;
- char *qemu_cmd = NULL;
int ret;
char template[] = "/tmp/vhost-test-XXXXXX";
GMainLoop *loop;
@@ -947,20 +948,11 @@ int main(int argc, char **argv)
root = tmpfs;
}
- server = test_server_new("test");
- test_server_listen(server);
-
loop = g_main_loop_new(NULL, FALSE);
/* run the main loop thread so the chardev may operate */
thread = g_thread_new(NULL, thread_function, loop);
- qemu_cmd = GET_QEMU_CMD(server);
-
- s = qtest_start(qemu_cmd);
- g_free(qemu_cmd);
- init_virtio_dev(server);
-
- qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
+ qtest_add_func("/vhost-user/read-guest-mem", test_read_guest_mem);
qtest_add_func("/vhost-user/migrate", test_migrate);
qtest_add_func("/vhost-user/multiqueue", test_multiqueue);
@@ -978,12 +970,7 @@ int main(int argc, char **argv)
ret = g_test_run();
- if (s) {
- qtest_quit(s);
- }
-
/* cleanup */
- test_server_free(server);
/* finish the helper thread and dispatch pending sources */
g_main_loop_quit(loop);
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index 73c4046b58..652566634a 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -29,6 +29,7 @@ void event_notifier_init_fd(EventNotifier *e, int fd)
{
e->rfd = fd;
e->wfd = fd;
+ e->cleanup = NULL;
}
#endif
@@ -65,6 +66,7 @@ int event_notifier_init(EventNotifier *e, int active)
e->rfd = fds[0];
e->wfd = fds[1];
}
+ e->cleanup = NULL;
if (active) {
event_notifier_set(e);
}
@@ -80,10 +82,11 @@ void event_notifier_cleanup(EventNotifier *e)
{
if (e->rfd != e->wfd) {
close(e->rfd);
- e->rfd = -1;
}
close(e->wfd);
+ e->rfd = -1;
e->wfd = -1;
+ e->cleanup = NULL;
}
int event_notifier_get_fd(const EventNotifier *e)
diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c
index 62c53b0a99..eff86701ad 100644
--- a/util/event_notifier-win32.c
+++ b/util/event_notifier-win32.c
@@ -19,6 +19,7 @@ int event_notifier_init(EventNotifier *e, int active)
{
e->event = CreateEvent(NULL, TRUE, FALSE, NULL);
assert(e->event);
+ e->cleanup = NULL;
return 0;
}
@@ -26,6 +27,7 @@ void event_notifier_cleanup(EventNotifier *e)
{
CloseHandle(e->event);
e->event = NULL;
+ e->cleanup = NULL;
}
HANDLE event_notifier_get_handle(EventNotifier *e)