aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/virtio-9p-device.c2
-rw-r--r--hw/Kconfig1
-rw-r--r--hw/acpi/Kconfig5
-rw-r--r--hw/acpi/cxl-stub.c12
-rw-r--r--hw/acpi/cxl.c257
-rw-r--r--hw/acpi/meson.build4
-rw-r--r--hw/arm/Kconfig1
-rw-r--r--hw/block/vhost-user-blk.c9
-rw-r--r--hw/block/virtio-blk.c2
-rw-r--r--hw/char/virtio-serial-bus.c3
-rw-r--r--hw/core/machine.c28
-rw-r--r--hw/cxl/Kconfig3
-rw-r--r--hw/cxl/cxl-component-utils.c396
-rw-r--r--hw/cxl/cxl-device-utils.c265
-rw-r--r--hw/cxl/cxl-host-stubs.c16
-rw-r--r--hw/cxl/cxl-host.c222
-rw-r--r--hw/cxl/cxl-mailbox-utils.c478
-rw-r--r--hw/cxl/meson.build12
-rw-r--r--hw/display/vhost-user-gpu.c7
-rw-r--r--hw/display/virtio-gpu-base.c2
-rw-r--r--hw/i386/acpi-build.c57
-rw-r--r--hw/i386/amd_iommu.c24
-rw-r--r--hw/i386/intel_iommu.c95
-rw-r--r--hw/i386/intel_iommu_internal.h9
-rw-r--r--hw/i386/microvm.c54
-rw-r--r--hw/i386/pc.c89
-rw-r--r--hw/i386/pc_piix.c4
-rw-r--r--hw/i386/pc_q35.c4
-rw-r--r--hw/i386/x86.c66
-rw-r--r--hw/input/vhost-user-input.c7
-rw-r--r--hw/input/virtio-input.c3
-rw-r--r--hw/mem/Kconfig5
-rw-r--r--hw/mem/cxl_type3.c371
-rw-r--r--hw/mem/meson.build1
-rw-r--r--hw/meson.build1
-rw-r--r--hw/net/vhost_net.c8
-rw-r--r--hw/net/virtio-net.c63
-rw-r--r--hw/pci-bridge/Kconfig5
-rw-r--r--hw/pci-bridge/cxl_root_port.c236
-rw-r--r--hw/pci-bridge/meson.build1
-rw-r--r--hw/pci-bridge/pci_expander_bridge.c168
-rw-r--r--hw/pci-bridge/pcie_root_port.c6
-rw-r--r--hw/pci-host/gpex-acpi.c20
-rw-r--r--hw/pci/pci.c21
-rw-r--r--hw/pci/pcie_port.c25
-rw-r--r--hw/scsi/vhost-scsi.c8
-rw-r--r--hw/scsi/vhost-user-scsi.c1
-rw-r--r--hw/scsi/virtio-scsi.c3
-rw-r--r--hw/virtio/trace-events10
-rw-r--r--hw/virtio/vhost-scsi-pci.c2
-rw-r--r--hw/virtio/vhost-shadow-virtqueue.c35
-rw-r--r--hw/virtio/vhost-shadow-virtqueue.h6
-rw-r--r--hw/virtio/vhost-user-blk-pci.c2
-rw-r--r--hw/virtio/vhost-user-fs-pci.c2
-rw-r--r--hw/virtio/vhost-user-fs.c10
-rw-r--r--hw/virtio/vhost-user-i2c-pci.c2
-rw-r--r--hw/virtio/vhost-user-i2c.c7
-rw-r--r--hw/virtio/vhost-user-input-pci.c2
-rw-r--r--hw/virtio/vhost-user-rng-pci.c2
-rw-r--r--hw/virtio/vhost-user-rng.c9
-rw-r--r--hw/virtio/vhost-user-scsi-pci.c2
-rw-r--r--hw/virtio/vhost-user-vsock-pci.c2
-rw-r--r--hw/virtio/vhost-user-vsock.c2
-rw-r--r--hw/virtio/vhost-user.c133
-rw-r--r--hw/virtio/vhost-vdpa.c29
-rw-r--r--hw/virtio/vhost-vsock-common.c12
-rw-r--r--hw/virtio/vhost-vsock-pci.c2
-rw-r--r--hw/virtio/vhost-vsock.c2
-rw-r--r--hw/virtio/vhost.c4
-rw-r--r--hw/virtio/virtio-9p-pci.c2
-rw-r--r--hw/virtio/virtio-balloon-pci.c2
-rw-r--r--hw/virtio/virtio-balloon.c3
-rw-r--r--hw/virtio/virtio-blk-pci.c2
-rw-r--r--hw/virtio/virtio-bus.c22
-rw-r--r--hw/virtio/virtio-crypto.c18
-rw-r--r--hw/virtio/virtio-input-host-pci.c2
-rw-r--r--hw/virtio/virtio-input-pci.c2
-rw-r--r--hw/virtio/virtio-iommu-pci.c2
-rw-r--r--hw/virtio/virtio-iommu.c3
-rw-r--r--hw/virtio/virtio-mem.c3
-rw-r--r--hw/virtio/virtio-net-pci.c2
-rw-r--r--hw/virtio/virtio-pci.c5
-rw-r--r--hw/virtio/virtio-pci.h255
-rw-r--r--hw/virtio/virtio-pmem.c3
-rw-r--r--hw/virtio/virtio-rng-pci.c2
-rw-r--r--hw/virtio/virtio-rng.c2
-rw-r--r--hw/virtio/virtio-scsi-pci.c2
-rw-r--r--hw/virtio/virtio-serial-pci.c2
-rw-r--r--hw/virtio/virtio.c56
89 files changed, 3211 insertions, 536 deletions
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 54ee93b71f..5f522e68e9 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -216,7 +216,7 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
}
v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag);
- virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size);
+ virtio_init(vdev, VIRTIO_ID_9P, v->config_size);
v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output);
}
diff --git a/hw/Kconfig b/hw/Kconfig
index ad20cce0a9..50e0952889 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -6,6 +6,7 @@ source audio/Kconfig
source block/Kconfig
source char/Kconfig
source core/Kconfig
+source cxl/Kconfig
source display/Kconfig
source dma/Kconfig
source gpio/Kconfig
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 19caebde6c..3703aca212 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -5,6 +5,7 @@ config ACPI_X86
bool
select ACPI
select ACPI_NVDIMM
+ select ACPI_CXL
select ACPI_CPU_HOTPLUG
select ACPI_MEMORY_HOTPLUG
select ACPI_HMAT
@@ -66,3 +67,7 @@ config ACPI_ERST
bool
default y
depends on ACPI && PCI
+
+config ACPI_CXL
+ bool
+ depends on ACPI
diff --git a/hw/acpi/cxl-stub.c b/hw/acpi/cxl-stub.c
new file mode 100644
index 0000000000..15bc21076b
--- /dev/null
+++ b/hw/acpi/cxl-stub.c
@@ -0,0 +1,12 @@
+
+/*
+ * Stubs for ACPI platforms that don't support CXl
+ */
+#include "qemu/osdep.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/cxl.h"
+
+void build_cxl_osc_method(Aml *dev)
+{
+ g_assert_not_reached();
+}
diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c
new file mode 100644
index 0000000000..31d5235136
--- /dev/null
+++ b/hw/acpi/cxl.c
@@ -0,0 +1,257 @@
+/*
+ * CXL ACPI Implementation
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_host.h"
+#include "hw/cxl/cxl.h"
+#include "hw/mem/memory-device.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/acpi/cxl.h"
+#include "qapi/error.h"
+#include "qemu/uuid.h"
+
+static void cedt_build_chbs(GArray *table_data, PXBDev *cxl)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge);
+ struct MemoryRegion *mr = sbd->mmio[0].memory;
+
+ /* Type */
+ build_append_int_noprefix(table_data, 0, 1);
+
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 1);
+
+ /* Record Length */
+ build_append_int_noprefix(table_data, 32, 2);
+
+ /* UID - currently equal to bus number */
+ build_append_int_noprefix(table_data, cxl->bus_nr, 4);
+
+ /* Version */
+ build_append_int_noprefix(table_data, 1, 4);
+
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+
+ /* Base - subregion within a container that is in PA space */
+ build_append_int_noprefix(table_data, mr->container->addr + mr->addr, 8);
+
+ /* Length */
+ build_append_int_noprefix(table_data, memory_region_size(mr), 8);
+}
+
+/*
+ * CFMWS entries in CXL 2.0 ECN: CEDT CFMWS & QTG _DSM.
+ * Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory
+ * interleaving.
+ */
+static void cedt_build_cfmws(GArray *table_data, MachineState *ms)
+{
+ CXLState *cxls = ms->cxl_devices_state;
+ GList *it;
+
+ for (it = cxls->fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ int i;
+
+ /* Type */
+ build_append_int_noprefix(table_data, 1, 1);
+
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 1);
+
+ /* Record Length */
+ build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2);
+
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
+
+ /* Base HPA */
+ build_append_int_noprefix(table_data, fw->mr.addr, 8);
+
+ /* Window Size */
+ build_append_int_noprefix(table_data, fw->size, 8);
+
+ /* Host Bridge Interleave Ways */
+ build_append_int_noprefix(table_data, fw->enc_int_ways, 1);
+
+ /* Host Bridge Interleave Arithmetic */
+ build_append_int_noprefix(table_data, 0, 1);
+
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
+
+ /* Host Bridge Interleave Granularity */
+ build_append_int_noprefix(table_data, fw->enc_int_gran, 4);
+
+ /* Window Restrictions */
+ build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */
+
+ /* QTG ID */
+ build_append_int_noprefix(table_data, 0, 2);
+
+ /* Host Bridge List (list of UIDs - currently bus_nr) */
+ for (i = 0; i < fw->num_targets; i++) {
+ g_assert(fw->target_hbs[i]);
+ build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 4);
+ }
+ }
+}
+
+static int cxl_foreach_pxb_hb(Object *obj, void *opaque)
+{
+ Aml *cedt = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) {
+ cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj));
+ }
+
+ return 0;
+}
+
+void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data,
+ BIOSLinker *linker, const char *oem_id,
+ const char *oem_table_id)
+{
+ Aml *cedt;
+ AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id,
+ .oem_table_id = oem_table_id };
+
+ acpi_add_table(table_offsets, table_data);
+ acpi_table_begin(&table, table_data);
+ cedt = init_aml_allocator();
+
+ /* reserve space for CEDT header */
+
+ object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt);
+ cedt_build_cfmws(cedt->buf, ms);
+
+ /* copy AML table into ACPI tables blob and patch header there */
+ g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len);
+ free_aml_allocator();
+
+ acpi_table_end(linker, &table);
+}
+
+static Aml *__build_cxl_osc_method(void)
+{
+ Aml *method, *if_uuid, *else_uuid, *if_arg1_not_1, *if_cxl, *if_caps_masked;
+ Aml *a_ctrl = aml_local(0);
+ Aml *a_cdw1 = aml_name("CDW1");
+
+ method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
+ /* CDW1 is used for the return value so is present whether or not a match occurs */
+ aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
+
+ /*
+ * Generate shared section between:
+ * CXL 2.0 - 9.14.2.1.4 and
+ * PCI Firmware Specification 3.0
+ * 4.5.1. _OSC Interface for PCI Host Bridge Devices
+ * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is
+ * identified by the Universal Unique IDentifier (UUID)
+ * 33DB4D5B-1FF7-401C-9657-7441C03DD766
+ * The _OSC interface for a CXL Host bridge is
+ * identified by the UUID 68F2D50B-C469-4D8A-BD3D-941A103FD3FC
+ * A CXL Host bridge is compatible with a PCI host bridge so
+ * for the shared section match both.
+ */
+ if_uuid = aml_if(
+ aml_lor(aml_equal(aml_arg(0),
+ aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766")),
+ aml_equal(aml_arg(0),
+ aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC"))));
+ aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
+ aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
+
+ aml_append(if_uuid, aml_store(aml_name("CDW3"), a_ctrl));
+
+ /*
+ *
+ * Allows OS control for all 5 features:
+ * PCIeHotplug SHPCHotplug PME AER PCIeCapability
+ */
+ aml_append(if_uuid, aml_and(a_ctrl, aml_int(0x1F), a_ctrl));
+
+ /*
+ * Check _OSC revision.
+ * PCI Firmware specification 3.3 and CXL 2.0 both use revision 1
+ * Unknown Revision is CDW1 - BIT (3)
+ */
+ if_arg1_not_1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1))));
+ aml_append(if_arg1_not_1, aml_or(a_cdw1, aml_int(0x08), a_cdw1));
+ aml_append(if_uuid, if_arg1_not_1);
+
+ if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl)));
+
+ /* Capability bits were masked */
+ aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1));
+ aml_append(if_uuid, if_caps_masked);
+
+ aml_append(if_uuid, aml_store(aml_name("CDW2"), aml_name("SUPP")));
+ aml_append(if_uuid, aml_store(aml_name("CDW3"), aml_name("CTRL")));
+
+ /* Update DWORD3 (the return value) */
+ aml_append(if_uuid, aml_store(a_ctrl, aml_name("CDW3")));
+
+ /* CXL only section as per CXL 2.0 - 9.14.2.1.4 */
+ if_cxl = aml_if(aml_equal(
+ aml_arg(0), aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC")));
+ /* CXL support field */
+ aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(12), "CDW4"));
+ /* CXL capabilities */
+ aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(16), "CDW5"));
+ aml_append(if_cxl, aml_store(aml_name("CDW4"), aml_name("SUPC")));
+ aml_append(if_cxl, aml_store(aml_name("CDW5"), aml_name("CTRC")));
+
+ /* CXL 2.0 Port/Device Register access */
+ aml_append(if_cxl,
+ aml_or(aml_name("CDW5"), aml_int(0x1), aml_name("CDW5")));
+ aml_append(if_uuid, if_cxl);
+
+ aml_append(if_uuid, aml_return(aml_arg(3)));
+ aml_append(method, if_uuid);
+
+ /*
+ * If no UUID matched, return Unrecognized UUID via Arg3 DWord 1
+ * ACPI 6.4 - 6.2.11
+ * Unrecognised UUID - BIT(2)
+ */
+ else_uuid = aml_else();
+
+ aml_append(else_uuid,
+ aml_or(aml_name("CDW1"), aml_int(0x4), aml_name("CDW1")));
+ aml_append(else_uuid, aml_return(aml_arg(3)));
+ aml_append(method, else_uuid);
+
+ return method;
+}
+
+void build_cxl_osc_method(Aml *dev)
+{
+ aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
+ aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
+ aml_append(dev, aml_name_decl("SUPC", aml_int(0)));
+ aml_append(dev, aml_name_decl("CTRC", aml_int(0)));
+ aml_append(dev, __build_cxl_osc_method());
+}
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index 8bea2e6933..cea2f5f93a 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -13,6 +13,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_false: files('acpi-mem-hotplu
acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_true: files('nvdimm.c'))
acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c'))
acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c'))
acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c'))
acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c'))
acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c'))
@@ -33,4 +34,5 @@ softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c',
'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c',
'acpi-mem-hotplug-stub.c', 'acpi-cpu-hotplug-stub.c',
- 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c'))
+ 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c',
+ 'cxl-stub.c'))
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 97f3b38019..219262a8da 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -29,6 +29,7 @@ config ARM_VIRT
select ACPI_APEI
select ACPI_VIOT
select VIRTIO_MEM_SUPPORTED
+ select ACPI_CXL
config CHEETAH
bool
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 1a42ae9187..5dca4eab09 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -491,7 +491,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
+ virtio_init(vdev, VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
s->virtqs = g_new(VirtQueue *, s->num_queues);
@@ -569,6 +569,12 @@ static void vhost_user_blk_instance_init(Object *obj)
"/disk@0,0", DEVICE(obj));
}
+static struct vhost_dev *vhost_user_blk_get_vhost(VirtIODevice *vdev)
+{
+ VHostUserBlk *s = VHOST_USER_BLK(vdev);
+ return &s->dev;
+}
+
static const VMStateDescription vmstate_vhost_user_blk = {
.name = "vhost-user-blk",
.minimum_version_id = 1,
@@ -603,6 +609,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
vdc->get_features = vhost_user_blk_get_features;
vdc->set_status = vhost_user_blk_set_status;
vdc->reset = vhost_user_blk_reset;
+ vdc->get_vhost = vhost_user_blk_get_vhost;
}
static const TypeInfo vhost_user_blk_info = {
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 6a1cc41877..cd804795c6 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1206,7 +1206,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
virtio_blk_set_config_size(s, s->host_features);
- virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size);
+ virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
s->blk = conf->conf.blk;
s->rq = NULL;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index 6048d408b8..7d4601cb5d 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -1044,8 +1044,7 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
VIRTIO_CONSOLE_F_EMERG_WRITE)) {
config_size = offsetof(struct virtio_console_config, emerg_wr);
}
- virtio_init(vdev, "virtio-serial", VIRTIO_ID_CONSOLE,
- config_size);
+ virtio_init(vdev, VIRTIO_ID_CONSOLE, config_size);
/* Spawn a new virtio-serial bus on which the ports will ride as devices */
qbus_init(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS,
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 3264c1e11d..b03d9192ba 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -33,6 +33,7 @@
#include "sysemu/qtest.h"
#include "hw/pci/pci.h"
#include "hw/mem/nvdimm.h"
+#include "hw/cxl/cxl.h"
#include "migration/global_state.h"
#include "migration/vmstate.h"
#include "exec/confidential-guest-support.h"
@@ -625,6 +626,20 @@ static void machine_set_nvdimm_persistence(Object *obj, const char *value,
nvdimms_state->persistence_string = g_strdup(value);
}
+static bool machine_get_cxl(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->cxl_devices_state->is_enabled;
+}
+
+static void machine_set_cxl(Object *obj, bool value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->cxl_devices_state->is_enabled = value;
+}
+
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type)
{
QAPI_LIST_PREPEND(mc->allowed_dynamic_sysbus_devices, g_strdup(type));
@@ -911,6 +926,8 @@ static void machine_class_init(ObjectClass *oc, void *data)
mc->default_ram_size = 128 * MiB;
mc->rom_file_has_mr = true;
+ /* Few machines support CXL, so default to off */
+ mc->cxl_supported = false;
/* numa node memory size aligned on 8MB by default.
* On Linux, each node's border has to be 8MB aligned
*/
@@ -1071,6 +1088,16 @@ static void machine_initfn(Object *obj)
"Valid values are cpu, mem-ctrl");
}
+ if (mc->cxl_supported) {
+ Object *obj = OBJECT(ms);
+
+ ms->cxl_devices_state = g_new0(CXLState, 1);
+ object_property_add_bool(obj, "cxl", machine_get_cxl, machine_set_cxl);
+ object_property_set_description(obj, "cxl",
+ "Set on/off to enable/disable "
+ "CXL instantiation");
+ }
+
if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) {
ms->numa_state = g_new0(NumaState, 1);
object_property_add_bool(obj, "hmat",
@@ -1108,6 +1135,7 @@ static void machine_finalize(Object *obj)
g_free(ms->device_memory);
g_free(ms->nvdimms_state);
g_free(ms->numa_state);
+ g_free(ms->cxl_devices_state);
}
bool machine_usb(MachineState *machine)
diff --git a/hw/cxl/Kconfig b/hw/cxl/Kconfig
new file mode 100644
index 0000000000..8e67519b16
--- /dev/null
+++ b/hw/cxl/Kconfig
@@ -0,0 +1,3 @@
+config CXL
+ bool
+ default y if PCI_EXPRESS
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
new file mode 100644
index 0000000000..7985c9bfca
--- /dev/null
+++ b/hw/cxl/cxl-component-utils.c
@@ -0,0 +1,396 @@
+/*
+ * CXL Utility library for components
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "hw/pci/pci.h"
+#include "hw/cxl/cxl.h"
+
+static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ CXLComponentState *cxl_cstate = opaque;
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+
+ if (size == 8) {
+ qemu_log_mask(LOG_UNIMP,
+ "CXL 8 byte cache mem registers not implemented\n");
+ return 0;
+ }
+
+ if (cregs->special_ops && cregs->special_ops->read) {
+ return cregs->special_ops->read(cxl_cstate, offset, size);
+ } else {
+ return cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)];
+ }
+}
+
+static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset,
+ uint32_t value)
+{
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+ uint32_t *cache_mem = cregs->cache_mem_registers;
+ bool should_commit = false;
+
+ switch (offset) {
+ case A_CXL_HDM_DECODER0_CTRL:
+ should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
+ break;
+ default:
+ break;
+ }
+
+ memory_region_transaction_begin();
+ stl_le_p((uint8_t *)cache_mem + offset, value);
+ if (should_commit) {
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0);
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0);
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
+ }
+ memory_region_transaction_commit();
+}
+
+static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+ CXLComponentState *cxl_cstate = opaque;
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+ uint32_t mask;
+
+ if (size == 8) {
+ qemu_log_mask(LOG_UNIMP,
+ "CXL 8 byte cache mem registers not implemented\n");
+ return;
+ }
+ mask = cregs->cache_mem_regs_write_mask[offset / sizeof(*cregs->cache_mem_regs_write_mask)];
+ value &= mask;
+ /* RO bits should remain constant. Done by reading existing value */
+ value |= ~mask & cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)];
+ if (cregs->special_ops && cregs->special_ops->write) {
+ cregs->special_ops->write(cxl_cstate, offset, value, size);
+ return;
+ }
+
+ if (offset >= A_CXL_HDM_DECODER_CAPABILITY &&
+ offset <= A_CXL_HDM_DECODER0_TARGET_LIST_HI) {
+ dumb_hdm_handler(cxl_cstate, offset, value);
+ } else {
+ cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)] = value;
+ }
+}
+
+/*
+ * 8.2.3
+ * The access restrictions specified in Section 8.2.2 also apply to CXL 2.0
+ * Component Registers.
+ *
+ * 8.2.2
+ * • A 32 bit register shall be accessed as a 4 Bytes quantity. Partial
+ * reads are not permitted.
+ * • A 64 bit register shall be accessed as a 8 Bytes quantity. Partial
+ * reads are not permitted.
+ *
+ * As of the spec defined today, only 4 byte registers exist.
+ */
+static const MemoryRegionOps cache_mem_ops = {
+ .read = cxl_cache_mem_read_reg,
+ .write = cxl_cache_mem_write_reg,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+};
+
+void cxl_component_register_block_init(Object *obj,
+ CXLComponentState *cxl_cstate,
+ const char *type)
+{
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+
+ memory_region_init(&cregs->component_registers, obj, type,
+ CXL2_COMPONENT_BLOCK_SIZE);
+
+ /* io registers controls link which we don't care about in QEMU */
+ memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io",
+ CXL2_COMPONENT_IO_REGION_SIZE);
+ memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cregs,
+ ".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE);
+
+ memory_region_add_subregion(&cregs->component_registers, 0, &cregs->io);
+ memory_region_add_subregion(&cregs->component_registers,
+ CXL2_COMPONENT_IO_REGION_SIZE,
+ &cregs->cache_mem);
+}
+
+static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
+{
+ /*
+ * Error status is RW1C but given bits are not yet set, it can
+ * be handled as RO.
+ */
+ reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0;
+ /* Bits 12-13 and 17-31 reserved in CXL 2.0 */
+ reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
+ write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
+ reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
+ write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
+ reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0;
+ reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
+ write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
+ /* CXL switches and devices must set */
+ reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00;
+}
+
+static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk)
+{
+ int decoder_count = 1;
+ int i;
+
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT,
+ cxl_decoder_count_enc(decoder_count));
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 0);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
+ HDM_DECODER_ENABLE, 0);
+ write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
+ for (i = 0; i < decoder_count; i++) {
+ write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * 0x20] = 0xf0000000;
+ write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * 0x20] = 0xffffffff;
+ write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * 0x20] = 0xf0000000;
+ write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * 0x20] = 0xffffffff;
+ write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff;
+ }
+}
+
+void cxl_component_register_init_common(uint32_t *reg_state, uint32_t *write_msk,
+ enum reg_type type)
+{
+ int caps = 0;
+
+ /*
+ * In CXL 2.0 the capabilities required for each CXL component are such that,
+ * with the ordering chosen here, a single number can be used to define
+ * which capabilities should be provided.
+ */
+ switch (type) {
+ case CXL2_DOWNSTREAM_PORT:
+ case CXL2_DEVICE:
+ /* RAS, Link */
+ caps = 2;
+ break;
+ case CXL2_UPSTREAM_PORT:
+ case CXL2_TYPE3_DEVICE:
+ case CXL2_LOGICAL_DEVICE:
+ /* + HDM */
+ caps = 3;
+ break;
+ case CXL2_ROOT_PORT:
+ /* + Extended Security, + Snoop */
+ caps = 5;
+ break;
+ default:
+ abort();
+ }
+
+ memset(reg_state, 0, CXL2_COMPONENT_CM_REGION_SIZE);
+
+ /* CXL Capability Header Register */
+ ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ID, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, VERSION, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 1);
+ ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
+
+#define init_cap_reg(reg, id, version) \
+ QEMU_BUILD_BUG_ON(CXL_##reg##_REGISTERS_OFFSET == 0); \
+ do { \
+ int which = R_CXL_##reg##_CAPABILITY_HEADER; \
+ reg_state[which] = FIELD_DP32(reg_state[which], \
+ CXL_##reg##_CAPABILITY_HEADER, ID, id); \
+ reg_state[which] = \
+ FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, \
+ VERSION, version); \
+ reg_state[which] = \
+ FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, PTR, \
+ CXL_##reg##_REGISTERS_OFFSET); \
+ } while (0)
+
+ init_cap_reg(RAS, 2, 2);
+ ras_init_common(reg_state, write_msk);
+
+ init_cap_reg(LINK, 4, 2);
+
+ if (caps < 3) {
+ return;
+ }
+
+ init_cap_reg(HDM, 5, 1);
+ hdm_init_common(reg_state, write_msk);
+
+ if (caps < 5) {
+ return;
+ }
+
+ init_cap_reg(EXTSEC, 6, 1);
+ init_cap_reg(SNOOP, 8, 1);
+
+#undef init_cap_reg
+}
+
+/*
+ * Helper to creates a DVSEC header for a CXL entity. The caller is responsible
+ * for tracking the valid offset.
+ *
+ * This function will build the DVSEC header on behalf of the caller and then
+ * copy in the remaining data for the vendor specific bits.
+ * It will also set up appropriate write masks.
+ */
+void cxl_component_create_dvsec(CXLComponentState *cxl,
+ enum reg_type cxl_dev_type, uint16_t length,
+ uint16_t type, uint8_t rev, uint8_t *body)
+{
+ PCIDevice *pdev = cxl->pdev;
+ uint16_t offset = cxl->dvsec_offset;
+ uint8_t *wmask = pdev->wmask;
+
+ assert(offset >= PCI_CFG_SPACE_SIZE &&
+ ((offset + length) < PCI_CFG_SPACE_EXP_SIZE));
+ assert((length & 0xf000) == 0);
+ assert((rev & ~0xf) == 0);
+
+ /* Create the DVSEC in the MCFG space */
+ pcie_add_capability(pdev, PCI_EXT_CAP_ID_DVSEC, 1, offset, length);
+ pci_set_long(pdev->config + offset + PCIE_DVSEC_HEADER1_OFFSET,
+ (length << 20) | (rev << 16) | CXL_VENDOR_ID);
+ pci_set_word(pdev->config + offset + PCIE_DVSEC_ID_OFFSET, type);
+ memcpy(pdev->config + offset + sizeof(DVSECHeader),
+ body + sizeof(DVSECHeader),
+ length - sizeof(DVSECHeader));
+
+ /* Configure write masks */
+ switch (type) {
+ case PCIE_CXL_DEVICE_DVSEC:
+ /* Cntrl RW Lock - so needs explicit blocking when lock is set */
+ wmask[offset + offsetof(CXLDVSECDevice, ctrl)] = 0xFD;
+ wmask[offset + offsetof(CXLDVSECDevice, ctrl) + 1] = 0x4F;
+ /* Status is RW1CS */
+ wmask[offset + offsetof(CXLDVSECDevice, ctrl2)] = 0x0F;
+ /* Lock is RW Once */
+ wmask[offset + offsetof(CXLDVSECDevice, lock)] = 0x01;
+ /* range1/2_base_high/low is RW Lock */
+ wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 2] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 3] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range1_base_lo) + 3] = 0xF0;
+ wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 2] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 3] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDevice, range2_base_lo) + 3] = 0xF0;
+ break;
+ case NON_CXL_FUNCTION_MAP_DVSEC:
+ break; /* Not yet implemented */
+ case EXTENSIONS_PORT_DVSEC:
+ wmask[offset + offsetof(CXLDVSECPortExtensions, control)] = 0x0F;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, control) + 1] = 0x40;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_base)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_limit)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base)] = 0xF0;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit)] = 0xF0;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base)] = 0xF0;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit)] = 0xF0;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 2] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 3] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 2] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 3] = 0xFF;
+ break;
+ case GPF_PORT_DVSEC:
+ wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl)] = 0x0F;
+ wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl) + 1] = 0x0F;
+ wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl)] = 0x0F;
+ wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl) + 1] = 0x0F;
+ break;
+ case GPF_DEVICE_DVSEC:
+ wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration)] = 0x0F;
+ wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration) + 1] = 0x0F;
+ wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power)] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 1] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 2] = 0xFF;
+ wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 3] = 0xFF;
+ break;
+ case PCIE_FLEXBUS_PORT_DVSEC:
+ switch (cxl_dev_type) {
+ case CXL2_ROOT_PORT:
+ /* No MLD */
+ wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xbd;
+ break;
+ case CXL2_DOWNSTREAM_PORT:
+ wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xfd;
+ break;
+ default: /* Registers are RO for other component types */
+ break;
+ }
+ /* There are rw1cs bits in the status register but never set currently */
+ break;
+ }
+
+ /* Update state for future DVSEC additions */
+ range_init_nofail(&cxl->dvsecs[type], cxl->dvsec_offset, length);
+ cxl->dvsec_offset += length;
+}
+
+uint8_t cxl_interleave_ways_enc(int iw, Error **errp)
+{
+ switch (iw) {
+ case 1: return 0x0;
+ case 2: return 0x1;
+ case 4: return 0x2;
+ case 8: return 0x3;
+ case 16: return 0x4;
+ case 3: return 0x8;
+ case 6: return 0x9;
+ case 12: return 0xa;
+ default:
+ error_setg(errp, "Interleave ways: %d not supported", iw);
+ return 0;
+ }
+}
+
+uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp)
+{
+ switch (gran) {
+ case 256: return 0;
+ case 512: return 1;
+ case 1024: return 2;
+ case 2048: return 3;
+ case 4096: return 4;
+ case 8192: return 5;
+ case 16384: return 6;
+ default:
+ error_setg(errp, "Interleave granularity: %" PRIu64 " invalid", gran);
+ return 0;
+ }
+}
diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c
new file mode 100644
index 0000000000..687759b301
--- /dev/null
+++ b/hw/cxl/cxl-device-utils.c
@@ -0,0 +1,265 @@
+/*
+ * CXL Utility library for devices
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/cxl/cxl.h"
+
+/*
+ * Device registers have no restrictions per the spec, and so fall back to the
+ * default memory mapped register rules in 8.2:
+ * Software shall use CXL.io Memory Read and Write to access memory mapped
+ * register defined in this section. Unless otherwise specified, software
+ * shall restrict the accesses width based on the following:
+ * • A 32 bit register shall be accessed as a 1 Byte, 2 Bytes or 4 Bytes
+ * quantity.
+ * • A 64 bit register shall be accessed as a 1 Byte, 2 Bytes, 4 Bytes or 8
+ * Bytes
+ * • The address shall be a multiple of the access width, e.g. when
+ * accessing a register as a 4 Byte quantity, the address shall be
+ * multiple of 4.
+ * • The accesses shall map to contiguous bytes.If these rules are not
+ * followed, the behavior is undefined
+ */
+
+static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size)
+{
+ CXLDeviceState *cxl_dstate = opaque;
+
+ if (size == 4) {
+ return cxl_dstate->caps_reg_state32[offset / sizeof(*cxl_dstate->caps_reg_state32)];
+ } else {
+ return cxl_dstate->caps_reg_state64[offset / sizeof(*cxl_dstate->caps_reg_state64)];
+ }
+}
+
+static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size)
+{
+ return 0;
+}
+
+static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size)
+{
+ CXLDeviceState *cxl_dstate = opaque;
+
+ switch (size) {
+ case 1:
+ return cxl_dstate->mbox_reg_state[offset];
+ case 2:
+ return cxl_dstate->mbox_reg_state16[offset / size];
+ case 4:
+ return cxl_dstate->mbox_reg_state32[offset / size];
+ case 8:
+ return cxl_dstate->mbox_reg_state64[offset / size];
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void mailbox_mem_writel(uint32_t *reg_state, hwaddr offset,
+ uint64_t value)
+{
+ switch (offset) {
+ case A_CXL_DEV_MAILBOX_CTRL:
+ /* fallthrough */
+ case A_CXL_DEV_MAILBOX_CAP:
+ /* RO register */
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP,
+ "%s Unexpected 32-bit access to 0x%" PRIx64 " (WI)\n",
+ __func__, offset);
+ return;
+ }
+
+ reg_state[offset / sizeof(*reg_state)] = value;
+}
+
+static void mailbox_mem_writeq(uint64_t *reg_state, hwaddr offset,
+ uint64_t value)
+{
+ switch (offset) {
+ case A_CXL_DEV_MAILBOX_CMD:
+ break;
+ case A_CXL_DEV_BG_CMD_STS:
+ /* BG not supported */
+ /* fallthrough */
+ case A_CXL_DEV_MAILBOX_STS:
+ /* Read only register, will get updated by the state machine */
+ return;
+ default:
+ qemu_log_mask(LOG_UNIMP,
+ "%s Unexpected 64-bit access to 0x%" PRIx64 " (WI)\n",
+ __func__, offset);
+ return;
+ }
+
+
+ reg_state[offset / sizeof(*reg_state)] = value;
+}
+
+static void mailbox_reg_write(void *opaque, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+ CXLDeviceState *cxl_dstate = opaque;
+
+ if (offset >= A_CXL_DEV_CMD_PAYLOAD) {
+ memcpy(cxl_dstate->mbox_reg_state + offset, &value, size);
+ return;
+ }
+
+ switch (size) {
+ case 4:
+ mailbox_mem_writel(cxl_dstate->mbox_reg_state32, offset, value);
+ break;
+ case 8:
+ mailbox_mem_writeq(cxl_dstate->mbox_reg_state64, offset, value);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (ARRAY_FIELD_EX32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL,
+ DOORBELL)) {
+ cxl_process_mailbox(cxl_dstate);
+ }
+}
+
+static uint64_t mdev_reg_read(void *opaque, hwaddr offset, unsigned size)
+{
+ uint64_t retval = 0;
+
+ retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MEDIA_STATUS, 1);
+ retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MBOX_READY, 1);
+
+ return retval;
+}
+
+static const MemoryRegionOps mdev_ops = {
+ .read = mdev_reg_read,
+ .write = NULL, /* memory device register is read only */
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static const MemoryRegionOps mailbox_ops = {
+ .read = mailbox_reg_read,
+ .write = mailbox_reg_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+static const MemoryRegionOps dev_ops = {
+ .read = dev_reg_read,
+ .write = NULL, /* status register is read only */
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+static const MemoryRegionOps caps_ops = {
+ .read = caps_reg_read,
+ .write = NULL, /* caps registers are read only */
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+};
+
+void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate)
+{
+ /* This will be a BAR, so needs to be rounded up to pow2 for PCI spec */
+ memory_region_init(&cxl_dstate->device_registers, obj, "device-registers",
+ pow2ceil(CXL_MMIO_SIZE));
+
+ memory_region_init_io(&cxl_dstate->caps, obj, &caps_ops, cxl_dstate,
+ "cap-array", CXL_CAPS_SIZE);
+ memory_region_init_io(&cxl_dstate->device, obj, &dev_ops, cxl_dstate,
+ "device-status", CXL_DEVICE_STATUS_REGISTERS_LENGTH);
+ memory_region_init_io(&cxl_dstate->mailbox, obj, &mailbox_ops, cxl_dstate,
+ "mailbox", CXL_MAILBOX_REGISTERS_LENGTH);
+ memory_region_init_io(&cxl_dstate->memory_device, obj, &mdev_ops,
+ cxl_dstate, "memory device caps",
+ CXL_MEMORY_DEVICE_REGISTERS_LENGTH);
+
+ memory_region_add_subregion(&cxl_dstate->device_registers, 0,
+ &cxl_dstate->caps);
+ memory_region_add_subregion(&cxl_dstate->device_registers,
+ CXL_DEVICE_STATUS_REGISTERS_OFFSET,
+ &cxl_dstate->device);
+ memory_region_add_subregion(&cxl_dstate->device_registers,
+ CXL_MAILBOX_REGISTERS_OFFSET,
+ &cxl_dstate->mailbox);
+ memory_region_add_subregion(&cxl_dstate->device_registers,
+ CXL_MEMORY_DEVICE_REGISTERS_OFFSET,
+ &cxl_dstate->memory_device);
+}
+
+static void device_reg_init_common(CXLDeviceState *cxl_dstate) { }
+
+static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
+{
+ /* 2048 payload size, with no interrupt or background support */
+ ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP,
+ PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT);
+ cxl_dstate->payload_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE;
+}
+
+static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) { }
+
+void cxl_device_register_init_common(CXLDeviceState *cxl_dstate)
+{
+ uint64_t *cap_hdrs = cxl_dstate->caps_reg_state64;
+ const int cap_count = 3;
+
+ /* CXL Device Capabilities Array Register */
+ ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_ID, 0);
+ ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1);
+ ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count);
+
+ cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1);
+ device_reg_init_common(cxl_dstate);
+
+ cxl_device_cap_init(cxl_dstate, MAILBOX, 2);
+ mailbox_reg_init_common(cxl_dstate);
+
+ cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000);
+ memdev_reg_init_common(cxl_dstate);
+
+ assert(cxl_initialize_mailbox(cxl_dstate) == 0);
+}
diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c
new file mode 100644
index 0000000000..24465a52ab
--- /dev/null
+++ b/hw/cxl/cxl-host-stubs.c
@@ -0,0 +1,16 @@
+/*
+ * CXL host parameter parsing routine stubs
+ *
+ * Copyright (c) 2022 Huawei
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/cxl/cxl.h"
+
+void cxl_fixed_memory_window_config(MachineState *ms,
+ CXLFixedMemoryWindowOptions *object,
+ Error **errp) {};
+
+void cxl_fixed_memory_window_link_targets(Error **errp) {};
+
+const MemoryRegionOps cfmws_ops;
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
new file mode 100644
index 0000000000..469b3c4ced
--- /dev/null
+++ b/hw/cxl/cxl-host.c
@@ -0,0 +1,222 @@
+/*
+ * CXL host parameter parsing routines
+ *
+ * Copyright (c) 2022 Huawei
+ * Modeled loosely on the NUMA options handling in hw/core/numa.c
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "sysemu/qtest.h"
+#include "hw/boards.h"
+
+#include "qapi/qapi-visit-machine.h"
+#include "hw/cxl/cxl.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_port.h"
+
+void cxl_fixed_memory_window_config(MachineState *ms,
+ CXLFixedMemoryWindowOptions *object,
+ Error **errp)
+{
+ CXLFixedWindow *fw = g_malloc0(sizeof(*fw));
+ strList *target;
+ int i;
+
+ for (target = object->targets; target; target = target->next) {
+ fw->num_targets++;
+ }
+
+ fw->enc_int_ways = cxl_interleave_ways_enc(fw->num_targets, errp);
+ if (*errp) {
+ return;
+ }
+
+ fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
+ for (i = 0, target = object->targets; target; i++, target = target->next) {
+ /* This link cannot be resolved yet, so stash the name for now */
+ fw->targets[i] = g_strdup(target->value);
+ }
+
+ if (object->size % (256 * MiB)) {
+ error_setg(errp,
+ "Size of a CXL fixed memory window must my a multiple of 256MiB");
+ return;
+ }
+ fw->size = object->size;
+
+ if (object->has_interleave_granularity) {
+ fw->enc_int_gran =
+ cxl_interleave_granularity_enc(object->interleave_granularity,
+ errp);
+ if (*errp) {
+ return;
+ }
+ } else {
+ /* Default to 256 byte interleave */
+ fw->enc_int_gran = 0;
+ }
+
+ ms->cxl_devices_state->fixed_windows =
+ g_list_append(ms->cxl_devices_state->fixed_windows, fw);
+
+ return;
+}
+
+void cxl_fixed_memory_window_link_targets(Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+
+ if (ms->cxl_devices_state && ms->cxl_devices_state->fixed_windows) {
+ GList *it;
+
+ for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ int i;
+
+ for (i = 0; i < fw->num_targets; i++) {
+ Object *o;
+ bool ambig;
+
+ o = object_resolve_path_type(fw->targets[i],
+ TYPE_PXB_CXL_DEVICE,
+ &ambig);
+ if (!o) {
+ error_setg(errp, "Could not resolve CXLFM target %s",
+ fw->targets[i]);
+ return;
+ }
+ fw->target_hbs[i] = PXB_CXL_DEV(o);
+ }
+ }
+ }
+}
+
+/* TODO: support, multiple hdm decoders */
+static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr,
+ uint8_t *target)
+{
+ uint32_t ctrl;
+ uint32_t ig_enc;
+ uint32_t iw_enc;
+ uint32_t target_reg;
+ uint32_t target_idx;
+
+ ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL];
+ if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) {
+ return false;
+ }
+
+ ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG);
+ iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW);
+ target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc);
+
+ if (target_idx > 4) {
+ target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO];
+ target_reg >>= target_idx * 8;
+ } else {
+ target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO];
+ target_reg >>= (target_idx - 4) * 8;
+ }
+ *target = target_reg & 0xff;
+
+ return true;
+}
+
+static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
+{
+ CXLComponentState *hb_cstate;
+ PCIHostState *hb;
+ int rb_index;
+ uint32_t *cache_mem;
+ uint8_t target;
+ bool target_found;
+ PCIDevice *rp, *d;
+
+ /* Address is relative to memory region. Convert to HPA */
+ addr += fw->base;
+
+ rb_index = (addr / cxl_decode_ig(fw->enc_int_gran)) % fw->num_targets;
+ hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl.cxl_host_bridge);
+ if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) {
+ return NULL;
+ }
+
+ hb_cstate = cxl_get_hb_cstate(hb);
+ if (!hb_cstate) {
+ return NULL;
+ }
+
+ cache_mem = hb_cstate->crb.cache_mem_registers;
+
+ target_found = cxl_hdm_find_target(cache_mem, addr, &target);
+ if (!target_found) {
+ return NULL;
+ }
+
+ rp = pcie_find_port_by_pn(hb->bus, target);
+ if (!rp) {
+ return NULL;
+ }
+
+ d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0];
+
+ if (!d || !object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+ return NULL;
+ }
+
+ return d;
+}
+
+static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLFixedWindow *fw = opaque;
+ PCIDevice *d;
+
+ d = cxl_cfmws_find_device(fw, addr);
+ if (d == NULL) {
+ *data = 0;
+ /* Reads to invalid address return poison */
+ return MEMTX_ERROR;
+ }
+
+ return cxl_type3_read(d, addr + fw->base, data, size, attrs);
+}
+
+static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size,
+ MemTxAttrs attrs)
+{
+ CXLFixedWindow *fw = opaque;
+ PCIDevice *d;
+
+ d = cxl_cfmws_find_device(fw, addr);
+ if (d == NULL) {
+ /* Writes to invalid address are silent */
+ return MEMTX_OK;
+ }
+
+ return cxl_type3_write(d, addr + fw->base, data, size, attrs);
+}
+
+const MemoryRegionOps cfmws_ops = {
+ .read_with_attrs = cxl_read_cfmws,
+ .write_with_attrs = cxl_write_cfmws,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+};
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
new file mode 100644
index 0000000000..bb66c765a5
--- /dev/null
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -0,0 +1,478 @@
+/*
+ * CXL Utility library for mailbox interface
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/cxl/cxl.h"
+#include "hw/pci/pci.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "qemu/uuid.h"
+
+/*
+ * How to add a new command, example. The command set FOO, with cmd BAR.
+ * 1. Add the command set and cmd to the enum.
+ * FOO = 0x7f,
+ * #define BAR 0
+ * 2. Implement the handler
+ * static ret_code cmd_foo_bar(struct cxl_cmd *cmd,
+ * CXLDeviceState *cxl_dstate, uint16_t *len)
+ * 3. Add the command to the cxl_cmd_set[][]
+ * [FOO][BAR] = { "FOO_BAR", cmd_foo_bar, x, y },
+ * 4. Implement your handler
+ * define_mailbox_handler(FOO_BAR) { ... return CXL_MBOX_SUCCESS; }
+ *
+ *
+ * Writing the handler:
+ * The handler will provide the &struct cxl_cmd, the &CXLDeviceState, and the
+ * in/out length of the payload. The handler is responsible for consuming the
+ * payload from cmd->payload and operating upon it as necessary. It must then
+ * fill the output data into cmd->payload (overwriting what was there),
+ * setting the length, and returning a valid return code.
+ *
+ * XXX: The handler need not worry about endianess. The payload is read out of
+ * a register interface that already deals with it.
+ */
+
+enum {
+ EVENTS = 0x01,
+ #define GET_RECORDS 0x0
+ #define CLEAR_RECORDS 0x1
+ #define GET_INTERRUPT_POLICY 0x2
+ #define SET_INTERRUPT_POLICY 0x3
+ FIRMWARE_UPDATE = 0x02,
+ #define GET_INFO 0x0
+ TIMESTAMP = 0x03,
+ #define GET 0x0
+ #define SET 0x1
+ LOGS = 0x04,
+ #define GET_SUPPORTED 0x0
+ #define GET_LOG 0x1
+ IDENTIFY = 0x40,
+ #define MEMORY_DEVICE 0x0
+ CCLS = 0x41,
+ #define GET_PARTITION_INFO 0x0
+ #define GET_LSA 0x2
+ #define SET_LSA 0x3
+};
+
+/* 8.2.8.4.5.1 Command Return Codes */
+typedef enum {
+ CXL_MBOX_SUCCESS = 0x0,
+ CXL_MBOX_BG_STARTED = 0x1,
+ CXL_MBOX_INVALID_INPUT = 0x2,
+ CXL_MBOX_UNSUPPORTED = 0x3,
+ CXL_MBOX_INTERNAL_ERROR = 0x4,
+ CXL_MBOX_RETRY_REQUIRED = 0x5,
+ CXL_MBOX_BUSY = 0x6,
+ CXL_MBOX_MEDIA_DISABLED = 0x7,
+ CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8,
+ CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9,
+ CXL_MBOX_FW_AUTH_FAILED = 0xa,
+ CXL_MBOX_FW_INVALID_SLOT = 0xb,
+ CXL_MBOX_FW_ROLLEDBACK = 0xc,
+ CXL_MBOX_FW_REST_REQD = 0xd,
+ CXL_MBOX_INVALID_HANDLE = 0xe,
+ CXL_MBOX_INVALID_PA = 0xf,
+ CXL_MBOX_INJECT_POISON_LIMIT = 0x10,
+ CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11,
+ CXL_MBOX_ABORTED = 0x12,
+ CXL_MBOX_INVALID_SECURITY_STATE = 0x13,
+ CXL_MBOX_INCORRECT_PASSPHRASE = 0x14,
+ CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15,
+ CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16,
+ CXL_MBOX_MAX = 0x17
+} ret_code;
+
+struct cxl_cmd;
+typedef ret_code (*opcode_handler)(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate, uint16_t *len);
+struct cxl_cmd {
+ const char *name;
+ opcode_handler handler;
+ ssize_t in;
+ uint16_t effect; /* Reported in CEL */
+ uint8_t *payload;
+};
+
+#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \
+ uint16_t __zero##name = size; \
+ static ret_code cmd_##name(struct cxl_cmd *cmd, \
+ CXLDeviceState *cxl_dstate, uint16_t *len) \
+ { \
+ *len = __zero##name; \
+ memset(cmd->payload, 0, *len); \
+ return CXL_MBOX_SUCCESS; \
+ }
+#define DEFINE_MAILBOX_HANDLER_NOP(name) \
+ static ret_code cmd_##name(struct cxl_cmd *cmd, \
+ CXLDeviceState *cxl_dstate, uint16_t *len) \
+ { \
+ return CXL_MBOX_SUCCESS; \
+ }
+
+DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20);
+DEFINE_MAILBOX_HANDLER_NOP(events_clear_records);
+DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4);
+DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy);
+
+/* 8.2.9.2.1 */
+static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct {
+ uint8_t slots_supported;
+ uint8_t slot_info;
+ uint8_t caps;
+ uint8_t rsvd[0xd];
+ char fw_rev1[0x10];
+ char fw_rev2[0x10];
+ char fw_rev3[0x10];
+ char fw_rev4[0x10];
+ } QEMU_PACKED *fw_info;
+ QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50);
+
+ if (cxl_dstate->pmem_size < (256 << 20)) {
+ return CXL_MBOX_INTERNAL_ERROR;
+ }
+
+ fw_info = (void *)cmd->payload;
+ memset(fw_info, 0, sizeof(*fw_info));
+
+ fw_info->slots_supported = 2;
+ fw_info->slot_info = BIT(0) | BIT(3);
+ fw_info->caps = 0;
+ pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0");
+
+ *len = sizeof(*fw_info);
+ return CXL_MBOX_SUCCESS;
+}
+
+/* 8.2.9.3.1 */
+static ret_code cmd_timestamp_get(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ uint64_t time, delta;
+ uint64_t final_time = 0;
+
+ if (cxl_dstate->timestamp.set) {
+ /* First find the delta from the last time the host set the time. */
+ time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ delta = time - cxl_dstate->timestamp.last_set;
+ final_time = cxl_dstate->timestamp.host_set + delta;
+ }
+
+ /* Then adjust the actual time */
+ stq_le_p(cmd->payload, final_time);
+ *len = 8;
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/* 8.2.9.3.2 */
+static ret_code cmd_timestamp_set(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ cxl_dstate->timestamp.set = true;
+ cxl_dstate->timestamp.last_set = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+ cxl_dstate->timestamp.host_set = le64_to_cpu(*(uint64_t *)cmd->payload);
+
+ *len = 0;
+ return CXL_MBOX_SUCCESS;
+}
+
+static QemuUUID cel_uuid;
+
+/* 8.2.9.4.1 */
+static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct {
+ uint16_t entries;
+ uint8_t rsvd[6];
+ struct {
+ QemuUUID uuid;
+ uint32_t size;
+ } log_entries[1];
+ } QEMU_PACKED *supported_logs = (void *)cmd->payload;
+ QEMU_BUILD_BUG_ON(sizeof(*supported_logs) != 0x1c);
+
+ supported_logs->entries = 1;
+ supported_logs->log_entries[0].uuid = cel_uuid;
+ supported_logs->log_entries[0].size = 4 * cxl_dstate->cel_size;
+
+ *len = sizeof(*supported_logs);
+ return CXL_MBOX_SUCCESS;
+}
+
+/* 8.2.9.4.2 */
+static ret_code cmd_logs_get_log(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct {
+ QemuUUID uuid;
+ uint32_t offset;
+ uint32_t length;
+ } QEMU_PACKED QEMU_ALIGNED(16) *get_log = (void *)cmd->payload;
+
+ /*
+ * 8.2.9.4.2
+ * The device shall return Invalid Parameter if the Offset or Length
+ * fields attempt to access beyond the size of the log as reported by Get
+ * Supported Logs.
+ *
+ * XXX: Spec is wrong, "Invalid Parameter" isn't a thing.
+ * XXX: Spec doesn't address incorrect UUID incorrectness.
+ *
+ * The CEL buffer is large enough to fit all commands in the emulation, so
+ * the only possible failure would be if the mailbox itself isn't big
+ * enough.
+ */
+ if (get_log->offset + get_log->length > cxl_dstate->payload_size) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ if (!qemu_uuid_is_equal(&get_log->uuid, &cel_uuid)) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ /* Store off everything to local variables so we can wipe out the payload */
+ *len = get_log->length;
+
+ memmove(cmd->payload, cxl_dstate->cel_log + get_log->offset,
+ get_log->length);
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/* 8.2.9.5.1.1 */
+static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct {
+ char fw_revision[0x10];
+ uint64_t total_capacity;
+ uint64_t volatile_capacity;
+ uint64_t persistent_capacity;
+ uint64_t partition_align;
+ uint16_t info_event_log_size;
+ uint16_t warning_event_log_size;
+ uint16_t failure_event_log_size;
+ uint16_t fatal_event_log_size;
+ uint32_t lsa_size;
+ uint8_t poison_list_max_mer[3];
+ uint16_t inject_poison_limit;
+ uint8_t poison_caps;
+ uint8_t qos_telemetry_caps;
+ } QEMU_PACKED *id;
+ QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43);
+
+ CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+ CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
+ uint64_t size = cxl_dstate->pmem_size;
+
+ if (!QEMU_IS_ALIGNED(size, 256 << 20)) {
+ return CXL_MBOX_INTERNAL_ERROR;
+ }
+
+ id = (void *)cmd->payload;
+ memset(id, 0, sizeof(*id));
+
+ /* PMEM only */
+ snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
+
+ id->total_capacity = size / (256 << 20);
+ id->persistent_capacity = size / (256 << 20);
+ id->lsa_size = cvc->get_lsa_size(ct3d);
+
+ *len = sizeof(*id);
+ return CXL_MBOX_SUCCESS;
+}
+
+static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct {
+ uint64_t active_vmem;
+ uint64_t active_pmem;
+ uint64_t next_vmem;
+ uint64_t next_pmem;
+ } QEMU_PACKED *part_info = (void *)cmd->payload;
+ QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20);
+ uint64_t size = cxl_dstate->pmem_size;
+
+ if (!QEMU_IS_ALIGNED(size, 256 << 20)) {
+ return CXL_MBOX_INTERNAL_ERROR;
+ }
+
+ /* PMEM only */
+ part_info->active_vmem = 0;
+ part_info->next_vmem = 0;
+ part_info->active_pmem = size / (256 << 20);
+ part_info->next_pmem = 0;
+
+ *len = sizeof(*part_info);
+ return CXL_MBOX_SUCCESS;
+}
+
+static ret_code cmd_ccls_get_lsa(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct {
+ uint32_t offset;
+ uint32_t length;
+ } QEMU_PACKED *get_lsa;
+ CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+ CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
+ uint32_t offset, length;
+
+ get_lsa = (void *)cmd->payload;
+ offset = get_lsa->offset;
+ length = get_lsa->length;
+
+ if (offset + length > cvc->get_lsa_size(ct3d)) {
+ *len = 0;
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ *len = cvc->get_lsa(ct3d, get_lsa, length, offset);
+ return CXL_MBOX_SUCCESS;
+}
+
+static ret_code cmd_ccls_set_lsa(struct cxl_cmd *cmd,
+ CXLDeviceState *cxl_dstate,
+ uint16_t *len)
+{
+ struct set_lsa_pl {
+ uint32_t offset;
+ uint32_t rsvd;
+ uint8_t data[];
+ } QEMU_PACKED;
+ struct set_lsa_pl *set_lsa_payload = (void *)cmd->payload;
+ CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+ CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
+ const size_t hdr_len = offsetof(struct set_lsa_pl, data);
+ uint16_t plen = *len;
+
+ *len = 0;
+ if (!plen) {
+ return CXL_MBOX_SUCCESS;
+ }
+
+ if (set_lsa_payload->offset + plen > cvc->get_lsa_size(ct3d) + hdr_len) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ plen -= hdr_len;
+
+ cvc->set_lsa(ct3d, set_lsa_payload->data, plen, set_lsa_payload->offset);
+ return CXL_MBOX_SUCCESS;
+}
+
+#define IMMEDIATE_CONFIG_CHANGE (1 << 1)
+#define IMMEDIATE_DATA_CHANGE (1 << 2)
+#define IMMEDIATE_POLICY_CHANGE (1 << 3)
+#define IMMEDIATE_LOG_CHANGE (1 << 4)
+
+static struct cxl_cmd cxl_cmd_set[256][256] = {
+ [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS",
+ cmd_events_get_records, 1, 0 },
+ [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS",
+ cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE },
+ [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY",
+ cmd_events_get_interrupt_policy, 0, 0 },
+ [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY",
+ cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE },
+ [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO",
+ cmd_firmware_update_get_info, 0, 0 },
+ [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
+ [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, IMMEDIATE_POLICY_CHANGE },
+ [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 },
+ [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 },
+ [IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE",
+ cmd_identify_memory_device, 0, 0 },
+ [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO",
+ cmd_ccls_get_partition_info, 0, 0 },
+ [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 },
+ [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
+ ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE },
+};
+
+void cxl_process_mailbox(CXLDeviceState *cxl_dstate)
+{
+ uint16_t ret = CXL_MBOX_SUCCESS;
+ struct cxl_cmd *cxl_cmd;
+ uint64_t status_reg;
+ opcode_handler h;
+ uint64_t command_reg = cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD];
+
+ uint8_t set = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET);
+ uint8_t cmd = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND);
+ uint16_t len = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH);
+ cxl_cmd = &cxl_cmd_set[set][cmd];
+ h = cxl_cmd->handler;
+ if (h) {
+ if (len == cxl_cmd->in) {
+ cxl_cmd->payload = cxl_dstate->mbox_reg_state +
+ A_CXL_DEV_CMD_PAYLOAD;
+ ret = (*h)(cxl_cmd, cxl_dstate, &len);
+ assert(len <= cxl_dstate->payload_size);
+ } else {
+ ret = CXL_MBOX_INVALID_PAYLOAD_LENGTH;
+ }
+ } else {
+ qemu_log_mask(LOG_UNIMP, "Command %04xh not implemented\n",
+ set << 8 | cmd);
+ ret = CXL_MBOX_UNSUPPORTED;
+ }
+
+ /* Set the return code */
+ status_reg = FIELD_DP64(0, CXL_DEV_MAILBOX_STS, ERRNO, ret);
+
+ /* Set the return length */
+ command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET, 0);
+ command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND, 0);
+ command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH, len);
+
+ cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD] = command_reg;
+ cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_STS] = status_reg;
+
+ /* Tell the host we're done */
+ ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL,
+ DOORBELL, 0);
+}
+
+int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate)
+{
+ /* CXL 2.0: Table 169 Get Supported Logs Log Entry */
+ const char *cel_uuidstr = "0da9c0b5-bf41-4b78-8f79-96b1623b3f17";
+
+ for (int set = 0; set < 256; set++) {
+ for (int cmd = 0; cmd < 256; cmd++) {
+ if (cxl_cmd_set[set][cmd].handler) {
+ struct cxl_cmd *c = &cxl_cmd_set[set][cmd];
+ struct cel_log *log =
+ &cxl_dstate->cel_log[cxl_dstate->cel_size];
+
+ log->opcode = (set << 8) | cmd;
+ log->effect = c->effect;
+ cxl_dstate->cel_size++;
+ }
+ }
+ }
+
+ return qemu_uuid_parse(cel_uuidstr, &cel_uuid);
+}
diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build
new file mode 100644
index 0000000000..f117b99949
--- /dev/null
+++ b/hw/cxl/meson.build
@@ -0,0 +1,12 @@
+softmmu_ss.add(when: 'CONFIG_CXL',
+ if_true: files(
+ 'cxl-component-utils.c',
+ 'cxl-device-utils.c',
+ 'cxl-mailbox-utils.c',
+ 'cxl-host.c',
+ ),
+ if_false: files(
+ 'cxl-host-stubs.c',
+ ))
+
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl-host-stubs.c'))
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 09818231bd..96e56c4467 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -565,6 +565,12 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp)
g->vhost_gpu_fd = -1;
}
+static struct vhost_dev *vhost_user_gpu_get_vhost(VirtIODevice *vdev)
+{
+ VhostUserGPU *g = VHOST_USER_GPU(vdev);
+ return &g->vhost->dev;
+}
+
static Property vhost_user_gpu_properties[] = {
VIRTIO_GPU_BASE_PROPERTIES(VhostUserGPU, parent_obj.conf),
DEFINE_PROP_END_OF_LIST(),
@@ -586,6 +592,7 @@ vhost_user_gpu_class_init(ObjectClass *klass, void *data)
vdc->guest_notifier_pending = vhost_user_gpu_guest_notifier_pending;
vdc->get_config = vhost_user_gpu_get_config;
vdc->set_config = vhost_user_gpu_set_config;
+ vdc->get_vhost = vhost_user_gpu_get_vhost;
device_class_set_props(dc, vhost_user_gpu_properties);
}
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index fff0fb4a82..8ba5da4312 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -173,7 +173,7 @@ virtio_gpu_base_device_realize(DeviceState *qdev,
}
g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs);
- virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
+ virtio_init(VIRTIO_DEVICE(g), VIRTIO_ID_GPU,
sizeof(struct virtio_gpu_config));
if (virtio_gpu_virgl_enabled(g->conf)) {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index dcf6ece3d0..c125939ed6 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -28,6 +28,7 @@
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
#include "hw/pci/pci.h"
+#include "hw/cxl/cxl.h"
#include "hw/core/cpu.h"
#include "target/i386/cpu.h"
#include "hw/misc/pvpanic.h"
@@ -66,6 +67,7 @@
#include "hw/acpi/aml-build.h"
#include "hw/acpi/utils.h"
#include "hw/acpi/pci.h"
+#include "hw/acpi/cxl.h"
#include "qom/qom-qobject.h"
#include "hw/i386/amd_iommu.h"
@@ -75,6 +77,7 @@
#include "hw/acpi/ipmi.h"
#include "hw/acpi/hmat.h"
#include "hw/acpi/viot.h"
+#include "hw/acpi/cxl.h"
#include CONFIG_DEVICES
@@ -1409,6 +1412,22 @@ static void build_smb0(Aml *table, I2CBus *smbus, int devnr, int func)
aml_append(table, scope);
}
+static void build_acpi0017(Aml *table)
+{
+ Aml *dev, *scope, *method;
+
+ scope = aml_scope("_SB");
+ dev = aml_device("CXLM");
+ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017")));
+
+ method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_return(aml_int(0x01)));
+ aml_append(dev, method);
+
+ aml_append(scope, dev);
+ aml_append(table, scope);
+}
+
static void
build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiPmInfo *pm, AcpiMiscInfo *misc,
@@ -1428,6 +1447,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
#ifdef CONFIG_TPM
TPMIf *tpm = tpm_find();
#endif
+ bool cxl_present = false;
int i;
VMBusBridge *vmbus_bridge = vmbus_bridge_find();
AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id,
@@ -1572,10 +1592,25 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
}
scope = aml_scope("\\_SB");
- dev = aml_device("PC%.02X", bus_num);
+
+ if (pci_bus_is_cxl(bus)) {
+ dev = aml_device("CL%.02X", bus_num);
+ } else {
+ dev = aml_device("PC%.02X", bus_num);
+ }
aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
- if (pci_bus_is_express(bus)) {
+ if (pci_bus_is_cxl(bus)) {
+ struct Aml *pkg = aml_package(2);
+
+ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016")));
+ aml_append(pkg, aml_eisaid("PNP0A08"));
+ aml_append(pkg, aml_eisaid("PNP0A03"));
+ aml_append(dev, aml_name_decl("_CID", pkg));
+ aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
+ aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
+ build_cxl_osc_method(dev);
+ } else if (pci_bus_is_express(bus)) {
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
@@ -1595,9 +1630,23 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
aml_append(dsdt, scope);
+
+ /* Handle the ranges for the PXB expanders */
+ if (pci_bus_is_cxl(bus)) {
+ MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
+ uint64_t base = mr->addr;
+
+ cxl_present = true;
+ crs_range_insert(crs_range_set.mem_ranges, base,
+ base + memory_region_size(mr) - 1);
+ }
}
}
+ if (cxl_present) {
+ build_acpi0017(dsdt);
+ }
+
/*
* At this point crs_range_set has all the ranges used by pci
* busses *other* than PCI0. These ranges will be excluded from
@@ -2662,6 +2711,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
machine->nvdimms_state, machine->ram_slots,
x86ms->oem_id, x86ms->oem_table_id);
}
+ if (machine->cxl_devices_state->is_enabled) {
+ cxl_build_cedt(machine, table_offsets, tables_blob, tables->linker,
+ x86ms->oem_id, x86ms->oem_table_id);
+ }
acpi_add_table(table_offsets, tables_blob);
build_waet(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index ea8eaeb330..725f69095b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -201,15 +201,18 @@ static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
/*
* AMDVi event structure
* 0:15 -> DeviceID
- * 55:63 -> event type + miscellaneous info
- * 63:127 -> related address
+ * 48:63 -> event type + miscellaneous info
+ * 64:127 -> related address
*/
static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
uint16_t info)
{
+ evt[0] = 0;
+ evt[1] = 0;
+
amdvi_setevent_bits(evt, devid, 0, 16);
- amdvi_setevent_bits(evt, info, 55, 8);
- amdvi_setevent_bits(evt, addr, 63, 64);
+ amdvi_setevent_bits(evt, info, 48, 16);
+ amdvi_setevent_bits(evt, addr, 64, 64);
}
/* log an error encountered during a page walk
*
@@ -218,7 +221,7 @@ static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
hwaddr addr, uint16_t info)
{
- uint64_t evt[4];
+ uint64_t evt[2];
info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
amdvi_encode_event(evt, devid, addr, info);
@@ -234,7 +237,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
hwaddr devtab, uint16_t info)
{
- uint64_t evt[4];
+ uint64_t evt[2];
info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
@@ -248,7 +251,8 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
*/
static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
{
- uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
+ uint64_t evt[2];
+ uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR;
amdvi_encode_event(evt, 0, addr, info);
amdvi_log_event(s, evt);
@@ -261,7 +265,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
hwaddr addr)
{
- uint64_t evt[4];
+ uint64_t evt[2];
info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
amdvi_encode_event(evt, 0, addr, info);
@@ -276,7 +280,7 @@ static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
hwaddr addr, uint16_t info)
{
- uint64_t evt[4];
+ uint64_t evt[2];
info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
amdvi_encode_event(evt, devid, addr, info);
@@ -288,7 +292,7 @@ static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
hwaddr addr, uint16_t info)
{
- uint64_t evt[4];
+ uint64_t evt[2];
info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
amdvi_encode_event(evt, devid, addr, info);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c64aa81a83..2162394e08 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -181,6 +181,18 @@ static void vtd_update_scalable_state(IntelIOMMUState *s)
}
}
+static void vtd_update_iq_dw(IntelIOMMUState *s)
+{
+ uint64_t val = vtd_get_quad_raw(s, DMAR_IQA_REG);
+
+ if (s->ecap & VTD_ECAP_SMTS &&
+ val & VTD_IQA_DW_MASK) {
+ s->iq_dw = true;
+ } else {
+ s->iq_dw = false;
+ }
+}
+
/* Whether the address space needs to notify new mappings */
static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
{
@@ -469,11 +481,6 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
assert(fault < VTD_FR_MAX);
- if (fault == VTD_FR_RESERVED_ERR) {
- /* This is not a normal fault reason case. Drop it. */
- return;
- }
-
trace_vtd_dmar_fault(source_id, fault, addr, is_write);
if (fsts_reg & VTD_FSTS_PFO) {
@@ -1025,6 +1032,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
+ uint64_t xlat, size;
if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
@@ -1069,11 +1077,33 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (vtd_is_last_slpte(slpte, level)) {
*slptep = slpte;
*slpte_level = level;
- return 0;
+ break;
}
addr = vtd_get_slpte_addr(slpte, aw_bits);
level--;
}
+
+ xlat = vtd_get_slpte_addr(*slptep, aw_bits);
+ size = ~vtd_slpt_level_page_mask(level) + 1;
+
+ /*
+ * From VT-d spec 3.14: Untranslated requests and translation
+ * requests that result in an address in the interrupt range will be
+ * blocked with condition code LGN.4 or SGN.8.
+ */
+ if ((xlat > VTD_INTERRUPT_ADDR_LAST ||
+ xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) {
+ return 0;
+ } else {
+ error_report_once("%s: xlat address is in interrupt range "
+ "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
+ "slpte=0x%" PRIx64 ", write=%d, "
+ "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")",
+ __func__, iova, level, slpte, is_write,
+ xlat, size);
+ return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
+ -VTD_FR_INTERRUPT_ADDR;
+ }
}
typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private);
@@ -1633,11 +1663,12 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_PAGING_ENTRY_INV] = true,
[VTD_FR_ROOT_TABLE_INV] = false,
[VTD_FR_CONTEXT_TABLE_INV] = false,
+ [VTD_FR_INTERRUPT_ADDR] = true,
[VTD_FR_ROOT_ENTRY_RSVD] = false,
[VTD_FR_PAGING_ENTRY_RSVD] = true,
[VTD_FR_CONTEXT_ENTRY_TT] = true,
[VTD_FR_PASID_TABLE_INV] = false,
- [VTD_FR_RESERVED_ERR] = false,
+ [VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_MAX] = false,
};
@@ -2209,12 +2240,13 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
/* Handle write to Global Command Register */
static void vtd_handle_gcmd_write(IntelIOMMUState *s)
{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
uint32_t changed = status ^ val;
trace_vtd_reg_write_gcmd(status, val);
- if (changed & VTD_GCMD_TE) {
+ if ((changed & VTD_GCMD_TE) && s->dma_translation) {
/* Translation enable/disable */
vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
}
@@ -2230,7 +2262,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
/* Set/update the interrupt remapping root-table pointer */
vtd_handle_gcmd_sirtp(s);
}
- if (changed & VTD_GCMD_IRE) {
+ if ((changed & VTD_GCMD_IRE) &&
+ x86_iommu_ir_supported(x86_iommu)) {
/* Interrupt remap enable/disable */
vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
}
@@ -2883,12 +2916,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
} else {
vtd_set_quad(s, addr, val);
}
- if (s->ecap & VTD_ECAP_SMTS &&
- val & VTD_IQA_DW_MASK) {
- s->iq_dw = true;
- } else {
- s->iq_dw = false;
- }
+ vtd_update_iq_dw(s);
break;
case DMAR_IQA_REG_HI:
@@ -3032,7 +3060,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
/* TODO: add support for VFIO and vhost users */
if (s->snoop_control) {
- error_setg_errno(errp, -ENOTSUP,
+ error_setg_errno(errp, ENOTSUP,
"Snoop Control with vhost or VFIO is not supported");
return -ENOTSUP;
}
@@ -3053,13 +3081,6 @@ static int vtd_post_load(void *opaque, int version_id)
IntelIOMMUState *iommu = opaque;
/*
- * Memory regions are dynamically turned on/off depending on
- * context entry configurations from the guest. After migration,
- * we need to make sure the memory regions are still correct.
- */
- vtd_switch_address_space_all(iommu);
-
- /*
* We don't need to migrate the root_scalable because we can
* simply do the calculation after the loading is complete. We
* can actually do similar things with root, dmar_enabled, etc.
@@ -3068,6 +3089,15 @@ static int vtd_post_load(void *opaque, int version_id)
*/
vtd_update_scalable_state(iommu);
+ vtd_update_iq_dw(iommu);
+
+ /*
+ * Memory regions are dynamically turned on/off depending on
+ * context entry configurations from the guest. After migration,
+ * we need to make sure the memory regions are still correct.
+ */
+ vtd_switch_address_space_all(iommu);
+
return 0;
}
@@ -3122,6 +3152,7 @@ static Property vtd_properties[] = {
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
+ DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -3627,12 +3658,17 @@ static void vtd_init(IntelIOMMUState *s)
s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
- VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+ VTD_CAP_MGAW(s->aw_bits);
if (s->dma_drain) {
s->cap |= VTD_CAP_DRAIN;
}
- if (s->aw_bits == VTD_HOST_AW_48BIT) {
- s->cap |= VTD_CAP_SAGAW_48bit;
+ if (s->dma_translation) {
+ if (s->aw_bits >= VTD_HOST_AW_39BIT) {
+ s->cap |= VTD_CAP_SAGAW_39bit;
+ }
+ if (s->aw_bits >= VTD_HOST_AW_48BIT) {
+ s->cap |= VTD_CAP_SAGAW_48bit;
+ }
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
@@ -3778,15 +3814,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
- if (!kvm_irqchip_in_kernel()) {
+ if (!kvm_irqchip_is_split()) {
error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
return false;
}
- if (!kvm_enable_x2apic()) {
- error_setg(errp, "eim=on requires support on the KVM side"
- "(X2APIC_API, first shipped in v4.7)");
- return false;
- }
}
/* Currently only address widths supported are 39 and 48 bits */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 1ff13b40f9..930ce61feb 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -289,6 +289,8 @@ typedef enum VTDFaultReason {
* context-entry.
*/
VTD_FR_CONTEXT_ENTRY_TT,
+ /* Output address in the interrupt address range */
+ VTD_FR_INTERRUPT_ADDR = 0xE,
/* Interrupt remapping transition faults */
VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved
@@ -304,11 +306,8 @@ typedef enum VTDFaultReason {
VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */
- /* This is not a normal fault reason. We use this to indicate some faults
- * that are not referenced by the VT-d specification.
- * Fault event with such reason should not be recorded.
- */
- VTD_FR_RESERVED_ERR,
+ /* Output address in the interrupt address range for scalable mode */
+ VTD_FR_SM_INTERRUPT_ADDR = 0x87,
VTD_FR_MAX, /* Guard */
} VTDFaultReason;
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 4b3b1dd262..754f1d0593 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -247,7 +247,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
x86ms->pci_irq_mask = 0;
}
- if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) {
+ if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
qemu_irq *i8259;
i8259 = i8259_init(isa_bus, x86_allocate_cpu_irq());
@@ -257,7 +257,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
g_free(i8259);
}
- if (mms->pit == ON_OFF_AUTO_ON || mms->pit == ON_OFF_AUTO_AUTO) {
+ if (x86ms->pit == ON_OFF_AUTO_ON || x86ms->pit == ON_OFF_AUTO_AUTO) {
if (kvm_pit_in_kernel()) {
kvm_pit_init(isa_bus, 0x40);
} else {
@@ -491,40 +491,6 @@ static void microvm_machine_reset(MachineState *machine)
}
}
-static void microvm_machine_get_pic(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- MicrovmMachineState *mms = MICROVM_MACHINE(obj);
- OnOffAuto pic = mms->pic;
-
- visit_type_OnOffAuto(v, name, &pic, errp);
-}
-
-static void microvm_machine_set_pic(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- MicrovmMachineState *mms = MICROVM_MACHINE(obj);
-
- visit_type_OnOffAuto(v, name, &mms->pic, errp);
-}
-
-static void microvm_machine_get_pit(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- MicrovmMachineState *mms = MICROVM_MACHINE(obj);
- OnOffAuto pit = mms->pit;
-
- visit_type_OnOffAuto(v, name, &pit, errp);
-}
-
-static void microvm_machine_set_pit(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- MicrovmMachineState *mms = MICROVM_MACHINE(obj);
-
- visit_type_OnOffAuto(v, name, &mms->pit, errp);
-}
-
static void microvm_machine_get_rtc(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
@@ -649,8 +615,6 @@ static void microvm_machine_initfn(Object *obj)
MicrovmMachineState *mms = MICROVM_MACHINE(obj);
/* Configuration */
- mms->pic = ON_OFF_AUTO_AUTO;
- mms->pit = ON_OFF_AUTO_AUTO;
mms->rtc = ON_OFF_AUTO_AUTO;
mms->pcie = ON_OFF_AUTO_AUTO;
mms->ioapic2 = ON_OFF_AUTO_AUTO;
@@ -702,20 +666,6 @@ static void microvm_class_init(ObjectClass *oc, void *data)
x86mc->fwcfg_dma_enabled = true;
- object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto",
- microvm_machine_get_pic,
- microvm_machine_set_pic,
- NULL, NULL);
- object_class_property_set_description(oc, MICROVM_MACHINE_PIC,
- "Enable i8259 PIC");
-
- object_class_property_add(oc, MICROVM_MACHINE_PIT, "OnOffAuto",
- microvm_machine_get_pit,
- microvm_machine_set_pit,
- NULL, NULL);
- object_class_property_set_description(oc, MICROVM_MACHINE_PIT,
- "Enable i8254 PIT");
-
object_class_property_add(oc, MICROVM_MACHINE_RTC, "OnOffAuto",
microvm_machine_get_rtc,
microvm_machine_set_rtc,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 305d2c0820..7c39c91335 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -75,6 +75,7 @@
#include "acpi-build.h"
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
+#include "hw/cxl/cxl.h"
#include "qapi/error.h"
#include "qapi/qapi-visit-common.h"
#include "qapi/qapi-visit-machine.h"
@@ -743,14 +744,6 @@ void pc_machine_done(Notifier *notifier, void *data)
/* update FW_CFG_NB_CPUS to account for -device added CPUs */
fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
}
-
-
- if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
- !kvm_irqchip_in_kernel()) {
- error_report("current -smp configuration requires kernel "
- "irqchip support.");
- exit(EXIT_FAILURE);
- }
}
void pc_guest_info_init(PCMachineState *pcms)
@@ -816,6 +809,7 @@ void pc_memory_init(PCMachineState *pcms,
MachineClass *mc = MACHINE_GET_CLASS(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
+ hwaddr cxl_base, cxl_resv_end = 0;
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
@@ -905,6 +899,44 @@ void pc_memory_init(PCMachineState *pcms,
&machine->device_memory->mr);
}
+ if (machine->cxl_devices_state->is_enabled) {
+ MemoryRegion *mr = &machine->cxl_devices_state->host_mr;
+ hwaddr cxl_size = MiB;
+
+ if (pcmc->has_reserved_memory && machine->device_memory->base) {
+ cxl_base = machine->device_memory->base;
+ if (!pcmc->broken_reserved_end) {
+ cxl_base += memory_region_size(&machine->device_memory->mr);
+ }
+ } else if (pcms->sgx_epc.size != 0) {
+ cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
+ } else {
+ cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
+ }
+
+ e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
+ memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
+ memory_region_add_subregion(system_memory, cxl_base, mr);
+ cxl_resv_end = cxl_base + cxl_size;
+ if (machine->cxl_devices_state->fixed_windows) {
+ hwaddr cxl_fmw_base;
+ GList *it;
+
+ cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB);
+ for (it = machine->cxl_devices_state->fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+
+ fw->base = cxl_fmw_base;
+ memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
+ "cxl-fixed-memory-region", fw->size);
+ memory_region_add_subregion(system_memory, fw->base, &fw->mr);
+ e820_add_entry(fw->base, fw->size, E820_RESERVED);
+ cxl_fmw_base += fw->size;
+ cxl_resv_end = cxl_fmw_base;
+ }
+ }
+ }
+
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
@@ -932,6 +964,10 @@ void pc_memory_init(PCMachineState *pcms,
if (!pcmc->broken_reserved_end) {
res_mem_end += memory_region_size(&machine->device_memory->mr);
}
+
+ if (machine->cxl_devices_state->is_enabled) {
+ res_mem_end = cxl_resv_end;
+ }
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
}
@@ -965,7 +1001,17 @@ uint64_t pc_pci_hole64_start(void)
X86MachineState *x86ms = X86_MACHINE(pcms);
uint64_t hole64_start = 0;
- if (pcmc->has_reserved_memory && ms->device_memory->base) {
+ if (ms->cxl_devices_state->host_mr.addr) {
+ hole64_start = ms->cxl_devices_state->host_mr.addr +
+ memory_region_size(&ms->cxl_devices_state->host_mr);
+ if (ms->cxl_devices_state->fixed_windows) {
+ GList *it;
+ for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ hole64_start = fw->mr.addr + memory_region_size(&fw->mr);
+ }
+ }
+ } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
hole64_start = ms->device_memory->base;
if (!pcmc->broken_reserved_end) {
hole64_start += memory_region_size(&ms->device_memory->mr);
@@ -1077,6 +1123,7 @@ void pc_basic_device_init(struct PCMachineState *pcms,
ISADevice *pit = NULL;
MemoryRegion *ioport80_io = g_new(MemoryRegion, 1);
MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1);
+ X86MachineState *x86ms = X86_MACHINE(pcms);
memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1);
memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io);
@@ -1121,7 +1168,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
qemu_register_boot_set(pc_boot_set, *rtc_state);
- if (!xen_enabled() && pcms->pit_enabled) {
+ if (!xen_enabled() &&
+ (x86ms->pit == ON_OFF_AUTO_AUTO || x86ms->pit == ON_OFF_AUTO_ON)) {
if (kvm_pit_in_kernel()) {
pit = kvm_pit_init(isa_bus, 0x40);
} else {
@@ -1491,20 +1539,6 @@ static void pc_machine_set_sata(Object *obj, bool value, Error **errp)
pcms->sata_enabled = value;
}
-static bool pc_machine_get_pit(Object *obj, Error **errp)
-{
- PCMachineState *pcms = PC_MACHINE(obj);
-
- return pcms->pit_enabled;
-}
-
-static void pc_machine_set_pit(Object *obj, bool value, Error **errp)
-{
- PCMachineState *pcms = PC_MACHINE(obj);
-
- pcms->pit_enabled = value;
-}
-
static bool pc_machine_get_hpet(Object *obj, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(obj);
@@ -1661,7 +1695,6 @@ static void pc_machine_initfn(Object *obj)
pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
pcms->smbus_enabled = true;
pcms->sata_enabled = true;
- pcms->pit_enabled = true;
pcms->i8042_enabled = true;
pcms->max_fw_size = 8 * MiB;
#ifdef CONFIG_HPET
@@ -1761,6 +1794,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
mc->nvdimm_supported = true;
mc->smp_props.dies_supported = true;
+ mc->cxl_supported = true;
mc->default_ram_id = "pc.ram";
object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
@@ -1789,11 +1823,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, PC_MACHINE_SATA,
"Enable/disable Serial ATA bus");
- object_class_property_add_bool(oc, PC_MACHINE_PIT,
- pc_machine_get_pit, pc_machine_set_pit);
- object_class_property_set_description(oc, PC_MACHINE_PIT,
- "Enable/disable Intel 8254 programmable interval timer emulation");
-
object_class_property_add_bool(oc, "hpet",
pc_machine_get_hpet, pc_machine_set_hpet);
object_class_property_set_description(oc, "hpet",
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index f843dd906f..578e537b35 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -218,7 +218,9 @@ static void pc_init1(MachineState *machine,
}
isa_bus_irqs(isa_bus, x86ms->gsi);
- pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
+ pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ }
if (pcmc->pci_enabled) {
ioapic_init_gsi(gsi_state, "i440fx");
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 302288342a..42eb8b9707 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -265,7 +265,9 @@ static void pc_q35_init(MachineState *machine)
pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq);
isa_bus = ich9_lpc->isa_bus;
- pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
+ pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ }
if (pcmc->pci_enabled) {
ioapic_init_gsi(gsi_state, "q35");
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 79ebdface6..78b05ab7a2 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -38,6 +38,7 @@
#include "sysemu/replay.h"
#include "sysemu/sysemu.h"
#include "sysemu/cpu-timers.h"
+#include "sysemu/xen.h"
#include "trace.h"
#include "hw/i386/x86.h"
@@ -122,6 +123,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
*/
x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
ms->smp.max_cpus - 1) + 1;
+
+ /*
+ * Can we support APIC ID 255 or higher?
+ *
+ * Under Xen: yes.
+ * With userspace emulated lapic: no
+ * With KVM's in-kernel lapic: only if X2APIC API is enabled.
+ */
+ if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+ (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+ error_report("current -smp configuration requires kernel "
+ "irqchip and X2APIC API support.");
+ exit(EXIT_FAILURE);
+ }
+
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < ms->smp.cpus; i++) {
x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
@@ -1228,6 +1244,40 @@ static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name,
visit_type_OnOffAuto(v, name, &x86ms->acpi, errp);
}
+static void x86_machine_get_pit(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ X86MachineState *x86ms = X86_MACHINE(obj);
+ OnOffAuto pit = x86ms->pit;
+
+ visit_type_OnOffAuto(v, name, &pit, errp);
+}
+
+static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ X86MachineState *x86ms = X86_MACHINE(obj);;
+
+ visit_type_OnOffAuto(v, name, &x86ms->pit, errp);
+}
+
+static void x86_machine_get_pic(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ X86MachineState *x86ms = X86_MACHINE(obj);
+ OnOffAuto pic = x86ms->pic;
+
+ visit_type_OnOffAuto(v, name, &pic, errp);
+}
+
+static void x86_machine_set_pic(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ X86MachineState *x86ms = X86_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &x86ms->pic, errp);
+}
+
static char *x86_machine_get_oem_id(Object *obj, Error **errp)
{
X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1317,6 +1367,8 @@ static void x86_machine_initfn(Object *obj)
x86ms->smm = ON_OFF_AUTO_AUTO;
x86ms->acpi = ON_OFF_AUTO_AUTO;
+ x86ms->pit = ON_OFF_AUTO_AUTO;
+ x86ms->pic = ON_OFF_AUTO_AUTO;
x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
@@ -1348,6 +1400,20 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, X86_MACHINE_ACPI,
"Enable ACPI");
+ object_class_property_add(oc, X86_MACHINE_PIT, "OnOffAuto",
+ x86_machine_get_pit,
+ x86_machine_set_pit,
+ NULL, NULL);
+ object_class_property_set_description(oc, X86_MACHINE_PIT,
+ "Enable i8254 PIT");
+
+ object_class_property_add(oc, X86_MACHINE_PIC, "OnOffAuto",
+ x86_machine_get_pic,
+ x86_machine_set_pic,
+ NULL, NULL);
+ object_class_property_set_description(oc, X86_MACHINE_PIC,
+ "Enable i8259 PIC");
+
object_class_property_add_str(oc, X86_MACHINE_OEM_ID,
x86_machine_get_oem_id,
x86_machine_set_oem_id);
diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c
index aeb0624fe5..1352e372ff 100644
--- a/hw/input/vhost-user-input.c
+++ b/hw/input/vhost-user-input.c
@@ -78,6 +78,12 @@ static void vhost_input_set_config(VirtIODevice *vdev,
virtio_notify_config(vdev);
}
+static struct vhost_dev *vhost_input_get_vhost(VirtIODevice *vdev)
+{
+ VHostUserInput *vhi = VHOST_USER_INPUT(vdev);
+ return &vhi->vhost->dev;
+}
+
static const VMStateDescription vmstate_vhost_input = {
.name = "vhost-user-input",
.unmigratable = 1,
@@ -92,6 +98,7 @@ static void vhost_input_class_init(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_vhost_input;
vdc->get_config = vhost_input_get_config;
vdc->set_config = vhost_input_set_config;
+ vdc->get_vhost = vhost_input_get_vhost;
vic->realize = vhost_input_realize;
vic->change_active = vhost_input_change_active;
}
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index 54bcb46c74..5b5398b3ca 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -257,8 +257,7 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp)
vinput->cfg_size += 8;
assert(vinput->cfg_size <= sizeof(virtio_input_config));
- virtio_init(vdev, "virtio-input", VIRTIO_ID_INPUT,
- vinput->cfg_size);
+ virtio_init(vdev, VIRTIO_ID_INPUT, vinput->cfg_size);
vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt);
vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts);
}
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig
index 03dbb3c7df..73c5ae8ad9 100644
--- a/hw/mem/Kconfig
+++ b/hw/mem/Kconfig
@@ -11,3 +11,8 @@ config NVDIMM
config SPARSE_MEM
bool
+
+config CXL_MEM_DEVICE
+ bool
+ default y if CXL
+ select MEM_DEVICE
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
new file mode 100644
index 0000000000..3bf2869573
--- /dev/null
+++ b/hw/mem/cxl_type3.c
@@ -0,0 +1,371 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/pmem.h"
+#include "qemu/range.h"
+#include "qemu/rcu.h"
+#include "sysemu/hostmem.h"
+#include "hw/cxl/cxl.h"
+
+static void build_dvsecs(CXLType3Dev *ct3d)
+{
+ CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
+ uint8_t *dvsec;
+
+ dvsec = (uint8_t *)&(CXLDVSECDevice){
+ .cap = 0x1e,
+ .ctrl = 0x2,
+ .status2 = 0x2,
+ .range1_size_hi = ct3d->hostmem->size >> 32,
+ .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+ (ct3d->hostmem->size & 0xF0000000),
+ .range1_base_hi = 0,
+ .range1_base_lo = 0,
+ };
+ cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
+ PCIE_CXL_DEVICE_DVSEC_LENGTH,
+ PCIE_CXL_DEVICE_DVSEC,
+ PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
+ .rsvd = 0,
+ .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
+ .reg0_base_hi = 0,
+ .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
+ .reg1_base_hi = 0,
+ };
+ cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
+ REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
+ REG_LOC_DVSEC_REVID, dvsec);
+ dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
+ .phase2_duration = 0x603, /* 3 seconds */
+ .phase2_power = 0x33, /* 0x33 miliwatts */
+ };
+ cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
+ GPF_DEVICE_DVSEC_LENGTH, GPF_PORT_DVSEC,
+ GPF_DEVICE_DVSEC_REVID, dvsec);
+}
+
+static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
+{
+ ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
+ uint32_t *cache_mem = cregs->cache_mem_registers;
+
+ assert(which == 0);
+
+ /* TODO: Sanity checks that the decoder is possible */
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0);
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0);
+
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
+}
+
+static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+ CXLComponentState *cxl_cstate = opaque;
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+ CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
+ uint32_t *cache_mem = cregs->cache_mem_registers;
+ bool should_commit = false;
+ int which_hdm = -1;
+
+ assert(size == 4);
+ g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
+
+ switch (offset) {
+ case A_CXL_HDM_DECODER0_CTRL:
+ should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
+ which_hdm = 0;
+ break;
+ default:
+ break;
+ }
+
+ stl_le_p((uint8_t *)cache_mem + offset, value);
+ if (should_commit) {
+ hdm_decoder_commit(ct3d, which_hdm);
+ }
+}
+
+static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
+{
+ DeviceState *ds = DEVICE(ct3d);
+ MemoryRegion *mr;
+ char *name;
+
+ if (!ct3d->hostmem) {
+ error_setg(errp, "memdev property must be set");
+ return false;
+ }
+
+ mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!mr) {
+ error_setg(errp, "memdev property must be set");
+ return false;
+ }
+ memory_region_set_nonvolatile(mr, true);
+ memory_region_set_enabled(mr, true);
+ host_memory_backend_set_mapped(ct3d->hostmem, true);
+
+ if (ds->id) {
+ name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id);
+ } else {
+ name = g_strdup("cxl-type3-dpa-space");
+ }
+ address_space_init(&ct3d->hostmem_as, mr, name);
+ g_free(name);
+
+ ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size;
+
+ if (!ct3d->lsa) {
+ error_setg(errp, "lsa property must be set");
+ return false;
+ }
+
+ return true;
+}
+
+static void ct3_realize(PCIDevice *pci_dev, Error **errp)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+ CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
+ ComponentRegisters *regs = &cxl_cstate->crb;
+ MemoryRegion *mr = &regs->component_registers;
+ uint8_t *pci_conf = pci_dev->config;
+
+ if (!cxl_setup_memory(ct3d, errp)) {
+ return;
+ }
+
+ pci_config_set_prog_interface(pci_conf, 0x10);
+ pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL);
+
+ pcie_endpoint_cap_init(pci_dev, 0x80);
+ cxl_cstate->dvsec_offset = 0x100;
+
+ ct3d->cxl_cstate.pdev = pci_dev;
+ build_dvsecs(ct3d);
+
+ regs->special_ops = g_new0(MemoryRegionOps, 1);
+ regs->special_ops->write = ct3d_reg_write;
+
+ cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
+ TYPE_CXL_TYPE3);
+
+ pci_register_bar(
+ pci_dev, CXL_COMPONENT_REG_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
+
+ cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate);
+ pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ &ct3d->cxl_dstate.device_registers);
+}
+
+static void ct3_exit(PCIDevice *pci_dev)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+ CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
+ ComponentRegisters *regs = &cxl_cstate->crb;
+
+ g_free(regs->special_ops);
+ address_space_destroy(&ct3d->hostmem_as);
+}
+
+/* TODO: Support multiple HDM decoders and DPA skip */
+static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
+{
+ uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
+ uint64_t decoder_base, decoder_size, hpa_offset;
+ uint32_t hdm0_ctrl;
+ int ig, iw;
+
+ decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) |
+ cache_mem[R_CXL_HDM_DECODER0_BASE_LO]);
+ if ((uint64_t)host_addr < decoder_base) {
+ return false;
+ }
+
+ hpa_offset = (uint64_t)host_addr - decoder_base;
+
+ decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) |
+ cache_mem[R_CXL_HDM_DECODER0_SIZE_LO];
+ if (hpa_offset >= decoder_size) {
+ return false;
+ }
+
+ hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL];
+ iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW);
+ ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG);
+
+ *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
+ ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw);
+
+ return true;
+}
+
+MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(d);
+ uint64_t dpa_offset;
+ MemoryRegion *mr;
+
+ /* TODO support volatile region */
+ mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!mr) {
+ return MEMTX_ERROR;
+ }
+
+ if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) {
+ return MEMTX_ERROR;
+ }
+
+ if (dpa_offset > int128_get64(mr->size)) {
+ return MEMTX_ERROR;
+ }
+
+ return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
+}
+
+MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(d);
+ uint64_t dpa_offset;
+ MemoryRegion *mr;
+
+ mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!mr) {
+ return MEMTX_OK;
+ }
+
+ if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) {
+ return MEMTX_OK;
+ }
+
+ if (dpa_offset > int128_get64(mr->size)) {
+ return MEMTX_OK;
+ }
+ return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs,
+ &data, size);
+}
+
+static void ct3d_reset(DeviceState *dev)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(dev);
+ uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
+ uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
+
+ cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
+ cxl_device_register_init_common(&ct3d->cxl_dstate);
+}
+
+static Property ct3_props[] = {
+ DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
+ HostMemoryBackend *),
+ DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
+ HostMemoryBackend *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static uint64_t get_lsa_size(CXLType3Dev *ct3d)
+{
+ MemoryRegion *mr;
+
+ mr = host_memory_backend_get_memory(ct3d->lsa);
+ return memory_region_size(mr);
+}
+
+static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
+ uint64_t offset)
+{
+ assert(offset + size <= memory_region_size(mr));
+ assert(offset + size > offset);
+}
+
+static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
+ uint64_t offset)
+{
+ MemoryRegion *mr;
+ void *lsa;
+
+ mr = host_memory_backend_get_memory(ct3d->lsa);
+ validate_lsa_access(mr, size, offset);
+
+ lsa = memory_region_get_ram_ptr(mr) + offset;
+ memcpy(buf, lsa, size);
+
+ return size;
+}
+
+static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
+ uint64_t offset)
+{
+ MemoryRegion *mr;
+ void *lsa;
+
+ mr = host_memory_backend_get_memory(ct3d->lsa);
+ validate_lsa_access(mr, size, offset);
+
+ lsa = memory_region_get_ram_ptr(mr) + offset;
+ memcpy(lsa, buf, size);
+ memory_region_set_dirty(mr, offset, size);
+
+ /*
+ * Just like the PMEM, if the guest is not allowed to exit gracefully, label
+ * updates will get lost.
+ */
+}
+
+static void ct3_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+ CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
+
+ pc->realize = ct3_realize;
+ pc->exit = ct3_exit;
+ pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
+ pc->vendor_id = PCI_VENDOR_ID_INTEL;
+ pc->device_id = 0xd93; /* LVF for now */
+ pc->revision = 1;
+
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ dc->desc = "CXL PMEM Device (Type 3)";
+ dc->reset = ct3d_reset;
+ device_class_set_props(dc, ct3_props);
+
+ cvc->get_lsa_size = get_lsa_size;
+ cvc->get_lsa = get_lsa;
+ cvc->set_lsa = set_lsa;
+}
+
+static const TypeInfo ct3d_info = {
+ .name = TYPE_CXL_TYPE3,
+ .parent = TYPE_PCI_DEVICE,
+ .class_size = sizeof(struct CXLType3Class),
+ .class_init = ct3_class_init,
+ .instance_size = sizeof(CXLType3Dev),
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CXL_DEVICE },
+ { INTERFACE_PCIE_DEVICE },
+ {}
+ },
+};
+
+static void ct3d_registers(void)
+{
+ type_register_static(&ct3d_info);
+}
+
+type_init(ct3d_registers);
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 82f86d117e..609b2b36fc 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -3,6 +3,7 @@ mem_ss.add(files('memory-device.c'))
mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
+mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
diff --git a/hw/meson.build b/hw/meson.build
index b3366c888e..9992c5101e 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -6,6 +6,7 @@ subdir('block')
subdir('char')
subdir('core')
subdir('cpu')
+subdir('cxl')
subdir('display')
subdir('dma')
subdir('gpio')
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 30379d2ca4..ccac5b7a64 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -201,7 +201,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
}
if (~net->dev.features & net->dev.backend_features) {
- fprintf(stderr, "vhost lacks feature mask %" PRIu64
+ fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
" for backend\n",
(uint64_t)(~net->dev.features & net->dev.backend_features));
goto fail;
@@ -213,7 +213,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
features = vhost_user_get_acked_features(net->nc);
if (~net->dev.features & features) {
- fprintf(stderr, "vhost lacks feature mask %" PRIu64
+ fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
" for backend\n",
(uint64_t)(~net->dev.features & features));
goto fail;
@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
r = vhost_set_vring_enable(peer, peer->vring_enable);
if (r < 0) {
+ vhost_net_stop_one(get_vhost_net(peer), dev);
goto err_start;
}
}
@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
err_start:
while (--i >= 0) {
- peer = qemu_get_peer(ncs , i);
+ peer = qemu_get_peer(ncs, i < data_queue_pairs ?
+ i : n->max_queue_pairs);
vhost_net_stop_one(get_vhost_net(peer), dev);
}
e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 1067e72b39..7ad948ee7c 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include "qemu/atomic.h"
#include "qemu/iov.h"
+#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "hw/virtio/virtio.h"
@@ -245,7 +246,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
VirtIODevice *vdev = VIRTIO_DEVICE(n);
NetClientState *nc = qemu_get_queue(n->nic);
int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
- int cvq = n->max_ncs - n->max_queue_pairs;
+ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+ n->max_ncs - n->max_queue_pairs : 0;
if (!get_vhost_net(nc->peer)) {
return;
@@ -1379,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
{
VirtIODevice *vdev = VIRTIO_DEVICE(n);
uint16_t queue_pairs;
+ NetClientState *nc = qemu_get_queue(n->nic);
virtio_net_disable_rss(n);
if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
@@ -1410,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
return VIRTIO_NET_ERR;
}
+ /* Avoid changing the number of queue_pairs for vdpa device in
+ * userspace handler. A future fix is needed to handle the mq
+ * change in userspace handler with vhost-vdpa. Let's disable
+ * the mq handling from userspace for now and only allow get
+ * done through the kernel. Ripples may be seen when falling
+ * back to userspace, but without doing it qemu process would
+ * crash on a recursive entry to virtio_net_set_status().
+ */
+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+ return VIRTIO_NET_ERR;
+ }
+
n->curr_queue_pairs = queue_pairs;
/* stop the backend before changing the number of queue_pairs to avoid handling a
* disabled queue */
@@ -1443,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
}
iov_cnt = elem->out_num;
- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
+ iov2 = iov = g_memdup2(elem->out_sg,
+ sizeof(struct iovec) * elem->out_num);
s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
if (s != sizeof(ctrl)) {
@@ -3170,8 +3186,22 @@ static NetClientInfo net_virtio_info = {
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
VirtIONet *n = VIRTIO_NET(vdev);
- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
+ NetClientState *nc;
assert(n->vhost_started);
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
+ /* Must guard against invalid features and bogus queue index
+ * from being set by malicious guest, or penetrated through
+ * buggy migration stream.
+ */
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: bogus vq index ignored\n", __func__);
+ return false;
+ }
+ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
+ } else {
+ nc = qemu_get_subqueue(n->nic, vq2q(idx));
+ }
return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
}
@@ -3179,8 +3209,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
bool mask)
{
VirtIONet *n = VIRTIO_NET(vdev);
- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
+ NetClientState *nc;
assert(n->vhost_started);
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
+ /* Must guard against invalid features and bogus queue index
+ * from being set by malicious guest, or penetrated through
+ * buggy migration stream.
+ */
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: bogus vq index ignored\n", __func__);
+ return;
+ }
+ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
+ } else {
+ nc = qemu_get_subqueue(n->nic, vq2q(idx));
+ }
vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
vdev, idx, mask);
}
@@ -3392,7 +3436,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
}
virtio_net_set_config_size(n, n->host_features);
- virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
+ virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
/*
* We set a lower limit on RX queue size to what it always was.
@@ -3619,6 +3663,14 @@ static bool dev_unplug_pending(void *opaque)
return vdc->primary_unplug_pending(dev);
}
+static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
+{
+ VirtIONet *n = VIRTIO_NET(vdev);
+ NetClientState *nc = qemu_get_queue(n->nic);
+ struct vhost_net *net = get_vhost_net(nc->peer);
+ return &net->dev;
+}
+
static const VMStateDescription vmstate_virtio_net = {
.name = "virtio-net",
.minimum_version_id = VIRTIO_NET_VM_VERSION,
@@ -3721,6 +3773,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->post_load = virtio_net_post_load_virtio;
vdc->vmsd = &vmstate_virtio_net_device;
vdc->primary_unplug_pending = primary_unplug_pending;
+ vdc->get_vhost = virtio_net_get_vhost;
}
static const TypeInfo virtio_net_info = {
diff --git a/hw/pci-bridge/Kconfig b/hw/pci-bridge/Kconfig
index f8df4315ba..02614f49aa 100644
--- a/hw/pci-bridge/Kconfig
+++ b/hw/pci-bridge/Kconfig
@@ -27,3 +27,8 @@ config DEC_PCI
config SIMBA
bool
+
+config CXL
+ bool
+ default y if PCI_EXPRESS && PXB
+ depends on PCI_EXPRESS && MSI_NONBROKEN && PXB
diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
new file mode 100644
index 0000000000..fb213fa06e
--- /dev/null
+++ b/hw/pci-bridge/cxl_root_port.c
@@ -0,0 +1,236 @@
+/*
+ * CXL 2.0 Root Port Implementation
+ *
+ * Copyright(C) 2020 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/range.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "qapi/error.h"
+#include "hw/cxl/cxl.h"
+
+#define CXL_ROOT_PORT_DID 0x7075
+
+/* Copied from the gen root port which we derive */
+#define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100
+#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \
+ (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
+#define CXL_ROOT_PORT_DVSEC_OFFSET \
+ (GEN_PCIE_ROOT_PORT_ACS_OFFSET + PCI_ACS_SIZEOF)
+
+typedef struct CXLRootPort {
+ /*< private >*/
+ PCIESlot parent_obj;
+
+ CXLComponentState cxl_cstate;
+ PCIResReserve res_reserve;
+} CXLRootPort;
+
+#define TYPE_CXL_ROOT_PORT "cxl-rp"
+DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT)
+
+static void latch_registers(CXLRootPort *crp)
+{
+ uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
+ uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
+
+ cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
+}
+
+static void build_dvsecs(CXLComponentState *cxl)
+{
+ uint8_t *dvsec;
+
+ dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
+ cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
+ EXTENSIONS_PORT_DVSEC_LENGTH,
+ EXTENSIONS_PORT_DVSEC,
+ EXTENSIONS_PORT_DVSEC_REVID, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECPortGPF){
+ .rsvd = 0,
+ .phase1_ctrl = 1, /* 1μs timeout */
+ .phase2_ctrl = 1, /* 1μs timeout */
+ };
+ cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
+ GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC,
+ GPF_PORT_DVSEC_REVID, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
+ .cap = 0x26, /* IO, Mem, non-MLD */
+ .ctrl = 0x2,
+ .status = 0x26, /* same */
+ .rcvd_mod_ts_data_phase1 = 0xef,
+ };
+ cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
+ PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0,
+ PCIE_FLEXBUS_PORT_DVSEC,
+ PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
+ .rsvd = 0,
+ .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
+ .reg0_base_hi = 0,
+ };
+ cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
+ REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
+ REG_LOC_DVSEC_REVID, dvsec);
+}
+
+static void cxl_rp_realize(DeviceState *dev, Error **errp)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(dev);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ CXLRootPort *crp = CXL_ROOT_PORT(dev);
+ CXLComponentState *cxl_cstate = &crp->cxl_cstate;
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+ MemoryRegion *component_bar = &cregs->component_registers;
+ Error *local_err = NULL;
+
+ rpc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ int rc =
+ pci_bridge_qemu_reserve_cap_init(pci_dev, 0, crp->res_reserve, errp);
+ if (rc < 0) {
+ rpc->parent_class.exit(pci_dev);
+ return;
+ }
+
+ if (!crp->res_reserve.io || crp->res_reserve.io == -1) {
+ pci_word_test_and_clear_mask(pci_dev->wmask + PCI_COMMAND,
+ PCI_COMMAND_IO);
+ pci_dev->wmask[PCI_IO_BASE] = 0;
+ pci_dev->wmask[PCI_IO_LIMIT] = 0;
+ }
+
+ cxl_cstate->dvsec_offset = CXL_ROOT_PORT_DVSEC_OFFSET;
+ cxl_cstate->pdev = pci_dev;
+ build_dvsecs(&crp->cxl_cstate);
+
+ cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
+ TYPE_CXL_ROOT_PORT);
+
+ pci_register_bar(pci_dev, CXL_COMPONENT_REG_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ component_bar);
+}
+
+static void cxl_rp_reset(DeviceState *dev)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ CXLRootPort *crp = CXL_ROOT_PORT(dev);
+
+ rpc->parent_reset(dev);
+
+ latch_registers(crp);
+}
+
+static Property gen_rp_props[] = {
+ DEFINE_PROP_UINT32("bus-reserve", CXLRootPort, res_reserve.bus, -1),
+ DEFINE_PROP_SIZE("io-reserve", CXLRootPort, res_reserve.io, -1),
+ DEFINE_PROP_SIZE("mem-reserve", CXLRootPort, res_reserve.mem_non_pref, -1),
+ DEFINE_PROP_SIZE("pref32-reserve", CXLRootPort, res_reserve.mem_pref_32,
+ -1),
+ DEFINE_PROP_SIZE("pref64-reserve", CXLRootPort, res_reserve.mem_pref_64,
+ -1),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
+ uint32_t val, int len)
+{
+ CXLRootPort *crp = CXL_ROOT_PORT(dev);
+
+ if (range_contains(&crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC], addr)) {
+ uint8_t *reg = &dev->config[addr];
+ addr -= crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC].lob;
+ if (addr == PORT_CONTROL_OFFSET) {
+ if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) {
+ /* unmask SBR */
+ qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n");
+ }
+ if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) {
+ /* Alt Memory & ID Space Enable */
+ qemu_log_mask(LOG_UNIMP,
+ "Alt Memory & ID space is not supported\n");
+ }
+ }
+ }
+}
+
+static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val,
+ int len)
+{
+ uint16_t slt_ctl, slt_sta;
+
+ pcie_cap_slot_get(d, &slt_ctl, &slt_sta);
+ pci_bridge_write_config(d, address, val, len);
+ pcie_cap_flr_write_config(d, address, val, len);
+ pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
+ pcie_aer_write_config(d, address, val, len);
+
+ cxl_rp_dvsec_write_config(d, address, val, len);
+}
+
+static void cxl_root_port_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(oc);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(oc);
+
+ k->vendor_id = PCI_VENDOR_ID_INTEL;
+ k->device_id = CXL_ROOT_PORT_DID;
+ dc->desc = "CXL Root Port";
+ k->revision = 0;
+ device_class_set_props(dc, gen_rp_props);
+ k->config_write = cxl_rp_write_config;
+
+ device_class_set_parent_realize(dc, cxl_rp_realize, &rpc->parent_realize);
+ device_class_set_parent_reset(dc, cxl_rp_reset, &rpc->parent_reset);
+
+ rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET;
+ rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET;
+
+ dc->hotpluggable = false;
+}
+
+static const TypeInfo cxl_root_port_info = {
+ .name = TYPE_CXL_ROOT_PORT,
+ .parent = TYPE_PCIE_ROOT_PORT,
+ .instance_size = sizeof(CXLRootPort),
+ .class_init = cxl_root_port_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CXL_DEVICE },
+ { }
+ },
+};
+
+static void cxl_register(void)
+{
+ type_register_static(&cxl_root_port_info);
+}
+
+type_init(cxl_register);
diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build
index daab8acf2a..b6d26a03d5 100644
--- a/hw/pci-bridge/meson.build
+++ b/hw/pci-bridge/meson.build
@@ -5,6 +5,7 @@ pci_ss.add(when: 'CONFIG_IOH3420', if_true: files('ioh3420.c'))
pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c', 'pcie_pci_bridge.c'))
pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c'))
pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c'))
+pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c'))
# NewWorld PowerMac
pci_ss.add(when: 'CONFIG_DEC_PCI', if_true: files('dec.c'))
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index de932286b5..69244decdb 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -17,6 +17,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/pci/pci_bridge.h"
+#include "hw/cxl/cxl.h"
#include "qemu/range.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
@@ -24,6 +25,8 @@
#include "hw/boards.h"
#include "qom/object.h"
+enum BusType { PCI, PCIE, CXL };
+
#define TYPE_PXB_BUS "pxb-bus"
typedef struct PXBBus PXBBus;
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS,
@@ -33,6 +36,10 @@ DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS,
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS,
TYPE_PXB_PCIE_BUS)
+#define TYPE_PXB_CXL_BUS "pxb-cxl-bus"
+DECLARE_INSTANCE_CHECKER(PXBBus, PXB_CXL_BUS,
+ TYPE_PXB_CXL_BUS)
+
struct PXBBus {
/*< private >*/
PCIBus parent_obj;
@@ -50,18 +57,13 @@ DECLARE_INSTANCE_CHECKER(PXBDev, PXB_DEV,
DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV,
TYPE_PXB_PCIE_DEVICE)
-struct PXBDev {
- /*< private >*/
- PCIDevice parent_obj;
- /*< public >*/
-
- uint8_t bus_nr;
- uint16_t numa_node;
- bool bypass_iommu;
-};
-
static PXBDev *convert_to_pxb(PCIDevice *dev)
{
+ /* A CXL PXB's parent bus is PCIe, so the normal check won't work */
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PXB_CXL_DEVICE)) {
+ return PXB_CXL_DEV(dev);
+ }
+
return pci_bus_is_express(pci_get_bus(dev))
? PXB_PCIE_DEV(dev) : PXB_DEV(dev);
}
@@ -70,6 +72,13 @@ static GList *pxb_dev_list;
#define TYPE_PXB_HOST "pxb-host"
+CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb)
+{
+ CXLHost *host = PXB_CXL_HOST(hb);
+
+ return &host->cxl_cstate;
+}
+
static int pxb_bus_num(PCIBus *bus)
{
PXBDev *pxb = convert_to_pxb(bus->parent_dev);
@@ -106,11 +115,20 @@ static const TypeInfo pxb_pcie_bus_info = {
.class_init = pxb_bus_class_init,
};
+static const TypeInfo pxb_cxl_bus_info = {
+ .name = TYPE_PXB_CXL_BUS,
+ .parent = TYPE_CXL_BUS,
+ .instance_size = sizeof(PXBBus),
+ .class_init = pxb_bus_class_init,
+};
+
static const char *pxb_host_root_bus_path(PCIHostState *host_bridge,
PCIBus *rootbus)
{
- PXBBus *bus = pci_bus_is_express(rootbus) ?
- PXB_PCIE_BUS(rootbus) : PXB_BUS(rootbus);
+ PXBBus *bus = pci_bus_is_cxl(rootbus) ?
+ PXB_CXL_BUS(rootbus) :
+ pci_bus_is_express(rootbus) ? PXB_PCIE_BUS(rootbus) :
+ PXB_BUS(rootbus);
snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus));
return bus->bus_path;
@@ -166,6 +184,52 @@ static const TypeInfo pxb_host_info = {
.class_init = pxb_host_class_init,
};
+static void pxb_cxl_realize(DeviceState *dev, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ CXLHost *cxl = PXB_CXL_HOST(dev);
+ CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
+ struct MemoryRegion *mr = &cxl_cstate->crb.component_registers;
+ hwaddr offset;
+
+ cxl_component_register_block_init(OBJECT(dev), cxl_cstate,
+ TYPE_PXB_CXL_HOST);
+ sysbus_init_mmio(sbd, mr);
+
+ offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx;
+ if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) {
+ error_setg(errp, "Insufficient space for pxb cxl host register space");
+ return;
+ }
+
+ memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr);
+ ms->cxl_devices_state->next_mr_idx++;
+}
+
+static void pxb_cxl_host_class_init(ObjectClass *class, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(class);
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
+
+ hc->root_bus_path = pxb_host_root_bus_path;
+ dc->fw_name = "cxl";
+ dc->realize = pxb_cxl_realize;
+ /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */
+ dc->user_creatable = false;
+}
+
+/*
+ * This is a device to handle the MMIO for a CXL host bridge. It does nothing
+ * else.
+ */
+static const TypeInfo cxl_host_info = {
+ .name = TYPE_PXB_CXL_HOST,
+ .parent = TYPE_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(CXLHost),
+ .class_init = pxb_cxl_host_class_init,
+};
+
/*
* Registers the PXB bus as a child of pci host root bus.
*/
@@ -212,6 +276,17 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
return pin - PCI_SLOT(pxb->devfn);
}
+static void pxb_dev_reset(DeviceState *dev)
+{
+ CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
+ CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
+ uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
+ uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
+
+ cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
+ ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
+}
+
static gint pxb_compare(gconstpointer a, gconstpointer b)
{
const PXBDev *pxb_a = a, *pxb_b = b;
@@ -221,7 +296,8 @@ static gint pxb_compare(gconstpointer a, gconstpointer b)
0;
}
-static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
+static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type,
+ Error **errp)
{
PXBDev *pxb = convert_to_pxb(dev);
DeviceState *ds, *bds = NULL;
@@ -245,9 +321,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
dev_name = dev->qdev.id;
}
- ds = qdev_new(TYPE_PXB_HOST);
- if (pcie) {
+ ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST);
+ if (type == PCIE) {
bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
+ } else if (type == CXL) {
+ bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS);
+ bus->flags |= PCI_BUS_CXL;
+ PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds);
} else {
bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
bds = qdev_new("pci-bridge");
@@ -295,7 +375,7 @@ static void pxb_dev_realize(PCIDevice *dev, Error **errp)
return;
}
- pxb_dev_realize_common(dev, false, errp);
+ pxb_dev_realize_common(dev, PCI, errp);
}
static void pxb_dev_exitfn(PCIDevice *pci_dev)
@@ -348,7 +428,7 @@ static void pxb_pcie_dev_realize(PCIDevice *dev, Error **errp)
return;
}
- pxb_dev_realize_common(dev, true, errp);
+ pxb_dev_realize_common(dev, PCIE, errp);
}
static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data)
@@ -379,13 +459,67 @@ static const TypeInfo pxb_pcie_dev_info = {
},
};
+static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+
+ /* A CXL PXB's parent bus is still PCIe */
+ if (!pci_bus_is_express(pci_get_bus(dev))) {
+ error_setg(errp, "pxb-cxl devices cannot reside on a PCI bus");
+ return;
+ }
+ if (!ms->cxl_devices_state->is_enabled) {
+ error_setg(errp, "Machine does not have cxl=on");
+ return;
+ }
+
+ pxb_dev_realize_common(dev, CXL, errp);
+ pxb_dev_reset(DEVICE(dev));
+}
+
+static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+ k->realize = pxb_cxl_dev_realize;
+ k->exit = pxb_dev_exitfn;
+ /*
+ * XXX: These types of bridges don't actually show up in the hierarchy so
+ * vendor, device, class, etc. ids are intentionally left out.
+ */
+
+ dc->desc = "CXL Host Bridge";
+ device_class_set_props(dc, pxb_dev_properties);
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+
+ /* Host bridges aren't hotpluggable. FIXME: spec reference */
+ dc->hotpluggable = false;
+ dc->reset = pxb_dev_reset;
+}
+
+static const TypeInfo pxb_cxl_dev_info = {
+ .name = TYPE_PXB_CXL_DEVICE,
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(PXBDev),
+ .class_init = pxb_cxl_dev_class_init,
+ .interfaces =
+ (InterfaceInfo[]){
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ {},
+ },
+};
+
static void pxb_register_types(void)
{
type_register_static(&pxb_bus_info);
type_register_static(&pxb_pcie_bus_info);
+ type_register_static(&pxb_cxl_bus_info);
type_register_static(&pxb_host_info);
+ type_register_static(&cxl_host_info);
type_register_static(&pxb_dev_info);
type_register_static(&pxb_pcie_dev_info);
+ type_register_static(&pxb_cxl_dev_info);
}
type_init(pxb_register_types)
diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index f1cfe9d14a..460e48269d 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -67,7 +67,11 @@ static void rp_realize(PCIDevice *d, Error **errp)
int rc;
pci_config_set_interrupt_pin(d->config, 1);
- pci_bridge_initfn(d, TYPE_PCIE_BUS);
+ if (d->cap_present & QEMU_PCIE_CAP_CXL) {
+ pci_bridge_initfn(d, TYPE_CXL_BUS);
+ } else {
+ pci_bridge_initfn(d, TYPE_PCIE_BUS);
+ }
pcie_port_init_reg(d);
rc = pci_bridge_ssvid_init(d, rpc->ssvid_offset, dc->vendor_id,
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index e7e162a00a..7c7316bc96 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -5,6 +5,7 @@
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pcie_host.h"
+#include "hw/acpi/cxl.h"
static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
{
@@ -139,6 +140,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
QLIST_FOREACH(bus, &bus->child, sibling) {
uint8_t bus_num = pci_bus_num(bus);
uint8_t numa_node = pci_bus_numa_node(bus);
+ bool is_cxl = pci_bus_is_cxl(bus);
if (!pci_bus_is_root(bus)) {
continue;
@@ -154,8 +156,16 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
}
dev = aml_device("PC%.02X", bus_num);
- aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08")));
- aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03")));
+ if (is_cxl) {
+ struct Aml *pkg = aml_package(2);
+ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016")));
+ aml_append(pkg, aml_eisaid("PNP0A08"));
+ aml_append(pkg, aml_eisaid("PNP0A03"));
+ aml_append(dev, aml_name_decl("_CID", pkg));
+ } else {
+ aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08")));
+ aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03")));
+ }
aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
aml_append(dev, aml_name_decl("_STR", aml_unicode("pxb Device")));
@@ -175,7 +185,11 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
cfg->pio.base, 0, 0, 0);
aml_append(dev, aml_name_decl("_CRS", crs));
- acpi_dsdt_add_pci_osc(dev);
+ if (is_cxl) {
+ build_cxl_osc_method(dev);
+ } else {
+ acpi_dsdt_add_pci_osc(dev);
+ }
aml_append(scope, dev);
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 9c58f02853..a9b37f8000 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -200,6 +200,11 @@ static const TypeInfo pci_bus_info = {
.class_init = pci_bus_class_init,
};
+static const TypeInfo cxl_interface_info = {
+ .name = INTERFACE_CXL_DEVICE,
+ .parent = TYPE_INTERFACE,
+};
+
static const TypeInfo pcie_interface_info = {
.name = INTERFACE_PCIE_DEVICE,
.parent = TYPE_INTERFACE,
@@ -223,6 +228,12 @@ static const TypeInfo pcie_bus_info = {
.class_init = pcie_bus_class_init,
};
+static const TypeInfo cxl_bus_info = {
+ .name = TYPE_CXL_BUS,
+ .parent = TYPE_PCIE_BUS,
+ .class_init = pcie_bus_class_init,
+};
+
static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num);
static void pci_update_mappings(PCIDevice *d);
static void pci_irq_handler(void *opaque, int irq_num, int level);
@@ -2182,6 +2193,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
+ if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
+ pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
+ }
+
pci_dev = do_pci_register_device(pci_dev,
object_get_typename(OBJECT(qdev)),
pci_dev->devfn, errp);
@@ -2747,7 +2762,9 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE);
ObjectClass *pcie =
object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE);
- assert(conventional || pcie);
+ ObjectClass *cxl =
+ object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE);
+ assert(conventional || pcie || cxl);
}
}
@@ -2937,7 +2954,9 @@ static void pci_register_types(void)
{
type_register_static(&pci_bus_info);
type_register_static(&pcie_bus_info);
+ type_register_static(&cxl_bus_info);
type_register_static(&conventional_pci_interface_info);
+ type_register_static(&cxl_interface_info);
type_register_static(&pcie_interface_info);
type_register_static(&pci_device_type_info);
}
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index e95c1e5519..687e4e763a 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -136,6 +136,31 @@ static void pcie_port_class_init(ObjectClass *oc, void *data)
device_class_set_props(dc, pcie_port_props);
}
+PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn)
+{
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ PCIDevice *d = bus->devices[devfn];
+ PCIEPort *port;
+
+ if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+ continue;
+ }
+
+ if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+ continue;
+ }
+
+ port = PCIE_PORT(d);
+ if (port->port == pn) {
+ return d;
+ }
+ }
+
+ return NULL;
+}
+
static const TypeInfo pcie_port_type_info = {
.name = TYPE_PCIE_PORT,
.parent = TYPE_PCI_BRIDGE,
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 778f43e4c1..3059068175 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -273,6 +273,13 @@ static void vhost_scsi_unrealize(DeviceState *dev)
virtio_scsi_common_unrealize(dev);
}
+static struct vhost_dev *vhost_scsi_get_vhost(VirtIODevice *vdev)
+{
+ VHostSCSI *s = VHOST_SCSI(vdev);
+ VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+ return &vsc->dev;
+}
+
static Property vhost_scsi_properties[] = {
DEFINE_PROP_STRING("vhostfd", VirtIOSCSICommon, conf.vhostfd),
DEFINE_PROP_STRING("wwpn", VirtIOSCSICommon, conf.wwpn),
@@ -307,6 +314,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data)
vdc->get_features = vhost_scsi_common_get_features;
vdc->set_config = vhost_scsi_common_set_config;
vdc->set_status = vhost_scsi_set_status;
+ vdc->get_vhost = vhost_scsi_get_vhost;
fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
}
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 1b2f7eed98..9be21d07ee 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -121,6 +121,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
vsc->dev.backend_features = 0;
vqs = vsc->dev.vqs;
+ s->vhost_user.supports_config = true;
ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
VHOST_BACKEND_TYPE_USER, 0, errp);
if (ret < 0) {
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index db54d104be..4141dddd51 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -1013,8 +1013,7 @@ void virtio_scsi_common_realize(DeviceState *dev,
VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev);
int i;
- virtio_init(vdev, "virtio-scsi", VIRTIO_ID_SCSI,
- sizeof(VirtIOSCSIConfig));
+ virtio_init(vdev, VIRTIO_ID_SCSI, sizeof(VirtIOSCSIConfig));
if (s->conf.num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) {
s->conf.num_queues = 1;
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 333348d9d5..ab8e095b73 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -21,6 +21,9 @@ vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_siz
vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64
vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
+vhost_user_read(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32""
+vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32""
+vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p"
# vhost-vdpa.c
vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8
@@ -89,7 +92,12 @@ virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 "
virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d"
virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
-# virtio-iommu.c
+# virtio-pci.c
+virtio_pci_notify(uint16_t vector) "virtio_pci_notify vec 0x%x"
+virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
+virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)"
+
+# hw/virtio/virtio-iommu.c
virtio_iommu_device_reset(void) "reset!"
virtio_iommu_system_reset(void) "system reset!"
virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c
index cb71a294fa..08980bc23b 100644
--- a/hw/virtio/vhost-scsi-pci.c
+++ b/hw/virtio/vhost-scsi-pci.c
@@ -21,7 +21,7 @@
#include "hw/virtio/vhost-scsi.h"
#include "qapi/error.h"
#include "qemu/module.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostSCSIPCI VHostSCSIPCI;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 1e5cfe2af6..56c96ebd13 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
for (n = 0; n < num; n++) {
if (more_descs || (n + 1 < num)) {
descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
+ descs[i].next = cpu_to_le16(svq->desc_next[i]);
} else {
descs[i].flags = flags;
}
@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
descs[i].len = cpu_to_le32(iovec[n].iov_len);
last = i;
- i = cpu_to_le16(descs[i].next);
+ i = cpu_to_le16(svq->desc_next[i]);
}
- svq->free_head = le16_to_cpu(descs[last].next);
+ svq->free_head = le16_to_cpu(svq->desc_next[last]);
}
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -198,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
return true;
}
+/**
+ * Add an element to a SVQ.
+ *
+ * The caller must check that there is enough slots for the new element. It
+ * takes ownership of the element: In case of failure, it is free and the SVQ
+ * is considered broken.
+ */
static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
{
unsigned qemu_head;
bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
if (unlikely(!ok)) {
+ g_free(elem);
return false;
}
@@ -333,13 +342,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
}
+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
+ uint16_t num, uint16_t i)
+{
+ for (uint16_t j = 0; j < (num - 1); ++j) {
+ i = le16_to_cpu(svq->desc_next[i]);
+ }
+
+ return i;
+}
+
static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
uint32_t *len)
{
- vring_desc_t *descs = svq->vring.desc;
const vring_used_t *used = svq->vring.used;
vring_used_elem_t used_elem;
- uint16_t last_used;
+ uint16_t last_used, last_used_chain, num;
if (!vhost_svq_more_used(svq)) {
return NULL;
@@ -365,7 +383,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
return NULL;
}
- descs[used_elem.id].next = svq->free_head;
+ num = svq->ring_id_maps[used_elem.id]->in_num +
+ svq->ring_id_maps[used_elem.id]->out_num;
+ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
+ svq->desc_next[last_used_chain] = svq->free_head;
svq->free_head = used_elem.id;
*len = used_elem.len;
@@ -540,8 +561,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
memset(svq->vring.used, 0, device_size);
svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
+ svq->desc_next = g_new0(uint16_t, svq->vring.num);
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
- svq->vring.desc[i].next = cpu_to_le16(i + 1);
+ svq->desc_next[i] = cpu_to_le16(i + 1);
}
}
@@ -574,6 +596,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
virtqueue_detach_element(svq->vq, next_avail_elem, 0);
}
svq->vq = NULL;
+ g_free(svq->desc_next);
g_free(svq->ring_id_maps);
qemu_vfree(svq->vring.desc);
qemu_vfree(svq->vring.used);
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index e5e24c536d..c132c994e9 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue {
/* Next VirtQueue element that guest made available */
VirtQueueElement *next_guest_avail_elem;
+ /*
+ * Backup next field for each descriptor so we can recover securely, not
+ * needing to trust the device access.
+ */
+ uint16_t *desc_next;
+
/* Next head to expose to the device */
uint16_t shadow_avail_idx;
diff --git a/hw/virtio/vhost-user-blk-pci.c b/hw/virtio/vhost-user-blk-pci.c
index 33b404d8a2..eef8641a98 100644
--- a/hw/virtio/vhost-user-blk-pci.c
+++ b/hw/virtio/vhost-user-blk-pci.c
@@ -26,7 +26,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostUserBlkPCI VHostUserBlkPCI;
diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c
index 2ed8492b3f..6829b8b743 100644
--- a/hw/virtio/vhost-user-fs-pci.c
+++ b/hw/virtio/vhost-user-fs-pci.c
@@ -14,7 +14,7 @@
#include "qemu/osdep.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-fs.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
struct VHostUserFSPCI {
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index c595957983..e513e4fdda 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -219,8 +219,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS,
- sizeof(struct virtio_fs_config));
+ virtio_init(vdev, VIRTIO_ID_FS, sizeof(struct virtio_fs_config));
/* Hiprio queue */
fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
@@ -277,6 +276,12 @@ static void vuf_device_unrealize(DeviceState *dev)
fs->vhost_dev.vqs = NULL;
}
+static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev)
+{
+ VHostUserFS *fs = VHOST_USER_FS(vdev);
+ return &fs->vhost_dev;
+}
+
static const VMStateDescription vuf_vmstate = {
.name = "vhost-user-fs",
.unmigratable = 1,
@@ -314,6 +319,7 @@ static void vuf_class_init(ObjectClass *klass, void *data)
vdc->set_status = vuf_set_status;
vdc->guest_notifier_mask = vuf_guest_notifier_mask;
vdc->guest_notifier_pending = vuf_guest_notifier_pending;
+ vdc->get_vhost = vuf_get_vhost;
}
static const TypeInfo vuf_info = {
diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c
index 70b7b65fd9..00ac10941f 100644
--- a/hw/virtio/vhost-user-i2c-pci.c
+++ b/hw/virtio/vhost-user-i2c-pci.c
@@ -9,7 +9,7 @@
#include "qemu/osdep.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-i2c.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
struct VHostUserI2CPCI {
VirtIOPCIProxy parent_obj;
diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
index 42c7f6d9e5..6020eee093 100644
--- a/hw/virtio/vhost-user-i2c.c
+++ b/hw/virtio/vhost-user-i2c.c
@@ -14,11 +14,6 @@
#include "qemu/error-report.h"
#include "standard-headers/linux/virtio_ids.h"
-/* Remove this once the header is updated in Linux kernel */
-#ifndef VIRTIO_ID_I2C_ADAPTER
-#define VIRTIO_ID_I2C_ADAPTER 34
-#endif
-
static const int feature_bits[] = {
VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
VHOST_INVALID_FEATURE_BIT
@@ -227,7 +222,7 @@ static void vu_i2c_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0);
+ virtio_init(vdev, VIRTIO_ID_I2C_ADAPTER, 0);
i2c->vhost_dev.nvqs = 1;
i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output);
diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c
index c9d3e9113a..b858898a36 100644
--- a/hw/virtio/vhost-user-input-pci.c
+++ b/hw/virtio/vhost-user-input-pci.c
@@ -9,7 +9,7 @@
#include "hw/virtio/virtio-input.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostUserInputPCI VHostUserInputPCI;
diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c
index c83dc86813..f64935453b 100644
--- a/hw/virtio/vhost-user-rng-pci.c
+++ b/hw/virtio/vhost-user-rng-pci.c
@@ -9,7 +9,7 @@
#include "qemu/osdep.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-rng.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
struct VHostUserRNGPCI {
VirtIOPCIProxy parent_obj;
diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c
index 209ee5bf9a..3a7bf8e32d 100644
--- a/hw/virtio/vhost-user-rng.c
+++ b/hw/virtio/vhost-user-rng.c
@@ -203,7 +203,7 @@ static void vu_rng_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0);
+ virtio_init(vdev, VIRTIO_ID_RNG, 0);
rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output);
if (!rng->req_vq) {
@@ -247,6 +247,12 @@ static void vu_rng_device_unrealize(DeviceState *dev)
vhost_user_cleanup(&rng->vhost_user);
}
+static struct vhost_dev *vu_rng_get_vhost(VirtIODevice *vdev)
+{
+ VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+ return &rng->vhost_dev;
+}
+
static const VMStateDescription vu_rng_vmstate = {
.name = "vhost-user-rng",
.unmigratable = 1,
@@ -272,6 +278,7 @@ static void vu_rng_class_init(ObjectClass *klass, void *data)
vdc->set_status = vu_rng_set_status;
vdc->guest_notifier_mask = vu_rng_guest_notifier_mask;
vdc->guest_notifier_pending = vu_rng_guest_notifier_pending;
+ vdc->get_vhost = vu_rng_get_vhost;
}
static const TypeInfo vu_rng_info = {
diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c
index d5343412a1..75882e3cf9 100644
--- a/hw/virtio/vhost-user-scsi-pci.c
+++ b/hw/virtio/vhost-user-scsi-pci.c
@@ -30,7 +30,7 @@
#include "hw/pci/msix.h"
#include "hw/loader.h"
#include "sysemu/kvm.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;
diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c
index 72a96199cd..e5a86e8013 100644
--- a/hw/virtio/vhost-user-vsock-pci.c
+++ b/hw/virtio/vhost-user-vsock-pci.c
@@ -10,7 +10,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-user-vsock.h"
#include "qom/object.h"
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 52bd682c34..0f8ff99f85 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -107,7 +107,7 @@ static void vuv_device_realize(DeviceState *dev, Error **errp)
return;
}
- vhost_vsock_common_realize(vdev, "vhost-user-vsock");
+ vhost_vsock_common_realize(vdev);
vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops);
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index afd51f79b3..b040c1ad2b 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -489,6 +489,8 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
return ret < 0 ? -saved_errno : -EIO;
}
+ trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
+
return 0;
}
@@ -542,6 +544,8 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
}
}
+ trace_vhost_user_read(msg.hdr.request, msg.hdr.flags);
+
return 0;
}
@@ -1170,14 +1174,16 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
n->unmap_addr = NULL;
}
-static void vhost_user_host_notifier_remove(VhostUserState *user,
- VirtIODevice *vdev, int queue_idx)
+/*
+ * clean-up function for notifier, will finally free the structure
+ * under rcu.
+ */
+static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
+ VirtIODevice *vdev)
{
- VhostUserHostNotifier *n = &user->notifier[queue_idx];
-
if (n->addr) {
if (vdev) {
- virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
+ virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
}
assert(!n->unmap_addr);
n->unmap_addr = n->addr;
@@ -1221,6 +1227,15 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
return 0;
}
+static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
+ int idx)
+{
+ if (idx >= u->notifiers->len) {
+ return NULL;
+ }
+ return g_ptr_array_index(u->notifiers, idx);
+}
+
static int vhost_user_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
@@ -1233,7 +1248,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
};
struct vhost_user *u = dev->opaque;
- vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index);
+ VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
+ if (n) {
+ vhost_user_host_notifier_remove(n, dev->vdev);
+ }
ret = vhost_user_write(dev, &msg, NULL, 0);
if (ret < 0) {
@@ -1498,6 +1516,29 @@ static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
return dev->config_ops->vhost_dev_config_notifier(dev);
}
+/*
+ * Fetch or create the notifier for a given idx. Newly created
+ * notifiers are added to the pointer array that tracks them.
+ */
+static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
+ int idx)
+{
+ VhostUserHostNotifier *n = NULL;
+ if (idx >= u->notifiers->len) {
+ g_ptr_array_set_size(u->notifiers, idx);
+ }
+
+ n = g_ptr_array_index(u->notifiers, idx);
+ if (!n) {
+ n = g_new0(VhostUserHostNotifier, 1);
+ n->idx = idx;
+ g_ptr_array_insert(u->notifiers, idx, n);
+ trace_vhost_user_create_notifier(idx, n);
+ }
+
+ return n;
+}
+
static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
VhostUserVringArea *area,
int fd)
@@ -1517,9 +1558,12 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
return -EINVAL;
}
- n = &user->notifier[queue_idx];
-
- vhost_user_host_notifier_remove(user, vdev, queue_idx);
+ /*
+ * Fetch notifier and invalidate any old data before setting up
+ * new mapped address.
+ */
+ n = fetch_or_create_notifier(user, queue_idx);
+ vhost_user_host_notifier_remove(n, vdev);
if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
return 0;
@@ -1945,14 +1989,15 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
Error **errp)
{
- uint64_t features, protocol_features, ram_slots;
+ uint64_t features, ram_slots;
struct vhost_user *u;
+ VhostUserState *vus = (VhostUserState *) opaque;
int err;
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
u = g_new0(struct vhost_user, 1);
- u->user = opaque;
+ u->user = vus;
u->dev = dev;
dev->opaque = u;
@@ -1963,6 +2008,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
}
if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
+ bool supports_f_config = vus->supports_config ||
+ (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
+ uint64_t protocol_features;
+
dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
@@ -1972,19 +2021,34 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
return -EPROTO;
}
- dev->protocol_features =
- protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
-
- if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
- /* Don't acknowledge CONFIG feature if device doesn't support it */
- dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
- } else if (!(protocol_features &
- (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
- error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG "
- "but backend does not support it.");
- return -EINVAL;
+ /*
+ * We will use all the protocol features we support - although
+ * we suppress F_CONFIG if we know QEMUs internal code can not support
+ * it.
+ */
+ protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
+
+ if (supports_f_config) {
+ if (!virtio_has_feature(protocol_features,
+ VHOST_USER_PROTOCOL_F_CONFIG)) {
+ error_setg(errp, "vhost-user device %s expecting "
+ "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
+ "not support it.", dev->vdev->name);
+ return -EPROTO;
+ }
+ } else {
+ if (virtio_has_feature(protocol_features,
+ VHOST_USER_PROTOCOL_F_CONFIG)) {
+ warn_reportf_err(*errp, "vhost-user backend supports "
+ "VHOST_USER_PROTOCOL_F_CONFIG for "
+ "device %s but QEMU does not.",
+ dev->vdev->name);
+ protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
+ }
}
+ /* final set of protocol features */
+ dev->protocol_features = protocol_features;
err = vhost_user_set_protocol_features(dev, dev->protocol_features);
if (err < 0) {
error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
@@ -2502,6 +2566,20 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
return vhost_user_write(dev, &msg, &inflight->fd, 1);
}
+static void vhost_user_state_destroy(gpointer data)
+{
+ VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
+ if (n) {
+ vhost_user_host_notifier_remove(n, NULL);
+ object_unparent(OBJECT(&n->mr));
+ /*
+ * We can't free until vhost_user_host_notifier_remove has
+ * done it's thing so schedule the free with RCU.
+ */
+ g_free_rcu(n, rcu);
+ }
+}
+
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
{
if (user->chr) {
@@ -2510,23 +2588,18 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
}
user->chr = chr;
user->memory_slots = 0;
+ user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
+ &vhost_user_state_destroy);
return true;
}
void vhost_user_cleanup(VhostUserState *user)
{
- int i;
- VhostUserHostNotifier *n;
-
if (!user->chr) {
return;
}
memory_region_transaction_begin();
- for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- n = &user->notifier[i];
- vhost_user_host_notifier_remove(user, NULL, i);
- object_unparent(OBJECT(&n->mr));
- }
+ user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
memory_region_transaction_commit();
user->chr = NULL;
}
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index a30510ed17..66f054a12c 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -368,11 +368,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
v->iova_range.last);
}
-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
+/*
+ * The use of this function is for requests that only need to be
+ * applied once. Typically such request occurs at the beginning
+ * of operation, and before setting up queues. It should not be
+ * used for request that performs operation until all queues are
+ * set, which would need to check dev->vq_index_end instead.
+ */
+static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
{
struct vhost_vdpa *v = dev->opaque;
- return v->index != 0;
+ return v->index == 0;
}
static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
@@ -453,7 +460,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
vhost_vdpa_get_iova_range(v);
- if (vhost_vdpa_one_time_request(dev)) {
+ if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
@@ -596,7 +603,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
struct vhost_memory *mem)
{
- if (vhost_vdpa_one_time_request(dev)) {
+ if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
@@ -625,7 +632,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
struct vhost_vdpa *v = dev->opaque;
int ret;
- if (vhost_vdpa_one_time_request(dev)) {
+ if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
@@ -667,7 +674,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
features &= f;
- if (vhost_vdpa_one_time_request(dev)) {
+ if (vhost_vdpa_first_dev(dev)) {
r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
if (r) {
return -EFAULT;
@@ -1018,7 +1025,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
struct vhost_vring_addr addr = {
- .index = i,
+ .index = dev->vq_index + i,
};
int r;
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
@@ -1120,7 +1127,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
struct vhost_log *log)
{
struct vhost_vdpa *v = dev->opaque;
- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
+ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) {
return 0;
}
@@ -1172,11 +1179,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
struct vhost_vdpa *v = dev->opaque;
+ int vdpa_idx = ring->index - dev->vq_index;
int ret;
if (v->shadow_vqs_enabled) {
- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
- ring->index);
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
/*
* Setting base as last used idx, so destination will see as available
@@ -1242,7 +1249,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
static int vhost_vdpa_set_owner(struct vhost_dev *dev)
{
- if (vhost_vdpa_one_time_request(dev)) {
+ if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index ed706681ac..7394818e00 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -224,12 +224,11 @@ int vhost_vsock_common_post_load(void *opaque, int version_id)
return 0;
}
-void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name)
+void vhost_vsock_common_realize(VirtIODevice *vdev)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
- virtio_init(vdev, name, VIRTIO_ID_VSOCK,
- sizeof(struct virtio_vsock_config));
+ virtio_init(vdev, VIRTIO_ID_VSOCK, sizeof(struct virtio_vsock_config));
/* Receive and transmit queues belong to vhost */
vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
@@ -259,6 +258,12 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev)
virtio_cleanup(vdev);
}
+static struct vhost_dev *vhost_vsock_common_get_vhost(VirtIODevice *vdev)
+{
+ VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+ return &vvc->vhost_dev;
+}
+
static Property vhost_vsock_common_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket,
ON_OFF_AUTO_AUTO),
@@ -274,6 +279,7 @@ static void vhost_vsock_common_class_init(ObjectClass *klass, void *data)
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask;
vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending;
+ vdc->get_vhost = vhost_vsock_common_get_vhost;
}
static const TypeInfo vhost_vsock_common_info = {
diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c
index 205da8d1f5..9f34414d38 100644
--- a/hw/virtio/vhost-vsock-pci.c
+++ b/hw/virtio/vhost-vsock-pci.c
@@ -13,7 +13,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/vhost-vsock.h"
#include "qemu/module.h"
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index 714046210b..0338de892f 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -169,7 +169,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
}
}
- vhost_vsock_common_realize(vdev, "vhost-vsock");
+ vhost_vsock_common_realize(vdev);
ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd,
VHOST_BACKEND_TYPE_KERNEL, 0, errp);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 2bc72c27c5..dd3263df56 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1738,6 +1738,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
+ vdev->vhost_started = true;
hdev->started = true;
hdev->vdev = vdev;
@@ -1810,7 +1811,7 @@ fail_vq:
fail_mem:
fail_features:
-
+ vdev->vhost_started = false;
hdev->started = false;
return r;
}
@@ -1841,6 +1842,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
}
vhost_log_put(hdev, true);
hdev->started = false;
+ vdev->vhost_started = false;
hdev->vdev = NULL;
}
diff --git a/hw/virtio/virtio-9p-pci.c b/hw/virtio/virtio-9p-pci.c
index e07adcd9ea..94c14f0b98 100644
--- a/hw/virtio/virtio-9p-pci.c
+++ b/hw/virtio/virtio-9p-pci.c
@@ -15,7 +15,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/9pfs/virtio-9p.h"
#include "hw/qdev-properties.h"
#include "qemu/module.h"
diff --git a/hw/virtio/virtio-balloon-pci.c b/hw/virtio/virtio-balloon-pci.c
index 79a3ba979a..ce2645ba71 100644
--- a/hw/virtio/virtio-balloon-pci.c
+++ b/hw/virtio/virtio-balloon-pci.c
@@ -14,7 +14,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-balloon.h"
#include "qapi/error.h"
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 8f1b38ef5c..73ac5eb675 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -882,8 +882,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
VirtIOBalloon *s = VIRTIO_BALLOON(dev);
int ret;
- virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
- virtio_balloon_config_size(s));
+ virtio_init(vdev, VIRTIO_ID_BALLOON, virtio_balloon_config_size(s));
ret = qemu_add_balloon_handler(virtio_balloon_to_target,
virtio_balloon_stat, s);
diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c
index 9d5795810c..9743bee965 100644
--- a/hw/virtio/virtio-blk-pci.c
+++ b/hw/virtio/virtio-blk-pci.c
@@ -19,7 +19,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-blk.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qapi/error.h"
#include "qemu/module.h"
#include "qom/object.h"
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index 0f69d1c742..d7ec023adf 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
return;
}
- vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
- if (klass->get_dma_as != NULL && has_iommu) {
+ vdev->dma_as = &address_space_memory;
+ if (has_iommu) {
+ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+ /*
+ * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and
+ * device operational. If the driver does not accept IOMMU_PLATFORM
+ * we fail the device.
+ */
virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM);
- vdev->dma_as = klass->get_dma_as(qbus->parent);
- if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
- error_setg(errp,
+ if (klass->get_dma_as) {
+ vdev->dma_as = klass->get_dma_as(qbus->parent);
+ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
+ error_setg(errp,
"iommu_platform=true is not supported by the device");
- return;
+ return;
+ }
}
- } else {
- vdev->dma_as = &address_space_memory;
}
}
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index dcd80b904d..c3829e7498 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
}
out_num = elem->out_num;
- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov = out_iov_copy;
in_num = elem->in_num;
@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
}
out_num = elem->out_num;
- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov = out_iov_copy;
in_num = elem->in_num;
- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num);
+ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num);
in_iov = in_iov_copy;
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req))
@@ -810,7 +810,7 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "virtio-crypto", VIRTIO_ID_CRYPTO, vcrypto->config_size);
+ virtio_init(vdev, VIRTIO_ID_CRYPTO, vcrypto->config_size);
vcrypto->curr_queues = 1;
vcrypto->vqs = g_new0(VirtIOCryptoQueue, vcrypto->max_queues);
for (i = 0; i < vcrypto->max_queues; i++) {
@@ -961,6 +961,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx)
return cryptodev_vhost_virtqueue_pending(vdev, queue, idx);
}
+static struct vhost_dev *virtio_crypto_get_vhost(VirtIODevice *vdev)
+{
+ VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+ CryptoDevBackend *b = vcrypto->cryptodev;
+ CryptoDevBackendClient *cc = b->conf.peers.ccs[0];
+ CryptoDevBackendVhost *vhost_crypto = cryptodev_get_vhost(cc, b, 0);
+ return &vhost_crypto->dev;
+}
+
static void virtio_crypto_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -977,6 +986,7 @@ static void virtio_crypto_class_init(ObjectClass *klass, void *data)
vdc->set_status = virtio_crypto_set_status;
vdc->guest_notifier_mask = virtio_crypto_guest_notifier_mask;
vdc->guest_notifier_pending = virtio_crypto_guest_notifier_pending;
+ vdc->get_vhost = virtio_crypto_get_vhost;
}
static void virtio_crypto_instance_init(Object *obj)
diff --git a/hw/virtio/virtio-input-host-pci.c b/hw/virtio/virtio-input-host-pci.c
index 0ac360de4f..cf8a9cf9e8 100644
--- a/hw/virtio/virtio-input-host-pci.c
+++ b/hw/virtio/virtio-input-host-pci.c
@@ -8,7 +8,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-input.h"
#include "qemu/module.h"
#include "qom/object.h"
diff --git a/hw/virtio/virtio-input-pci.c b/hw/virtio/virtio-input-pci.c
index 48e9ff38e2..a9d0992389 100644
--- a/hw/virtio/virtio-input-pci.c
+++ b/hw/virtio/virtio-input-pci.c
@@ -8,7 +8,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-input.h"
#include "qemu/module.h"
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 6a1df7fe50..844d647704 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -11,7 +11,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 57c09d98a9..2597e166f9 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1033,8 +1033,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
- virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
- sizeof(struct virtio_iommu_config));
+ virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));
memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 5aca408726..30d03e987a 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -867,8 +867,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmem->block_size;
vmem->bitmap = bitmap_new(vmem->bitmap_size);
- virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
- sizeof(struct virtio_mem_config));
+ virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
host_memory_backend_set_mapped(vmem->memdev, true);
diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c
index aa0b3caecb..e03543a70a 100644
--- a/hw/virtio/virtio-net-pci.c
+++ b/hw/virtio/virtio-net-pci.c
@@ -19,7 +19,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-net.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qapi/error.h"
#include "qemu/module.h"
#include "qom/object.h"
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 7cf1231c1c..0566ad7d00 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -33,11 +33,12 @@
#include "hw/pci/msix.h"
#include "hw/loader.h"
#include "sysemu/kvm.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qemu/range.h"
#include "hw/virtio/virtio-bus.h"
#include "qapi/visitor.h"
#include "sysemu/replay.h"
+#include "trace.h"
#define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
@@ -1380,6 +1381,7 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr,
unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
+ trace_virtio_pci_notify_write(addr, val, size);
virtio_queue_notify(vdev, queue);
}
}
@@ -1393,6 +1395,7 @@ static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
unsigned queue = val;
if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
+ trace_virtio_pci_notify_write_pio(addr, val, size);
virtio_queue_notify(vdev, queue);
}
}
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
deleted file mode 100644
index 2446dcd9ae..0000000000
--- a/hw/virtio/virtio-pci.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Virtio PCI Bindings
- *
- * Copyright IBM, Corp. 2007
- * Copyright (c) 2009 CodeSourcery
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- * Paul Brook <paul@codesourcery.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#ifndef QEMU_VIRTIO_PCI_H
-#define QEMU_VIRTIO_PCI_H
-
-#include "hw/pci/msi.h"
-#include "hw/virtio/virtio-bus.h"
-#include "qom/object.h"
-
-
-/* virtio-pci-bus */
-
-typedef struct VirtioBusState VirtioPCIBusState;
-typedef struct VirtioBusClass VirtioPCIBusClass;
-
-#define TYPE_VIRTIO_PCI_BUS "virtio-pci-bus"
-DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass,
- VIRTIO_PCI_BUS, TYPE_VIRTIO_PCI_BUS)
-
-enum {
- VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
- VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
- VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
- VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
- VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
- VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
- VIRTIO_PCI_FLAG_ATS_BIT,
- VIRTIO_PCI_FLAG_INIT_DEVERR_BIT,
- VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT,
- VIRTIO_PCI_FLAG_INIT_PM_BIT,
- VIRTIO_PCI_FLAG_INIT_FLR_BIT,
- VIRTIO_PCI_FLAG_AER_BIT,
- VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT,
-};
-
-/* Need to activate work-arounds for buggy guests at vmstate load. */
-#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \
- (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT)
-
-/* Performance improves when virtqueue kick processing is decoupled from the
- * vcpu thread using ioeventfd for some devices. */
-#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
-
-/* virtio version flags */
-#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
-
-/* migrate extra state */
-#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)
-
-/* have pio notification for modern device ? */
-#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
- (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
-
-/* page per vq flag to be used by split drivers within guests */
-#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
- (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
-
-/* address space translation service */
-#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
-
-/* Init error enabling flags */
-#define VIRTIO_PCI_FLAG_INIT_DEVERR (1 << VIRTIO_PCI_FLAG_INIT_DEVERR_BIT)
-
-/* Init Link Control register */
-#define VIRTIO_PCI_FLAG_INIT_LNKCTL (1 << VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT)
-
-/* Init Power Management */
-#define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT)
-
-/* Init Function Level Reset capability */
-#define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT)
-
-/* Advanced Error Reporting capability */
-#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT)
-
-/* Page Aligned Address space Translation Service */
-#define VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED \
- (1 << VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT)
-
-typedef struct {
- MSIMessage msg;
- int virq;
- unsigned int users;
-} VirtIOIRQFD;
-
-/*
- * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
- */
-#define TYPE_VIRTIO_PCI "virtio-pci"
-OBJECT_DECLARE_TYPE(VirtIOPCIProxy, VirtioPCIClass, VIRTIO_PCI)
-
-struct VirtioPCIClass {
- PCIDeviceClass parent_class;
- DeviceRealize parent_dc_realize;
- void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
-};
-
-typedef struct VirtIOPCIRegion {
- MemoryRegion mr;
- uint32_t offset;
- uint32_t size;
- uint32_t type;
-} VirtIOPCIRegion;
-
-typedef struct VirtIOPCIQueue {
- uint16_t num;
- bool enabled;
- uint32_t desc[2];
- uint32_t avail[2];
- uint32_t used[2];
-} VirtIOPCIQueue;
-
-struct VirtIOPCIProxy {
- PCIDevice pci_dev;
- MemoryRegion bar;
- union {
- struct {
- VirtIOPCIRegion common;
- VirtIOPCIRegion isr;
- VirtIOPCIRegion device;
- VirtIOPCIRegion notify;
- VirtIOPCIRegion notify_pio;
- };
- VirtIOPCIRegion regs[5];
- };
- MemoryRegion modern_bar;
- MemoryRegion io_bar;
- uint32_t legacy_io_bar_idx;
- uint32_t msix_bar_idx;
- uint32_t modern_io_bar_idx;
- uint32_t modern_mem_bar_idx;
- int config_cap;
- uint32_t flags;
- bool disable_modern;
- bool ignore_backend_features;
- OnOffAuto disable_legacy;
- uint32_t class_code;
- uint32_t nvectors;
- uint32_t dfselect;
- uint32_t gfselect;
- uint32_t guest_features[2];
- VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
-
- VirtIOIRQFD *vector_irqfd;
- int nvqs_with_notifiers;
- VirtioBusState bus;
-};
-
-static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy)
-{
- return !proxy->disable_modern;
-}
-
-static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy)
-{
- return proxy->disable_legacy == ON_OFF_AUTO_OFF;
-}
-
-static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy)
-{
- proxy->disable_modern = false;
- proxy->disable_legacy = ON_OFF_AUTO_ON;
-}
-
-static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
-{
- proxy->disable_modern = true;
-}
-
-/*
- * virtio-input-pci: This extends VirtioPCIProxy.
- */
-#define TYPE_VIRTIO_INPUT_PCI "virtio-input-pci"
-
-/* Virtio ABI version, if we increment this, we break the guest driver. */
-#define VIRTIO_PCI_ABI_VERSION 0
-
-/* Input for virtio_pci_types_register() */
-typedef struct VirtioPCIDeviceTypeInfo {
- /*
- * Common base class for the subclasses below.
- *
- * Required only if transitional_name or non_transitional_name is set.
- *
- * We need a separate base type instead of making all types
- * inherit from generic_name for two reasons:
- * 1) generic_name implements INTERFACE_PCIE_DEVICE, but
- * transitional_name does not.
- * 2) generic_name has the "disable-legacy" and "disable-modern"
- * properties, transitional_name and non_transitional name don't.
- */
- const char *base_name;
- /*
- * Generic device type. Optional.
- *
- * Supports both transitional and non-transitional modes,
- * using the disable-legacy and disable-modern properties.
- * If disable-legacy=auto, (non-)transitional mode is selected
- * depending on the bus where the device is plugged.
- *
- * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE,
- * but PCI Express is supported only in non-transitional mode.
- *
- * The only type implemented by QEMU 3.1 and older.
- */
- const char *generic_name;
- /*
- * The transitional device type. Optional.
- *
- * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE.
- */
- const char *transitional_name;
- /*
- * The non-transitional device type. Optional.
- *
- * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only.
- */
- const char *non_transitional_name;
-
- /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */
- const char *parent;
-
- /* Same as TypeInfo fields: */
- size_t instance_size;
- size_t class_size;
- void (*instance_init)(Object *obj);
- void (*class_init)(ObjectClass *klass, void *data);
- InterfaceInfo *interfaces;
-} VirtioPCIDeviceTypeInfo;
-
-/* Register virtio-pci type(s). @t must be static. */
-void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
-
-/**
- * virtio_pci_optimal_num_queues:
- * @fixed_queues: number of queues that are always present
- *
- * Returns: The optimal number of queues for a multi-queue device, excluding
- * @fixed_queues.
- */
-unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
-
-#endif
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
index 5dd21c2c44..a1abfe0e1b 100644
--- a/hw/virtio/virtio-pmem.c
+++ b/hw/virtio/virtio-pmem.c
@@ -122,8 +122,7 @@ static void virtio_pmem_realize(DeviceState *dev, Error **errp)
}
host_memory_backend_set_mapped(pmem->memdev, true);
- virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM,
- sizeof(struct virtio_pmem_config));
+ virtio_init(vdev, VIRTIO_ID_PMEM, sizeof(struct virtio_pmem_config));
pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush);
}
diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c
index c1f916268b..151ece6f94 100644
--- a/hw/virtio/virtio-rng-pci.c
+++ b/hw/virtio/virtio-rng-pci.c
@@ -11,7 +11,7 @@
#include "qemu/osdep.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-rng.h"
#include "qapi/error.h"
#include "qemu/module.h"
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index cc8e9f775d..7e12fc03bf 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -215,7 +215,7 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
return;
}
- virtio_init(vdev, "virtio-rng", VIRTIO_ID_RNG, 0);
+ virtio_init(vdev, VIRTIO_ID_RNG, 0);
vrng->vq = virtio_add_queue(vdev, 8, handle_input);
vrng->quota_remaining = vrng->conf.max_bytes;
diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c
index 97fab74236..e8e3442f38 100644
--- a/hw/virtio/virtio-scsi-pci.c
+++ b/hw/virtio/virtio-scsi-pci.c
@@ -18,7 +18,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-scsi.h"
#include "qemu/module.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VirtIOSCSIPCI VirtIOSCSIPCI;
diff --git a/hw/virtio/virtio-serial-pci.c b/hw/virtio/virtio-serial-pci.c
index 35bcd961c9..cea31adcc4 100644
--- a/hw/virtio/virtio-serial-pci.c
+++ b/hw/virtio/virtio-serial-pci.c
@@ -20,7 +20,7 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-serial.h"
#include "qemu/module.h"
-#include "virtio-pci.h"
+#include "hw/virtio/virtio-pci.h"
#include "qom/object.h"
typedef struct VirtIOSerialPCI VirtIOSerialPCI;
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 67a873f54a..5d607aeaa0 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -132,6 +132,56 @@ struct VirtQueue
QLIST_ENTRY(VirtQueue) node;
};
+const char *virtio_device_names[] = {
+ [VIRTIO_ID_NET] = "virtio-net",
+ [VIRTIO_ID_BLOCK] = "virtio-blk",
+ [VIRTIO_ID_CONSOLE] = "virtio-serial",
+ [VIRTIO_ID_RNG] = "virtio-rng",
+ [VIRTIO_ID_BALLOON] = "virtio-balloon",
+ [VIRTIO_ID_IOMEM] = "virtio-iomem",
+ [VIRTIO_ID_RPMSG] = "virtio-rpmsg",
+ [VIRTIO_ID_SCSI] = "virtio-scsi",
+ [VIRTIO_ID_9P] = "virtio-9p",
+ [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan",
+ [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial",
+ [VIRTIO_ID_CAIF] = "virtio-caif",
+ [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon",
+ [VIRTIO_ID_GPU] = "virtio-gpu",
+ [VIRTIO_ID_CLOCK] = "virtio-clk",
+ [VIRTIO_ID_INPUT] = "virtio-input",
+ [VIRTIO_ID_VSOCK] = "vhost-vsock",
+ [VIRTIO_ID_CRYPTO] = "virtio-crypto",
+ [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal",
+ [VIRTIO_ID_PSTORE] = "virtio-pstore",
+ [VIRTIO_ID_IOMMU] = "virtio-iommu",
+ [VIRTIO_ID_MEM] = "virtio-mem",
+ [VIRTIO_ID_SOUND] = "virtio-sound",
+ [VIRTIO_ID_FS] = "virtio-user-fs",
+ [VIRTIO_ID_PMEM] = "virtio-pmem",
+ [VIRTIO_ID_RPMB] = "virtio-rpmb",
+ [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim",
+ [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder",
+ [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder",
+ [VIRTIO_ID_SCMI] = "virtio-scmi",
+ [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod",
+ [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c",
+ [VIRTIO_ID_WATCHDOG] = "virtio-watchdog",
+ [VIRTIO_ID_CAN] = "virtio-can",
+ [VIRTIO_ID_DMABUF] = "virtio-dmabuf",
+ [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv",
+ [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol",
+ [VIRTIO_ID_BT] = "virtio-bluetooth",
+ [VIRTIO_ID_GPIO] = "virtio-gpio"
+};
+
+static const char *virtio_id_to_name(uint16_t device_id)
+{
+ assert(device_id < G_N_ELEMENTS(virtio_device_names));
+ const char *name = virtio_device_names[device_id];
+ assert(name != NULL);
+ return name;
+}
+
/* Called within call_rcu(). */
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
{
@@ -3207,8 +3257,7 @@ void virtio_instance_init_common(Object *proxy_obj, void *data,
qdev_alias_all_properties(vdev, proxy_obj);
}
-void virtio_init(VirtIODevice *vdev, const char *name,
- uint16_t device_id, size_t config_size)
+void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size)
{
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
@@ -3222,6 +3271,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->start_on_kick = false;
vdev->started = false;
+ vdev->vhost_started = false;
vdev->device_id = device_id;
vdev->status = 0;
qatomic_set(&vdev->isr, 0);
@@ -3237,7 +3287,7 @@ void virtio_init(VirtIODevice *vdev, const char *name,
vdev->vq[i].host_notifier_enabled = false;
}
- vdev->name = name;
+ vdev->name = virtio_id_to_name(device_id);
vdev->config_len = config_size;
if (vdev->config_len) {
vdev->config = g_malloc0(config_size);