diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-05-16 16:31:01 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-05-16 16:31:01 -0700 |
commit | eec398119fc6911d99412c37af06a6bc27871f85 (patch) | |
tree | a315e93a78329d2d2de9b087e890024fc6b39d29 /hw | |
parent | afdb415e67e13e8726edc21238c9883447b2c704 (diff) | |
parent | 6852c21db229c4bf4c1db772444bdfbbd027e5b8 (diff) |
Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pc,pci: fixes,cleanups,features
most of CXL support
fixes, cleanups all over the place
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmKCuLIPHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpdDUH/12SmWaAo+0+SdIHgWFFxsmg3t/EdcO38fgi
# MV+GpYdbp6TlU3jdQhrMZYmFdkVVydBdxk93ujCLbFS0ixTsKj31j0IbZMfdcGgv
# SLqnV+E3JdHqnGP39q9a9rdwYWyqhkgHoldxilIFW76ngOSapaZVvnwnOMAMkf77
# 1LieL4/Xq7N9Ho86Zrs3IczQcf0czdJRDaFaSIu8GaHl8ELyuPhlSm6CSqqrEEWR
# PA/COQsLDbLOMxbfCi5v88r5aaxmGNZcGbXQbiH9qVHw65nlHyLH9UkNTdJn1du1
# f2GYwwa7eekfw/LCvvVwxO1znJrj02sfFai7aAtQYbXPvjvQiqA=
# =xdSk
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 16 May 2022 01:48:50 PM PDT
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (86 commits)
vhost-user-scsi: avoid unlink(NULL) with fd passing
virtio-net: don't handle mq request in userspace handler for vhost-vdpa
vhost-vdpa: change name and polarity for vhost_vdpa_one_time_request()
vhost-vdpa: backend feature should set only once
vhost-net: fix improper cleanup in vhost_net_start
vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa
virtio-net: align ctrl_vq index for non-mq guest for vhost_vdpa
virtio-net: setup vhost_dev and notifiers for cvq only when feature is negotiated
hw/i386/amd_iommu: Fix IOMMU event log encoding errors
hw/i386: Make pic a property of common x86 base machine type
hw/i386: Make pit a property of common x86 base machine type
include/hw/pci/pcie_host: Correct PCIE_MMCFG_SIZE_MAX
include/hw/pci/pcie_host: Correct PCIE_MMCFG_BUS_MASK
docs/vhost-user: Clarifications for VHOST_USER_ADD/REM_MEM_REG
vhost-user: more master/slave things
virtio: add vhost support for virtio devices
virtio: drop name parameter for virtio_init()
virtio/vhost-user: dynamically assign VhostUserHostNotifiers
hw/virtio/vhost-user: don't suppress F_CONFIG when supported
include/hw: start documenting the vhost API
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'hw')
89 files changed, 3211 insertions, 536 deletions
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c index 54ee93b71f..5f522e68e9 100644 --- a/hw/9pfs/virtio-9p-device.c +++ b/hw/9pfs/virtio-9p-device.c @@ -216,7 +216,7 @@ static void virtio_9p_device_realize(DeviceState *dev, Error **errp) } v->config_size = sizeof(struct virtio_9p_config) + strlen(s->fsconf.tag); - virtio_init(vdev, "virtio-9p", VIRTIO_ID_9P, v->config_size); + virtio_init(vdev, VIRTIO_ID_9P, v->config_size); v->vq = virtio_add_queue(vdev, MAX_REQ, handle_9p_output); } diff --git a/hw/Kconfig b/hw/Kconfig index ad20cce0a9..50e0952889 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -6,6 +6,7 @@ source audio/Kconfig source block/Kconfig source char/Kconfig source core/Kconfig +source cxl/Kconfig source display/Kconfig source dma/Kconfig source gpio/Kconfig diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index 19caebde6c..3703aca212 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -5,6 +5,7 @@ config ACPI_X86 bool select ACPI select ACPI_NVDIMM + select ACPI_CXL select ACPI_CPU_HOTPLUG select ACPI_MEMORY_HOTPLUG select ACPI_HMAT @@ -66,3 +67,7 @@ config ACPI_ERST bool default y depends on ACPI && PCI + +config ACPI_CXL + bool + depends on ACPI diff --git a/hw/acpi/cxl-stub.c b/hw/acpi/cxl-stub.c new file mode 100644 index 0000000000..15bc21076b --- /dev/null +++ b/hw/acpi/cxl-stub.c @@ -0,0 +1,12 @@ + +/* + * Stubs for ACPI platforms that don't support CXl + */ +#include "qemu/osdep.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/cxl.h" + +void build_cxl_osc_method(Aml *dev) +{ + g_assert_not_reached(); +} diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c new file mode 100644 index 0000000000..31d5235136 --- /dev/null +++ b/hw/acpi/cxl.c @@ -0,0 +1,257 @@ +/* + * CXL ACPI Implementation + * + * Copyright(C) 2020 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "hw/cxl/cxl.h" +#include "hw/mem/memory-device.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/bios-linker-loader.h" +#include "hw/acpi/cxl.h" +#include "qapi/error.h" +#include "qemu/uuid.h" + +static void cedt_build_chbs(GArray *table_data, PXBDev *cxl) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge); + struct MemoryRegion *mr = sbd->mmio[0].memory; + + /* Type */ + build_append_int_noprefix(table_data, 0, 1); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + + /* Record Length */ + build_append_int_noprefix(table_data, 32, 2); + + /* UID - currently equal to bus number */ + build_append_int_noprefix(table_data, cxl->bus_nr, 4); + + /* Version */ + build_append_int_noprefix(table_data, 1, 4); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + + /* Base - subregion within a container that is in PA space */ + build_append_int_noprefix(table_data, mr->container->addr + mr->addr, 8); + + /* Length */ + build_append_int_noprefix(table_data, memory_region_size(mr), 8); +} + +/* + * CFMWS entries in CXL 2.0 ECN: CEDT CFMWS & QTG _DSM. + * Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory + * interleaving. + */ +static void cedt_build_cfmws(GArray *table_data, MachineState *ms) +{ + CXLState *cxls = ms->cxl_devices_state; + GList *it; + + for (it = cxls->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + int i; + + /* Type */ + build_append_int_noprefix(table_data, 1, 1); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); + + /* Record Length */ + build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); + + /* Base HPA */ + build_append_int_noprefix(table_data, fw->mr.addr, 8); + + /* Window Size */ + build_append_int_noprefix(table_data, fw->size, 8); + + /* Host Bridge Interleave Ways */ + build_append_int_noprefix(table_data, fw->enc_int_ways, 1); + + /* Host Bridge Interleave Arithmetic */ + build_append_int_noprefix(table_data, 0, 1); + + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); + + /* Host Bridge Interleave Granularity */ + build_append_int_noprefix(table_data, fw->enc_int_gran, 4); + + /* Window Restrictions */ + build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */ + + /* QTG ID */ + build_append_int_noprefix(table_data, 0, 2); + + /* Host Bridge List (list of UIDs - currently bus_nr) */ + for (i = 0; i < fw->num_targets; i++) { + g_assert(fw->target_hbs[i]); + build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 4); + } + } +} + +static int cxl_foreach_pxb_hb(Object *obj, void *opaque) +{ + Aml *cedt = opaque; + + if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) { + cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj)); + } + + return 0; +} + +void cxl_build_cedt(MachineState *ms, GArray *table_offsets, GArray *table_data, + BIOSLinker *linker, const char *oem_id, + const char *oem_table_id) +{ + Aml *cedt; + AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id, + .oem_table_id = oem_table_id }; + + acpi_add_table(table_offsets, table_data); + acpi_table_begin(&table, table_data); + cedt = init_aml_allocator(); + + /* reserve space for CEDT header */ + + object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt); + cedt_build_cfmws(cedt->buf, ms); + + /* copy AML table into ACPI tables blob and patch header there */ + g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len); + free_aml_allocator(); + + acpi_table_end(linker, &table); +} + +static Aml *__build_cxl_osc_method(void) +{ + Aml *method, *if_uuid, *else_uuid, *if_arg1_not_1, *if_cxl, *if_caps_masked; + Aml *a_ctrl = aml_local(0); + Aml *a_cdw1 = aml_name("CDW1"); + + method = aml_method("_OSC", 4, AML_NOTSERIALIZED); + /* CDW1 is used for the return value so is present whether or not a match occurs */ + aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); + + /* + * Generate shared section between: + * CXL 2.0 - 9.14.2.1.4 and + * PCI Firmware Specification 3.0 + * 4.5.1. _OSC Interface for PCI Host Bridge Devices + * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is + * identified by the Universal Unique IDentifier (UUID) + * 33DB4D5B-1FF7-401C-9657-7441C03DD766 + * The _OSC interface for a CXL Host bridge is + * identified by the UUID 68F2D50B-C469-4D8A-BD3D-941A103FD3FC + * A CXL Host bridge is compatible with a PCI host bridge so + * for the shared section match both. + */ + if_uuid = aml_if( + aml_lor(aml_equal(aml_arg(0), + aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766")), + aml_equal(aml_arg(0), + aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC")))); + aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); + aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); + + aml_append(if_uuid, aml_store(aml_name("CDW3"), a_ctrl)); + + /* + * + * Allows OS control for all 5 features: + * PCIeHotplug SHPCHotplug PME AER PCIeCapability + */ + aml_append(if_uuid, aml_and(a_ctrl, aml_int(0x1F), a_ctrl)); + + /* + * Check _OSC revision. + * PCI Firmware specification 3.3 and CXL 2.0 both use revision 1 + * Unknown Revision is CDW1 - BIT (3) + */ + if_arg1_not_1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1)))); + aml_append(if_arg1_not_1, aml_or(a_cdw1, aml_int(0x08), a_cdw1)); + aml_append(if_uuid, if_arg1_not_1); + + if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); + + /* Capability bits were masked */ + aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1)); + aml_append(if_uuid, if_caps_masked); + + aml_append(if_uuid, aml_store(aml_name("CDW2"), aml_name("SUPP"))); + aml_append(if_uuid, aml_store(aml_name("CDW3"), aml_name("CTRL"))); + + /* Update DWORD3 (the return value) */ + aml_append(if_uuid, aml_store(a_ctrl, aml_name("CDW3"))); + + /* CXL only section as per CXL 2.0 - 9.14.2.1.4 */ + if_cxl = aml_if(aml_equal( + aml_arg(0), aml_touuid("68F2D50B-C469-4D8A-BD3D-941A103FD3FC"))); + /* CXL support field */ + aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(12), "CDW4")); + /* CXL capabilities */ + aml_append(if_cxl, aml_create_dword_field(aml_arg(3), aml_int(16), "CDW5")); + aml_append(if_cxl, aml_store(aml_name("CDW4"), aml_name("SUPC"))); + aml_append(if_cxl, aml_store(aml_name("CDW5"), aml_name("CTRC"))); + + /* CXL 2.0 Port/Device Register access */ + aml_append(if_cxl, + aml_or(aml_name("CDW5"), aml_int(0x1), aml_name("CDW5"))); + aml_append(if_uuid, if_cxl); + + aml_append(if_uuid, aml_return(aml_arg(3))); + aml_append(method, if_uuid); + + /* + * If no UUID matched, return Unrecognized UUID via Arg3 DWord 1 + * ACPI 6.4 - 6.2.11 + * Unrecognised UUID - BIT(2) + */ + else_uuid = aml_else(); + + aml_append(else_uuid, + aml_or(aml_name("CDW1"), aml_int(0x4), aml_name("CDW1"))); + aml_append(else_uuid, aml_return(aml_arg(3))); + aml_append(method, else_uuid); + + return method; +} + +void build_cxl_osc_method(Aml *dev) +{ + aml_append(dev, aml_name_decl("SUPP", aml_int(0))); + aml_append(dev, aml_name_decl("CTRL", aml_int(0))); + aml_append(dev, aml_name_decl("SUPC", aml_int(0))); + aml_append(dev, aml_name_decl("CTRC", aml_int(0))); + aml_append(dev, __build_cxl_osc_method()); +} diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index 8bea2e6933..cea2f5f93a 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -13,6 +13,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_false: files('acpi-mem-hotplu acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_true: files('nvdimm.c')) acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) +acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) @@ -33,4 +34,5 @@ softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss) softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c', 'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c', 'acpi-mem-hotplug-stub.c', 'acpi-cpu-hotplug-stub.c', - 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c')) + 'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c', + 'cxl-stub.c')) diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 97f3b38019..219262a8da 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -29,6 +29,7 @@ config ARM_VIRT select ACPI_APEI select ACPI_VIOT select VIRTIO_MEM_SUPPORTED + select ACPI_CXL config CHEETAH bool diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 1a42ae9187..5dca4eab09 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -491,7 +491,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + virtio_init(vdev, VIRTIO_ID_BLOCK, sizeof(struct virtio_blk_config)); s->virtqs = g_new(VirtQueue *, s->num_queues); @@ -569,6 +569,12 @@ static void vhost_user_blk_instance_init(Object *obj) "/disk@0,0", DEVICE(obj)); } +static struct vhost_dev *vhost_user_blk_get_vhost(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + return &s->dev; +} + static const VMStateDescription vmstate_vhost_user_blk = { .name = "vhost-user-blk", .minimum_version_id = 1, @@ -603,6 +609,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_user_blk_get_features; vdc->set_status = vhost_user_blk_set_status; vdc->reset = vhost_user_blk_reset; + vdc->get_vhost = vhost_user_blk_get_vhost; } static const TypeInfo vhost_user_blk_info = { diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 6a1cc41877..cd804795c6 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1206,7 +1206,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) virtio_blk_set_config_size(s, s->host_features); - virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size); + virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); s->blk = conf->conf.blk; s->rq = NULL; diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 6048d408b8..7d4601cb5d 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -1044,8 +1044,7 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp) VIRTIO_CONSOLE_F_EMERG_WRITE)) { config_size = offsetof(struct virtio_console_config, emerg_wr); } - virtio_init(vdev, "virtio-serial", VIRTIO_ID_CONSOLE, - config_size); + virtio_init(vdev, VIRTIO_ID_CONSOLE, config_size); /* Spawn a new virtio-serial bus on which the ports will ride as devices */ qbus_init(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS, diff --git a/hw/core/machine.c b/hw/core/machine.c index 3264c1e11d..b03d9192ba 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -33,6 +33,7 @@ #include "sysemu/qtest.h" #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" +#include "hw/cxl/cxl.h" #include "migration/global_state.h" #include "migration/vmstate.h" #include "exec/confidential-guest-support.h" @@ -625,6 +626,20 @@ static void machine_set_nvdimm_persistence(Object *obj, const char *value, nvdimms_state->persistence_string = g_strdup(value); } +static bool machine_get_cxl(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->cxl_devices_state->is_enabled; +} + +static void machine_set_cxl(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->cxl_devices_state->is_enabled = value; +} + void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type) { QAPI_LIST_PREPEND(mc->allowed_dynamic_sysbus_devices, g_strdup(type)); @@ -911,6 +926,8 @@ static void machine_class_init(ObjectClass *oc, void *data) mc->default_ram_size = 128 * MiB; mc->rom_file_has_mr = true; + /* Few machines support CXL, so default to off */ + mc->cxl_supported = false; /* numa node memory size aligned on 8MB by default. * On Linux, each node's border has to be 8MB aligned */ @@ -1071,6 +1088,16 @@ static void machine_initfn(Object *obj) "Valid values are cpu, mem-ctrl"); } + if (mc->cxl_supported) { + Object *obj = OBJECT(ms); + + ms->cxl_devices_state = g_new0(CXLState, 1); + object_property_add_bool(obj, "cxl", machine_get_cxl, machine_set_cxl); + object_property_set_description(obj, "cxl", + "Set on/off to enable/disable " + "CXL instantiation"); + } + if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { ms->numa_state = g_new0(NumaState, 1); object_property_add_bool(obj, "hmat", @@ -1108,6 +1135,7 @@ static void machine_finalize(Object *obj) g_free(ms->device_memory); g_free(ms->nvdimms_state); g_free(ms->numa_state); + g_free(ms->cxl_devices_state); } bool machine_usb(MachineState *machine) diff --git a/hw/cxl/Kconfig b/hw/cxl/Kconfig new file mode 100644 index 0000000000..8e67519b16 --- /dev/null +++ b/hw/cxl/Kconfig @@ -0,0 +1,3 @@ +config CXL + bool + default y if PCI_EXPRESS diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c new file mode 100644 index 0000000000..7985c9bfca --- /dev/null +++ b/hw/cxl/cxl-component-utils.c @@ -0,0 +1,396 @@ +/* + * CXL Utility library for components + * + * Copyright(C) 2020 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "hw/pci/pci.h" +#include "hw/cxl/cxl.h" + +static uint64_t cxl_cache_mem_read_reg(void *opaque, hwaddr offset, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + + if (size == 8) { + qemu_log_mask(LOG_UNIMP, + "CXL 8 byte cache mem registers not implemented\n"); + return 0; + } + + if (cregs->special_ops && cregs->special_ops->read) { + return cregs->special_ops->read(cxl_cstate, offset, size); + } else { + return cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)]; + } +} + +static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, + uint32_t value) +{ + ComponentRegisters *cregs = &cxl_cstate->crb; + uint32_t *cache_mem = cregs->cache_mem_registers; + bool should_commit = false; + + switch (offset) { + case A_CXL_HDM_DECODER0_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + break; + default: + break; + } + + memory_region_transaction_begin(); + stl_le_p((uint8_t *)cache_mem + offset, value); + if (should_commit) { + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); + } + memory_region_transaction_commit(); +} + +static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + uint32_t mask; + + if (size == 8) { + qemu_log_mask(LOG_UNIMP, + "CXL 8 byte cache mem registers not implemented\n"); + return; + } + mask = cregs->cache_mem_regs_write_mask[offset / sizeof(*cregs->cache_mem_regs_write_mask)]; + value &= mask; + /* RO bits should remain constant. Done by reading existing value */ + value |= ~mask & cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)]; + if (cregs->special_ops && cregs->special_ops->write) { + cregs->special_ops->write(cxl_cstate, offset, value, size); + return; + } + + if (offset >= A_CXL_HDM_DECODER_CAPABILITY && + offset <= A_CXL_HDM_DECODER0_TARGET_LIST_HI) { + dumb_hdm_handler(cxl_cstate, offset, value); + } else { + cregs->cache_mem_registers[offset / sizeof(*cregs->cache_mem_registers)] = value; + } +} + +/* + * 8.2.3 + * The access restrictions specified in Section 8.2.2 also apply to CXL 2.0 + * Component Registers. + * + * 8.2.2 + * • A 32 bit register shall be accessed as a 4 Bytes quantity. Partial + * reads are not permitted. + * • A 64 bit register shall be accessed as a 8 Bytes quantity. Partial + * reads are not permitted. + * + * As of the spec defined today, only 4 byte registers exist. + */ +static const MemoryRegionOps cache_mem_ops = { + .read = cxl_cache_mem_read_reg, + .write = cxl_cache_mem_write_reg, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +void cxl_component_register_block_init(Object *obj, + CXLComponentState *cxl_cstate, + const char *type) +{ + ComponentRegisters *cregs = &cxl_cstate->crb; + + memory_region_init(&cregs->component_registers, obj, type, + CXL2_COMPONENT_BLOCK_SIZE); + + /* io registers controls link which we don't care about in QEMU */ + memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io", + CXL2_COMPONENT_IO_REGION_SIZE); + memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cregs, + ".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE); + + memory_region_add_subregion(&cregs->component_registers, 0, &cregs->io); + memory_region_add_subregion(&cregs->component_registers, + CXL2_COMPONENT_IO_REGION_SIZE, + &cregs->cache_mem); +} + +static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk) +{ + /* + * Error status is RW1C but given bits are not yet set, it can + * be handled as RO. + */ + reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0; + /* Bits 12-13 and 17-31 reserved in CXL 2.0 */ + reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff; + write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff; + reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff; + write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff; + reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0; + reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f; + write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f; + /* CXL switches and devices must set */ + reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00; +} + +static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk) +{ + int decoder_count = 1; + int i; + + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, DECODER_COUNT, + cxl_decoder_count_enc(decoder_count)); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 1); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_256B, 1); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, INTERLEAVE_4K, 1); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, POISON_ON_ERR_CAP, 0); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL, + HDM_DECODER_ENABLE, 0); + write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3; + for (i = 0; i < decoder_count; i++) { + write_msk[R_CXL_HDM_DECODER0_BASE_LO + i * 0x20] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_BASE_HI + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_SIZE_LO + i * 0x20] = 0xf0000000; + write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * 0x20] = 0xffffffff; + write_msk[R_CXL_HDM_DECODER0_CTRL + i * 0x20] = 0x13ff; + } +} + +void cxl_component_register_init_common(uint32_t *reg_state, uint32_t *write_msk, + enum reg_type type) +{ + int caps = 0; + + /* + * In CXL 2.0 the capabilities required for each CXL component are such that, + * with the ordering chosen here, a single number can be used to define + * which capabilities should be provided. + */ + switch (type) { + case CXL2_DOWNSTREAM_PORT: + case CXL2_DEVICE: + /* RAS, Link */ + caps = 2; + break; + case CXL2_UPSTREAM_PORT: + case CXL2_TYPE3_DEVICE: + case CXL2_LOGICAL_DEVICE: + /* + HDM */ + caps = 3; + break; + case CXL2_ROOT_PORT: + /* + Extended Security, + Snoop */ + caps = 5; + break; + default: + abort(); + } + + memset(reg_state, 0, CXL2_COMPONENT_CM_REGION_SIZE); + + /* CXL Capability Header Register */ + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ID, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, VERSION, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, CACHE_MEM_VERSION, 1); + ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps); + +#define init_cap_reg(reg, id, version) \ + QEMU_BUILD_BUG_ON(CXL_##reg##_REGISTERS_OFFSET == 0); \ + do { \ + int which = R_CXL_##reg##_CAPABILITY_HEADER; \ + reg_state[which] = FIELD_DP32(reg_state[which], \ + CXL_##reg##_CAPABILITY_HEADER, ID, id); \ + reg_state[which] = \ + FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, \ + VERSION, version); \ + reg_state[which] = \ + FIELD_DP32(reg_state[which], CXL_##reg##_CAPABILITY_HEADER, PTR, \ + CXL_##reg##_REGISTERS_OFFSET); \ + } while (0) + + init_cap_reg(RAS, 2, 2); + ras_init_common(reg_state, write_msk); + + init_cap_reg(LINK, 4, 2); + + if (caps < 3) { + return; + } + + init_cap_reg(HDM, 5, 1); + hdm_init_common(reg_state, write_msk); + + if (caps < 5) { + return; + } + + init_cap_reg(EXTSEC, 6, 1); + init_cap_reg(SNOOP, 8, 1); + +#undef init_cap_reg +} + +/* + * Helper to creates a DVSEC header for a CXL entity. The caller is responsible + * for tracking the valid offset. + * + * This function will build the DVSEC header on behalf of the caller and then + * copy in the remaining data for the vendor specific bits. + * It will also set up appropriate write masks. + */ +void cxl_component_create_dvsec(CXLComponentState *cxl, + enum reg_type cxl_dev_type, uint16_t length, + uint16_t type, uint8_t rev, uint8_t *body) +{ + PCIDevice *pdev = cxl->pdev; + uint16_t offset = cxl->dvsec_offset; + uint8_t *wmask = pdev->wmask; + + assert(offset >= PCI_CFG_SPACE_SIZE && + ((offset + length) < PCI_CFG_SPACE_EXP_SIZE)); + assert((length & 0xf000) == 0); + assert((rev & ~0xf) == 0); + + /* Create the DVSEC in the MCFG space */ + pcie_add_capability(pdev, PCI_EXT_CAP_ID_DVSEC, 1, offset, length); + pci_set_long(pdev->config + offset + PCIE_DVSEC_HEADER1_OFFSET, + (length << 20) | (rev << 16) | CXL_VENDOR_ID); + pci_set_word(pdev->config + offset + PCIE_DVSEC_ID_OFFSET, type); + memcpy(pdev->config + offset + sizeof(DVSECHeader), + body + sizeof(DVSECHeader), + length - sizeof(DVSECHeader)); + + /* Configure write masks */ + switch (type) { + case PCIE_CXL_DEVICE_DVSEC: + /* Cntrl RW Lock - so needs explicit blocking when lock is set */ + wmask[offset + offsetof(CXLDVSECDevice, ctrl)] = 0xFD; + wmask[offset + offsetof(CXLDVSECDevice, ctrl) + 1] = 0x4F; + /* Status is RW1CS */ + wmask[offset + offsetof(CXLDVSECDevice, ctrl2)] = 0x0F; + /* Lock is RW Once */ + wmask[offset + offsetof(CXLDVSECDevice, lock)] = 0x01; + /* range1/2_base_high/low is RW Lock */ + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi)] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_hi) + 3] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range1_base_lo) + 3] = 0xF0; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi)] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_hi) + 3] = 0xFF; + wmask[offset + offsetof(CXLDVSECDevice, range2_base_lo) + 3] = 0xF0; + break; + case NON_CXL_FUNCTION_MAP_DVSEC: + break; /* Not yet implemented */ + case EXTENSIONS_PORT_DVSEC: + wmask[offset + offsetof(CXLDVSECPortExtensions, control)] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortExtensions, control) + 1] = 0x40; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_base)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_bus_limit)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_base) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_memory_limit) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit)] = 0xF0; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_base_high) + 3] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high)] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECPortExtensions, alt_prefetch_limit_high) + 3] = 0xFF; + break; + case GPF_PORT_DVSEC: + wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl)] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortGPF, phase1_ctrl) + 1] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl)] = 0x0F; + wmask[offset + offsetof(CXLDVSECPortGPF, phase2_ctrl) + 1] = 0x0F; + break; + case GPF_DEVICE_DVSEC: + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration)] = 0x0F; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_duration) + 1] = 0x0F; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power)] = 0xFF; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 1] = 0xFF; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 2] = 0xFF; + wmask[offset + offsetof(CXLDVSECDeviceGPF, phase2_power) + 3] = 0xFF; + break; + case PCIE_FLEXBUS_PORT_DVSEC: + switch (cxl_dev_type) { + case CXL2_ROOT_PORT: + /* No MLD */ + wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xbd; + break; + case CXL2_DOWNSTREAM_PORT: + wmask[offset + offsetof(CXLDVSECPortFlexBus, ctrl)] = 0xfd; + break; + default: /* Registers are RO for other component types */ + break; + } + /* There are rw1cs bits in the status register but never set currently */ + break; + } + + /* Update state for future DVSEC additions */ + range_init_nofail(&cxl->dvsecs[type], cxl->dvsec_offset, length); + cxl->dvsec_offset += length; +} + +uint8_t cxl_interleave_ways_enc(int iw, Error **errp) +{ + switch (iw) { + case 1: return 0x0; + case 2: return 0x1; + case 4: return 0x2; + case 8: return 0x3; + case 16: return 0x4; + case 3: return 0x8; + case 6: return 0x9; + case 12: return 0xa; + default: + error_setg(errp, "Interleave ways: %d not supported", iw); + return 0; + } +} + +uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp) +{ + switch (gran) { + case 256: return 0; + case 512: return 1; + case 1024: return 2; + case 2048: return 3; + case 4096: return 4; + case 8192: return 5; + case 16384: return 6; + default: + error_setg(errp, "Interleave granularity: %" PRIu64 " invalid", gran); + return 0; + } +} diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c new file mode 100644 index 0000000000..687759b301 --- /dev/null +++ b/hw/cxl/cxl-device-utils.c @@ -0,0 +1,265 @@ +/* + * CXL Utility library for devices + * + * Copyright(C) 2020 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/cxl/cxl.h" + +/* + * Device registers have no restrictions per the spec, and so fall back to the + * default memory mapped register rules in 8.2: + * Software shall use CXL.io Memory Read and Write to access memory mapped + * register defined in this section. Unless otherwise specified, software + * shall restrict the accesses width based on the following: + * • A 32 bit register shall be accessed as a 1 Byte, 2 Bytes or 4 Bytes + * quantity. + * • A 64 bit register shall be accessed as a 1 Byte, 2 Bytes, 4 Bytes or 8 + * Bytes + * • The address shall be a multiple of the access width, e.g. when + * accessing a register as a 4 Byte quantity, the address shall be + * multiple of 4. + * • The accesses shall map to contiguous bytes.If these rules are not + * followed, the behavior is undefined + */ + +static uint64_t caps_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + CXLDeviceState *cxl_dstate = opaque; + + if (size == 4) { + return cxl_dstate->caps_reg_state32[offset / sizeof(*cxl_dstate->caps_reg_state32)]; + } else { + return cxl_dstate->caps_reg_state64[offset / sizeof(*cxl_dstate->caps_reg_state64)]; + } +} + +static uint64_t dev_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + return 0; +} + +static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + CXLDeviceState *cxl_dstate = opaque; + + switch (size) { + case 1: + return cxl_dstate->mbox_reg_state[offset]; + case 2: + return cxl_dstate->mbox_reg_state16[offset / size]; + case 4: + return cxl_dstate->mbox_reg_state32[offset / size]; + case 8: + return cxl_dstate->mbox_reg_state64[offset / size]; + default: + g_assert_not_reached(); + } +} + +static void mailbox_mem_writel(uint32_t *reg_state, hwaddr offset, + uint64_t value) +{ + switch (offset) { + case A_CXL_DEV_MAILBOX_CTRL: + /* fallthrough */ + case A_CXL_DEV_MAILBOX_CAP: + /* RO register */ + break; + default: + qemu_log_mask(LOG_UNIMP, + "%s Unexpected 32-bit access to 0x%" PRIx64 " (WI)\n", + __func__, offset); + return; + } + + reg_state[offset / sizeof(*reg_state)] = value; +} + +static void mailbox_mem_writeq(uint64_t *reg_state, hwaddr offset, + uint64_t value) +{ + switch (offset) { + case A_CXL_DEV_MAILBOX_CMD: + break; + case A_CXL_DEV_BG_CMD_STS: + /* BG not supported */ + /* fallthrough */ + case A_CXL_DEV_MAILBOX_STS: + /* Read only register, will get updated by the state machine */ + return; + default: + qemu_log_mask(LOG_UNIMP, + "%s Unexpected 64-bit access to 0x%" PRIx64 " (WI)\n", + __func__, offset); + return; + } + + + reg_state[offset / sizeof(*reg_state)] = value; +} + +static void mailbox_reg_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLDeviceState *cxl_dstate = opaque; + + if (offset >= A_CXL_DEV_CMD_PAYLOAD) { + memcpy(cxl_dstate->mbox_reg_state + offset, &value, size); + return; + } + + switch (size) { + case 4: + mailbox_mem_writel(cxl_dstate->mbox_reg_state32, offset, value); + break; + case 8: + mailbox_mem_writeq(cxl_dstate->mbox_reg_state64, offset, value); + break; + default: + g_assert_not_reached(); + } + + if (ARRAY_FIELD_EX32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL, + DOORBELL)) { + cxl_process_mailbox(cxl_dstate); + } +} + +static uint64_t mdev_reg_read(void *opaque, hwaddr offset, unsigned size) +{ + uint64_t retval = 0; + + retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MEDIA_STATUS, 1); + retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MBOX_READY, 1); + + return retval; +} + +static const MemoryRegionOps mdev_ops = { + .read = mdev_reg_read, + .write = NULL, /* memory device register is read only */ + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps mailbox_ops = { + .read = mailbox_reg_read, + .write = mailbox_reg_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps dev_ops = { + .read = dev_reg_read, + .write = NULL, /* status register is read only */ + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps caps_ops = { + .read = caps_reg_read, + .write = NULL, /* caps registers are read only */ + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +void cxl_device_register_block_init(Object *obj, CXLDeviceState *cxl_dstate) +{ + /* This will be a BAR, so needs to be rounded up to pow2 for PCI spec */ + memory_region_init(&cxl_dstate->device_registers, obj, "device-registers", + pow2ceil(CXL_MMIO_SIZE)); + + memory_region_init_io(&cxl_dstate->caps, obj, &caps_ops, cxl_dstate, + "cap-array", CXL_CAPS_SIZE); + memory_region_init_io(&cxl_dstate->device, obj, &dev_ops, cxl_dstate, + "device-status", CXL_DEVICE_STATUS_REGISTERS_LENGTH); + memory_region_init_io(&cxl_dstate->mailbox, obj, &mailbox_ops, cxl_dstate, + "mailbox", CXL_MAILBOX_REGISTERS_LENGTH); + memory_region_init_io(&cxl_dstate->memory_device, obj, &mdev_ops, + cxl_dstate, "memory device caps", + CXL_MEMORY_DEVICE_REGISTERS_LENGTH); + + memory_region_add_subregion(&cxl_dstate->device_registers, 0, + &cxl_dstate->caps); + memory_region_add_subregion(&cxl_dstate->device_registers, + CXL_DEVICE_STATUS_REGISTERS_OFFSET, + &cxl_dstate->device); + memory_region_add_subregion(&cxl_dstate->device_registers, + CXL_MAILBOX_REGISTERS_OFFSET, + &cxl_dstate->mailbox); + memory_region_add_subregion(&cxl_dstate->device_registers, + CXL_MEMORY_DEVICE_REGISTERS_OFFSET, + &cxl_dstate->memory_device); +} + +static void device_reg_init_common(CXLDeviceState *cxl_dstate) { } + +static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate) +{ + /* 2048 payload size, with no interrupt or background support */ + ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CAP, + PAYLOAD_SIZE, CXL_MAILBOX_PAYLOAD_SHIFT); + cxl_dstate->payload_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE; +} + +static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) { } + +void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) +{ + uint64_t *cap_hdrs = cxl_dstate->caps_reg_state64; + const int cap_count = 3; + + /* CXL Device Capabilities Array Register */ + ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_ID, 0); + ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_VERSION, 1); + ARRAY_FIELD_DP64(cap_hdrs, CXL_DEV_CAP_ARRAY, CAP_COUNT, cap_count); + + cxl_device_cap_init(cxl_dstate, DEVICE_STATUS, 1); + device_reg_init_common(cxl_dstate); + + cxl_device_cap_init(cxl_dstate, MAILBOX, 2); + mailbox_reg_init_common(cxl_dstate); + + cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); + memdev_reg_init_common(cxl_dstate); + + assert(cxl_initialize_mailbox(cxl_dstate) == 0); +} diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c new file mode 100644 index 0000000000..24465a52ab --- /dev/null +++ b/hw/cxl/cxl-host-stubs.c @@ -0,0 +1,16 @@ +/* + * CXL host parameter parsing routine stubs + * + * Copyright (c) 2022 Huawei + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/cxl/cxl.h" + +void cxl_fixed_memory_window_config(MachineState *ms, + CXLFixedMemoryWindowOptions *object, + Error **errp) {}; + +void cxl_fixed_memory_window_link_targets(Error **errp) {}; + +const MemoryRegionOps cfmws_ops; diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c new file mode 100644 index 0000000000..469b3c4ced --- /dev/null +++ b/hw/cxl/cxl-host.c @@ -0,0 +1,222 @@ +/* + * CXL host parameter parsing routines + * + * Copyright (c) 2022 Huawei + * Modeled loosely on the NUMA options handling in hw/core/numa.c + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/bitmap.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "sysemu/qtest.h" +#include "hw/boards.h" + +#include "qapi/qapi-visit-machine.h" +#include "hw/cxl/cxl.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "hw/pci/pcie_port.h" + +void cxl_fixed_memory_window_config(MachineState *ms, + CXLFixedMemoryWindowOptions *object, + Error **errp) +{ + CXLFixedWindow *fw = g_malloc0(sizeof(*fw)); + strList *target; + int i; + + for (target = object->targets; target; target = target->next) { + fw->num_targets++; + } + + fw->enc_int_ways = cxl_interleave_ways_enc(fw->num_targets, errp); + if (*errp) { + return; + } + + fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets)); + for (i = 0, target = object->targets; target; i++, target = target->next) { + /* This link cannot be resolved yet, so stash the name for now */ + fw->targets[i] = g_strdup(target->value); + } + + if (object->size % (256 * MiB)) { + error_setg(errp, + "Size of a CXL fixed memory window must my a multiple of 256MiB"); + return; + } + fw->size = object->size; + + if (object->has_interleave_granularity) { + fw->enc_int_gran = + cxl_interleave_granularity_enc(object->interleave_granularity, + errp); + if (*errp) { + return; + } + } else { + /* Default to 256 byte interleave */ + fw->enc_int_gran = 0; + } + + ms->cxl_devices_state->fixed_windows = + g_list_append(ms->cxl_devices_state->fixed_windows, fw); + + return; +} + +void cxl_fixed_memory_window_link_targets(Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + if (ms->cxl_devices_state && ms->cxl_devices_state->fixed_windows) { + GList *it; + + for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + int i; + + for (i = 0; i < fw->num_targets; i++) { + Object *o; + bool ambig; + + o = object_resolve_path_type(fw->targets[i], + TYPE_PXB_CXL_DEVICE, + &ambig); + if (!o) { + error_setg(errp, "Could not resolve CXLFM target %s", + fw->targets[i]); + return; + } + fw->target_hbs[i] = PXB_CXL_DEV(o); + } + } + } +} + +/* TODO: support, multiple hdm decoders */ +static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, + uint8_t *target) +{ + uint32_t ctrl; + uint32_t ig_enc; + uint32_t iw_enc; + uint32_t target_reg; + uint32_t target_idx; + + ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + if (!FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED)) { + return false; + } + + ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); + iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); + target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); + + if (target_idx > 4) { + target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO]; + target_reg >>= target_idx * 8; + } else { + target_reg = cache_mem[R_CXL_HDM_DECODER0_TARGET_LIST_LO]; + target_reg >>= (target_idx - 4) * 8; + } + *target = target_reg & 0xff; + + return true; +} + +static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) +{ + CXLComponentState *hb_cstate; + PCIHostState *hb; + int rb_index; + uint32_t *cache_mem; + uint8_t target; + bool target_found; + PCIDevice *rp, *d; + + /* Address is relative to memory region. Convert to HPA */ + addr += fw->base; + + rb_index = (addr / cxl_decode_ig(fw->enc_int_gran)) % fw->num_targets; + hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl.cxl_host_bridge); + if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) { + return NULL; + } + + hb_cstate = cxl_get_hb_cstate(hb); + if (!hb_cstate) { + return NULL; + } + + cache_mem = hb_cstate->crb.cache_mem_registers; + + target_found = cxl_hdm_find_target(cache_mem, addr, &target); + if (!target_found) { + return NULL; + } + + rp = pcie_find_port_by_pn(hb->bus, target); + if (!rp) { + return NULL; + } + + d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0]; + + if (!d || !object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + return NULL; + } + + return d; +} + +static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + CXLFixedWindow *fw = opaque; + PCIDevice *d; + + d = cxl_cfmws_find_device(fw, addr); + if (d == NULL) { + *data = 0; + /* Reads to invalid address return poison */ + return MEMTX_ERROR; + } + + return cxl_type3_read(d, addr + fw->base, data, size, attrs); +} + +static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr, + uint64_t data, unsigned size, + MemTxAttrs attrs) +{ + CXLFixedWindow *fw = opaque; + PCIDevice *d; + + d = cxl_cfmws_find_device(fw, addr); + if (d == NULL) { + /* Writes to invalid address are silent */ + return MEMTX_OK; + } + + return cxl_type3_write(d, addr + fw->base, data, size, attrs); +} + +const MemoryRegionOps cfmws_ops = { + .read_with_attrs = cxl_read_cfmws, + .write_with_attrs = cxl_write_cfmws, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = true, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = true, + }, +}; diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c new file mode 100644 index 0000000000..bb66c765a5 --- /dev/null +++ b/hw/cxl/cxl-mailbox-utils.c @@ -0,0 +1,478 @@ +/* + * CXL Utility library for mailbox interface + * + * Copyright(C) 2020 Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/cxl/cxl.h" +#include "hw/pci/pci.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "qemu/uuid.h" + +/* + * How to add a new command, example. The command set FOO, with cmd BAR. + * 1. Add the command set and cmd to the enum. + * FOO = 0x7f, + * #define BAR 0 + * 2. Implement the handler + * static ret_code cmd_foo_bar(struct cxl_cmd *cmd, + * CXLDeviceState *cxl_dstate, uint16_t *len) + * 3. Add the command to the cxl_cmd_set[][] + * [FOO][BAR] = { "FOO_BAR", cmd_foo_bar, x, y }, + * 4. Implement your handler + * define_mailbox_handler(FOO_BAR) { ... return CXL_MBOX_SUCCESS; } + * + * + * Writing the handler: + * The handler will provide the &struct cxl_cmd, the &CXLDeviceState, and the + * in/out length of the payload. The handler is responsible for consuming the + * payload from cmd->payload and operating upon it as necessary. It must then + * fill the output data into cmd->payload (overwriting what was there), + * setting the length, and returning a valid return code. + * + * XXX: The handler need not worry about endianess. The payload is read out of + * a register interface that already deals with it. + */ + +enum { + EVENTS = 0x01, + #define GET_RECORDS 0x0 + #define CLEAR_RECORDS 0x1 + #define GET_INTERRUPT_POLICY 0x2 + #define SET_INTERRUPT_POLICY 0x3 + FIRMWARE_UPDATE = 0x02, + #define GET_INFO 0x0 + TIMESTAMP = 0x03, + #define GET 0x0 + #define SET 0x1 + LOGS = 0x04, + #define GET_SUPPORTED 0x0 + #define GET_LOG 0x1 + IDENTIFY = 0x40, + #define MEMORY_DEVICE 0x0 + CCLS = 0x41, + #define GET_PARTITION_INFO 0x0 + #define GET_LSA 0x2 + #define SET_LSA 0x3 +}; + +/* 8.2.8.4.5.1 Command Return Codes */ +typedef enum { + CXL_MBOX_SUCCESS = 0x0, + CXL_MBOX_BG_STARTED = 0x1, + CXL_MBOX_INVALID_INPUT = 0x2, + CXL_MBOX_UNSUPPORTED = 0x3, + CXL_MBOX_INTERNAL_ERROR = 0x4, + CXL_MBOX_RETRY_REQUIRED = 0x5, + CXL_MBOX_BUSY = 0x6, + CXL_MBOX_MEDIA_DISABLED = 0x7, + CXL_MBOX_FW_XFER_IN_PROGRESS = 0x8, + CXL_MBOX_FW_XFER_OUT_OF_ORDER = 0x9, + CXL_MBOX_FW_AUTH_FAILED = 0xa, + CXL_MBOX_FW_INVALID_SLOT = 0xb, + CXL_MBOX_FW_ROLLEDBACK = 0xc, + CXL_MBOX_FW_REST_REQD = 0xd, + CXL_MBOX_INVALID_HANDLE = 0xe, + CXL_MBOX_INVALID_PA = 0xf, + CXL_MBOX_INJECT_POISON_LIMIT = 0x10, + CXL_MBOX_PERMANENT_MEDIA_FAILURE = 0x11, + CXL_MBOX_ABORTED = 0x12, + CXL_MBOX_INVALID_SECURITY_STATE = 0x13, + CXL_MBOX_INCORRECT_PASSPHRASE = 0x14, + CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, + CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, + CXL_MBOX_MAX = 0x17 +} ret_code; + +struct cxl_cmd; +typedef ret_code (*opcode_handler)(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, uint16_t *len); +struct cxl_cmd { + const char *name; + opcode_handler handler; + ssize_t in; + uint16_t effect; /* Reported in CEL */ + uint8_t *payload; +}; + +#define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \ + uint16_t __zero##name = size; \ + static ret_code cmd_##name(struct cxl_cmd *cmd, \ + CXLDeviceState *cxl_dstate, uint16_t *len) \ + { \ + *len = __zero##name; \ + memset(cmd->payload, 0, *len); \ + return CXL_MBOX_SUCCESS; \ + } +#define DEFINE_MAILBOX_HANDLER_NOP(name) \ + static ret_code cmd_##name(struct cxl_cmd *cmd, \ + CXLDeviceState *cxl_dstate, uint16_t *len) \ + { \ + return CXL_MBOX_SUCCESS; \ + } + +DEFINE_MAILBOX_HANDLER_ZEROED(events_get_records, 0x20); +DEFINE_MAILBOX_HANDLER_NOP(events_clear_records); +DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); +DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); + +/* 8.2.9.2.1 */ +static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint8_t slots_supported; + uint8_t slot_info; + uint8_t caps; + uint8_t rsvd[0xd]; + char fw_rev1[0x10]; + char fw_rev2[0x10]; + char fw_rev3[0x10]; + char fw_rev4[0x10]; + } QEMU_PACKED *fw_info; + QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); + + if (cxl_dstate->pmem_size < (256 << 20)) { + return CXL_MBOX_INTERNAL_ERROR; + } + + fw_info = (void *)cmd->payload; + memset(fw_info, 0, sizeof(*fw_info)); + + fw_info->slots_supported = 2; + fw_info->slot_info = BIT(0) | BIT(3); + fw_info->caps = 0; + pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0"); + + *len = sizeof(*fw_info); + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.3.1 */ +static ret_code cmd_timestamp_get(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + uint64_t time, delta; + uint64_t final_time = 0; + + if (cxl_dstate->timestamp.set) { + /* First find the delta from the last time the host set the time. */ + time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + delta = time - cxl_dstate->timestamp.last_set; + final_time = cxl_dstate->timestamp.host_set + delta; + } + + /* Then adjust the actual time */ + stq_le_p(cmd->payload, final_time); + *len = 8; + + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.3.2 */ +static ret_code cmd_timestamp_set(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + cxl_dstate->timestamp.set = true; + cxl_dstate->timestamp.last_set = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + cxl_dstate->timestamp.host_set = le64_to_cpu(*(uint64_t *)cmd->payload); + + *len = 0; + return CXL_MBOX_SUCCESS; +} + +static QemuUUID cel_uuid; + +/* 8.2.9.4.1 */ +static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint16_t entries; + uint8_t rsvd[6]; + struct { + QemuUUID uuid; + uint32_t size; + } log_entries[1]; + } QEMU_PACKED *supported_logs = (void *)cmd->payload; + QEMU_BUILD_BUG_ON(sizeof(*supported_logs) != 0x1c); + + supported_logs->entries = 1; + supported_logs->log_entries[0].uuid = cel_uuid; + supported_logs->log_entries[0].size = 4 * cxl_dstate->cel_size; + + *len = sizeof(*supported_logs); + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.4.2 */ +static ret_code cmd_logs_get_log(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + QemuUUID uuid; + uint32_t offset; + uint32_t length; + } QEMU_PACKED QEMU_ALIGNED(16) *get_log = (void *)cmd->payload; + + /* + * 8.2.9.4.2 + * The device shall return Invalid Parameter if the Offset or Length + * fields attempt to access beyond the size of the log as reported by Get + * Supported Logs. + * + * XXX: Spec is wrong, "Invalid Parameter" isn't a thing. + * XXX: Spec doesn't address incorrect UUID incorrectness. + * + * The CEL buffer is large enough to fit all commands in the emulation, so + * the only possible failure would be if the mailbox itself isn't big + * enough. + */ + if (get_log->offset + get_log->length > cxl_dstate->payload_size) { + return CXL_MBOX_INVALID_INPUT; + } + + if (!qemu_uuid_is_equal(&get_log->uuid, &cel_uuid)) { + return CXL_MBOX_UNSUPPORTED; + } + + /* Store off everything to local variables so we can wipe out the payload */ + *len = get_log->length; + + memmove(cmd->payload, cxl_dstate->cel_log + get_log->offset, + get_log->length); + + return CXL_MBOX_SUCCESS; +} + +/* 8.2.9.5.1.1 */ +static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + char fw_revision[0x10]; + uint64_t total_capacity; + uint64_t volatile_capacity; + uint64_t persistent_capacity; + uint64_t partition_align; + uint16_t info_event_log_size; + uint16_t warning_event_log_size; + uint16_t failure_event_log_size; + uint16_t fatal_event_log_size; + uint32_t lsa_size; + uint8_t poison_list_max_mer[3]; + uint16_t inject_poison_limit; + uint8_t poison_caps; + uint8_t qos_telemetry_caps; + } QEMU_PACKED *id; + QEMU_BUILD_BUG_ON(sizeof(*id) != 0x43); + + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + uint64_t size = cxl_dstate->pmem_size; + + if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + return CXL_MBOX_INTERNAL_ERROR; + } + + id = (void *)cmd->payload; + memset(id, 0, sizeof(*id)); + + /* PMEM only */ + snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); + + id->total_capacity = size / (256 << 20); + id->persistent_capacity = size / (256 << 20); + id->lsa_size = cvc->get_lsa_size(ct3d); + + *len = sizeof(*id); + return CXL_MBOX_SUCCESS; +} + +static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint64_t active_vmem; + uint64_t active_pmem; + uint64_t next_vmem; + uint64_t next_pmem; + } QEMU_PACKED *part_info = (void *)cmd->payload; + QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); + uint64_t size = cxl_dstate->pmem_size; + + if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + return CXL_MBOX_INTERNAL_ERROR; + } + + /* PMEM only */ + part_info->active_vmem = 0; + part_info->next_vmem = 0; + part_info->active_pmem = size / (256 << 20); + part_info->next_pmem = 0; + + *len = sizeof(*part_info); + return CXL_MBOX_SUCCESS; +} + +static ret_code cmd_ccls_get_lsa(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct { + uint32_t offset; + uint32_t length; + } QEMU_PACKED *get_lsa; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + uint32_t offset, length; + + get_lsa = (void *)cmd->payload; + offset = get_lsa->offset; + length = get_lsa->length; + + if (offset + length > cvc->get_lsa_size(ct3d)) { + *len = 0; + return CXL_MBOX_INVALID_INPUT; + } + + *len = cvc->get_lsa(ct3d, get_lsa, length, offset); + return CXL_MBOX_SUCCESS; +} + +static ret_code cmd_ccls_set_lsa(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) +{ + struct set_lsa_pl { + uint32_t offset; + uint32_t rsvd; + uint8_t data[]; + } QEMU_PACKED; + struct set_lsa_pl *set_lsa_payload = (void *)cmd->payload; + CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); + const size_t hdr_len = offsetof(struct set_lsa_pl, data); + uint16_t plen = *len; + + *len = 0; + if (!plen) { + return CXL_MBOX_SUCCESS; + } + + if (set_lsa_payload->offset + plen > cvc->get_lsa_size(ct3d) + hdr_len) { + return CXL_MBOX_INVALID_INPUT; + } + plen -= hdr_len; + + cvc->set_lsa(ct3d, set_lsa_payload->data, plen, set_lsa_payload->offset); + return CXL_MBOX_SUCCESS; +} + +#define IMMEDIATE_CONFIG_CHANGE (1 << 1) +#define IMMEDIATE_DATA_CHANGE (1 << 2) +#define IMMEDIATE_POLICY_CHANGE (1 << 3) +#define IMMEDIATE_LOG_CHANGE (1 << 4) + +static struct cxl_cmd cxl_cmd_set[256][256] = { + [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS", + cmd_events_get_records, 1, 0 }, + [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", + cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, + [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", + cmd_events_get_interrupt_policy, 0, 0 }, + [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", + cmd_events_set_interrupt_policy, 4, IMMEDIATE_CONFIG_CHANGE }, + [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", + cmd_firmware_update_get_info, 0, 0 }, + [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, + [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, IMMEDIATE_POLICY_CHANGE }, + [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, + [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, + [IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE", + cmd_identify_memory_device, 0, 0 }, + [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", + cmd_ccls_get_partition_info, 0, 0 }, + [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 0, 0 }, + [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, + ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, +}; + +void cxl_process_mailbox(CXLDeviceState *cxl_dstate) +{ + uint16_t ret = CXL_MBOX_SUCCESS; + struct cxl_cmd *cxl_cmd; + uint64_t status_reg; + opcode_handler h; + uint64_t command_reg = cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD]; + + uint8_t set = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET); + uint8_t cmd = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND); + uint16_t len = FIELD_EX64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH); + cxl_cmd = &cxl_cmd_set[set][cmd]; + h = cxl_cmd->handler; + if (h) { + if (len == cxl_cmd->in) { + cxl_cmd->payload = cxl_dstate->mbox_reg_state + + A_CXL_DEV_CMD_PAYLOAD; + ret = (*h)(cxl_cmd, cxl_dstate, &len); + assert(len <= cxl_dstate->payload_size); + } else { + ret = CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + } else { + qemu_log_mask(LOG_UNIMP, "Command %04xh not implemented\n", + set << 8 | cmd); + ret = CXL_MBOX_UNSUPPORTED; + } + + /* Set the return code */ + status_reg = FIELD_DP64(0, CXL_DEV_MAILBOX_STS, ERRNO, ret); + + /* Set the return length */ + command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND_SET, 0); + command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, COMMAND, 0); + command_reg = FIELD_DP64(command_reg, CXL_DEV_MAILBOX_CMD, LENGTH, len); + + cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_CMD] = command_reg; + cxl_dstate->mbox_reg_state64[R_CXL_DEV_MAILBOX_STS] = status_reg; + + /* Tell the host we're done */ + ARRAY_FIELD_DP32(cxl_dstate->mbox_reg_state32, CXL_DEV_MAILBOX_CTRL, + DOORBELL, 0); +} + +int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) +{ + /* CXL 2.0: Table 169 Get Supported Logs Log Entry */ + const char *cel_uuidstr = "0da9c0b5-bf41-4b78-8f79-96b1623b3f17"; + + for (int set = 0; set < 256; set++) { + for (int cmd = 0; cmd < 256; cmd++) { + if (cxl_cmd_set[set][cmd].handler) { + struct cxl_cmd *c = &cxl_cmd_set[set][cmd]; + struct cel_log *log = + &cxl_dstate->cel_log[cxl_dstate->cel_size]; + + log->opcode = (set << 8) | cmd; + log->effect = c->effect; + cxl_dstate->cel_size++; + } + } + } + + return qemu_uuid_parse(cel_uuidstr, &cel_uuid); +} diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build new file mode 100644 index 0000000000..f117b99949 --- /dev/null +++ b/hw/cxl/meson.build @@ -0,0 +1,12 @@ +softmmu_ss.add(when: 'CONFIG_CXL', + if_true: files( + 'cxl-component-utils.c', + 'cxl-device-utils.c', + 'cxl-mailbox-utils.c', + 'cxl-host.c', + ), + if_false: files( + 'cxl-host-stubs.c', + )) + +softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl-host-stubs.c')) diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 09818231bd..96e56c4467 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -565,6 +565,12 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp) g->vhost_gpu_fd = -1; } +static struct vhost_dev *vhost_user_gpu_get_vhost(VirtIODevice *vdev) +{ + VhostUserGPU *g = VHOST_USER_GPU(vdev); + return &g->vhost->dev; +} + static Property vhost_user_gpu_properties[] = { VIRTIO_GPU_BASE_PROPERTIES(VhostUserGPU, parent_obj.conf), DEFINE_PROP_END_OF_LIST(), @@ -586,6 +592,7 @@ vhost_user_gpu_class_init(ObjectClass *klass, void *data) vdc->guest_notifier_pending = vhost_user_gpu_guest_notifier_pending; vdc->get_config = vhost_user_gpu_get_config; vdc->set_config = vhost_user_gpu_set_config; + vdc->get_vhost = vhost_user_gpu_get_vhost; device_class_set_props(dc, vhost_user_gpu_properties); } diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index fff0fb4a82..8ba5da4312 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -173,7 +173,7 @@ virtio_gpu_base_device_realize(DeviceState *qdev, } g->virtio_config.num_scanouts = cpu_to_le32(g->conf.max_outputs); - virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU, + virtio_init(VIRTIO_DEVICE(g), VIRTIO_ID_GPU, sizeof(struct virtio_gpu_config)); if (virtio_gpu_virgl_enabled(g->conf)) { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index dcf6ece3d0..c125939ed6 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -28,6 +28,7 @@ #include "qemu/bitmap.h" #include "qemu/error-report.h" #include "hw/pci/pci.h" +#include "hw/cxl/cxl.h" #include "hw/core/cpu.h" #include "target/i386/cpu.h" #include "hw/misc/pvpanic.h" @@ -66,6 +67,7 @@ #include "hw/acpi/aml-build.h" #include "hw/acpi/utils.h" #include "hw/acpi/pci.h" +#include "hw/acpi/cxl.h" #include "qom/qom-qobject.h" #include "hw/i386/amd_iommu.h" @@ -75,6 +77,7 @@ #include "hw/acpi/ipmi.h" #include "hw/acpi/hmat.h" #include "hw/acpi/viot.h" +#include "hw/acpi/cxl.h" #include CONFIG_DEVICES @@ -1409,6 +1412,22 @@ static void build_smb0(Aml *table, I2CBus *smbus, int devnr, int func) aml_append(table, scope); } +static void build_acpi0017(Aml *table) +{ + Aml *dev, *scope, *method; + + scope = aml_scope("_SB"); + dev = aml_device("CXLM"); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017"))); + + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0x01))); + aml_append(dev, method); + + aml_append(scope, dev); + aml_append(table, scope); +} + static void build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiMiscInfo *misc, @@ -1428,6 +1447,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, #ifdef CONFIG_TPM TPMIf *tpm = tpm_find(); #endif + bool cxl_present = false; int i; VMBusBridge *vmbus_bridge = vmbus_bridge_find(); AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, @@ -1572,10 +1592,25 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } scope = aml_scope("\\_SB"); - dev = aml_device("PC%.02X", bus_num); + + if (pci_bus_is_cxl(bus)) { + dev = aml_device("CL%.02X", bus_num); + } else { + dev = aml_device("PC%.02X", bus_num); + } aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); - if (pci_bus_is_express(bus)) { + if (pci_bus_is_cxl(bus)) { + struct Aml *pkg = aml_package(2); + + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016"))); + aml_append(pkg, aml_eisaid("PNP0A08")); + aml_append(pkg, aml_eisaid("PNP0A03")); + aml_append(dev, aml_name_decl("_CID", pkg)); + aml_append(dev, aml_name_decl("_ADR", aml_int(0))); + aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); + build_cxl_osc_method(dev); + } else if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); @@ -1595,9 +1630,23 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); aml_append(dsdt, scope); + + /* Handle the ranges for the PXB expanders */ + if (pci_bus_is_cxl(bus)) { + MemoryRegion *mr = &machine->cxl_devices_state->host_mr; + uint64_t base = mr->addr; + + cxl_present = true; + crs_range_insert(crs_range_set.mem_ranges, base, + base + memory_region_size(mr) - 1); + } } } + if (cxl_present) { + build_acpi0017(dsdt); + } + /* * At this point crs_range_set has all the ranges used by pci * busses *other* than PCI0. These ranges will be excluded from @@ -2662,6 +2711,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) machine->nvdimms_state, machine->ram_slots, x86ms->oem_id, x86ms->oem_table_id); } + if (machine->cxl_devices_state->is_enabled) { + cxl_build_cedt(machine, table_offsets, tables_blob, tables->linker, + x86ms->oem_id, x86ms->oem_table_id); + } acpi_add_table(table_offsets, tables_blob); build_waet(tables_blob, tables->linker, x86ms->oem_id, x86ms->oem_table_id); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index ea8eaeb330..725f69095b 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -201,15 +201,18 @@ static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, /* * AMDVi event structure * 0:15 -> DeviceID - * 55:63 -> event type + miscellaneous info - * 63:127 -> related address + * 48:63 -> event type + miscellaneous info + * 64:127 -> related address */ static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, uint16_t info) { + evt[0] = 0; + evt[1] = 0; + amdvi_setevent_bits(evt, devid, 0, 16); - amdvi_setevent_bits(evt, info, 55, 8); - amdvi_setevent_bits(evt, addr, 63, 64); + amdvi_setevent_bits(evt, info, 48, 16); + amdvi_setevent_bits(evt, addr, 64, 64); } /* log an error encountered during a page walk * @@ -218,7 +221,7 @@ static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, static void amdvi_page_fault(AMDVIState *s, uint16_t devid, hwaddr addr, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; amdvi_encode_event(evt, devid, addr, info); @@ -234,7 +237,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid, static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, hwaddr devtab, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; @@ -248,7 +251,8 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, */ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) { - uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR; + uint64_t evt[2]; + uint16_t info = AMDVI_EVENT_COMMAND_HW_ERROR; amdvi_encode_event(evt, 0, addr, info); amdvi_log_event(s, evt); @@ -261,7 +265,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, hwaddr addr) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; amdvi_encode_event(evt, 0, addr, info); @@ -276,7 +280,7 @@ static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, hwaddr addr, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; amdvi_encode_event(evt, devid, addr, info); @@ -288,7 +292,7 @@ static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, hwaddr addr, uint16_t info) { - uint64_t evt[4]; + uint64_t evt[2]; info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; amdvi_encode_event(evt, devid, addr, info); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c64aa81a83..2162394e08 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -181,6 +181,18 @@ static void vtd_update_scalable_state(IntelIOMMUState *s) } } +static void vtd_update_iq_dw(IntelIOMMUState *s) +{ + uint64_t val = vtd_get_quad_raw(s, DMAR_IQA_REG); + + if (s->ecap & VTD_ECAP_SMTS && + val & VTD_IQA_DW_MASK) { + s->iq_dw = true; + } else { + s->iq_dw = false; + } +} + /* Whether the address space needs to notify new mappings */ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as) { @@ -469,11 +481,6 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, assert(fault < VTD_FR_MAX); - if (fault == VTD_FR_RESERVED_ERR) { - /* This is not a normal fault reason case. Drop it. */ - return; - } - trace_vtd_dmar_fault(source_id, fault, addr, is_write); if (fsts_reg & VTD_FSTS_PFO) { @@ -1025,6 +1032,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t slpte; uint64_t access_right_check; + uint64_t xlat, size; if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", @@ -1069,11 +1077,33 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (vtd_is_last_slpte(slpte, level)) { *slptep = slpte; *slpte_level = level; - return 0; + break; } addr = vtd_get_slpte_addr(slpte, aw_bits); level--; } + + xlat = vtd_get_slpte_addr(*slptep, aw_bits); + size = ~vtd_slpt_level_page_mask(level) + 1; + + /* + * From VT-d spec 3.14: Untranslated requests and translation + * requests that result in an address in the interrupt range will be + * blocked with condition code LGN.4 or SGN.8. + */ + if ((xlat > VTD_INTERRUPT_ADDR_LAST || + xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) { + return 0; + } else { + error_report_once("%s: xlat address is in interrupt range " + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " + "slpte=0x%" PRIx64 ", write=%d, " + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")", + __func__, iova, level, slpte, is_write, + xlat, size); + return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : + -VTD_FR_INTERRUPT_ADDR; + } } typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); @@ -1633,11 +1663,12 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_PAGING_ENTRY_INV] = true, [VTD_FR_ROOT_TABLE_INV] = false, [VTD_FR_CONTEXT_TABLE_INV] = false, + [VTD_FR_INTERRUPT_ADDR] = true, [VTD_FR_ROOT_ENTRY_RSVD] = false, [VTD_FR_PAGING_ENTRY_RSVD] = true, [VTD_FR_CONTEXT_ENTRY_TT] = true, [VTD_FR_PASID_TABLE_INV] = false, - [VTD_FR_RESERVED_ERR] = false, + [VTD_FR_SM_INTERRUPT_ADDR] = true, [VTD_FR_MAX] = false, }; @@ -2209,12 +2240,13 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) /* Handle write to Global Command Register */ static void vtd_handle_gcmd_write(IntelIOMMUState *s) { + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG); uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); uint32_t changed = status ^ val; trace_vtd_reg_write_gcmd(status, val); - if (changed & VTD_GCMD_TE) { + if ((changed & VTD_GCMD_TE) && s->dma_translation) { /* Translation enable/disable */ vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); } @@ -2230,7 +2262,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Set/update the interrupt remapping root-table pointer */ vtd_handle_gcmd_sirtp(s); } - if (changed & VTD_GCMD_IRE) { + if ((changed & VTD_GCMD_IRE) && + x86_iommu_ir_supported(x86_iommu)) { /* Interrupt remap enable/disable */ vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); } @@ -2883,12 +2916,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, } else { vtd_set_quad(s, addr, val); } - if (s->ecap & VTD_ECAP_SMTS && - val & VTD_IQA_DW_MASK) { - s->iq_dw = true; - } else { - s->iq_dw = false; - } + vtd_update_iq_dw(s); break; case DMAR_IQA_REG_HI: @@ -3032,7 +3060,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, /* TODO: add support for VFIO and vhost users */ if (s->snoop_control) { - error_setg_errno(errp, -ENOTSUP, + error_setg_errno(errp, ENOTSUP, "Snoop Control with vhost or VFIO is not supported"); return -ENOTSUP; } @@ -3053,13 +3081,6 @@ static int vtd_post_load(void *opaque, int version_id) IntelIOMMUState *iommu = opaque; /* - * Memory regions are dynamically turned on/off depending on - * context entry configurations from the guest. After migration, - * we need to make sure the memory regions are still correct. - */ - vtd_switch_address_space_all(iommu); - - /* * We don't need to migrate the root_scalable because we can * simply do the calculation after the loading is complete. We * can actually do similar things with root, dmar_enabled, etc. @@ -3068,6 +3089,15 @@ static int vtd_post_load(void *opaque, int version_id) */ vtd_update_scalable_state(iommu); + vtd_update_iq_dw(iommu); + + /* + * Memory regions are dynamically turned on/off depending on + * context entry configurations from the guest. After migration, + * we need to make sure the memory regions are still correct. + */ + vtd_switch_address_space_all(iommu); + return 0; } @@ -3122,6 +3152,7 @@ static Property vtd_properties[] = { DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), + DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_END_OF_LIST(), }; @@ -3627,12 +3658,17 @@ static void vtd_init(IntelIOMMUState *s) s->next_frcd_reg = 0; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | - VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + VTD_CAP_MGAW(s->aw_bits); if (s->dma_drain) { s->cap |= VTD_CAP_DRAIN; } - if (s->aw_bits == VTD_HOST_AW_48BIT) { - s->cap |= VTD_CAP_SAGAW_48bit; + if (s->dma_translation) { + if (s->aw_bits >= VTD_HOST_AW_39BIT) { + s->cap |= VTD_CAP_SAGAW_39bit; + } + if (s->aw_bits >= VTD_HOST_AW_48BIT) { + s->cap |= VTD_CAP_SAGAW_48bit; + } } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; @@ -3778,15 +3814,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) { - if (!kvm_irqchip_in_kernel()) { + if (!kvm_irqchip_is_split()) { error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); return false; } - if (!kvm_enable_x2apic()) { - error_setg(errp, "eim=on requires support on the KVM side" - "(X2APIC_API, first shipped in v4.7)"); - return false; - } } /* Currently only address widths supported are 39 and 48 bits */ diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 1ff13b40f9..930ce61feb 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -289,6 +289,8 @@ typedef enum VTDFaultReason { * context-entry. */ VTD_FR_CONTEXT_ENTRY_TT, + /* Output address in the interrupt address range */ + VTD_FR_INTERRUPT_ADDR = 0xE, /* Interrupt remapping transition faults */ VTD_FR_IR_REQ_RSVD = 0x20, /* One or more IR request reserved @@ -304,11 +306,8 @@ typedef enum VTDFaultReason { VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */ - /* This is not a normal fault reason. We use this to indicate some faults - * that are not referenced by the VT-d specification. - * Fault event with such reason should not be recorded. - */ - VTD_FR_RESERVED_ERR, + /* Output address in the interrupt address range for scalable mode */ + VTD_FR_SM_INTERRUPT_ADDR = 0x87, VTD_FR_MAX, /* Guard */ } VTDFaultReason; diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 4b3b1dd262..754f1d0593 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -247,7 +247,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) x86ms->pci_irq_mask = 0; } - if (mms->pic == ON_OFF_AUTO_ON || mms->pic == ON_OFF_AUTO_AUTO) { + if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { qemu_irq *i8259; i8259 = i8259_init(isa_bus, x86_allocate_cpu_irq()); @@ -257,7 +257,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) g_free(i8259); } - if (mms->pit == ON_OFF_AUTO_ON || mms->pit == ON_OFF_AUTO_AUTO) { + if (x86ms->pit == ON_OFF_AUTO_ON || x86ms->pit == ON_OFF_AUTO_AUTO) { if (kvm_pit_in_kernel()) { kvm_pit_init(isa_bus, 0x40); } else { @@ -491,40 +491,6 @@ static void microvm_machine_reset(MachineState *machine) } } -static void microvm_machine_get_pic(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - OnOffAuto pic = mms->pic; - - visit_type_OnOffAuto(v, name, &pic, errp); -} - -static void microvm_machine_set_pic(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - - visit_type_OnOffAuto(v, name, &mms->pic, errp); -} - -static void microvm_machine_get_pit(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - OnOffAuto pit = mms->pit; - - visit_type_OnOffAuto(v, name, &pit, errp); -} - -static void microvm_machine_set_pit(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - MicrovmMachineState *mms = MICROVM_MACHINE(obj); - - visit_type_OnOffAuto(v, name, &mms->pit, errp); -} - static void microvm_machine_get_rtc(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -649,8 +615,6 @@ static void microvm_machine_initfn(Object *obj) MicrovmMachineState *mms = MICROVM_MACHINE(obj); /* Configuration */ - mms->pic = ON_OFF_AUTO_AUTO; - mms->pit = ON_OFF_AUTO_AUTO; mms->rtc = ON_OFF_AUTO_AUTO; mms->pcie = ON_OFF_AUTO_AUTO; mms->ioapic2 = ON_OFF_AUTO_AUTO; @@ -702,20 +666,6 @@ static void microvm_class_init(ObjectClass *oc, void *data) x86mc->fwcfg_dma_enabled = true; - object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto", - microvm_machine_get_pic, - microvm_machine_set_pic, - NULL, NULL); - object_class_property_set_description(oc, MICROVM_MACHINE_PIC, - "Enable i8259 PIC"); - - object_class_property_add(oc, MICROVM_MACHINE_PIT, "OnOffAuto", - microvm_machine_get_pit, - microvm_machine_set_pit, - NULL, NULL); - object_class_property_set_description(oc, MICROVM_MACHINE_PIT, - "Enable i8254 PIT"); - object_class_property_add(oc, MICROVM_MACHINE_RTC, "OnOffAuto", microvm_machine_get_rtc, microvm_machine_set_rtc, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 305d2c0820..7c39c91335 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -75,6 +75,7 @@ #include "acpi-build.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" +#include "hw/cxl/cxl.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" #include "qapi/qapi-visit-machine.h" @@ -743,14 +744,6 @@ void pc_machine_done(Notifier *notifier, void *data) /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); } - - - if (x86ms->apic_id_limit > 255 && !xen_enabled() && - !kvm_irqchip_in_kernel()) { - error_report("current -smp configuration requires kernel " - "irqchip support."); - exit(EXIT_FAILURE); - } } void pc_guest_info_init(PCMachineState *pcms) @@ -816,6 +809,7 @@ void pc_memory_init(PCMachineState *pcms, MachineClass *mc = MACHINE_GET_CLASS(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr cxl_base, cxl_resv_end = 0; assert(machine->ram_size == x86ms->below_4g_mem_size + x86ms->above_4g_mem_size); @@ -905,6 +899,44 @@ void pc_memory_init(PCMachineState *pcms, &machine->device_memory->mr); } + if (machine->cxl_devices_state->is_enabled) { + MemoryRegion *mr = &machine->cxl_devices_state->host_mr; + hwaddr cxl_size = MiB; + + if (pcmc->has_reserved_memory && machine->device_memory->base) { + cxl_base = machine->device_memory->base; + if (!pcmc->broken_reserved_end) { + cxl_base += memory_region_size(&machine->device_memory->mr); + } + } else if (pcms->sgx_epc.size != 0) { + cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc); + } else { + cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size; + } + + e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; + if (machine->cxl_devices_state->fixed_windows) { + hwaddr cxl_fmw_base; + GList *it; + + cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB); + for (it = machine->cxl_devices_state->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + + fw->base = cxl_fmw_base; + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); + e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + } + } + /* Initialize PC system firmware */ pc_system_firmware_init(pcms, rom_memory); @@ -932,6 +964,10 @@ void pc_memory_init(PCMachineState *pcms, if (!pcmc->broken_reserved_end) { res_mem_end += memory_region_size(&machine->device_memory->mr); } + + if (machine->cxl_devices_state->is_enabled) { + res_mem_end = cxl_resv_end; + } *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); } @@ -965,7 +1001,17 @@ uint64_t pc_pci_hole64_start(void) X86MachineState *x86ms = X86_MACHINE(pcms); uint64_t hole64_start = 0; - if (pcmc->has_reserved_memory && ms->device_memory->base) { + if (ms->cxl_devices_state->host_mr.addr) { + hole64_start = ms->cxl_devices_state->host_mr.addr + + memory_region_size(&ms->cxl_devices_state->host_mr); + if (ms->cxl_devices_state->fixed_windows) { + GList *it; + for (it = ms->cxl_devices_state->fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + hole64_start = fw->mr.addr + memory_region_size(&fw->mr); + } + } + } else if (pcmc->has_reserved_memory && ms->device_memory->base) { hole64_start = ms->device_memory->base; if (!pcmc->broken_reserved_end) { hole64_start += memory_region_size(&ms->device_memory->mr); @@ -1077,6 +1123,7 @@ void pc_basic_device_init(struct PCMachineState *pcms, ISADevice *pit = NULL; MemoryRegion *ioport80_io = g_new(MemoryRegion, 1); MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1); + X86MachineState *x86ms = X86_MACHINE(pcms); memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1); memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io); @@ -1121,7 +1168,8 @@ void pc_basic_device_init(struct PCMachineState *pcms, qemu_register_boot_set(pc_boot_set, *rtc_state); - if (!xen_enabled() && pcms->pit_enabled) { + if (!xen_enabled() && + (x86ms->pit == ON_OFF_AUTO_AUTO || x86ms->pit == ON_OFF_AUTO_ON)) { if (kvm_pit_in_kernel()) { pit = kvm_pit_init(isa_bus, 0x40); } else { @@ -1491,20 +1539,6 @@ static void pc_machine_set_sata(Object *obj, bool value, Error **errp) pcms->sata_enabled = value; } -static bool pc_machine_get_pit(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return pcms->pit_enabled; -} - -static void pc_machine_set_pit(Object *obj, bool value, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - pcms->pit_enabled = value; -} - static bool pc_machine_get_hpet(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -1661,7 +1695,6 @@ static void pc_machine_initfn(Object *obj) pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; pcms->smbus_enabled = true; pcms->sata_enabled = true; - pcms->pit_enabled = true; pcms->i8042_enabled = true; pcms->max_fw_size = 8 * MiB; #ifdef CONFIG_HPET @@ -1761,6 +1794,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE; mc->nvdimm_supported = true; mc->smp_props.dies_supported = true; + mc->cxl_supported = true; mc->default_ram_id = "pc.ram"; object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", @@ -1789,11 +1823,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, PC_MACHINE_SATA, "Enable/disable Serial ATA bus"); - object_class_property_add_bool(oc, PC_MACHINE_PIT, - pc_machine_get_pit, pc_machine_set_pit); - object_class_property_set_description(oc, PC_MACHINE_PIT, - "Enable/disable Intel 8254 programmable interval timer emulation"); - object_class_property_add_bool(oc, "hpet", pc_machine_get_hpet, pc_machine_set_hpet); object_class_property_set_description(oc, "hpet", diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index f843dd906f..578e537b35 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -218,7 +218,9 @@ static void pc_init1(MachineState *machine, } isa_bus_irqs(isa_bus, x86ms->gsi); - pc_i8259_create(isa_bus, gsi_state->i8259_irq); + if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { + pc_i8259_create(isa_bus, gsi_state->i8259_irq); + } if (pcmc->pci_enabled) { ioapic_init_gsi(gsi_state, "i440fx"); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 302288342a..42eb8b9707 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -265,7 +265,9 @@ static void pc_q35_init(MachineState *machine) pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq); isa_bus = ich9_lpc->isa_bus; - pc_i8259_create(isa_bus, gsi_state->i8259_irq); + if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { + pc_i8259_create(isa_bus, gsi_state->i8259_irq); + } if (pcmc->pci_enabled) { ioapic_init_gsi(gsi_state, "q35"); diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 79ebdface6..78b05ab7a2 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -38,6 +38,7 @@ #include "sysemu/replay.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-timers.h" +#include "sysemu/xen.h" #include "trace.h" #include "hw/i386/x86.h" @@ -122,6 +123,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) */ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ms->smp.max_cpus - 1) + 1; + + /* + * Can we support APIC ID 255 or higher? + * + * Under Xen: yes. + * With userspace emulated lapic: no + * With KVM's in-kernel lapic: only if X2APIC API is enabled. + */ + if (x86ms->apic_id_limit > 255 && !xen_enabled() && + (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { + error_report("current -smp configuration requires kernel " + "irqchip and X2APIC API support."); + exit(EXIT_FAILURE); + } + possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); @@ -1228,6 +1244,40 @@ static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &x86ms->acpi, errp); } +static void x86_machine_get_pit(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + OnOffAuto pit = x86ms->pit; + + visit_type_OnOffAuto(v, name, &pit, errp); +} + +static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj);; + + visit_type_OnOffAuto(v, name, &x86ms->pit, errp); +} + +static void x86_machine_get_pic(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + OnOffAuto pic = x86ms->pic; + + visit_type_OnOffAuto(v, name, &pic, errp); +} + +static void x86_machine_set_pic(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + X86MachineState *x86ms = X86_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &x86ms->pic, errp); +} + static char *x86_machine_get_oem_id(Object *obj, Error **errp) { X86MachineState *x86ms = X86_MACHINE(obj); @@ -1317,6 +1367,8 @@ static void x86_machine_initfn(Object *obj) x86ms->smm = ON_OFF_AUTO_AUTO; x86ms->acpi = ON_OFF_AUTO_AUTO; + x86ms->pit = ON_OFF_AUTO_AUTO; + x86ms->pic = ON_OFF_AUTO_AUTO; x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); @@ -1348,6 +1400,20 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, X86_MACHINE_ACPI, "Enable ACPI"); + object_class_property_add(oc, X86_MACHINE_PIT, "OnOffAuto", + x86_machine_get_pit, + x86_machine_set_pit, + NULL, NULL); + object_class_property_set_description(oc, X86_MACHINE_PIT, + "Enable i8254 PIT"); + + object_class_property_add(oc, X86_MACHINE_PIC, "OnOffAuto", + x86_machine_get_pic, + x86_machine_set_pic, + NULL, NULL); + object_class_property_set_description(oc, X86_MACHINE_PIC, + "Enable i8259 PIC"); + object_class_property_add_str(oc, X86_MACHINE_OEM_ID, x86_machine_get_oem_id, x86_machine_set_oem_id); diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c index aeb0624fe5..1352e372ff 100644 --- a/hw/input/vhost-user-input.c +++ b/hw/input/vhost-user-input.c @@ -78,6 +78,12 @@ static void vhost_input_set_config(VirtIODevice *vdev, virtio_notify_config(vdev); } +static struct vhost_dev *vhost_input_get_vhost(VirtIODevice *vdev) +{ + VHostUserInput *vhi = VHOST_USER_INPUT(vdev); + return &vhi->vhost->dev; +} + static const VMStateDescription vmstate_vhost_input = { .name = "vhost-user-input", .unmigratable = 1, @@ -92,6 +98,7 @@ static void vhost_input_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_vhost_input; vdc->get_config = vhost_input_get_config; vdc->set_config = vhost_input_set_config; + vdc->get_vhost = vhost_input_get_vhost; vic->realize = vhost_input_realize; vic->change_active = vhost_input_change_active; } diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index 54bcb46c74..5b5398b3ca 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -257,8 +257,7 @@ static void virtio_input_device_realize(DeviceState *dev, Error **errp) vinput->cfg_size += 8; assert(vinput->cfg_size <= sizeof(virtio_input_config)); - virtio_init(vdev, "virtio-input", VIRTIO_ID_INPUT, - vinput->cfg_size); + virtio_init(vdev, VIRTIO_ID_INPUT, vinput->cfg_size); vinput->evt = virtio_add_queue(vdev, 64, virtio_input_handle_evt); vinput->sts = virtio_add_queue(vdev, 64, virtio_input_handle_sts); } diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index 03dbb3c7df..73c5ae8ad9 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -11,3 +11,8 @@ config NVDIMM config SPARSE_MEM bool + +config CXL_MEM_DEVICE + bool + default y if CXL + select MEM_DEVICE diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c new file mode 100644 index 0000000000..3bf2869573 --- /dev/null +++ b/hw/mem/cxl_type3.c @@ -0,0 +1,371 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "hw/mem/memory-device.h" +#include "hw/mem/pc-dimm.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/pmem.h" +#include "qemu/range.h" +#include "qemu/rcu.h" +#include "sysemu/hostmem.h" +#include "hw/cxl/cxl.h" + +static void build_dvsecs(CXLType3Dev *ct3d) +{ + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECDevice){ + .cap = 0x1e, + .ctrl = 0x2, + .status2 = 0x2, + .range1_size_hi = ct3d->hostmem->size >> 32, + .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (ct3d->hostmem->size & 0xF0000000), + .range1_base_hi = 0, + .range1_base_lo = 0, + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + PCIE_CXL_DEVICE_DVSEC_LENGTH, + PCIE_CXL_DEVICE_DVSEC, + PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX, + .reg1_base_hi = 0, + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); + dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){ + .phase2_duration = 0x603, /* 3 seconds */ + .phase2_power = 0x33, /* 0x33 miliwatts */ + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + GPF_DEVICE_DVSEC_LENGTH, GPF_PORT_DVSEC, + GPF_DEVICE_DVSEC_REVID, dvsec); +} + +static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) +{ + ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; + uint32_t *cache_mem = cregs->cache_mem_registers; + + assert(which == 0); + + /* TODO: Sanity checks that the decoder is possible */ + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); + + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); +} + +static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate); + uint32_t *cache_mem = cregs->cache_mem_registers; + bool should_commit = false; + int which_hdm = -1; + + assert(size == 4); + g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE); + + switch (offset) { + case A_CXL_HDM_DECODER0_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + which_hdm = 0; + break; + default: + break; + } + + stl_le_p((uint8_t *)cache_mem + offset, value); + if (should_commit) { + hdm_decoder_commit(ct3d, which_hdm); + } +} + +static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) +{ + DeviceState *ds = DEVICE(ct3d); + MemoryRegion *mr; + char *name; + + if (!ct3d->hostmem) { + error_setg(errp, "memdev property must be set"); + return false; + } + + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + error_setg(errp, "memdev property must be set"); + return false; + } + memory_region_set_nonvolatile(mr, true); + memory_region_set_enabled(mr, true); + host_memory_backend_set_mapped(ct3d->hostmem, true); + + if (ds->id) { + name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id); + } else { + name = g_strdup("cxl-type3-dpa-space"); + } + address_space_init(&ct3d->hostmem_as, mr, name); + g_free(name); + + ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size; + + if (!ct3d->lsa) { + error_setg(errp, "lsa property must be set"); + return false; + } + + return true; +} + +static void ct3_realize(PCIDevice *pci_dev, Error **errp) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + MemoryRegion *mr = ®s->component_registers; + uint8_t *pci_conf = pci_dev->config; + + if (!cxl_setup_memory(ct3d, errp)) { + return; + } + + pci_config_set_prog_interface(pci_conf, 0x10); + pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL); + + pcie_endpoint_cap_init(pci_dev, 0x80); + cxl_cstate->dvsec_offset = 0x100; + + ct3d->cxl_cstate.pdev = pci_dev; + build_dvsecs(ct3d); + + regs->special_ops = g_new0(MemoryRegionOps, 1); + regs->special_ops->write = ct3d_reg_write; + + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, + TYPE_CXL_TYPE3); + + pci_register_bar( + pci_dev, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr); + + cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate); + pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + &ct3d->cxl_dstate.device_registers); +} + +static void ct3_exit(PCIDevice *pci_dev) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + + g_free(regs->special_ops); + address_space_destroy(&ct3d->hostmem_as); +} + +/* TODO: Support multiple HDM decoders and DPA skip */ +static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) +{ + uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; + uint64_t decoder_base, decoder_size, hpa_offset; + uint32_t hdm0_ctrl; + int ig, iw; + + decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) | + cache_mem[R_CXL_HDM_DECODER0_BASE_LO]); + if ((uint64_t)host_addr < decoder_base) { + return false; + } + + hpa_offset = (uint64_t)host_addr - decoder_base; + + decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) | + cache_mem[R_CXL_HDM_DECODER0_SIZE_LO]; + if (hpa_offset >= decoder_size) { + return false; + } + + hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW); + ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG); + + *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | + ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw); + + return true; +} + +MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + CXLType3Dev *ct3d = CXL_TYPE3(d); + uint64_t dpa_offset; + MemoryRegion *mr; + + /* TODO support volatile region */ + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + return MEMTX_ERROR; + } + + if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { + return MEMTX_ERROR; + } + + if (dpa_offset > int128_get64(mr->size)) { + return MEMTX_ERROR; + } + + return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size); +} + +MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + CXLType3Dev *ct3d = CXL_TYPE3(d); + uint64_t dpa_offset; + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + return MEMTX_OK; + } + + if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { + return MEMTX_OK; + } + + if (dpa_offset > int128_get64(mr->size)) { + return MEMTX_OK; + } + return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs, + &data, size); +} + +static void ct3d_reset(DeviceState *dev) +{ + CXLType3Dev *ct3d = CXL_TYPE3(dev); + uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE); + cxl_device_register_init_common(&ct3d->cxl_dstate); +} + +static Property ct3_props[] = { + DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static uint64_t get_lsa_size(CXLType3Dev *ct3d) +{ + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(ct3d->lsa); + return memory_region_size(mr); +} + +static void validate_lsa_access(MemoryRegion *mr, uint64_t size, + uint64_t offset) +{ + assert(offset + size <= memory_region_size(mr)); + assert(offset + size > offset); +} + +static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size, + uint64_t offset) +{ + MemoryRegion *mr; + void *lsa; + + mr = host_memory_backend_get_memory(ct3d->lsa); + validate_lsa_access(mr, size, offset); + + lsa = memory_region_get_ram_ptr(mr) + offset; + memcpy(buf, lsa, size); + + return size; +} + +static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, + uint64_t offset) +{ + MemoryRegion *mr; + void *lsa; + + mr = host_memory_backend_get_memory(ct3d->lsa); + validate_lsa_access(mr, size, offset); + + lsa = memory_region_get_ram_ptr(mr) + offset; + memcpy(lsa, buf, size); + memory_region_set_dirty(mr, offset, size); + + /* + * Just like the PMEM, if the guest is not allowed to exit gracefully, label + * updates will get lost. + */ +} + +static void ct3_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + CXLType3Class *cvc = CXL_TYPE3_CLASS(oc); + + pc->realize = ct3_realize; + pc->exit = ct3_exit; + pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->vendor_id = PCI_VENDOR_ID_INTEL; + pc->device_id = 0xd93; /* LVF for now */ + pc->revision = 1; + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "CXL PMEM Device (Type 3)"; + dc->reset = ct3d_reset; + device_class_set_props(dc, ct3_props); + + cvc->get_lsa_size = get_lsa_size; + cvc->get_lsa = get_lsa; + cvc->set_lsa = set_lsa; +} + +static const TypeInfo ct3d_info = { + .name = TYPE_CXL_TYPE3, + .parent = TYPE_PCI_DEVICE, + .class_size = sizeof(struct CXLType3Class), + .class_init = ct3_class_init, + .instance_size = sizeof(CXLType3Dev), + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CXL_DEVICE }, + { INTERFACE_PCIE_DEVICE }, + {} + }, +}; + +static void ct3d_registers(void) +{ + type_register_static(&ct3d_info); +} + +type_init(ct3d_registers); diff --git a/hw/mem/meson.build b/hw/mem/meson.build index 82f86d117e..609b2b36fc 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -3,6 +3,7 @@ mem_ss.add(files('memory-device.c')) mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) +mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) diff --git a/hw/meson.build b/hw/meson.build index b3366c888e..9992c5101e 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -6,6 +6,7 @@ subdir('block') subdir('char') subdir('core') subdir('cpu') +subdir('cxl') subdir('display') subdir('dma') subdir('gpio') diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 30379d2ca4..ccac5b7a64 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -201,7 +201,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); } if (~net->dev.features & net->dev.backend_features) { - fprintf(stderr, "vhost lacks feature mask %" PRIu64 + fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 " for backend\n", (uint64_t)(~net->dev.features & net->dev.backend_features)); goto fail; @@ -213,7 +213,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { features = vhost_user_get_acked_features(net->nc); if (~net->dev.features & features) { - fprintf(stderr, "vhost lacks feature mask %" PRIu64 + fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 " for backend\n", (uint64_t)(~net->dev.features & features)); goto fail; @@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, r = vhost_set_vring_enable(peer, peer->vring_enable); if (r < 0) { + vhost_net_stop_one(get_vhost_net(peer), dev); goto err_start; } } @@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, err_start: while (--i >= 0) { - peer = qemu_get_peer(ncs , i); + peer = qemu_get_peer(ncs, i < data_queue_pairs ? + i : n->max_queue_pairs); vhost_net_stop_one(get_vhost_net(peer), dev); } e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 1067e72b39..7ad948ee7c 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qemu/atomic.h" #include "qemu/iov.h" +#include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "hw/virtio/virtio.h" @@ -245,7 +246,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; - int cvq = n->max_ncs - n->max_queue_pairs; + int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? + n->max_ncs - n->max_queue_pairs : 0; if (!get_vhost_net(nc->peer)) { return; @@ -1379,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, { VirtIODevice *vdev = VIRTIO_DEVICE(n); uint16_t queue_pairs; + NetClientState *nc = qemu_get_queue(n->nic); virtio_net_disable_rss(n); if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { @@ -1410,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, return VIRTIO_NET_ERR; } + /* Avoid changing the number of queue_pairs for vdpa device in + * userspace handler. A future fix is needed to handle the mq + * change in userspace handler with vhost-vdpa. Let's disable + * the mq handling from userspace for now and only allow get + * done through the kernel. Ripples may be seen when falling + * back to userspace, but without doing it qemu process would + * crash on a recursive entry to virtio_net_set_status(). + */ + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + return VIRTIO_NET_ERR; + } + n->curr_queue_pairs = queue_pairs; /* stop the backend before changing the number of queue_pairs to avoid handling a * disabled queue */ @@ -1443,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) } iov_cnt = elem->out_num; - iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); + iov2 = iov = g_memdup2(elem->out_sg, + sizeof(struct iovec) * elem->out_num); s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); if (s != sizeof(ctrl)) { @@ -3170,8 +3186,22 @@ static NetClientInfo net_virtio_info = { static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) { VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); + NetClientState *nc; assert(n->vhost_started); + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { + /* Must guard against invalid features and bogus queue index + * from being set by malicious guest, or penetrated through + * buggy migration stream. + */ + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: bogus vq index ignored\n", __func__); + return false; + } + nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); } @@ -3179,8 +3209,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) { VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); + NetClientState *nc; assert(n->vhost_started); + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { + /* Must guard against invalid features and bogus queue index + * from being set by malicious guest, or penetrated through + * buggy migration stream. + */ + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: bogus vq index ignored\n", __func__); + return; + } + nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); } @@ -3392,7 +3436,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) } virtio_net_set_config_size(n, n->host_features); - virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size); + virtio_init(vdev, VIRTIO_ID_NET, n->config_size); /* * We set a lower limit on RX queue size to what it always was. @@ -3619,6 +3663,14 @@ static bool dev_unplug_pending(void *opaque) return vdc->primary_unplug_pending(dev); } +static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); + struct vhost_net *net = get_vhost_net(nc->peer); + return &net->dev; +} + static const VMStateDescription vmstate_virtio_net = { .name = "virtio-net", .minimum_version_id = VIRTIO_NET_VM_VERSION, @@ -3721,6 +3773,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) vdc->post_load = virtio_net_post_load_virtio; vdc->vmsd = &vmstate_virtio_net_device; vdc->primary_unplug_pending = primary_unplug_pending; + vdc->get_vhost = virtio_net_get_vhost; } static const TypeInfo virtio_net_info = { diff --git a/hw/pci-bridge/Kconfig b/hw/pci-bridge/Kconfig index f8df4315ba..02614f49aa 100644 --- a/hw/pci-bridge/Kconfig +++ b/hw/pci-bridge/Kconfig @@ -27,3 +27,8 @@ config DEC_PCI config SIMBA bool + +config CXL + bool + default y if PCI_EXPRESS && PXB + depends on PCI_EXPRESS && MSI_NONBROKEN && PXB diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c new file mode 100644 index 0000000000..fb213fa06e --- /dev/null +++ b/hw/pci-bridge/cxl_root_port.c @@ -0,0 +1,236 @@ +/* + * CXL 2.0 Root Port Implementation + * + * Copyright(C) 2020 Intel Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/range.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pcie_port.h" +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "qapi/error.h" +#include "hw/cxl/cxl.h" + +#define CXL_ROOT_PORT_DID 0x7075 + +/* Copied from the gen root port which we derive */ +#define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100 +#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \ + (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF) +#define CXL_ROOT_PORT_DVSEC_OFFSET \ + (GEN_PCIE_ROOT_PORT_ACS_OFFSET + PCI_ACS_SIZEOF) + +typedef struct CXLRootPort { + /*< private >*/ + PCIESlot parent_obj; + + CXLComponentState cxl_cstate; + PCIResReserve res_reserve; +} CXLRootPort; + +#define TYPE_CXL_ROOT_PORT "cxl-rp" +DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT) + +static void latch_registers(CXLRootPort *crp) +{ + uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT); +} + +static void build_dvsecs(CXLComponentState *cxl) +{ + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + EXTENSIONS_PORT_DVSEC_LENGTH, + EXTENSIONS_PORT_DVSEC, + EXTENSIONS_PORT_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortGPF){ + .rsvd = 0, + .phase1_ctrl = 1, /* 1μs timeout */ + .phase2_ctrl = 1, /* 1μs timeout */ + }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC, + GPF_PORT_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ + .cap = 0x26, /* IO, Mem, non-MLD */ + .ctrl = 0x2, + .status = 0x26, /* same */ + .rcvd_mod_ts_data_phase1 = 0xef, + }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_FLEXBUS_PORT_DVSEC, + PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + }; + cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); +} + +static void cxl_rp_realize(DeviceState *dev, Error **errp) +{ + PCIDevice *pci_dev = PCI_DEVICE(dev); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + CXLRootPort *crp = CXL_ROOT_PORT(dev); + CXLComponentState *cxl_cstate = &crp->cxl_cstate; + ComponentRegisters *cregs = &cxl_cstate->crb; + MemoryRegion *component_bar = &cregs->component_registers; + Error *local_err = NULL; + + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + int rc = + pci_bridge_qemu_reserve_cap_init(pci_dev, 0, crp->res_reserve, errp); + if (rc < 0) { + rpc->parent_class.exit(pci_dev); + return; + } + + if (!crp->res_reserve.io || crp->res_reserve.io == -1) { + pci_word_test_and_clear_mask(pci_dev->wmask + PCI_COMMAND, + PCI_COMMAND_IO); + pci_dev->wmask[PCI_IO_BASE] = 0; + pci_dev->wmask[PCI_IO_LIMIT] = 0; + } + + cxl_cstate->dvsec_offset = CXL_ROOT_PORT_DVSEC_OFFSET; + cxl_cstate->pdev = pci_dev; + build_dvsecs(&crp->cxl_cstate); + + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, + TYPE_CXL_ROOT_PORT); + + pci_register_bar(pci_dev, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + component_bar); +} + +static void cxl_rp_reset(DeviceState *dev) +{ + PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev); + CXLRootPort *crp = CXL_ROOT_PORT(dev); + + rpc->parent_reset(dev); + + latch_registers(crp); +} + +static Property gen_rp_props[] = { + DEFINE_PROP_UINT32("bus-reserve", CXLRootPort, res_reserve.bus, -1), + DEFINE_PROP_SIZE("io-reserve", CXLRootPort, res_reserve.io, -1), + DEFINE_PROP_SIZE("mem-reserve", CXLRootPort, res_reserve.mem_non_pref, -1), + DEFINE_PROP_SIZE("pref32-reserve", CXLRootPort, res_reserve.mem_pref_32, + -1), + DEFINE_PROP_SIZE("pref64-reserve", CXLRootPort, res_reserve.mem_pref_64, + -1), + DEFINE_PROP_END_OF_LIST() +}; + +static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr, + uint32_t val, int len) +{ + CXLRootPort *crp = CXL_ROOT_PORT(dev); + + if (range_contains(&crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC], addr)) { + uint8_t *reg = &dev->config[addr]; + addr -= crp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC].lob; + if (addr == PORT_CONTROL_OFFSET) { + if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) { + /* unmask SBR */ + qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n"); + } + if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) { + /* Alt Memory & ID Space Enable */ + qemu_log_mask(LOG_UNIMP, + "Alt Memory & ID space is not supported\n"); + } + } + } +} + +static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val, + int len) +{ + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_ctl, &slt_sta); + pci_bridge_write_config(d, address, val, len); + pcie_cap_flr_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); + pcie_aer_write_config(d, address, val, len); + + cxl_rp_dvsec_write_config(d, address, val, len); +} + +static void cxl_root_port_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *k = PCI_DEVICE_CLASS(oc); + PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(oc); + + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->device_id = CXL_ROOT_PORT_DID; + dc->desc = "CXL Root Port"; + k->revision = 0; + device_class_set_props(dc, gen_rp_props); + k->config_write = cxl_rp_write_config; + + device_class_set_parent_realize(dc, cxl_rp_realize, &rpc->parent_realize); + device_class_set_parent_reset(dc, cxl_rp_reset, &rpc->parent_reset); + + rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET; + rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET; + + dc->hotpluggable = false; +} + +static const TypeInfo cxl_root_port_info = { + .name = TYPE_CXL_ROOT_PORT, + .parent = TYPE_PCIE_ROOT_PORT, + .instance_size = sizeof(CXLRootPort), + .class_init = cxl_root_port_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CXL_DEVICE }, + { } + }, +}; + +static void cxl_register(void) +{ + type_register_static(&cxl_root_port_info); +} + +type_init(cxl_register); diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build index daab8acf2a..b6d26a03d5 100644 --- a/hw/pci-bridge/meson.build +++ b/hw/pci-bridge/meson.build @@ -5,6 +5,7 @@ pci_ss.add(when: 'CONFIG_IOH3420', if_true: files('ioh3420.c')) pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c', 'pcie_pci_bridge.c')) pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c')) pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c')) +pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c')) # NewWorld PowerMac pci_ss.add(when: 'CONFIG_DEC_PCI', if_true: files('dec.c')) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index de932286b5..69244decdb 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -17,6 +17,7 @@ #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/pci/pci_bridge.h" +#include "hw/cxl/cxl.h" #include "qemu/range.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -24,6 +25,8 @@ #include "hw/boards.h" #include "qom/object.h" +enum BusType { PCI, PCIE, CXL }; + #define TYPE_PXB_BUS "pxb-bus" typedef struct PXBBus PXBBus; DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS, @@ -33,6 +36,10 @@ DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS, DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS, TYPE_PXB_PCIE_BUS) +#define TYPE_PXB_CXL_BUS "pxb-cxl-bus" +DECLARE_INSTANCE_CHECKER(PXBBus, PXB_CXL_BUS, + TYPE_PXB_CXL_BUS) + struct PXBBus { /*< private >*/ PCIBus parent_obj; @@ -50,18 +57,13 @@ DECLARE_INSTANCE_CHECKER(PXBDev, PXB_DEV, DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV, TYPE_PXB_PCIE_DEVICE) -struct PXBDev { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - - uint8_t bus_nr; - uint16_t numa_node; - bool bypass_iommu; -}; - static PXBDev *convert_to_pxb(PCIDevice *dev) { + /* A CXL PXB's parent bus is PCIe, so the normal check won't work */ + if (object_dynamic_cast(OBJECT(dev), TYPE_PXB_CXL_DEVICE)) { + return PXB_CXL_DEV(dev); + } + return pci_bus_is_express(pci_get_bus(dev)) ? PXB_PCIE_DEV(dev) : PXB_DEV(dev); } @@ -70,6 +72,13 @@ static GList *pxb_dev_list; #define TYPE_PXB_HOST "pxb-host" +CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb) +{ + CXLHost *host = PXB_CXL_HOST(hb); + + return &host->cxl_cstate; +} + static int pxb_bus_num(PCIBus *bus) { PXBDev *pxb = convert_to_pxb(bus->parent_dev); @@ -106,11 +115,20 @@ static const TypeInfo pxb_pcie_bus_info = { .class_init = pxb_bus_class_init, }; +static const TypeInfo pxb_cxl_bus_info = { + .name = TYPE_PXB_CXL_BUS, + .parent = TYPE_CXL_BUS, + .instance_size = sizeof(PXBBus), + .class_init = pxb_bus_class_init, +}; + static const char *pxb_host_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { - PXBBus *bus = pci_bus_is_express(rootbus) ? - PXB_PCIE_BUS(rootbus) : PXB_BUS(rootbus); + PXBBus *bus = pci_bus_is_cxl(rootbus) ? + PXB_CXL_BUS(rootbus) : + pci_bus_is_express(rootbus) ? PXB_PCIE_BUS(rootbus) : + PXB_BUS(rootbus); snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus)); return bus->bus_path; @@ -166,6 +184,52 @@ static const TypeInfo pxb_host_info = { .class_init = pxb_host_class_init, }; +static void pxb_cxl_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + CXLHost *cxl = PXB_CXL_HOST(dev); + CXLComponentState *cxl_cstate = &cxl->cxl_cstate; + struct MemoryRegion *mr = &cxl_cstate->crb.component_registers; + hwaddr offset; + + cxl_component_register_block_init(OBJECT(dev), cxl_cstate, + TYPE_PXB_CXL_HOST); + sysbus_init_mmio(sbd, mr); + + offset = memory_region_size(mr) * ms->cxl_devices_state->next_mr_idx; + if (offset > memory_region_size(&ms->cxl_devices_state->host_mr)) { + error_setg(errp, "Insufficient space for pxb cxl host register space"); + return; + } + + memory_region_add_subregion(&ms->cxl_devices_state->host_mr, offset, mr); + ms->cxl_devices_state->next_mr_idx++; +} + +static void pxb_cxl_host_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class); + + hc->root_bus_path = pxb_host_root_bus_path; + dc->fw_name = "cxl"; + dc->realize = pxb_cxl_realize; + /* Reason: Internal part of the pxb/pxb-pcie device, not usable by itself */ + dc->user_creatable = false; +} + +/* + * This is a device to handle the MMIO for a CXL host bridge. It does nothing + * else. + */ +static const TypeInfo cxl_host_info = { + .name = TYPE_PXB_CXL_HOST, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(CXLHost), + .class_init = pxb_cxl_host_class_init, +}; + /* * Registers the PXB bus as a child of pci host root bus. */ @@ -212,6 +276,17 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) return pin - PCI_SLOT(pxb->devfn); } +static void pxb_dev_reset(DeviceState *dev) +{ + CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge; + CXLComponentState *cxl_cstate = &cxl->cxl_cstate; + uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; + uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8); +} + static gint pxb_compare(gconstpointer a, gconstpointer b) { const PXBDev *pxb_a = a, *pxb_b = b; @@ -221,7 +296,8 @@ static gint pxb_compare(gconstpointer a, gconstpointer b) 0; } -static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) +static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, + Error **errp) { PXBDev *pxb = convert_to_pxb(dev); DeviceState *ds, *bds = NULL; @@ -245,9 +321,13 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) dev_name = dev->qdev.id; } - ds = qdev_new(TYPE_PXB_HOST); - if (pcie) { + ds = qdev_new(type == CXL ? TYPE_PXB_CXL_HOST : TYPE_PXB_HOST); + if (type == PCIE) { bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS); + } else if (type == CXL) { + bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS); + bus->flags |= PCI_BUS_CXL; + PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds); } else { bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS); bds = qdev_new("pci-bridge"); @@ -295,7 +375,7 @@ static void pxb_dev_realize(PCIDevice *dev, Error **errp) return; } - pxb_dev_realize_common(dev, false, errp); + pxb_dev_realize_common(dev, PCI, errp); } static void pxb_dev_exitfn(PCIDevice *pci_dev) @@ -348,7 +428,7 @@ static void pxb_pcie_dev_realize(PCIDevice *dev, Error **errp) return; } - pxb_dev_realize_common(dev, true, errp); + pxb_dev_realize_common(dev, PCIE, errp); } static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data) @@ -379,13 +459,67 @@ static const TypeInfo pxb_pcie_dev_info = { }, }; +static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + + /* A CXL PXB's parent bus is still PCIe */ + if (!pci_bus_is_express(pci_get_bus(dev))) { + error_setg(errp, "pxb-cxl devices cannot reside on a PCI bus"); + return; + } + if (!ms->cxl_devices_state->is_enabled) { + error_setg(errp, "Machine does not have cxl=on"); + return; + } + + pxb_dev_realize_common(dev, CXL, errp); + pxb_dev_reset(DEVICE(dev)); +} + +static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = pxb_cxl_dev_realize; + k->exit = pxb_dev_exitfn; + /* + * XXX: These types of bridges don't actually show up in the hierarchy so + * vendor, device, class, etc. ids are intentionally left out. + */ + + dc->desc = "CXL Host Bridge"; + device_class_set_props(dc, pxb_dev_properties); + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + + /* Host bridges aren't hotpluggable. FIXME: spec reference */ + dc->hotpluggable = false; + dc->reset = pxb_dev_reset; +} + +static const TypeInfo pxb_cxl_dev_info = { + .name = TYPE_PXB_CXL_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PXBDev), + .class_init = pxb_cxl_dev_class_init, + .interfaces = + (InterfaceInfo[]){ + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + {}, + }, +}; + static void pxb_register_types(void) { type_register_static(&pxb_bus_info); type_register_static(&pxb_pcie_bus_info); + type_register_static(&pxb_cxl_bus_info); type_register_static(&pxb_host_info); + type_register_static(&cxl_host_info); type_register_static(&pxb_dev_info); type_register_static(&pxb_pcie_dev_info); + type_register_static(&pxb_cxl_dev_info); } type_init(pxb_register_types) diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index f1cfe9d14a..460e48269d 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -67,7 +67,11 @@ static void rp_realize(PCIDevice *d, Error **errp) int rc; pci_config_set_interrupt_pin(d->config, 1); - pci_bridge_initfn(d, TYPE_PCIE_BUS); + if (d->cap_present & QEMU_PCIE_CAP_CXL) { + pci_bridge_initfn(d, TYPE_CXL_BUS); + } else { + pci_bridge_initfn(d, TYPE_PCIE_BUS); + } pcie_port_init_reg(d); rc = pci_bridge_ssvid_init(d, rpc->ssvid_offset, dc->vendor_id, diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index e7e162a00a..7c7316bc96 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -5,6 +5,7 @@ #include "hw/pci/pci_bus.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pcie_host.h" +#include "hw/acpi/cxl.h" static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) { @@ -139,6 +140,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) QLIST_FOREACH(bus, &bus->child, sibling) { uint8_t bus_num = pci_bus_num(bus); uint8_t numa_node = pci_bus_numa_node(bus); + bool is_cxl = pci_bus_is_cxl(bus); if (!pci_bus_is_root(bus)) { continue; @@ -154,8 +156,16 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) } dev = aml_device("PC%.02X", bus_num); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08"))); - aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03"))); + if (is_cxl) { + struct Aml *pkg = aml_package(2); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0016"))); + aml_append(pkg, aml_eisaid("PNP0A08")); + aml_append(pkg, aml_eisaid("PNP0A03")); + aml_append(dev, aml_name_decl("_CID", pkg)); + } else { + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A08"))); + aml_append(dev, aml_name_decl("_CID", aml_string("PNP0A03"))); + } aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); aml_append(dev, aml_name_decl("_STR", aml_unicode("pxb Device"))); @@ -175,7 +185,11 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) cfg->pio.base, 0, 0, 0); aml_append(dev, aml_name_decl("_CRS", crs)); - acpi_dsdt_add_pci_osc(dev); + if (is_cxl) { + build_cxl_osc_method(dev); + } else { + acpi_dsdt_add_pci_osc(dev); + } aml_append(scope, dev); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 9c58f02853..a9b37f8000 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -200,6 +200,11 @@ static const TypeInfo pci_bus_info = { .class_init = pci_bus_class_init, }; +static const TypeInfo cxl_interface_info = { + .name = INTERFACE_CXL_DEVICE, + .parent = TYPE_INTERFACE, +}; + static const TypeInfo pcie_interface_info = { .name = INTERFACE_PCIE_DEVICE, .parent = TYPE_INTERFACE, @@ -223,6 +228,12 @@ static const TypeInfo pcie_bus_info = { .class_init = pcie_bus_class_init, }; +static const TypeInfo cxl_bus_info = { + .name = TYPE_CXL_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pcie_bus_class_init, +}; + static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num); static void pci_update_mappings(PCIDevice *d); static void pci_irq_handler(void *opaque, int irq_num, int level); @@ -2182,6 +2193,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } + if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) { + pci_dev->cap_present |= QEMU_PCIE_CAP_CXL; + } + pci_dev = do_pci_register_device(pci_dev, object_get_typename(OBJECT(qdev)), pci_dev->devfn, errp); @@ -2747,7 +2762,9 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE); ObjectClass *pcie = object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE); - assert(conventional || pcie); + ObjectClass *cxl = + object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE); + assert(conventional || pcie || cxl); } } @@ -2937,7 +2954,9 @@ static void pci_register_types(void) { type_register_static(&pci_bus_info); type_register_static(&pcie_bus_info); + type_register_static(&cxl_bus_info); type_register_static(&conventional_pci_interface_info); + type_register_static(&cxl_interface_info); type_register_static(&pcie_interface_info); type_register_static(&pci_device_type_info); } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index e95c1e5519..687e4e763a 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -136,6 +136,31 @@ static void pcie_port_class_init(ObjectClass *oc, void *data) device_class_set_props(dc, pcie_port_props); } +PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn) +{ + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + PCIEPort *port; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + + if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + continue; + } + + port = PCIE_PORT(d); + if (port->port == pn) { + return d; + } + } + + return NULL; +} + static const TypeInfo pcie_port_type_info = { .name = TYPE_PCIE_PORT, .parent = TYPE_PCI_BRIDGE, diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 778f43e4c1..3059068175 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -273,6 +273,13 @@ static void vhost_scsi_unrealize(DeviceState *dev) virtio_scsi_common_unrealize(dev); } +static struct vhost_dev *vhost_scsi_get_vhost(VirtIODevice *vdev) +{ + VHostSCSI *s = VHOST_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + return &vsc->dev; +} + static Property vhost_scsi_properties[] = { DEFINE_PROP_STRING("vhostfd", VirtIOSCSICommon, conf.vhostfd), DEFINE_PROP_STRING("wwpn", VirtIOSCSICommon, conf.wwpn), @@ -307,6 +314,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_scsi_common_get_features; vdc->set_config = vhost_scsi_common_set_config; vdc->set_status = vhost_scsi_set_status; + vdc->get_vhost = vhost_scsi_get_vhost; fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; } diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 1b2f7eed98..9be21d07ee 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -121,6 +121,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) vsc->dev.backend_features = 0; vqs = vsc->dev.vqs; + s->vhost_user.supports_config = true; ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index db54d104be..4141dddd51 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -1013,8 +1013,7 @@ void virtio_scsi_common_realize(DeviceState *dev, VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); int i; - virtio_init(vdev, "virtio-scsi", VIRTIO_ID_SCSI, - sizeof(VirtIOSCSIConfig)); + virtio_init(vdev, VIRTIO_ID_SCSI, sizeof(VirtIOSCSIConfig)); if (s->conf.num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { s->conf.num_queues = 1; diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 333348d9d5..ab8e095b73 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -21,6 +21,9 @@ vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_siz vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 +vhost_user_read(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" +vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" +vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" # vhost-vdpa.c vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 @@ -89,7 +92,12 @@ virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d" virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d" -# virtio-iommu.c +# virtio-pci.c +virtio_pci_notify(uint16_t vector) "virtio_pci_notify vec 0x%x" +virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)" +virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)" + +# hw/virtio/virtio-iommu.c virtio_iommu_device_reset(void) "reset!" virtio_iommu_system_reset(void) "system reset!" virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64 diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c index cb71a294fa..08980bc23b 100644 --- a/hw/virtio/vhost-scsi-pci.c +++ b/hw/virtio/vhost-scsi-pci.c @@ -21,7 +21,7 @@ #include "hw/virtio/vhost-scsi.h" #include "qapi/error.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostSCSIPCI VHostSCSIPCI; diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1e5cfe2af6..56c96ebd13 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, for (n = 0; n < num; n++) { if (more_descs || (n + 1 < num)) { descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); + descs[i].next = cpu_to_le16(svq->desc_next[i]); } else { descs[i].flags = flags; } @@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, descs[i].len = cpu_to_le32(iovec[n].iov_len); last = i; - i = cpu_to_le16(descs[i].next); + i = cpu_to_le16(svq->desc_next[i]); } - svq->free_head = le16_to_cpu(descs[last].next); + svq->free_head = le16_to_cpu(svq->desc_next[last]); } static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, @@ -198,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, return true; } +/** + * Add an element to a SVQ. + * + * The caller must check that there is enough slots for the new element. It + * takes ownership of the element: In case of failure, it is free and the SVQ + * is considered broken. + */ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) { unsigned qemu_head; bool ok = vhost_svq_add_split(svq, elem, &qemu_head); if (unlikely(!ok)) { + g_free(elem); return false; } @@ -333,13 +342,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); } +static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, + uint16_t num, uint16_t i) +{ + for (uint16_t j = 0; j < (num - 1); ++j) { + i = le16_to_cpu(svq->desc_next[i]); + } + + return i; +} + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len) { - vring_desc_t *descs = svq->vring.desc; const vring_used_t *used = svq->vring.used; vring_used_elem_t used_elem; - uint16_t last_used; + uint16_t last_used, last_used_chain, num; if (!vhost_svq_more_used(svq)) { return NULL; @@ -365,7 +383,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, return NULL; } - descs[used_elem.id].next = svq->free_head; + num = svq->ring_id_maps[used_elem.id]->in_num + + svq->ring_id_maps[used_elem.id]->out_num; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; svq->free_head = used_elem.id; *len = used_elem.len; @@ -540,8 +561,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); memset(svq->vring.used, 0, device_size); svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); + svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { - svq->vring.desc[i].next = cpu_to_le16(i + 1); + svq->desc_next[i] = cpu_to_le16(i + 1); } } @@ -574,6 +596,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) virtqueue_detach_element(svq->vq, next_avail_elem, 0); } svq->vq = NULL; + g_free(svq->desc_next); g_free(svq->ring_id_maps); qemu_vfree(svq->vring.desc); qemu_vfree(svq->vring.used); diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index e5e24c536d..c132c994e9 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { /* Next VirtQueue element that guest made available */ VirtQueueElement *next_guest_avail_elem; + /* + * Backup next field for each descriptor so we can recover securely, not + * needing to trust the device access. + */ + uint16_t *desc_next; + /* Next head to expose to the device */ uint16_t shadow_avail_idx; diff --git a/hw/virtio/vhost-user-blk-pci.c b/hw/virtio/vhost-user-blk-pci.c index 33b404d8a2..eef8641a98 100644 --- a/hw/virtio/vhost-user-blk-pci.c +++ b/hw/virtio/vhost-user-blk-pci.c @@ -26,7 +26,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostUserBlkPCI VHostUserBlkPCI; diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c index 2ed8492b3f..6829b8b743 100644 --- a/hw/virtio/vhost-user-fs-pci.c +++ b/hw/virtio/vhost-user-fs-pci.c @@ -14,7 +14,7 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-fs.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" struct VHostUserFSPCI { diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index c595957983..e513e4fdda 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -219,8 +219,7 @@ static void vuf_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "vhost-user-fs", VIRTIO_ID_FS, - sizeof(struct virtio_fs_config)); + virtio_init(vdev, VIRTIO_ID_FS, sizeof(struct virtio_fs_config)); /* Hiprio queue */ fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output); @@ -277,6 +276,12 @@ static void vuf_device_unrealize(DeviceState *dev) fs->vhost_dev.vqs = NULL; } +static struct vhost_dev *vuf_get_vhost(VirtIODevice *vdev) +{ + VHostUserFS *fs = VHOST_USER_FS(vdev); + return &fs->vhost_dev; +} + static const VMStateDescription vuf_vmstate = { .name = "vhost-user-fs", .unmigratable = 1, @@ -314,6 +319,7 @@ static void vuf_class_init(ObjectClass *klass, void *data) vdc->set_status = vuf_set_status; vdc->guest_notifier_mask = vuf_guest_notifier_mask; vdc->guest_notifier_pending = vuf_guest_notifier_pending; + vdc->get_vhost = vuf_get_vhost; } static const TypeInfo vuf_info = { diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c index 70b7b65fd9..00ac10941f 100644 --- a/hw/virtio/vhost-user-i2c-pci.c +++ b/hw/virtio/vhost-user-i2c-pci.c @@ -9,7 +9,7 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-i2c.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" struct VHostUserI2CPCI { VirtIOPCIProxy parent_obj; diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index 42c7f6d9e5..6020eee093 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -14,11 +14,6 @@ #include "qemu/error-report.h" #include "standard-headers/linux/virtio_ids.h" -/* Remove this once the header is updated in Linux kernel */ -#ifndef VIRTIO_ID_I2C_ADAPTER -#define VIRTIO_ID_I2C_ADAPTER 34 -#endif - static const int feature_bits[] = { VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, VHOST_INVALID_FEATURE_BIT @@ -227,7 +222,7 @@ static void vu_i2c_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0); + virtio_init(vdev, VIRTIO_ID_I2C_ADAPTER, 0); i2c->vhost_dev.nvqs = 1; i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output); diff --git a/hw/virtio/vhost-user-input-pci.c b/hw/virtio/vhost-user-input-pci.c index c9d3e9113a..b858898a36 100644 --- a/hw/virtio/vhost-user-input-pci.c +++ b/hw/virtio/vhost-user-input-pci.c @@ -9,7 +9,7 @@ #include "hw/virtio/virtio-input.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostUserInputPCI VHostUserInputPCI; diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c index c83dc86813..f64935453b 100644 --- a/hw/virtio/vhost-user-rng-pci.c +++ b/hw/virtio/vhost-user-rng-pci.c @@ -9,7 +9,7 @@ #include "qemu/osdep.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-rng.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" struct VHostUserRNGPCI { VirtIOPCIProxy parent_obj; diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 209ee5bf9a..3a7bf8e32d 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -203,7 +203,7 @@ static void vu_rng_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0); + virtio_init(vdev, VIRTIO_ID_RNG, 0); rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output); if (!rng->req_vq) { @@ -247,6 +247,12 @@ static void vu_rng_device_unrealize(DeviceState *dev) vhost_user_cleanup(&rng->vhost_user); } +static struct vhost_dev *vu_rng_get_vhost(VirtIODevice *vdev) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + return &rng->vhost_dev; +} + static const VMStateDescription vu_rng_vmstate = { .name = "vhost-user-rng", .unmigratable = 1, @@ -272,6 +278,7 @@ static void vu_rng_class_init(ObjectClass *klass, void *data) vdc->set_status = vu_rng_set_status; vdc->guest_notifier_mask = vu_rng_guest_notifier_mask; vdc->guest_notifier_pending = vu_rng_guest_notifier_pending; + vdc->get_vhost = vu_rng_get_vhost; } static const TypeInfo vu_rng_info = { diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c index d5343412a1..75882e3cf9 100644 --- a/hw/virtio/vhost-user-scsi-pci.c +++ b/hw/virtio/vhost-user-scsi-pci.c @@ -30,7 +30,7 @@ #include "hw/pci/msix.h" #include "hw/loader.h" #include "sysemu/kvm.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c index 72a96199cd..e5a86e8013 100644 --- a/hw/virtio/vhost-user-vsock-pci.c +++ b/hw/virtio/vhost-user-vsock-pci.c @@ -10,7 +10,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-vsock.h" #include "qom/object.h" diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 52bd682c34..0f8ff99f85 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -107,7 +107,7 @@ static void vuv_device_realize(DeviceState *dev, Error **errp) return; } - vhost_vsock_common_realize(vdev, "vhost-user-vsock"); + vhost_vsock_common_realize(vdev); vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index afd51f79b3..b040c1ad2b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -489,6 +489,8 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, return ret < 0 ? -saved_errno : -EIO; } + trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); + return 0; } @@ -542,6 +544,8 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, } } + trace_vhost_user_read(msg.hdr.request, msg.hdr.flags); + return 0; } @@ -1170,14 +1174,16 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) n->unmap_addr = NULL; } -static void vhost_user_host_notifier_remove(VhostUserState *user, - VirtIODevice *vdev, int queue_idx) +/* + * clean-up function for notifier, will finally free the structure + * under rcu. + */ +static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, + VirtIODevice *vdev) { - VhostUserHostNotifier *n = &user->notifier[queue_idx]; - if (n->addr) { if (vdev) { - virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false); + virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); } assert(!n->unmap_addr); n->unmap_addr = n->addr; @@ -1221,6 +1227,15 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) return 0; } +static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, + int idx) +{ + if (idx >= u->notifiers->len) { + return NULL; + } + return g_ptr_array_index(u->notifiers, idx); +} + static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { @@ -1233,7 +1248,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, }; struct vhost_user *u = dev->opaque; - vhost_user_host_notifier_remove(u->user, dev->vdev, ring->index); + VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); + if (n) { + vhost_user_host_notifier_remove(n, dev->vdev); + } ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { @@ -1498,6 +1516,29 @@ static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) return dev->config_ops->vhost_dev_config_notifier(dev); } +/* + * Fetch or create the notifier for a given idx. Newly created + * notifiers are added to the pointer array that tracks them. + */ +static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, + int idx) +{ + VhostUserHostNotifier *n = NULL; + if (idx >= u->notifiers->len) { + g_ptr_array_set_size(u->notifiers, idx); + } + + n = g_ptr_array_index(u->notifiers, idx); + if (!n) { + n = g_new0(VhostUserHostNotifier, 1); + n->idx = idx; + g_ptr_array_insert(u->notifiers, idx, n); + trace_vhost_user_create_notifier(idx, n); + } + + return n; +} + static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, VhostUserVringArea *area, int fd) @@ -1517,9 +1558,12 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return -EINVAL; } - n = &user->notifier[queue_idx]; - - vhost_user_host_notifier_remove(user, vdev, queue_idx); + /* + * Fetch notifier and invalidate any old data before setting up + * new mapped address. + */ + n = fetch_or_create_notifier(user, queue_idx); + vhost_user_host_notifier_remove(n, vdev); if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { return 0; @@ -1945,14 +1989,15 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, Error **errp) { - uint64_t features, protocol_features, ram_slots; + uint64_t features, ram_slots; struct vhost_user *u; + VhostUserState *vus = (VhostUserState *) opaque; int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); u = g_new0(struct vhost_user, 1); - u->user = opaque; + u->user = vus; u->dev = dev; dev->opaque = u; @@ -1963,6 +2008,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, } if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { + bool supports_f_config = vus->supports_config || + (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); + uint64_t protocol_features; + dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, @@ -1972,19 +2021,34 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, return -EPROTO; } - dev->protocol_features = - protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; - - if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { - /* Don't acknowledge CONFIG feature if device doesn't support it */ - dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); - } else if (!(protocol_features & - (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) { - error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG " - "but backend does not support it."); - return -EINVAL; + /* + * We will use all the protocol features we support - although + * we suppress F_CONFIG if we know QEMUs internal code can not support + * it. + */ + protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; + + if (supports_f_config) { + if (!virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_CONFIG)) { + error_setg(errp, "vhost-user device %s expecting " + "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " + "not support it.", dev->vdev->name); + return -EPROTO; + } + } else { + if (virtio_has_feature(protocol_features, + VHOST_USER_PROTOCOL_F_CONFIG)) { + warn_reportf_err(*errp, "vhost-user backend supports " + "VHOST_USER_PROTOCOL_F_CONFIG for " + "device %s but QEMU does not.", + dev->vdev->name); + protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); + } } + /* final set of protocol features */ + dev->protocol_features = protocol_features; err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); @@ -2502,6 +2566,20 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev, return vhost_user_write(dev, &msg, &inflight->fd, 1); } +static void vhost_user_state_destroy(gpointer data) +{ + VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; + if (n) { + vhost_user_host_notifier_remove(n, NULL); + object_unparent(OBJECT(&n->mr)); + /* + * We can't free until vhost_user_host_notifier_remove has + * done it's thing so schedule the free with RCU. + */ + g_free_rcu(n, rcu); + } +} + bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) { if (user->chr) { @@ -2510,23 +2588,18 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) } user->chr = chr; user->memory_slots = 0; + user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, + &vhost_user_state_destroy); return true; } void vhost_user_cleanup(VhostUserState *user) { - int i; - VhostUserHostNotifier *n; - if (!user->chr) { return; } memory_region_transaction_begin(); - for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - n = &user->notifier[i]; - vhost_user_host_notifier_remove(user, NULL, i); - object_unparent(OBJECT(&n->mr)); - } + user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); memory_region_transaction_commit(); user->chr = NULL; } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index a30510ed17..66f054a12c 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -368,11 +368,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) v->iova_range.last); } -static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) +/* + * The use of this function is for requests that only need to be + * applied once. Typically such request occurs at the beginning + * of operation, and before setting up queues. It should not be + * used for request that performs operation until all queues are + * set, which would need to check dev->vq_index_end instead. + */ +static bool vhost_vdpa_first_dev(struct vhost_dev *dev) { struct vhost_vdpa *v = dev->opaque; - return v->index != 0; + return v->index == 0; } static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, @@ -453,7 +460,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_get_iova_range(v); - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } @@ -596,7 +603,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } @@ -625,7 +632,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, struct vhost_vdpa *v = dev->opaque; int ret; - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } @@ -667,7 +674,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) features &= f; - if (vhost_vdpa_one_time_request(dev)) { + if (vhost_vdpa_first_dev(dev)) { r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); if (r) { return -EFAULT; @@ -1018,7 +1025,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); struct vhost_vring_addr addr = { - .index = i, + .index = dev->vq_index + i, }; int r; bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); @@ -1120,7 +1127,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { struct vhost_vdpa *v = dev->opaque; - if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { + if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { return 0; } @@ -1172,11 +1179,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { struct vhost_vdpa *v = dev->opaque; + int vdpa_idx = ring->index - dev->vq_index; int ret; if (v->shadow_vqs_enabled) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, - ring->index); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); /* * Setting base as last used idx, so destination will see as available @@ -1242,7 +1249,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { - if (vhost_vdpa_one_time_request(dev)) { + if (!vhost_vdpa_first_dev(dev)) { return 0; } diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index ed706681ac..7394818e00 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -224,12 +224,11 @@ int vhost_vsock_common_post_load(void *opaque, int version_id) return 0; } -void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name) +void vhost_vsock_common_realize(VirtIODevice *vdev) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - virtio_init(vdev, name, VIRTIO_ID_VSOCK, - sizeof(struct virtio_vsock_config)); + virtio_init(vdev, VIRTIO_ID_VSOCK, sizeof(struct virtio_vsock_config)); /* Receive and transmit queues belong to vhost */ vvc->recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, @@ -259,6 +258,12 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev) virtio_cleanup(vdev); } +static struct vhost_dev *vhost_vsock_common_get_vhost(VirtIODevice *vdev) +{ + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + return &vvc->vhost_dev; +} + static Property vhost_vsock_common_properties[] = { DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket, ON_OFF_AUTO_AUTO), @@ -274,6 +279,7 @@ static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask; vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending; + vdc->get_vhost = vhost_vsock_common_get_vhost; } static const TypeInfo vhost_vsock_common_info = { diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c index 205da8d1f5..9f34414d38 100644 --- a/hw/virtio/vhost-vsock-pci.c +++ b/hw/virtio/vhost-vsock-pci.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-vsock.h" #include "qemu/module.h" diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 714046210b..0338de892f 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -169,7 +169,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) } } - vhost_vsock_common_realize(vdev, "vhost-vsock"); + vhost_vsock_common_realize(vdev); ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd, VHOST_BACKEND_TYPE_KERNEL, 0, errp); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 2bc72c27c5..dd3263df56 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1738,6 +1738,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) /* should only be called after backend is connected */ assert(hdev->vhost_ops); + vdev->vhost_started = true; hdev->started = true; hdev->vdev = vdev; @@ -1810,7 +1811,7 @@ fail_vq: fail_mem: fail_features: - + vdev->vhost_started = false; hdev->started = false; return r; } @@ -1841,6 +1842,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) } vhost_log_put(hdev, true); hdev->started = false; + vdev->vhost_started = false; hdev->vdev = NULL; } diff --git a/hw/virtio/virtio-9p-pci.c b/hw/virtio/virtio-9p-pci.c index e07adcd9ea..94c14f0b98 100644 --- a/hw/virtio/virtio-9p-pci.c +++ b/hw/virtio/virtio-9p-pci.c @@ -15,7 +15,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/9pfs/virtio-9p.h" #include "hw/qdev-properties.h" #include "qemu/module.h" diff --git a/hw/virtio/virtio-balloon-pci.c b/hw/virtio/virtio-balloon-pci.c index 79a3ba979a..ce2645ba71 100644 --- a/hw/virtio/virtio-balloon-pci.c +++ b/hw/virtio/virtio-balloon-pci.c @@ -14,7 +14,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-balloon.h" #include "qapi/error.h" diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 8f1b38ef5c..73ac5eb675 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -882,8 +882,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) VirtIOBalloon *s = VIRTIO_BALLOON(dev); int ret; - virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, - virtio_balloon_config_size(s)); + virtio_init(vdev, VIRTIO_ID_BALLOON, virtio_balloon_config_size(s)); ret = qemu_add_balloon_handler(virtio_balloon_to_target, virtio_balloon_stat, s); diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c index 9d5795810c..9743bee965 100644 --- a/hw/virtio/virtio-blk-pci.c +++ b/hw/virtio/virtio-blk-pci.c @@ -19,7 +19,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-blk.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qapi/error.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 0f69d1c742..d7ec023adf 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) return; } - vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); - if (klass->get_dma_as != NULL && has_iommu) { + vdev->dma_as = &address_space_memory; + if (has_iommu) { + vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + /* + * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and + * device operational. If the driver does not accept IOMMU_PLATFORM + * we fail the device. + */ virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); - vdev->dma_as = klass->get_dma_as(qbus->parent); - if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { - error_setg(errp, + if (klass->get_dma_as) { + vdev->dma_as = klass->get_dma_as(qbus->parent); + if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { + error_setg(errp, "iommu_platform=true is not supported by the device"); - return; + return; + } } - } else { - vdev->dma_as = &address_space_memory; } } diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index dcd80b904d..c3829e7498 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) } out_num = elem->out_num; - out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); out_iov = out_iov_copy; in_num = elem->in_num; @@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) } out_num = elem->out_num; - out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); out_iov = out_iov_copy; in_num = elem->in_num; - in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); + in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); in_iov = in_iov_copy; if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) @@ -810,7 +810,7 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-crypto", VIRTIO_ID_CRYPTO, vcrypto->config_size); + virtio_init(vdev, VIRTIO_ID_CRYPTO, vcrypto->config_size); vcrypto->curr_queues = 1; vcrypto->vqs = g_new0(VirtIOCryptoQueue, vcrypto->max_queues); for (i = 0; i < vcrypto->max_queues; i++) { @@ -961,6 +961,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); } +static struct vhost_dev *virtio_crypto_get_vhost(VirtIODevice *vdev) +{ + VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); + CryptoDevBackend *b = vcrypto->cryptodev; + CryptoDevBackendClient *cc = b->conf.peers.ccs[0]; + CryptoDevBackendVhost *vhost_crypto = cryptodev_get_vhost(cc, b, 0); + return &vhost_crypto->dev; +} + static void virtio_crypto_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -977,6 +986,7 @@ static void virtio_crypto_class_init(ObjectClass *klass, void *data) vdc->set_status = virtio_crypto_set_status; vdc->guest_notifier_mask = virtio_crypto_guest_notifier_mask; vdc->guest_notifier_pending = virtio_crypto_guest_notifier_pending; + vdc->get_vhost = virtio_crypto_get_vhost; } static void virtio_crypto_instance_init(Object *obj) diff --git a/hw/virtio/virtio-input-host-pci.c b/hw/virtio/virtio-input-host-pci.c index 0ac360de4f..cf8a9cf9e8 100644 --- a/hw/virtio/virtio-input-host-pci.c +++ b/hw/virtio/virtio-input-host-pci.c @@ -8,7 +8,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-input.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/virtio/virtio-input-pci.c b/hw/virtio/virtio-input-pci.c index 48e9ff38e2..a9d0992389 100644 --- a/hw/virtio/virtio-input-pci.c +++ b/hw/virtio/virtio-input-pci.c @@ -8,7 +8,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-input.h" #include "qemu/module.h" diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index 6a1df7fe50..844d647704 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-iommu.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 57c09d98a9..2597e166f9 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1033,8 +1033,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOIOMMU *s = VIRTIO_IOMMU(dev); - virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU, - sizeof(struct virtio_iommu_config)); + virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 5aca408726..30d03e987a 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -867,8 +867,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmem->block_size; vmem->bitmap = bitmap_new(vmem->bitmap_size); - virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM, - sizeof(struct virtio_mem_config)); + virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config)); vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); host_memory_backend_set_mapped(vmem->memdev, true); diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index aa0b3caecb..e03543a70a 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -19,7 +19,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-net.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qapi/error.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cf1231c1c..0566ad7d00 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -33,11 +33,12 @@ #include "hw/pci/msix.h" #include "hw/loader.h" #include "sysemu/kvm.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qemu/range.h" #include "hw/virtio/virtio-bus.h" #include "qapi/visitor.h" #include "sysemu/replay.h" +#include "trace.h" #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) @@ -1380,6 +1381,7 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr, unsigned queue = addr / virtio_pci_queue_mem_mult(proxy); if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) { + trace_virtio_pci_notify_write(addr, val, size); virtio_queue_notify(vdev, queue); } } @@ -1393,6 +1395,7 @@ static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr, unsigned queue = val; if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) { + trace_virtio_pci_notify_write_pio(addr, val, size); virtio_queue_notify(vdev, queue); } } diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h deleted file mode 100644 index 2446dcd9ae..0000000000 --- a/hw/virtio/virtio-pci.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Virtio PCI Bindings - * - * Copyright IBM, Corp. 2007 - * Copyright (c) 2009 CodeSourcery - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * Paul Brook <paul@codesourcery.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#ifndef QEMU_VIRTIO_PCI_H -#define QEMU_VIRTIO_PCI_H - -#include "hw/pci/msi.h" -#include "hw/virtio/virtio-bus.h" -#include "qom/object.h" - - -/* virtio-pci-bus */ - -typedef struct VirtioBusState VirtioPCIBusState; -typedef struct VirtioBusClass VirtioPCIBusClass; - -#define TYPE_VIRTIO_PCI_BUS "virtio-pci-bus" -DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass, - VIRTIO_PCI_BUS, TYPE_VIRTIO_PCI_BUS) - -enum { - VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, - VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, - VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, - VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, - VIRTIO_PCI_FLAG_ATS_BIT, - VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, - VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, - VIRTIO_PCI_FLAG_INIT_PM_BIT, - VIRTIO_PCI_FLAG_INIT_FLR_BIT, - VIRTIO_PCI_FLAG_AER_BIT, - VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, -}; - -/* Need to activate work-arounds for buggy guests at vmstate load. */ -#define VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION \ - (1 << VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT) - -/* Performance improves when virtqueue kick processing is decoupled from the - * vcpu thread using ioeventfd for some devices. */ -#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) - -/* virtio version flags */ -#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) - -/* migrate extra state */ -#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) - -/* have pio notification for modern device ? */ -#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ - (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) - -/* page per vq flag to be used by split drivers within guests */ -#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \ - (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT) - -/* address space translation service */ -#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT) - -/* Init error enabling flags */ -#define VIRTIO_PCI_FLAG_INIT_DEVERR (1 << VIRTIO_PCI_FLAG_INIT_DEVERR_BIT) - -/* Init Link Control register */ -#define VIRTIO_PCI_FLAG_INIT_LNKCTL (1 << VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT) - -/* Init Power Management */ -#define VIRTIO_PCI_FLAG_INIT_PM (1 << VIRTIO_PCI_FLAG_INIT_PM_BIT) - -/* Init Function Level Reset capability */ -#define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT) - -/* Advanced Error Reporting capability */ -#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT) - -/* Page Aligned Address space Translation Service */ -#define VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED \ - (1 << VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT) - -typedef struct { - MSIMessage msg; - int virq; - unsigned int users; -} VirtIOIRQFD; - -/* - * virtio-pci: This is the PCIDevice which has a virtio-pci-bus. - */ -#define TYPE_VIRTIO_PCI "virtio-pci" -OBJECT_DECLARE_TYPE(VirtIOPCIProxy, VirtioPCIClass, VIRTIO_PCI) - -struct VirtioPCIClass { - PCIDeviceClass parent_class; - DeviceRealize parent_dc_realize; - void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp); -}; - -typedef struct VirtIOPCIRegion { - MemoryRegion mr; - uint32_t offset; - uint32_t size; - uint32_t type; -} VirtIOPCIRegion; - -typedef struct VirtIOPCIQueue { - uint16_t num; - bool enabled; - uint32_t desc[2]; - uint32_t avail[2]; - uint32_t used[2]; -} VirtIOPCIQueue; - -struct VirtIOPCIProxy { - PCIDevice pci_dev; - MemoryRegion bar; - union { - struct { - VirtIOPCIRegion common; - VirtIOPCIRegion isr; - VirtIOPCIRegion device; - VirtIOPCIRegion notify; - VirtIOPCIRegion notify_pio; - }; - VirtIOPCIRegion regs[5]; - }; - MemoryRegion modern_bar; - MemoryRegion io_bar; - uint32_t legacy_io_bar_idx; - uint32_t msix_bar_idx; - uint32_t modern_io_bar_idx; - uint32_t modern_mem_bar_idx; - int config_cap; - uint32_t flags; - bool disable_modern; - bool ignore_backend_features; - OnOffAuto disable_legacy; - uint32_t class_code; - uint32_t nvectors; - uint32_t dfselect; - uint32_t gfselect; - uint32_t guest_features[2]; - VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX]; - - VirtIOIRQFD *vector_irqfd; - int nvqs_with_notifiers; - VirtioBusState bus; -}; - -static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) -{ - return !proxy->disable_modern; -} - -static inline bool virtio_pci_legacy(VirtIOPCIProxy *proxy) -{ - return proxy->disable_legacy == ON_OFF_AUTO_OFF; -} - -static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy) -{ - proxy->disable_modern = false; - proxy->disable_legacy = ON_OFF_AUTO_ON; -} - -static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) -{ - proxy->disable_modern = true; -} - -/* - * virtio-input-pci: This extends VirtioPCIProxy. - */ -#define TYPE_VIRTIO_INPUT_PCI "virtio-input-pci" - -/* Virtio ABI version, if we increment this, we break the guest driver. */ -#define VIRTIO_PCI_ABI_VERSION 0 - -/* Input for virtio_pci_types_register() */ -typedef struct VirtioPCIDeviceTypeInfo { - /* - * Common base class for the subclasses below. - * - * Required only if transitional_name or non_transitional_name is set. - * - * We need a separate base type instead of making all types - * inherit from generic_name for two reasons: - * 1) generic_name implements INTERFACE_PCIE_DEVICE, but - * transitional_name does not. - * 2) generic_name has the "disable-legacy" and "disable-modern" - * properties, transitional_name and non_transitional name don't. - */ - const char *base_name; - /* - * Generic device type. Optional. - * - * Supports both transitional and non-transitional modes, - * using the disable-legacy and disable-modern properties. - * If disable-legacy=auto, (non-)transitional mode is selected - * depending on the bus where the device is plugged. - * - * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE, - * but PCI Express is supported only in non-transitional mode. - * - * The only type implemented by QEMU 3.1 and older. - */ - const char *generic_name; - /* - * The transitional device type. Optional. - * - * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE. - */ - const char *transitional_name; - /* - * The non-transitional device type. Optional. - * - * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only. - */ - const char *non_transitional_name; - - /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */ - const char *parent; - - /* Same as TypeInfo fields: */ - size_t instance_size; - size_t class_size; - void (*instance_init)(Object *obj); - void (*class_init)(ObjectClass *klass, void *data); - InterfaceInfo *interfaces; -} VirtioPCIDeviceTypeInfo; - -/* Register virtio-pci type(s). @t must be static. */ -void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); - -/** - * virtio_pci_optimal_num_queues: - * @fixed_queues: number of queues that are always present - * - * Returns: The optimal number of queues for a multi-queue device, excluding - * @fixed_queues. - */ -unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); - -#endif diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index 5dd21c2c44..a1abfe0e1b 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -122,8 +122,7 @@ static void virtio_pmem_realize(DeviceState *dev, Error **errp) } host_memory_backend_set_mapped(pmem->memdev, true); - virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM, - sizeof(struct virtio_pmem_config)); + virtio_init(vdev, VIRTIO_ID_PMEM, sizeof(struct virtio_pmem_config)); pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush); } diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c index c1f916268b..151ece6f94 100644 --- a/hw/virtio/virtio-rng-pci.c +++ b/hw/virtio/virtio-rng-pci.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-rng.h" #include "qapi/error.h" #include "qemu/module.h" diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index cc8e9f775d..7e12fc03bf 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -215,7 +215,7 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) return; } - virtio_init(vdev, "virtio-rng", VIRTIO_ID_RNG, 0); + virtio_init(vdev, VIRTIO_ID_RNG, 0); vrng->vq = virtio_add_queue(vdev, 8, handle_input); vrng->quota_remaining = vrng->conf.max_bytes; diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c index 97fab74236..e8e3442f38 100644 --- a/hw/virtio/virtio-scsi-pci.c +++ b/hw/virtio/virtio-scsi-pci.c @@ -18,7 +18,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-scsi.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VirtIOSCSIPCI VirtIOSCSIPCI; diff --git a/hw/virtio/virtio-serial-pci.c b/hw/virtio/virtio-serial-pci.c index 35bcd961c9..cea31adcc4 100644 --- a/hw/virtio/virtio-serial-pci.c +++ b/hw/virtio/virtio-serial-pci.c @@ -20,7 +20,7 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio-serial.h" #include "qemu/module.h" -#include "virtio-pci.h" +#include "hw/virtio/virtio-pci.h" #include "qom/object.h" typedef struct VirtIOSerialPCI VirtIOSerialPCI; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 67a873f54a..5d607aeaa0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -132,6 +132,56 @@ struct VirtQueue QLIST_ENTRY(VirtQueue) node; }; +const char *virtio_device_names[] = { + [VIRTIO_ID_NET] = "virtio-net", + [VIRTIO_ID_BLOCK] = "virtio-blk", + [VIRTIO_ID_CONSOLE] = "virtio-serial", + [VIRTIO_ID_RNG] = "virtio-rng", + [VIRTIO_ID_BALLOON] = "virtio-balloon", + [VIRTIO_ID_IOMEM] = "virtio-iomem", + [VIRTIO_ID_RPMSG] = "virtio-rpmsg", + [VIRTIO_ID_SCSI] = "virtio-scsi", + [VIRTIO_ID_9P] = "virtio-9p", + [VIRTIO_ID_MAC80211_WLAN] = "virtio-mac-wlan", + [VIRTIO_ID_RPROC_SERIAL] = "virtio-rproc-serial", + [VIRTIO_ID_CAIF] = "virtio-caif", + [VIRTIO_ID_MEMORY_BALLOON] = "virtio-mem-balloon", + [VIRTIO_ID_GPU] = "virtio-gpu", + [VIRTIO_ID_CLOCK] = "virtio-clk", + [VIRTIO_ID_INPUT] = "virtio-input", + [VIRTIO_ID_VSOCK] = "vhost-vsock", + [VIRTIO_ID_CRYPTO] = "virtio-crypto", + [VIRTIO_ID_SIGNAL_DIST] = "virtio-signal", + [VIRTIO_ID_PSTORE] = "virtio-pstore", + [VIRTIO_ID_IOMMU] = "virtio-iommu", + [VIRTIO_ID_MEM] = "virtio-mem", + [VIRTIO_ID_SOUND] = "virtio-sound", + [VIRTIO_ID_FS] = "virtio-user-fs", + [VIRTIO_ID_PMEM] = "virtio-pmem", + [VIRTIO_ID_RPMB] = "virtio-rpmb", + [VIRTIO_ID_MAC80211_HWSIM] = "virtio-mac-hwsim", + [VIRTIO_ID_VIDEO_ENCODER] = "virtio-vid-encoder", + [VIRTIO_ID_VIDEO_DECODER] = "virtio-vid-decoder", + [VIRTIO_ID_SCMI] = "virtio-scmi", + [VIRTIO_ID_NITRO_SEC_MOD] = "virtio-nitro-sec-mod", + [VIRTIO_ID_I2C_ADAPTER] = "vhost-user-i2c", + [VIRTIO_ID_WATCHDOG] = "virtio-watchdog", + [VIRTIO_ID_CAN] = "virtio-can", + [VIRTIO_ID_DMABUF] = "virtio-dmabuf", + [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv", + [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol", + [VIRTIO_ID_BT] = "virtio-bluetooth", + [VIRTIO_ID_GPIO] = "virtio-gpio" +}; + +static const char *virtio_id_to_name(uint16_t device_id) +{ + assert(device_id < G_N_ELEMENTS(virtio_device_names)); + const char *name = virtio_device_names[device_id]; + assert(name != NULL); + return name; +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -3207,8 +3257,7 @@ void virtio_instance_init_common(Object *proxy_obj, void *data, qdev_alias_all_properties(vdev, proxy_obj); } -void virtio_init(VirtIODevice *vdev, const char *name, - uint16_t device_id, size_t config_size) +void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size) { BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); @@ -3222,6 +3271,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->start_on_kick = false; vdev->started = false; + vdev->vhost_started = false; vdev->device_id = device_id; vdev->status = 0; qatomic_set(&vdev->isr, 0); @@ -3237,7 +3287,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->vq[i].host_notifier_enabled = false; } - vdev->name = name; + vdev->name = virtio_id_to_name(device_id); vdev->config_len = config_size; if (vdev->config_len) { vdev->config = g_malloc0(config_size); |