aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-07-24 09:32:04 +1000
committerRichard Henderson <richard.henderson@linaro.org>2024-07-24 09:32:04 +1000
commit5885bcef3d760e84d17eb4113e85f2aea0bd0582 (patch)
tree34b0d77bb749f5489148cdc3257ccc19c344e065
parent3b5efc553eb1ea8eaaa81d87b100036205a4525d (diff)
parent4f947b10d525958578002848a92eeb6152ffbf0d (diff)
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pci,pc: features,fixes pci: Initial support for SPDM Responders cxl: Add support for scan media, feature commands, device patrol scrub control, DDR5 ECS control, firmware updates virtio: in-order support virtio-net: support for SR-IOV emulation (note: known issues on s390, might get reverted if not fixed) smbios: memory device size is now configurable per Machine cpu: architecture agnostic code to support vCPU Hotplug Fixes, cleanups all over the place. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmae9l8PHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRp8fYH/impBH9nViO/WK48io4mLSkl0EUL8Y/xrMvH # zKFCKaXq8D96VTt1Z4EGKYgwG0voBKZaCEKYU/0ARGnSlSwxINQ8ROCnBWMfn2sx # yQt08EXVMznNLtXjc6U5zCoCi6SaV85GH40No3MUFXBQt29ZSlFqO/fuHGZHYBwS # wuVKvTjjNF4EsGt3rS4Qsv6BwZWMM+dE6yXpKWk68kR8IGp+6QGxkMbWt9uEX2Md # VuemKVnFYw0XGCGy5K+ZkvoA2DGpEw0QxVSOMs8CI55Oc9SkTKz5fUSzXXGo1if+ # M1CTjOPJu6pMym6gy6XpFa8/QioDA/jE2vBQvfJ64TwhJDV159s= # =k8e9 # -----END PGP SIGNATURE----- # gpg: Signature made Tue 23 Jul 2024 10:16:31 AM AEST # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (61 commits) hw/nvme: Add SPDM over DOE support backends: Initial support for SPDM socket support hw/pci: Add all Data Object Types defined in PCIe r6.0 tests/acpi: Add expected ACPI AML files for RISC-V tests/qtest/bios-tables-test.c: Enable basic testing for RISC-V tests/acpi: Add empty ACPI data files for RISC-V tests/qtest/bios-tables-test.c: Remove the fall back path tests/acpi: update expected DSDT blob for aarch64 and microvm acpi/gpex: Create PCI link devices outside PCI root bridge tests/acpi: Allow DSDT acpi table changes for aarch64 hw/riscv/virt-acpi-build.c: Update the HID of RISC-V UART hw/riscv/virt-acpi-build.c: Add namespace devices for PLIC and APLIC virtio-iommu: Add trace point on virtio_iommu_detach_endpoint_from_domain hw/vfio/common: Add vfio_listener_region_del_iommu trace event virtio-iommu: Remove the end point on detach virtio-iommu: Free [host_]resv_ranges on unset_iommu_devices virtio-iommu: Remove probe_done Revert "virtio-iommu: Clear IOMMUDevice when VFIO device is unplugged" gdbstub: Add helper function to unregister GDB register space physmem: Add helper function to destroy CPU AddressSpace ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--MAINTAINERS9
-rw-r--r--accel/kvm/kvm-all.c95
-rw-r--r--accel/kvm/kvm-cpus.h1
-rw-r--r--accel/kvm/trace-events5
-rw-r--r--backends/Kconfig4
-rw-r--r--backends/meson.build2
-rw-r--r--backends/spdm-socket.c216
-rw-r--r--contrib/vhost-user-blk/vhost-user-blk.c2
-rw-r--r--docs/specs/acpi_hw_reduced_hotplug.rst3
-rw-r--r--docs/specs/index.rst1
-rw-r--r--docs/specs/spdm.rst134
-rw-r--r--docs/system/index.rst1
-rw-r--r--docs/system/sriov.rst36
-rw-r--r--gdbstub/gdbstub.c13
-rw-r--r--hw/acpi/acpi-cpu-hotplug-stub.c6
-rw-r--r--hw/acpi/cpu.c18
-rw-r--r--hw/acpi/generic_event_device.c50
-rw-r--r--hw/arm/virt.c1
-rw-r--r--hw/audio/virtio-snd.c13
-rw-r--r--hw/block/vhost-user-blk.c1
-rw-r--r--hw/core/cpu-common.c5
-rw-r--r--hw/core/machine.c6
-rw-r--r--hw/cxl/cxl-events.c13
-rw-r--r--hw/cxl/cxl-host.c3
-rw-r--r--hw/cxl/cxl-mailbox-utils.c966
-rw-r--r--hw/i386/acpi-build.c3
-rw-r--r--hw/i386/intel_iommu.c2
-rw-r--r--hw/i386/intel_iommu_internal.h6
-rw-r--r--hw/i386/pc_piix.c1
-rw-r--r--hw/i386/pc_q35.c1
-rw-r--r--hw/mem/cxl_type3.c64
-rw-r--r--hw/net/vhost_net.c2
-rw-r--r--hw/nvme/ctrl.c62
-rw-r--r--hw/pci-host/gpex-acpi.c13
-rw-r--r--hw/pci/pci.c68
-rw-r--r--hw/pci/pcie_sriov.c300
-rw-r--r--hw/riscv/virt-acpi-build.c34
-rw-r--r--hw/scsi/vhost-scsi.c1
-rw-r--r--hw/scsi/vhost-user-scsi.c1
-rw-r--r--hw/smbios/smbios.c11
-rw-r--r--hw/vfio/common.c3
-rw-r--r--hw/vfio/trace-events3
-rw-r--r--hw/virtio/trace-events1
-rw-r--r--hw/virtio/vhost-user-fs.c1
-rw-r--r--hw/virtio/vhost-user-vsock.c1
-rw-r--r--hw/virtio/virtio-crypto.c2
-rw-r--r--hw/virtio/virtio-iommu.c88
-rw-r--r--hw/virtio/virtio-net-pci.c1
-rw-r--r--hw/virtio/virtio-pci.c20
-rw-r--r--hw/virtio/virtio.c131
-rw-r--r--include/exec/cpu-common.h8
-rw-r--r--include/exec/gdbstub.h6
-rw-r--r--include/hw/acpi/cpu.h7
-rw-r--r--include/hw/acpi/generic_event_device.h5
-rw-r--r--include/hw/boards.h4
-rw-r--r--include/hw/core/cpu.h1
-rw-r--r--include/hw/cxl/cxl_device.h88
-rw-r--r--include/hw/cxl/cxl_mailbox.h18
-rw-r--r--include/hw/pci/pci_device.h13
-rw-r--r--include/hw/pci/pcie_doe.h5
-rw-r--r--include/hw/pci/pcie_sriov.h18
-rw-r--r--include/hw/virtio/virtio-iommu.h1
-rw-r--r--include/hw/virtio/virtio-pci.h1
-rw-r--r--include/hw/virtio/virtio.h6
-rw-r--r--include/sysemu/kvm.h25
-rw-r--r--include/sysemu/spdm-socket.h74
-rw-r--r--net/vhost-vdpa.c1
-rw-r--r--system/physmem.c29
-rw-r--r--tests/data/acpi/aarch64/virt/DSDTbin5196 -> 5196 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/DSDT.acpihmatvirtbin5282 -> 5282 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/DSDT.memhpbin6557 -> 6557 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/DSDT.pxbbin7679 -> 7679 bytes
-rw-r--r--tests/data/acpi/aarch64/virt/DSDT.topologybin5398 -> 5398 bytes
-rw-r--r--tests/data/acpi/riscv64/virt/APICbin0 -> 116 bytes
-rw-r--r--tests/data/acpi/riscv64/virt/DSDTbin0 -> 3576 bytes
-rw-r--r--tests/data/acpi/riscv64/virt/FACPbin0 -> 276 bytes
-rw-r--r--tests/data/acpi/riscv64/virt/MCFGbin0 -> 60 bytes
-rw-r--r--tests/data/acpi/riscv64/virt/RHCTbin0 -> 332 bytes
-rw-r--r--tests/data/acpi/riscv64/virt/SPCRbin0 -> 80 bytes
-rw-r--r--tests/data/acpi/x86/microvm/DSDT.pciebin3023 -> 3023 bytes
-rw-r--r--tests/qtest/bios-tables-test.c40
81 files changed, 2486 insertions, 288 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d5ff6c2498..6afc61979c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2009,6 +2009,7 @@ F: hw/pci-bridge/*
F: qapi/pci.json
F: docs/pci*
F: docs/specs/*pci*
+F: docs/system/sriov.rst
PCIE DOE
M: Huai-Cheng Kuo <hchkuo@avery-design.com.tw>
@@ -2208,6 +2209,7 @@ F: docs/devel/vfio-iommufd.rst
vhost
M: Michael S. Tsirkin <mst@redhat.com>
+R: Stefano Garzarella <sgarzare@redhat.com>
S: Supported
F: hw/*/*vhost*
F: docs/interop/vhost-user.json
@@ -3398,6 +3400,12 @@ F: tests/qtest/*tpm*
F: docs/specs/tpm.rst
T: git https://github.com/stefanberger/qemu-tpm.git tpm-next
+SPDM
+M: Alistair Francis <alistair.francis@wdc.com>
+S: Maintained
+F: backends/spdm-socket.c
+F: include/sysemu/spdm-socket.h
+
Checkpatch
S: Odd Fixes
F: scripts/checkpatch.pl
@@ -3657,6 +3665,7 @@ F: tests/uefi-test-tools/
VT-d Emulation
M: Michael S. Tsirkin <mst@redhat.com>
R: Jason Wang <jasowang@redhat.com>
+R: Yi Liu <yi.l.liu@intel.com>
S: Supported
F: hw/i386/intel_iommu.c
F: hw/i386/intel_iommu_internal.h
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 64bf47a033..0f110cce3e 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -340,14 +340,71 @@ err:
return ret;
}
+void kvm_park_vcpu(CPUState *cpu)
+{
+ struct KVMParkedVcpu *vcpu;
+
+ trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+ vcpu = g_malloc0(sizeof(*vcpu));
+ vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
+ vcpu->kvm_fd = cpu->kvm_fd;
+ QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+}
+
+int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
+{
+ struct KVMParkedVcpu *cpu;
+ int kvm_fd = -ENOENT;
+
+ QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
+ if (cpu->vcpu_id == vcpu_id) {
+ QLIST_REMOVE(cpu, node);
+ kvm_fd = cpu->kvm_fd;
+ g_free(cpu);
+ }
+ }
+
+ trace_kvm_unpark_vcpu(vcpu_id, kvm_fd > 0 ? "unparked" : "!found parked");
+
+ return kvm_fd;
+}
+
+int kvm_create_vcpu(CPUState *cpu)
+{
+ unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
+ KVMState *s = kvm_state;
+ int kvm_fd;
+
+ /* check if the KVM vCPU already exist but is parked */
+ kvm_fd = kvm_unpark_vcpu(s, vcpu_id);
+ if (kvm_fd < 0) {
+ /* vCPU not parked: create a new KVM vCPU */
+ kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+ if (kvm_fd < 0) {
+ error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
+ return kvm_fd;
+ }
+ }
+
+ cpu->kvm_fd = kvm_fd;
+ cpu->kvm_state = s;
+ cpu->vcpu_dirty = true;
+ cpu->dirty_pages = 0;
+ cpu->throttle_us_per_full = 0;
+
+ trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd);
+
+ return 0;
+}
+
static int do_kvm_destroy_vcpu(CPUState *cpu)
{
KVMState *s = kvm_state;
long mmap_size;
- struct KVMParkedVcpu *vcpu = NULL;
int ret = 0;
- trace_kvm_destroy_vcpu();
+ trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
ret = kvm_arch_destroy_vcpu(cpu);
if (ret < 0) {
@@ -373,10 +430,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
}
}
- vcpu = g_malloc0(sizeof(*vcpu));
- vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
- vcpu->kvm_fd = cpu->kvm_fd;
- QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+ kvm_park_vcpu(cpu);
err:
return ret;
}
@@ -389,24 +443,6 @@ void kvm_destroy_vcpu(CPUState *cpu)
}
}
-static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
-{
- struct KVMParkedVcpu *cpu;
-
- QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
- if (cpu->vcpu_id == vcpu_id) {
- int kvm_fd;
-
- QLIST_REMOVE(cpu, node);
- kvm_fd = cpu->kvm_fd;
- g_free(cpu);
- return kvm_fd;
- }
- }
-
- return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
-}
-
int kvm_init_vcpu(CPUState *cpu, Error **errp)
{
KVMState *s = kvm_state;
@@ -415,19 +451,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
- ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+ ret = kvm_create_vcpu(cpu);
if (ret < 0) {
- error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
+ error_setg_errno(errp, -ret,
+ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
kvm_arch_vcpu_id(cpu));
goto err;
}
- cpu->kvm_fd = ret;
- cpu->kvm_state = s;
- cpu->vcpu_dirty = true;
- cpu->dirty_pages = 0;
- cpu->throttle_us_per_full = 0;
-
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
ret = mmap_size;
diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h
index ca40add32c..171b22fd29 100644
--- a/accel/kvm/kvm-cpus.h
+++ b/accel/kvm/kvm-cpus.h
@@ -22,5 +22,4 @@ bool kvm_supports_guest_debug(void);
int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
void kvm_remove_all_breakpoints(CPUState *cpu);
-
#endif /* KVM_CPUS_H */
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index 681ccb667d..37626c1ac5 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int kvm_fd) "index: %d, id: %lu, kvm fd: %d"
+kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s"
kvm_irqchip_commit_routes(void) ""
kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
@@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
kvm_dirty_ring_reaper_kick(const char *reason) "%s"
kvm_dirty_ring_flush(int finished) "%d"
-kvm_destroy_vcpu(void) ""
kvm_failed_get_vcpu_mmap_size(void) ""
kvm_cpu_exec(void) ""
kvm_interrupt_exit_request(void) ""
diff --git a/backends/Kconfig b/backends/Kconfig
index 2cb23f62fa..d3dbe19868 100644
--- a/backends/Kconfig
+++ b/backends/Kconfig
@@ -3,3 +3,7 @@ source tpm/Kconfig
config IOMMUFD
bool
depends on VFIO
+
+config SPDM_SOCKET
+ bool
+ default y
diff --git a/backends/meson.build b/backends/meson.build
index 749b491f12..da714b93d1 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -33,4 +33,6 @@ endif
system_ss.add(when: gio, if_true: files('dbus-vmstate.c'))
system_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c'))
+system_ss.add(when: 'CONFIG_SPDM_SOCKET', if_true: files('spdm-socket.c'))
+
subdir('tpm')
diff --git a/backends/spdm-socket.c b/backends/spdm-socket.c
new file mode 100644
index 0000000000..d0663d696c
--- /dev/null
+++ b/backends/spdm-socket.c
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * QEMU SPDM socket support
+ *
+ * This is based on:
+ * https://github.com/DMTF/spdm-emu/blob/07c0a838bcc1c6207c656ac75885c0603e344b6f/spdm_emu/spdm_emu_common/command.c
+ * but has been re-written to match QEMU style
+ *
+ * Copyright (c) 2021, DMTF. All rights reserved.
+ * Copyright (c) 2023. Western Digital Corporation or its affiliates.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/spdm-socket.h"
+#include "qapi/error.h"
+
+static bool read_bytes(const int socket, uint8_t *buffer,
+ size_t number_of_bytes)
+{
+ ssize_t number_received = 0;
+ ssize_t result;
+
+ while (number_received < number_of_bytes) {
+ result = recv(socket, buffer + number_received,
+ number_of_bytes - number_received, 0);
+ if (result <= 0) {
+ return false;
+ }
+ number_received += result;
+ }
+ return true;
+}
+
+static bool read_data32(const int socket, uint32_t *data)
+{
+ bool result;
+
+ result = read_bytes(socket, (uint8_t *)data, sizeof(uint32_t));
+ if (!result) {
+ return result;
+ }
+ *data = ntohl(*data);
+ return true;
+}
+
+static bool read_multiple_bytes(const int socket, uint8_t *buffer,
+ uint32_t *bytes_received,
+ uint32_t max_buffer_length)
+{
+ uint32_t length;
+ bool result;
+
+ result = read_data32(socket, &length);
+ if (!result) {
+ return result;
+ }
+
+ if (length > max_buffer_length) {
+ return false;
+ }
+
+ if (bytes_received) {
+ *bytes_received = length;
+ }
+
+ if (length == 0) {
+ return true;
+ }
+
+ return read_bytes(socket, buffer, length);
+}
+
+static bool receive_platform_data(const int socket,
+ uint32_t transport_type,
+ uint32_t *command,
+ uint8_t *receive_buffer,
+ uint32_t *bytes_to_receive)
+{
+ bool result;
+ uint32_t response;
+ uint32_t bytes_received;
+
+ result = read_data32(socket, &response);
+ if (!result) {
+ return result;
+ }
+ *command = response;
+
+ result = read_data32(socket, &transport_type);
+ if (!result) {
+ return result;
+ }
+
+ bytes_received = 0;
+ result = read_multiple_bytes(socket, receive_buffer, &bytes_received,
+ *bytes_to_receive);
+ if (!result) {
+ return result;
+ }
+ *bytes_to_receive = bytes_received;
+
+ return result;
+}
+
+static bool write_bytes(const int socket, const uint8_t *buffer,
+ uint32_t number_of_bytes)
+{
+ ssize_t number_sent = 0;
+ ssize_t result;
+
+ while (number_sent < number_of_bytes) {
+ result = send(socket, buffer + number_sent,
+ number_of_bytes - number_sent, 0);
+ if (result == -1) {
+ return false;
+ }
+ number_sent += result;
+ }
+ return true;
+}
+
+static bool write_data32(const int socket, uint32_t data)
+{
+ data = htonl(data);
+ return write_bytes(socket, (uint8_t *)&data, sizeof(uint32_t));
+}
+
+static bool write_multiple_bytes(const int socket, const uint8_t *buffer,
+ uint32_t bytes_to_send)
+{
+ bool result;
+
+ result = write_data32(socket, bytes_to_send);
+ if (!result) {
+ return result;
+ }
+
+ return write_bytes(socket, buffer, bytes_to_send);
+}
+
+static bool send_platform_data(const int socket,
+ uint32_t transport_type, uint32_t command,
+ const uint8_t *send_buffer, size_t bytes_to_send)
+{
+ bool result;
+
+ result = write_data32(socket, command);
+ if (!result) {
+ return result;
+ }
+
+ result = write_data32(socket, transport_type);
+ if (!result) {
+ return result;
+ }
+
+ return write_multiple_bytes(socket, send_buffer, bytes_to_send);
+}
+
+int spdm_socket_connect(uint16_t port, Error **errp)
+{
+ int client_socket;
+ struct sockaddr_in server_addr;
+
+ client_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (client_socket < 0) {
+ error_setg(errp, "cannot create socket: %s", strerror(errno));
+ return -1;
+ }
+
+ memset((char *)&server_addr, 0, sizeof(server_addr));
+ server_addr.sin_family = AF_INET;
+ server_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ server_addr.sin_port = htons(port);
+
+
+ if (connect(client_socket, (struct sockaddr *)&server_addr,
+ sizeof(server_addr)) < 0) {
+ error_setg(errp, "cannot connect: %s", strerror(errno));
+ close(client_socket);
+ return -1;
+ }
+
+ return client_socket;
+}
+
+uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type,
+ void *req, uint32_t req_len,
+ void *rsp, uint32_t rsp_len)
+{
+ uint32_t command;
+ bool result;
+
+ result = send_platform_data(socket, transport_type,
+ SPDM_SOCKET_COMMAND_NORMAL,
+ req, req_len);
+ if (!result) {
+ return 0;
+ }
+
+ result = receive_platform_data(socket, transport_type, &command,
+ (uint8_t *)rsp, &rsp_len);
+ if (!result) {
+ return 0;
+ }
+
+ assert(command != 0);
+
+ return rsp_len;
+}
+
+void spdm_socket_close(const int socket, uint32_t transport_type)
+{
+ send_platform_data(socket, transport_type,
+ SPDM_SOCKET_COMMAND_SHUTDOWN, NULL, 0);
+}
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
index 9492146855..6cc18a1c04 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -196,7 +196,7 @@ vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
VubDev *vdev_blk = req->vdev_blk;
desc = buf;
uint64_t range[2] = { le64_to_cpu(desc->sector) << 9,
- le32_to_cpu(desc->num_sectors) << 9 };
+ (uint64_t)le32_to_cpu(desc->num_sectors) << 9 };
if (type == VIRTIO_BLK_T_DISCARD) {
if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
g_free(buf);
diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst
index 0bd3f9399f..3acd6fcd8b 100644
--- a/docs/specs/acpi_hw_reduced_hotplug.rst
+++ b/docs/specs/acpi_hw_reduced_hotplug.rst
@@ -64,7 +64,8 @@ GED IO interface (4 byte access)
0: Memory hotplug event
1: System power down event
2: NVDIMM hotplug event
- 3-31: Reserved
+ 3: CPU hotplug event
+ 4-31: Reserved
**write_access:**
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 1484e3e760..e2d907959a 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -29,6 +29,7 @@ guest hardware that is specific to QEMU.
edu
ivshmem-spec
pvpanic
+ spdm
standard-vga
virt-ctlr
vmcoreinfo
diff --git a/docs/specs/spdm.rst b/docs/specs/spdm.rst
new file mode 100644
index 0000000000..f7de080ff0
--- /dev/null
+++ b/docs/specs/spdm.rst
@@ -0,0 +1,134 @@
+======================================================
+QEMU Security Protocols and Data Models (SPDM) Support
+======================================================
+
+SPDM enables authentication, attestation and key exchange to assist in
+providing infrastructure security enablement. It's a standard published
+by the `DMTF`_.
+
+QEMU supports connecting to a SPDM responder implementation. This allows an
+external application to emulate the SPDM responder logic for an SPDM device.
+
+Setting up a SPDM server
+========================
+
+When using QEMU with SPDM devices QEMU will connect to a server which
+implements the SPDM functionality.
+
+SPDM-Utils
+----------
+
+You can use `SPDM Utils`_ to emulate a responder. This is the simplest method.
+
+SPDM-Utils is a Linux applications to manage, test and develop devices
+supporting DMTF Security Protocol and Data Model (SPDM). It is written in Rust
+and utilises libspdm.
+
+To use SPDM-Utils you will need to do the following steps. Details are included
+in the SPDM-Utils README.
+
+ 1. `Build libspdm`_
+ 2. `Build SPDM Utils`_
+ 3. `Run it as a server`_
+
+spdm-emu
+--------
+
+You can use `spdm emu`_ to model the
+SPDM responder.
+
+.. code-block:: shell
+
+ $ cd spdm-emu
+ $ git submodule init; git submodule update --recursive
+ $ mkdir build; cd build
+ $ cmake -DARCH=x64 -DTOOLCHAIN=GCC -DTARGET=Debug -DCRYPTO=openssl ..
+ $ make -j32
+ $ make copy_sample_key # Build certificates, required for SPDM authentication.
+
+It is worth noting that the certificates should be in compliance with
+PCIe r6.1 sec 6.31.3. This means you will need to add the following to
+openssl.cnf
+
+.. code-block::
+
+ subjectAltName = otherName:2.23.147;UTF8:Vendor=1b36:Device=0010:CC=010802:REV=02:SSVID=1af4:SSID=1100
+ 2.23.147 = ASN1:OID:2.23.147
+
+and then manually regenerate some certificates with:
+
+.. code-block:: shell
+
+ $ openssl req -nodes -newkey ec:param.pem -keyout end_responder.key \
+ -out end_responder.req -sha384 -batch \
+ -subj "/CN=DMTF libspdm ECP384 responder cert"
+
+ $ openssl x509 -req -in end_responder.req -out end_responder.cert \
+ -CA inter.cert -CAkey inter.key -sha384 -days 3650 -set_serial 3 \
+ -extensions v3_end -extfile ../openssl.cnf
+
+ $ openssl asn1parse -in end_responder.cert -out end_responder.cert.der
+
+ $ cat ca.cert.der inter.cert.der end_responder.cert.der > bundle_responder.certchain.der
+
+You can use SPDM-Utils instead as it will generate the correct certificates
+automatically.
+
+The responder can then be launched with
+
+.. code-block:: shell
+
+ $ cd bin
+ $ ./spdm_responder_emu --trans PCI_DOE
+
+Connecting an SPDM NVMe device
+==============================
+
+Once a SPDM server is running we can start QEMU and connect to the server.
+
+For an NVMe device first let's setup a block we can use
+
+.. code-block:: shell
+
+ $ cd qemu-spdm/linux/image
+ $ dd if=/dev/zero of=blknvme bs=1M count=2096 # 2GB NNMe Drive
+
+Then you can add this to your QEMU command line:
+
+.. code-block:: shell
+
+ -drive file=blknvme,if=none,id=mynvme,format=raw \
+ -device nvme,drive=mynvme,serial=deadbeef,spdm_port=2323
+
+At which point QEMU will try to connect to the SPDM server.
+
+Note that if using x64-64 you will want to use the q35 machine instead
+of the default. So the entire QEMU command might look like this
+
+.. code-block:: shell
+
+ qemu-system-x86_64 -M q35 \
+ --kernel bzImage \
+ -drive file=rootfs.ext2,if=virtio,format=raw \
+ -append "root=/dev/vda console=ttyS0" \
+ -net none -nographic \
+ -drive file=blknvme,if=none,id=mynvme,format=raw \
+ -device nvme,drive=mynvme,serial=deadbeef,spdm_port=2323
+
+.. _DMTF:
+ https://www.dmtf.org/standards/SPDM
+
+.. _SPDM Utils:
+ https://github.com/westerndigitalcorporation/spdm-utils
+
+.. _spdm emu:
+ https://github.com/dmtf/spdm-emu
+
+.. _Build libspdm:
+ https://github.com/westerndigitalcorporation/spdm-utils?tab=readme-ov-file#build-libspdm
+
+.. _Build SPDM Utils:
+ https://github.com/westerndigitalcorporation/spdm-utils?tab=readme-ov-file#build-the-binary
+
+.. _Run it as a server:
+ https://github.com/westerndigitalcorporation/spdm-utils#qemu-spdm-device-emulation
diff --git a/docs/system/index.rst b/docs/system/index.rst
index c21065e519..718e9d3c56 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -39,3 +39,4 @@ or Hypervisor.Framework.
multi-process
confidential-guest-support
vm-templating
+ sriov
diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst
new file mode 100644
index 0000000000..a851a66a4b
--- /dev/null
+++ b/docs/system/sriov.rst
@@ -0,0 +1,36 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Compsable SR-IOV device
+=======================
+
+SR-IOV (Single Root I/O Virtualization) is an optional extended capability of a
+PCI Express device. It allows a single physical function (PF) to appear as
+multiple virtual functions (VFs) for the main purpose of eliminating software
+overhead in I/O from virtual machines.
+
+There are devices with predefined SR-IOV configurations, but it is also possible
+to compose an SR-IOV device yourself. Composing an SR-IOV device is currently
+only supported by virtio-net-pci.
+
+Users can configure an SR-IOV-capable virtio-net device by adding
+virtio-net-pci functions to a bus. Below is a command line example:
+
+.. code-block:: shell
+
+ -netdev user,id=n -netdev user,id=o
+ -netdev user,id=p -netdev user,id=q
+ -device pcie-root-port,id=b
+ -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
+ -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
+ -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
+ -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f
+
+The VFs specify the paired PF with ``sriov-pf`` property. The PF must be
+added after all VFs. It is the user's responsibility to ensure that VFs have
+function numbers larger than one of the PF, and that the function numbers
+have a consistent stride.
+
+You may also need to perform additional steps to activate the SR-IOV feature on
+your guest. For Linux, refer to [1]_.
+
+.. [1] https://docs.kernel.org/PCI/pci-iov-howto.html
diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index b7be8e5a44..d08568cea0 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -618,6 +618,19 @@ void gdb_register_coprocessor(CPUState *cpu,
}
}
+void gdb_unregister_coprocessor_all(CPUState *cpu)
+{
+ /*
+ * Safe to nuke everything. GDBRegisterState::xml is static const char so
+ * it won't be freed
+ */
+ g_array_free(cpu->gdb_regs, true);
+
+ cpu->gdb_regs = NULL;
+ cpu->gdb_num_regs = 0;
+ cpu->gdb_num_g_regs = 0;
+}
+
static void gdb_process_breakpoint_remove_all(GDBProcess *p)
{
CPUState *cpu = gdb_get_first_cpu_in_process(p);
diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c
index 3fc4b14c26..c6c61bb9cd 100644
--- a/hw/acpi/acpi-cpu-hotplug-stub.c
+++ b/hw/acpi/acpi-cpu-hotplug-stub.c
@@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner,
return;
}
+void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
+ CPUHotplugState *state, hwaddr base_addr)
+{
+ return;
+}
+
void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list)
{
return;
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 2d81c1e790..5cb60ca8bc 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -7,7 +7,6 @@
#include "trace.h"
#include "sysemu/numa.h"
-#define ACPI_CPU_HOTPLUG_REG_LEN 12
#define ACPI_CPU_SELECTOR_OFFSET_WR 0
#define ACPI_CPU_FLAGS_OFFSET_RW 4
#define ACPI_CPU_CMD_OFFSET_WR 5
@@ -339,9 +338,10 @@ const VMStateDescription vmstate_cpu_hotplug = {
#define CPU_FW_EJECT_EVENT "CEJF"
void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
- build_madt_cpu_fn build_madt_cpu, hwaddr io_base,
+ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr,
const char *res_root,
- const char *event_handler_method)
+ const char *event_handler_method,
+ AmlRegionSpace rs)
{
Aml *ifctx;
Aml *field;
@@ -365,14 +365,22 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
aml_name_decl("_UID", aml_string("CPU Hotplug resources")));
aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0));
+ assert((rs == AML_SYSTEM_IO) || (rs == AML_SYSTEM_MEMORY));
+
crs = aml_resource_template();
- aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1,
+ if (rs == AML_SYSTEM_IO) {
+ aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1,
ACPI_CPU_HOTPLUG_REG_LEN));
+ } else if (rs == AML_SYSTEM_MEMORY) {
+ aml_append(crs, aml_memory32_fixed(base_addr,
+ ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE));
+ }
+
aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs));
/* declare CPU hotplug MMIO region with related access fields */
aml_append(cpu_ctrl_dev,
- aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base),
+ aml_operation_region("PRST", rs, aml_int(base_addr),
ACPI_CPU_HOTPLUG_REG_LEN));
field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK,
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 2d6e91b124..15b4c3ebbf 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = {
ACPI_GED_MEM_HOTPLUG_EVT,
ACPI_GED_PWR_DOWN_EVT,
ACPI_GED_NVDIMM_HOTPLUG_EVT,
+ ACPI_GED_CPU_HOTPLUG_EVT,
};
/*
@@ -107,6 +108,9 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev,
aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "."
MEMORY_SLOT_SCAN_METHOD));
break;
+ case ACPI_GED_CPU_HOTPLUG_EVT:
+ aml_append(if_ctx, aml_call0(AML_GED_EVT_CPU_SCAN_METHOD));
+ break;
case ACPI_GED_PWR_DOWN_EVT:
aml_append(if_ctx,
aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE),
@@ -234,6 +238,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev,
} else {
acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp);
}
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
} else {
error_setg(errp, "virt: device plug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -248,6 +254,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev,
if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
!(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)))) {
acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
} else {
error_setg(errp, "acpi: device unplug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -261,6 +269,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
acpi_memory_unplug_cb(&s->memhp_state, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
+ acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp);
} else {
error_setg(errp, "acpi: device unplug for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -272,6 +282,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list)
AcpiGedState *s = ACPI_GED(adev);
acpi_memory_ospm_status(&s->memhp_state, list);
+ acpi_cpu_ospm_status(&s->cpuhp_state, list);
}
static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
@@ -286,6 +297,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
sel = ACPI_GED_PWR_DOWN_EVT;
} else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) {
sel = ACPI_GED_NVDIMM_HOTPLUG_EVT;
+ } else if (ev & ACPI_CPU_HOTPLUG_STATUS) {
+ sel = ACPI_GED_CPU_HOTPLUG_EVT;
} else {
/* Unknown event. Return without generating interrupt. */
warn_report("GED: Unsupported event %d. No irq injected", ev);
@@ -371,6 +384,42 @@ static const VMStateDescription vmstate_acpi_ged = {
}
};
+static void acpi_ged_realize(DeviceState *dev, Error **errp)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ AcpiGedState *s = ACPI_GED(dev);
+ uint32_t ged_events;
+ int i;
+
+ ged_events = ctpop32(s->ged_event_bitmap);
+
+ for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) {
+ uint32_t event = s->ged_event_bitmap & ged_supported_events[i];
+
+ if (!event) {
+ continue;
+ }
+
+ switch (event) {
+ case ACPI_GED_CPU_HOTPLUG_EVT:
+ /* initialize CPU Hotplug related regions */
+ memory_region_init(&s->container_cpuhp, OBJECT(dev),
+ "cpuhp container",
+ ACPI_CPU_HOTPLUG_REG_LEN);
+ sysbus_init_mmio(sbd, &s->container_cpuhp);
+ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev),
+ &s->cpuhp_state, 0);
+ break;
+ }
+ ged_events--;
+ }
+
+ if (ged_events) {
+ error_report("Unsupported events specified");
+ abort();
+ }
+}
+
static void acpi_ged_initfn(Object *obj)
{
DeviceState *dev = DEVICE(obj);
@@ -411,6 +460,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data)
dc->desc = "ACPI Generic Event Device";
device_class_set_props(dc, acpi_ged_properties);
dc->vmsd = &vmstate_acpi_ged;
+ dc->realize = acpi_ged_realize;
hc->plug = acpi_ged_device_plug_cb;
hc->unplug_request = acpi_ged_unplug_request_cb;
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index b0c68d66a3..719e83e6a1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3308,6 +3308,7 @@ DEFINE_VIRT_MACHINE_AS_LATEST(9, 1)
static void virt_machine_9_0_options(MachineClass *mc)
{
virt_machine_9_1_options(mc);
+ mc->smbios_memory_device_size = 16 * GiB;
compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len);
}
DEFINE_VIRT_MACHINE(9, 0)
diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c
index 5993f4f040..e5196aa4bb 100644
--- a/hw/audio/virtio-snd.c
+++ b/hw/audio/virtio-snd.c
@@ -282,11 +282,13 @@ uint32_t virtio_snd_set_pcm_params(VirtIOSound *s,
error_report("Number of channels is not supported.");
return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP);
}
- if (!(supported_formats & BIT(params->format))) {
+ if (BIT(params->format) > sizeof(supported_formats) ||
+ !(supported_formats & BIT(params->format))) {
error_report("Stream format is not supported.");
return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP);
}
- if (!(supported_rates & BIT(params->rate))) {
+ if (BIT(params->rate) > sizeof(supported_rates) ||
+ !(supported_rates & BIT(params->rate))) {
error_report("Stream rate is not supported.");
return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP);
}
@@ -1261,7 +1263,7 @@ static void virtio_snd_pcm_in_cb(void *data, int available)
{
VirtIOSoundPCMStream *stream = data;
VirtIOSoundPCMBuffer *buffer;
- size_t size;
+ size_t size, max_size;
WITH_QEMU_LOCK_GUARD(&stream->queue_mutex) {
while (!QSIMPLEQ_EMPTY(&stream->queue)) {
@@ -1275,7 +1277,12 @@ static void virtio_snd_pcm_in_cb(void *data, int available)
continue;
}
+ max_size = iov_size(buffer->elem->in_sg, buffer->elem->in_num);
for (;;) {
+ if (buffer->size >= max_size) {
+ return_rx_buffer(stream, buffer);
+ break;
+ }
size = AUD_read(stream->voice.in,
buffer->data + buffer->size,
MIN(available, (stream->params.period_bytes -
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index fdbc30b9ce..5b7f46bbb0 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -51,6 +51,7 @@ static const int user_feature_bits[] = {
VIRTIO_F_RING_PACKED,
VIRTIO_F_IOMMU_PLATFORM,
VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VHOST_INVALID_FEATURE_BIT
};
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index d2e3e4570a..7982ecd39a 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -282,7 +282,10 @@ static void cpu_common_finalize(Object *obj)
}
#endif
free_queued_cpu_work(cpu);
- g_array_free(cpu->gdb_regs, TRUE);
+ /* If cleanup didn't happen in context to gdb_unregister_coprocessor_all */
+ if (cpu->gdb_regs) {
+ g_array_free(cpu->gdb_regs, TRUE);
+ }
qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
qemu_mutex_destroy(&cpu->work_mutex);
qemu_cond_destroy(cpu->halt_cond);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 8a878f84d7..27dcda0248 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1005,6 +1005,12 @@ static void machine_class_init(ObjectClass *oc, void *data)
/* Default 128 MB as guest ram size */
mc->default_ram_size = 128 * MiB;
mc->rom_file_has_mr = true;
+ /*
+ * SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size
+ * use max possible value that could be encoded into
+ * 'Extended Size' field (2047Tb).
+ */
+ mc->smbios_memory_device_size = 2047 * TiB;
/* numa node memory size aligned on 8MB by default.
* On Linux, each node's border has to be 8MB aligned
diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c
index d397718b1b..12dee2e467 100644
--- a/hw/cxl/cxl-events.c
+++ b/hw/cxl/cxl-events.c
@@ -139,6 +139,19 @@ bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type,
return cxl_event_count(log) == 1;
}
+void cxl_discard_all_event_records(CXLDeviceState *cxlds)
+{
+ CXLEventLogType log_type;
+ CXLEventLog *log;
+
+ for (log_type = 0; log_type < CXL_EVENT_TYPE_MAX; log_type++) {
+ log = &cxlds->event_logs[log_type];
+ while (!cxl_event_empty(log)) {
+ cxl_event_delete_head(cxlds, log_type, log);
+ }
+ }
+}
+
CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
uint8_t log_type, int max_recs,
size_t *len)
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index c5f5fcfd64..e9f2543c43 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -315,7 +315,8 @@ static void machine_set_cxl(Object *obj, Visitor *v, const char *name,
static void machine_get_cfmw(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
- CXLFixedMemoryWindowOptionsList **list = opaque;
+ CXLState *state = opaque;
+ CXLFixedMemoryWindowOptionsList **list = &state->cfmw_list;
visit_type_CXLFixedMemoryWindowOptionsList(v, name, list, errp);
}
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 74eeb6fde7..b752920ec8 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -12,6 +12,7 @@
#include "hw/pci/msix.h"
#include "hw/cxl/cxl.h"
#include "hw/cxl/cxl_events.h"
+#include "hw/cxl/cxl_mailbox.h"
#include "hw/pci/pci.h"
#include "hw/pci-bridge/cxl_upstream_port.h"
#include "qemu/cutils.h"
@@ -62,12 +63,18 @@ enum {
#define SET_INTERRUPT_POLICY 0x3
FIRMWARE_UPDATE = 0x02,
#define GET_INFO 0x0
+ #define TRANSFER 0x1
+ #define ACTIVATE 0x2
TIMESTAMP = 0x03,
#define GET 0x0
#define SET 0x1
LOGS = 0x04,
#define GET_SUPPORTED 0x0
#define GET_LOG 0x1
+ FEATURES = 0x05,
+ #define GET_SUPPORTED 0x0
+ #define GET_FEATURE 0x1
+ #define SET_FEATURE 0x2
IDENTIFY = 0x40,
#define MEMORY_DEVICE 0x0
CCLS = 0x41,
@@ -83,6 +90,9 @@ enum {
#define GET_POISON_LIST 0x0
#define INJECT_POISON 0x1
#define CLEAR_POISON 0x2
+ #define GET_SCAN_MEDIA_CAPABILITIES 0x3
+ #define SCAN_MEDIA 0x4
+ #define GET_SCAN_MEDIA_RESULTS 0x5
DCD_CONFIG = 0x48,
#define GET_DC_CONFIG 0x0
#define GET_DYN_CAP_EXT_LIST 0x1
@@ -235,7 +245,6 @@ static CXLRetCode cmd_events_get_records(const struct cxl_cmd *cmd,
log_type = payload_in[0];
pl = (CXLGetEventPayload *)payload_out;
- memset(pl, 0, sizeof(*pl));
max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) /
CXL_EVENT_RECORD_SIZE;
@@ -273,7 +282,6 @@ static CXLRetCode cmd_events_get_interrupt_policy(const struct cxl_cmd *cmd,
CXLEventLog *log;
policy = (CXLEventInterruptPolicy *)payload_out;
- memset(policy, 0, sizeof(*policy));
log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO];
if (log->irq_enabled) {
@@ -372,7 +380,6 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd,
QEMU_BUILD_BUG_ON(sizeof(*is_identify) != 18);
is_identify = (void *)payload_out;
- memset(is_identify, 0, sizeof(*is_identify));
is_identify->pcie_vid = class->vendor_id;
is_identify->pcie_did = class->device_id;
if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_USP)) {
@@ -606,7 +613,6 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd,
QEMU_BUILD_BUG_ON(sizeof(*bg_op_status) != 8);
bg_op_status = (void *)payload_out;
- memset(bg_op_status, 0, sizeof(*bg_op_status));
bg_op_status->status = cci->bg.complete_pct << 1;
if (cci->bg.runtime > 0) {
bg_op_status->status |= 1U << 0;
@@ -618,6 +624,9 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
+#define CXL_FW_SLOTS 2
+#define CXL_FW_SIZE 0x02000000 /* 32 mb */
+
/* CXL r3.1 Section 8.2.9.3.1: Get FW Info (Opcode 0200h) */
static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd,
uint8_t *payload_in,
@@ -647,17 +656,193 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd,
}
fw_info = (void *)payload_out;
- memset(fw_info, 0, sizeof(*fw_info));
- fw_info->slots_supported = 2;
- fw_info->slot_info = BIT(0) | BIT(3);
- fw_info->caps = 0;
- pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0");
+ fw_info->slots_supported = CXL_FW_SLOTS;
+ fw_info->slot_info = (cci->fw.active_slot & 0x7) |
+ ((cci->fw.staged_slot & 0x7) << 3);
+ fw_info->caps = BIT(0); /* online update supported */
+
+ if (cci->fw.slot[0]) {
+ pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0");
+ }
+ if (cci->fw.slot[1]) {
+ pstrcpy(fw_info->fw_rev2, sizeof(fw_info->fw_rev2), "BWFW VERSION 1");
+ }
*len_out = sizeof(*fw_info);
return CXL_MBOX_SUCCESS;
}
+/* CXL r3.1 section 8.2.9.3.2: Transfer FW (Opcode 0201h) */
+#define CXL_FW_XFER_ALIGNMENT 128
+
+#define CXL_FW_XFER_ACTION_FULL 0x0
+#define CXL_FW_XFER_ACTION_INIT 0x1
+#define CXL_FW_XFER_ACTION_CONTINUE 0x2
+#define CXL_FW_XFER_ACTION_END 0x3
+#define CXL_FW_XFER_ACTION_ABORT 0x4
+
+static CXLRetCode cmd_firmware_update_transfer(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t action;
+ uint8_t slot;
+ uint8_t rsvd1[2];
+ uint32_t offset;
+ uint8_t rsvd2[0x78];
+ uint8_t data[];
+ } QEMU_PACKED *fw_transfer = (void *)payload_in;
+ size_t offset, length;
+
+ if (fw_transfer->action == CXL_FW_XFER_ACTION_ABORT) {
+ /*
+ * At this point there aren't any on-going transfers
+ * running in the bg - this is serialized before this
+ * call altogether. Just mark the state machine and
+ * disregard any other input.
+ */
+ cci->fw.transferring = false;
+ return CXL_MBOX_SUCCESS;
+ }
+
+ offset = fw_transfer->offset * CXL_FW_XFER_ALIGNMENT;
+ length = len - sizeof(*fw_transfer);
+ if (offset + length > CXL_FW_SIZE) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ if (cci->fw.transferring) {
+ if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL ||
+ fw_transfer->action == CXL_FW_XFER_ACTION_INIT) {
+ return CXL_MBOX_FW_XFER_IN_PROGRESS;
+ }
+ /*
+ * Abort partitioned package transfer if over 30 secs
+ * between parts. As opposed to the explicit ABORT action,
+ * semantically treat this condition as an error - as
+ * if a part action were passed without a previous INIT.
+ */
+ if (difftime(time(NULL), cci->fw.last_partxfer) > 30.0) {
+ cci->fw.transferring = false;
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ } else if (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE ||
+ fw_transfer->action == CXL_FW_XFER_ACTION_END) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ /* allow back-to-back retransmission */
+ if ((offset != cci->fw.prev_offset || length != cci->fw.prev_len) &&
+ (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE ||
+ fw_transfer->action == CXL_FW_XFER_ACTION_END)) {
+ /* verify no overlaps */
+ if (offset < cci->fw.prev_offset + cci->fw.prev_len) {
+ return CXL_MBOX_FW_XFER_OUT_OF_ORDER;
+ }
+ }
+
+ switch (fw_transfer->action) {
+ case CXL_FW_XFER_ACTION_FULL: /* ignores offset */
+ case CXL_FW_XFER_ACTION_END:
+ if (fw_transfer->slot == 0 ||
+ fw_transfer->slot == cci->fw.active_slot ||
+ fw_transfer->slot > CXL_FW_SLOTS) {
+ return CXL_MBOX_FW_INVALID_SLOT;
+ }
+
+ /* mark the slot used upon bg completion */
+ break;
+ case CXL_FW_XFER_ACTION_INIT:
+ if (offset != 0) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ cci->fw.transferring = true;
+ cci->fw.prev_offset = offset;
+ cci->fw.prev_len = length;
+ break;
+ case CXL_FW_XFER_ACTION_CONTINUE:
+ cci->fw.prev_offset = offset;
+ cci->fw.prev_len = length;
+ break;
+ default:
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL) {
+ cci->bg.runtime = 10 * 1000UL;
+ } else {
+ cci->bg.runtime = 2 * 1000UL;
+ }
+ /* keep relevant context for bg completion */
+ cci->fw.curr_action = fw_transfer->action;
+ cci->fw.curr_slot = fw_transfer->slot;
+ *len_out = 0;
+
+ return CXL_MBOX_BG_STARTED;
+}
+
+static void __do_firmware_xfer(CXLCCI *cci)
+{
+ switch (cci->fw.curr_action) {
+ case CXL_FW_XFER_ACTION_FULL:
+ case CXL_FW_XFER_ACTION_END:
+ cci->fw.slot[cci->fw.curr_slot - 1] = true;
+ cci->fw.transferring = false;
+ break;
+ case CXL_FW_XFER_ACTION_INIT:
+ case CXL_FW_XFER_ACTION_CONTINUE:
+ time(&cci->fw.last_partxfer);
+ break;
+ default:
+ break;
+ }
+}
+
+/* CXL r3.1 section 8.2.9.3.3: Activate FW (Opcode 0202h) */
+static CXLRetCode cmd_firmware_update_activate(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t action;
+ uint8_t slot;
+ } QEMU_PACKED *fw_activate = (void *)payload_in;
+ QEMU_BUILD_BUG_ON(sizeof(*fw_activate) != 0x2);
+
+ if (fw_activate->slot == 0 ||
+ fw_activate->slot == cci->fw.active_slot ||
+ fw_activate->slot > CXL_FW_SLOTS) {
+ return CXL_MBOX_FW_INVALID_SLOT;
+ }
+
+ /* ensure that an actual fw package is there */
+ if (!cci->fw.slot[fw_activate->slot - 1]) {
+ return CXL_MBOX_FW_INVALID_SLOT;
+ }
+
+ switch (fw_activate->action) {
+ case 0: /* online */
+ cci->fw.active_slot = fw_activate->slot;
+ break;
+ case 1: /* reset */
+ cci->fw.staged_slot = fw_activate->slot;
+ break;
+ default:
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ return CXL_MBOX_SUCCESS;
+}
+
/* CXL r3.1 Section 8.2.9.4.1: Get Timestamp (Opcode 0300h) */
static CXLRetCode cmd_timestamp_get(const struct cxl_cmd *cmd,
uint8_t *payload_in,
@@ -768,6 +953,388 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
+/* CXL r3.1 section 8.2.9.6: Features */
+/*
+ * Get Supported Features output payload
+ * CXL r3.1 section 8.2.9.6.1 Table 8-96
+ */
+typedef struct CXLSupportedFeatureHeader {
+ uint16_t entries;
+ uint16_t nsuppfeats_dev;
+ uint32_t reserved;
+} QEMU_PACKED CXLSupportedFeatureHeader;
+
+/*
+ * Get Supported Features Supported Feature Entry
+ * CXL r3.1 section 8.2.9.6.1 Table 8-97
+ */
+typedef struct CXLSupportedFeatureEntry {
+ QemuUUID uuid;
+ uint16_t feat_index;
+ uint16_t get_feat_size;
+ uint16_t set_feat_size;
+ uint32_t attr_flags;
+ uint8_t get_feat_version;
+ uint8_t set_feat_version;
+ uint16_t set_feat_effects;
+ uint8_t rsvd[18];
+} QEMU_PACKED CXLSupportedFeatureEntry;
+
+/*
+ * Get Supported Features Supported Feature Entry
+ * CXL rev 3.1 section 8.2.9.6.1 Table 8-97
+ */
+/* Supported Feature Entry : attribute flags */
+#define CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE BIT(0)
+#define CXL_FEAT_ENTRY_ATTR_FLAG_DEEPEST_RESET_PERSISTENCE_MASK GENMASK(3, 1)
+#define CXL_FEAT_ENTRY_ATTR_FLAG_PERSIST_ACROSS_FIRMWARE_UPDATE BIT(4)
+#define CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION BIT(5)
+#define CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_SAVED_SELECTION BIT(6)
+
+/* Supported Feature Entry : set feature effects */
+#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_COLD_RESET BIT(0)
+#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE BIT(1)
+#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_DATA_CHANGE BIT(2)
+#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_POLICY_CHANGE BIT(3)
+#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_LOG_CHANGE BIT(4)
+#define CXL_FEAT_ENTRY_SFE_SECURITY_STATE_CHANGE BIT(5)
+#define CXL_FEAT_ENTRY_SFE_BACKGROUND_OPERATION BIT(6)
+#define CXL_FEAT_ENTRY_SFE_SUPPORT_SECONDARY_MAILBOX BIT(7)
+#define CXL_FEAT_ENTRY_SFE_SUPPORT_ABORT_BACKGROUND_OPERATION BIT(8)
+#define CXL_FEAT_ENTRY_SFE_CEL_VALID BIT(9)
+#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_CONV_RESET BIT(10)
+#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_CXL_RESET BIT(11)
+
+enum CXL_SUPPORTED_FEATURES_LIST {
+ CXL_FEATURE_PATROL_SCRUB = 0,
+ CXL_FEATURE_ECS,
+ CXL_FEATURE_MAX
+};
+
+/* Get Feature CXL 3.1 Spec 8.2.9.6.2 */
+/*
+ * Get Feature input payload
+ * CXL r3.1 section 8.2.9.6.2 Table 8-99
+ */
+/* Get Feature : Payload in selection */
+enum CXL_GET_FEATURE_SELECTION {
+ CXL_GET_FEATURE_SEL_CURRENT_VALUE,
+ CXL_GET_FEATURE_SEL_DEFAULT_VALUE,
+ CXL_GET_FEATURE_SEL_SAVED_VALUE,
+ CXL_GET_FEATURE_SEL_MAX
+};
+
+/* Set Feature CXL 3.1 Spec 8.2.9.6.3 */
+/*
+ * Set Feature input payload
+ * CXL r3.1 section 8.2.9.6.3 Table 8-101
+ */
+typedef struct CXLSetFeatureInHeader {
+ QemuUUID uuid;
+ uint32_t flags;
+ uint16_t offset;
+ uint8_t version;
+ uint8_t rsvd[9];
+} QEMU_PACKED QEMU_ALIGNED(16) CXLSetFeatureInHeader;
+
+/* Set Feature : Payload in flags */
+#define CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MASK 0x7
+enum CXL_SET_FEATURE_FLAG_DATA_TRANSFER {
+ CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER,
+ CXL_SET_FEATURE_FLAG_INITIATE_DATA_TRANSFER,
+ CXL_SET_FEATURE_FLAG_CONTINUE_DATA_TRANSFER,
+ CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER,
+ CXL_SET_FEATURE_FLAG_ABORT_DATA_TRANSFER,
+ CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MAX
+};
+#define CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET BIT(3)
+
+/* CXL r3.1 section 8.2.9.9.11.1: Device Patrol Scrub Control Feature */
+static const QemuUUID patrol_scrub_uuid = {
+ .data = UUID(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33,
+ 0x75, 0x77, 0x4e, 0x06, 0xdb, 0x8a)
+};
+
+typedef struct CXLMemPatrolScrubSetFeature {
+ CXLSetFeatureInHeader hdr;
+ CXLMemPatrolScrubWriteAttrs feat_data;
+} QEMU_PACKED QEMU_ALIGNED(16) CXLMemPatrolScrubSetFeature;
+
+/*
+ * CXL r3.1 section 8.2.9.9.11.2:
+ * DDR5 Error Check Scrub (ECS) Control Feature
+ */
+static const QemuUUID ecs_uuid = {
+ .data = UUID(0xe5b13f22, 0x2328, 0x4a14, 0xb8, 0xba,
+ 0xb9, 0x69, 0x1e, 0x89, 0x33, 0x86)
+};
+
+typedef struct CXLMemECSSetFeature {
+ CXLSetFeatureInHeader hdr;
+ CXLMemECSWriteAttrs feat_data[];
+} QEMU_PACKED QEMU_ALIGNED(16) CXLMemECSSetFeature;
+
+/* CXL r3.1 section 8.2.9.6.1: Get Supported Features (Opcode 0500h) */
+static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint32_t count;
+ uint16_t start_index;
+ uint16_t reserved;
+ } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_in = (void *)payload_in;
+
+ struct {
+ CXLSupportedFeatureHeader hdr;
+ CXLSupportedFeatureEntry feat_entries[];
+ } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_out = (void *)payload_out;
+ uint16_t index, req_entries;
+ uint16_t entry;
+
+ if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+ if (get_feats_in->count < sizeof(CXLSupportedFeatureHeader) ||
+ get_feats_in->start_index >= CXL_FEATURE_MAX) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ req_entries = (get_feats_in->count -
+ sizeof(CXLSupportedFeatureHeader)) /
+ sizeof(CXLSupportedFeatureEntry);
+ req_entries = MIN(req_entries,
+ (CXL_FEATURE_MAX - get_feats_in->start_index));
+
+ for (entry = 0, index = get_feats_in->start_index;
+ entry < req_entries; index++) {
+ switch (index) {
+ case CXL_FEATURE_PATROL_SCRUB:
+ /* Fill supported feature entry for device patrol scrub control */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = patrol_scrub_uuid,
+ .feat_index = index,
+ .get_feat_size = sizeof(CXLMemPatrolScrubReadAttrs),
+ .set_feat_size = sizeof(CXLMemPatrolScrubWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE,
+ .get_feat_version = CXL_MEMDEV_PS_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_MEMDEV_PS_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
+ case CXL_FEATURE_ECS:
+ /* Fill supported feature entry for device DDR5 ECS control */
+ get_feats_out->feat_entries[entry++] =
+ (struct CXLSupportedFeatureEntry) {
+ .uuid = ecs_uuid,
+ .feat_index = index,
+ .get_feat_size = CXL_ECS_NUM_MEDIA_FRUS *
+ sizeof(CXLMemECSReadAttrs),
+ .set_feat_size = CXL_ECS_NUM_MEDIA_FRUS *
+ sizeof(CXLMemECSWriteAttrs),
+ .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE,
+ .get_feat_version = CXL_ECS_GET_FEATURE_VERSION,
+ .set_feat_version = CXL_ECS_SET_FEATURE_VERSION,
+ .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE |
+ CXL_FEAT_ENTRY_SFE_CEL_VALID,
+ };
+ break;
+ default:
+ __builtin_unreachable();
+ }
+ }
+ get_feats_out->hdr.nsuppfeats_dev = CXL_FEATURE_MAX;
+ get_feats_out->hdr.entries = req_entries;
+ *len_out = sizeof(CXLSupportedFeatureHeader) +
+ req_entries * sizeof(CXLSupportedFeatureEntry);
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.1 section 8.2.9.6.2: Get Feature (Opcode 0501h) */
+static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ QemuUUID uuid;
+ uint16_t offset;
+ uint16_t count;
+ uint8_t selection;
+ } QEMU_PACKED QEMU_ALIGNED(16) * get_feature;
+ uint16_t bytes_to_copy = 0;
+ CXLType3Dev *ct3d;
+ CXLSetFeatureInfo *set_feat_info;
+
+ if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ ct3d = CXL_TYPE3(cci->d);
+ get_feature = (void *)payload_in;
+
+ set_feat_info = &ct3d->set_feat_info;
+ if (qemu_uuid_is_equal(&get_feature->uuid, &set_feat_info->uuid)) {
+ return CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS;
+ }
+
+ if (get_feature->selection != CXL_GET_FEATURE_SEL_CURRENT_VALUE) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+ if (get_feature->offset + get_feature->count > cci->payload_max) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ if (qemu_uuid_is_equal(&get_feature->uuid, &patrol_scrub_uuid)) {
+ if (get_feature->offset >= sizeof(CXLMemPatrolScrubReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = sizeof(CXLMemPatrolScrubReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->patrol_scrub_attrs + get_feature->offset,
+ bytes_to_copy);
+ } else if (qemu_uuid_is_equal(&get_feature->uuid, &ecs_uuid)) {
+ if (get_feature->offset >= CXL_ECS_NUM_MEDIA_FRUS *
+ sizeof(CXLMemECSReadAttrs)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ bytes_to_copy = CXL_ECS_NUM_MEDIA_FRUS *
+ sizeof(CXLMemECSReadAttrs) -
+ get_feature->offset;
+ bytes_to_copy = MIN(bytes_to_copy, get_feature->count);
+ memcpy(payload_out,
+ (uint8_t *)&ct3d->ecs_attrs + get_feature->offset,
+ bytes_to_copy);
+ } else {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ *len_out = bytes_to_copy;
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.1 section 8.2.9.6.3: Set Feature (Opcode 0502h) */
+static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ CXLSetFeatureInHeader *hdr = (void *)payload_in;
+ CXLMemPatrolScrubWriteAttrs *ps_write_attrs;
+ CXLMemPatrolScrubSetFeature *ps_set_feature;
+ CXLMemECSWriteAttrs *ecs_write_attrs;
+ CXLMemECSSetFeature *ecs_set_feature;
+ CXLSetFeatureInfo *set_feat_info;
+ uint16_t bytes_to_copy = 0;
+ uint8_t data_transfer_flag;
+ CXLType3Dev *ct3d;
+ uint16_t count;
+
+
+ if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+ ct3d = CXL_TYPE3(cci->d);
+ set_feat_info = &ct3d->set_feat_info;
+
+ if (!qemu_uuid_is_null(&set_feat_info->uuid) &&
+ !qemu_uuid_is_equal(&hdr->uuid, &set_feat_info->uuid)) {
+ return CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS;
+ }
+ if (hdr->flags & CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET) {
+ set_feat_info->data_saved_across_reset = true;
+ } else {
+ set_feat_info->data_saved_across_reset = false;
+ }
+
+ data_transfer_flag =
+ hdr->flags & CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MASK;
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_INITIATE_DATA_TRANSFER) {
+ set_feat_info->uuid = hdr->uuid;
+ set_feat_info->data_size = 0;
+ }
+ set_feat_info->data_transfer_flag = data_transfer_flag;
+ set_feat_info->data_offset = hdr->offset;
+ bytes_to_copy = len_in - sizeof(CXLSetFeatureInHeader);
+
+ if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) {
+ if (hdr->version != CXL_MEMDEV_PS_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ ps_set_feature = (void *)payload_in;
+ ps_write_attrs = &ps_set_feature->feat_data;
+ memcpy((uint8_t *)&ct3d->patrol_scrub_wr_attrs + hdr->offset,
+ ps_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ ct3d->patrol_scrub_attrs.scrub_cycle &= ~0xFF;
+ ct3d->patrol_scrub_attrs.scrub_cycle |=
+ ct3d->patrol_scrub_wr_attrs.scrub_cycle_hr & 0xFF;
+ ct3d->patrol_scrub_attrs.scrub_flags &= ~0x1;
+ ct3d->patrol_scrub_attrs.scrub_flags |=
+ ct3d->patrol_scrub_wr_attrs.scrub_flags & 0x1;
+ }
+ } else if (qemu_uuid_is_equal(&hdr->uuid,
+ &ecs_uuid)) {
+ if (hdr->version != CXL_ECS_SET_FEATURE_VERSION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ ecs_set_feature = (void *)payload_in;
+ ecs_write_attrs = ecs_set_feature->feat_data;
+ memcpy((uint8_t *)ct3d->ecs_wr_attrs + hdr->offset,
+ ecs_write_attrs,
+ bytes_to_copy);
+ set_feat_info->data_size += bytes_to_copy;
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) {
+ for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) {
+ ct3d->ecs_attrs[count].ecs_log_cap =
+ ct3d->ecs_wr_attrs[count].ecs_log_cap;
+ ct3d->ecs_attrs[count].ecs_config =
+ ct3d->ecs_wr_attrs[count].ecs_config & 0x1F;
+ }
+ }
+ } else {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER ||
+ data_transfer_flag == CXL_SET_FEATURE_FLAG_ABORT_DATA_TRANSFER) {
+ memset(&set_feat_info->uuid, 0, sizeof(QemuUUID));
+ if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) {
+ memset(&ct3d->patrol_scrub_wr_attrs, 0, set_feat_info->data_size);
+ } else if (qemu_uuid_is_equal(&hdr->uuid, &ecs_uuid)) {
+ memset(ct3d->ecs_wr_attrs, 0, set_feat_info->data_size);
+ }
+ set_feat_info->data_transfer_flag = 0;
+ set_feat_info->data_saved_across_reset = false;
+ set_feat_info->data_offset = 0;
+ set_feat_info->data_size = 0;
+ }
+
+ return CXL_MBOX_SUCCESS;
+}
+
/* CXL r3.1 Section 8.2.9.9.1.1: Identify Memory Device (Opcode 4000h) */
static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd,
uint8_t *payload_in,
@@ -805,7 +1372,6 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd,
}
id = (void *)payload_out;
- memset(id, 0, sizeof(*id));
snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0);
@@ -953,6 +1519,7 @@ static void __do_sanitization(CXLType3Dev *ct3d)
memset(lsa, 0, memory_region_size(mr));
}
}
+ cxl_discard_all_event_records(&ct3d->cxl_dstate);
}
/*
@@ -1095,7 +1662,6 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd,
out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
- memset(out, 0, out_pl_len);
QLIST_FOREACH(ent, poison_list, node) {
uint64_t start, stop;
@@ -1117,6 +1683,10 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd,
out->flags = (1 << 1);
stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts);
}
+ if (scan_media_running(cci)) {
+ out->flags |= (1 << 2);
+ }
+
stw_le_p(&out->count, record_count);
*len_out = out_pl_len;
return CXL_MBOX_SUCCESS;
@@ -1146,6 +1716,16 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
}
+ /*
+ * Freeze the list if there is an on-going scan media operation.
+ */
+ if (scan_media_running(cci)) {
+ /*
+ * XXX: Spec is ambiguous - is this case considered
+ * a successful return despite not adding to the list?
+ */
+ goto success;
+ }
if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
return CXL_MBOX_INJECT_POISON_LIMIT;
@@ -1161,6 +1741,7 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd,
*/
QLIST_INSERT_HEAD(poison_list, p, node);
ct3d->poison_list_cnt++;
+success:
*len_out = 0;
return CXL_MBOX_SUCCESS;
@@ -1200,6 +1781,17 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
}
}
+ /*
+ * Freeze the list if there is an on-going scan media operation.
+ */
+ if (scan_media_running(cci)) {
+ /*
+ * XXX: Spec is ambiguous - is this case considered
+ * a successful return despite not removing from the list?
+ */
+ goto success;
+ }
+
QLIST_FOREACH(ent, poison_list, node) {
/*
* Test for contained in entry. Simpler than general case
@@ -1210,7 +1802,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
}
}
if (!ent) {
- return CXL_MBOX_SUCCESS;
+ goto success;
}
QLIST_REMOVE(ent, node);
@@ -1247,12 +1839,262 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd,
}
/* Any fragments have been added, free original entry */
g_free(ent);
+success:
*len_out = 0;
return CXL_MBOX_SUCCESS;
}
/*
+ * CXL r3.1 section 8.2.9.9.4.4: Get Scan Media Capabilities
+ */
+static CXLRetCode
+cmd_media_get_scan_media_capabilities(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct get_scan_media_capabilities_pl {
+ uint64_t pa;
+ uint64_t length;
+ } QEMU_PACKED;
+
+ struct get_scan_media_capabilities_out_pl {
+ uint32_t estimated_runtime_ms;
+ };
+
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
+ struct get_scan_media_capabilities_pl *in = (void *)payload_in;
+ struct get_scan_media_capabilities_out_pl *out = (void *)payload_out;
+ uint64_t query_start;
+ uint64_t query_length;
+
+ query_start = ldq_le_p(&in->pa);
+ /* 64 byte alignment required */
+ if (query_start & 0x3f) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;
+
+ if (query_start + query_length > cxl_dstate->static_mem_size) {
+ return CXL_MBOX_INVALID_PA;
+ }
+
+ /*
+ * Just use 400 nanosecond access/read latency + 100 ns for
+ * the cost of updating the poison list. For small enough
+ * chunks return at least 1 ms.
+ */
+ stl_le_p(&out->estimated_runtime_ms,
+ MAX(1, query_length * (0.0005L / 64)));
+
+ *len_out = sizeof(*out);
+ return CXL_MBOX_SUCCESS;
+}
+
+static void __do_scan_media(CXLType3Dev *ct3d)
+{
+ CXLPoison *ent;
+ unsigned int results_cnt = 0;
+
+ QLIST_FOREACH(ent, &ct3d->scan_media_results, node) {
+ results_cnt++;
+ }
+
+ /* only scan media may clear the overflow */
+ if (ct3d->poison_list_overflowed &&
+ ct3d->poison_list_cnt == results_cnt) {
+ cxl_clear_poison_list_overflowed(ct3d);
+ }
+ /* scan media has run since last conventional reset */
+ ct3d->scan_media_hasrun = true;
+}
+
+/*
+ * CXL r3.1 section 8.2.9.9.4.5: Scan Media
+ */
+static CXLRetCode cmd_media_scan_media(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct scan_media_pl {
+ uint64_t pa;
+ uint64_t length;
+ uint8_t flags;
+ } QEMU_PACKED;
+
+ struct scan_media_pl *in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate;
+ uint64_t query_start;
+ uint64_t query_length;
+ CXLPoison *ent, *next;
+
+ query_start = ldq_le_p(&in->pa);
+ /* 64 byte alignment required */
+ if (query_start & 0x3f) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE;
+
+ if (query_start + query_length > cxl_dstate->static_mem_size) {
+ return CXL_MBOX_INVALID_PA;
+ }
+ if (ct3d->dc.num_regions && query_start + query_length >=
+ cxl_dstate->static_mem_size + ct3d->dc.total_capacity) {
+ return CXL_MBOX_INVALID_PA;
+ }
+
+ if (in->flags == 0) { /* TODO */
+ qemu_log_mask(LOG_UNIMP,
+ "Scan Media Event Log is unsupported\n");
+ }
+
+ /* any previous results are discarded upon a new Scan Media */
+ QLIST_FOREACH_SAFE(ent, &ct3d->scan_media_results, node, next) {
+ QLIST_REMOVE(ent, node);
+ g_free(ent);
+ }
+
+ /* kill the poison list - it will be recreated */
+ if (ct3d->poison_list_overflowed) {
+ QLIST_FOREACH_SAFE(ent, &ct3d->poison_list, node, next) {
+ QLIST_REMOVE(ent, node);
+ g_free(ent);
+ ct3d->poison_list_cnt--;
+ }
+ }
+
+ /*
+ * Scan the backup list and move corresponding entries
+ * into the results list, updating the poison list
+ * when possible.
+ */
+ QLIST_FOREACH_SAFE(ent, &ct3d->poison_list_bkp, node, next) {
+ CXLPoison *res;
+
+ if (ent->start >= query_start + query_length ||
+ ent->start + ent->length <= query_start) {
+ continue;
+ }
+
+ /*
+ * If a Get Poison List cmd comes in while this
+ * scan is being done, it will see the new complete
+ * list, while setting the respective flag.
+ */
+ if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
+ CXLPoison *p = g_new0(CXLPoison, 1);
+
+ p->start = ent->start;
+ p->length = ent->length;
+ p->type = ent->type;
+ QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
+ ct3d->poison_list_cnt++;
+ }
+
+ res = g_new0(CXLPoison, 1);
+ res->start = ent->start;
+ res->length = ent->length;
+ res->type = ent->type;
+ QLIST_INSERT_HEAD(&ct3d->scan_media_results, res, node);
+
+ QLIST_REMOVE(ent, node);
+ g_free(ent);
+ }
+
+ cci->bg.runtime = MAX(1, query_length * (0.0005L / 64));
+ *len_out = 0;
+
+ return CXL_MBOX_BG_STARTED;
+}
+
+/*
+ * CXL r3.1 section 8.2.9.9.4.6: Get Scan Media Results
+ */
+static CXLRetCode cmd_media_get_scan_media_results(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct get_scan_media_results_out_pl {
+ uint64_t dpa_restart;
+ uint64_t length;
+ uint8_t flags;
+ uint8_t rsvd1;
+ uint16_t count;
+ uint8_t rsvd2[0xc];
+ struct {
+ uint64_t addr;
+ uint32_t length;
+ uint32_t resv;
+ } QEMU_PACKED records[];
+ } QEMU_PACKED;
+
+ struct get_scan_media_results_out_pl *out = (void *)payload_out;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLPoisonList *scan_media_results = &ct3d->scan_media_results;
+ CXLPoison *ent, *next;
+ uint16_t total_count = 0, record_count = 0, i = 0;
+ uint16_t out_pl_len;
+
+ if (!ct3d->scan_media_hasrun) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ /*
+ * Calculate limits, all entries are within the same address range of the
+ * last scan media call.
+ */
+ QLIST_FOREACH(ent, scan_media_results, node) {
+ size_t rec_size = record_count * sizeof(out->records[0]);
+
+ if (sizeof(*out) + rec_size < CXL_MAILBOX_MAX_PAYLOAD_SIZE) {
+ record_count++;
+ }
+ total_count++;
+ }
+
+ out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+ assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+ memset(out, 0, out_pl_len);
+ QLIST_FOREACH_SAFE(ent, scan_media_results, node, next) {
+ uint64_t start, stop;
+
+ if (i == record_count) {
+ break;
+ }
+
+ start = ROUND_DOWN(ent->start, 64ull);
+ stop = ROUND_DOWN(ent->start, 64ull) + ent->length;
+ stq_le_p(&out->records[i].addr, start | (ent->type & 0x7));
+ stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE);
+ i++;
+
+ /* consume the returning entry */
+ QLIST_REMOVE(ent, node);
+ g_free(ent);
+ }
+
+ stw_le_p(&out->count, record_count);
+ if (total_count > record_count) {
+ out->flags = (1 << 0); /* More Media Error Records */
+ }
+
+ *len_out = out_pl_len;
+ return CXL_MBOX_SUCCESS;
+}
+
+/*
* CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration
* (Opcode: 4800h)
*/
@@ -1822,40 +2664,51 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
return CXL_MBOX_SUCCESS;
}
-#define IMMEDIATE_CONFIG_CHANGE (1 << 1)
-#define IMMEDIATE_DATA_CHANGE (1 << 2)
-#define IMMEDIATE_POLICY_CHANGE (1 << 3)
-#define IMMEDIATE_LOG_CHANGE (1 << 4)
-#define SECURITY_STATE_CHANGE (1 << 5)
-#define BACKGROUND_OPERATION (1 << 6)
-
static const struct cxl_cmd cxl_cmd_set[256][256] = {
[EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS",
cmd_events_get_records, 1, 0 },
[EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS",
- cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE },
+ cmd_events_clear_records, ~0, CXL_MBOX_IMMEDIATE_LOG_CHANGE },
[EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY",
cmd_events_get_interrupt_policy, 0, 0 },
[EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY",
cmd_events_set_interrupt_policy,
- ~0, IMMEDIATE_CONFIG_CHANGE },
+ ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE },
[FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO",
cmd_firmware_update_get_info, 0, 0 },
+ [FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER",
+ cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION },
+ [FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE",
+ cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION },
[TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
[TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set,
- 8, IMMEDIATE_POLICY_CHANGE },
+ 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
[LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported,
0, 0 },
[LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 },
+ [FEATURES][GET_SUPPORTED] = { "FEATURES_GET_SUPPORTED",
+ cmd_features_get_supported, 0x8, 0 },
+ [FEATURES][GET_FEATURE] = { "FEATURES_GET_FEATURE",
+ cmd_features_get_feature, 0x15, 0 },
+ [FEATURES][SET_FEATURE] = { "FEATURES_SET_FEATURE",
+ cmd_features_set_feature,
+ ~0,
+ (CXL_MBOX_IMMEDIATE_CONFIG_CHANGE |
+ CXL_MBOX_IMMEDIATE_DATA_CHANGE |
+ CXL_MBOX_IMMEDIATE_POLICY_CHANGE |
+ CXL_MBOX_IMMEDIATE_LOG_CHANGE |
+ CXL_MBOX_SECURITY_STATE_CHANGE)},
[IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE",
cmd_identify_memory_device, 0, 0 },
[CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO",
cmd_ccls_get_partition_info, 0, 0 },
[CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
[CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
- ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE },
+ ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE },
[SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0,
- IMMEDIATE_DATA_CHANGE | SECURITY_STATE_CHANGE | BACKGROUND_OPERATION },
+ (CXL_MBOX_IMMEDIATE_DATA_CHANGE |
+ CXL_MBOX_SECURITY_STATE_CHANGE |
+ CXL_MBOX_BACKGROUND_OPERATION)},
[PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE",
cmd_get_security_state, 0, 0 },
[MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST",
@@ -1864,6 +2717,14 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
cmd_media_inject_poison, 8, 0 },
[MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON",
cmd_media_clear_poison, 72, 0 },
+ [MEDIA_AND_POISON][GET_SCAN_MEDIA_CAPABILITIES] = {
+ "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES",
+ cmd_media_get_scan_media_capabilities, 16, 0 },
+ [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA",
+ cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION },
+ [MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = {
+ "MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS",
+ cmd_media_get_scan_media_results, 0, 0 },
};
static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
@@ -1874,10 +2735,10 @@ static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
8, 0 },
[DCD_CONFIG][ADD_DYN_CAP_RSP] = {
"DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
- ~0, IMMEDIATE_DATA_CHANGE },
+ ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE },
[DCD_CONFIG][RELEASE_DYN_CAP] = {
"DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
- ~0, IMMEDIATE_DATA_CHANGE },
+ ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE },
};
static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
@@ -1885,8 +2746,8 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
[INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS",
cmd_infostat_bg_op_sts, 0, 0 },
[TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 },
- [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 0,
- IMMEDIATE_POLICY_CHANGE },
+ [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8,
+ CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
[LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0,
0 },
[LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 },
@@ -1913,6 +2774,7 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
int ret;
const struct cxl_cmd *cxl_cmd;
opcode_handler h;
+ CXLDeviceState *cxl_dstate;
*len_out = 0;
cxl_cmd = &cci->cxl_cmd_set[set][cmd];
@@ -1928,28 +2790,34 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd,
}
/* Only one bg command at a time */
- if ((cxl_cmd->effect & BACKGROUND_OPERATION) &&
+ if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) &&
cci->bg.runtime > 0) {
return CXL_MBOX_BUSY;
}
- /* forbid any selected commands while overwriting */
- if (sanitize_running(cci)) {
- if (h == cmd_events_get_records ||
- h == cmd_ccls_get_partition_info ||
- h == cmd_ccls_set_lsa ||
- h == cmd_ccls_get_lsa ||
- h == cmd_logs_get_log ||
- h == cmd_media_get_poison_list ||
- h == cmd_media_inject_poison ||
- h == cmd_media_clear_poison ||
- h == cmd_sanitize_overwrite) {
- return CXL_MBOX_MEDIA_DISABLED;
+ /* forbid any selected commands while the media is disabled */
+ if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) {
+ cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate;
+
+ if (cxl_dev_media_disabled(cxl_dstate)) {
+ if (h == cmd_events_get_records ||
+ h == cmd_ccls_get_partition_info ||
+ h == cmd_ccls_set_lsa ||
+ h == cmd_ccls_get_lsa ||
+ h == cmd_logs_get_log ||
+ h == cmd_media_get_poison_list ||
+ h == cmd_media_inject_poison ||
+ h == cmd_media_clear_poison ||
+ h == cmd_sanitize_overwrite ||
+ h == cmd_firmware_update_transfer ||
+ h == cmd_firmware_update_activate) {
+ return CXL_MBOX_MEDIA_DISABLED;
+ }
}
}
ret = (*h)(cxl_cmd, pl_in, len_in, pl_out, len_out, cci);
- if ((cxl_cmd->effect & BACKGROUND_OPERATION) &&
+ if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) &&
ret == CXL_MBOX_BG_STARTED) {
*bg_started = true;
} else {
@@ -1987,6 +2855,9 @@ static void bg_timercb(void *opaque)
cci->bg.complete_pct = 100;
cci->bg.ret_code = ret;
switch (cci->bg.opcode) {
+ case 0x0201: /* fw transfer */
+ __do_firmware_xfer(cci);
+ break;
case 0x4400: /* sanitize */
{
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
@@ -1995,8 +2866,13 @@ static void bg_timercb(void *opaque)
cxl_dev_enable_media(&ct3d->cxl_dstate);
}
break;
- case 0x4304: /* TODO: scan media */
+ case 0x4304: /* scan media */
+ {
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+
+ __do_scan_media(ct3d);
break;
+ }
default:
__builtin_unreachable();
break;
@@ -2053,6 +2929,10 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max)
cci->bg.runtime = 0;
cci->bg.timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
bg_timercb, cci);
+
+ memset(&cci->fw, 0, sizeof(cci->fw));
+ cci->fw.active_slot = 1;
+ cci->fw.slot[cci->fw.active_slot - 1] = true;
}
static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256])
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index f4e366f64f..5d4bd2b710 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1536,7 +1536,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
.fw_unplugs_cpu = pm->smi_on_cpu_unplug,
};
build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry,
- pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02");
+ pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02",
+ AML_SYSTEM_IO);
}
if (pcms->memhp_io_base && nr_mem) {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 37c21a0aec..be0cb39b5c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -358,7 +358,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
{
struct vtd_iotlb_key key;
VTDIOTLBEntry *entry;
- int level;
+ unsigned level;
for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
key.gfn = vtd_get_iotlb_gfn(addr, level);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index f8cf99bddf..5f32c36943 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -264,10 +264,10 @@
#define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32)
#define VTD_FRCD_SID_MASK 0xffffULL
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
+#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
+#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31)
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
-#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
-#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
#define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48)
/* DMA Remapping Fault Conditions */
@@ -436,7 +436,7 @@ struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
uint32_t pasid;
uint64_t addr;
- uint8_t mask;
+ uint64_t mask;
};
typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 9445b07b4f..d9e69243b4 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -495,6 +495,7 @@ static void pc_i440fx_machine_9_0_options(MachineClass *m)
pc_i440fx_machine_9_1_options(m);
m->alias = NULL;
m->is_default = false;
+ m->smbios_memory_device_size = 16 * GiB;
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 71d3c6d122..9d108b194e 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -374,6 +374,7 @@ static void pc_q35_machine_9_0_options(MachineClass *m)
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_machine_9_1_options(m);
m->alias = NULL;
+ m->smbios_memory_device_size = 16 * GiB;
compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len);
compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len);
pcmc->isa_bios_alias = false;
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 35ac59883a..d648192ab9 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -737,6 +737,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
error_setg(errp, "volatile memdev must have backing device");
return false;
}
+ if (host_memory_backend_is_mapped(ct3d->hostvmem)) {
+ error_setg(errp, "memory backend %s can't be used multiple times.",
+ object_get_canonical_path_component(OBJECT(ct3d->hostvmem)));
+ return false;
+ }
memory_region_set_nonvolatile(vmr, false);
memory_region_set_enabled(vmr, true);
host_memory_backend_set_mapped(ct3d->hostvmem, true);
@@ -760,6 +765,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
error_setg(errp, "persistent memdev must have backing device");
return false;
}
+ if (host_memory_backend_is_mapped(ct3d->hostpmem)) {
+ error_setg(errp, "memory backend %s can't be used multiple times.",
+ object_get_canonical_path_component(OBJECT(ct3d->hostpmem)));
+ return false;
+ }
memory_region_set_nonvolatile(pmr, true);
memory_region_set_enabled(pmr, true);
host_memory_backend_set_mapped(ct3d->hostpmem, true);
@@ -790,6 +800,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
return false;
}
+ if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) {
+ error_setg(errp, "memory backend %s can't be used multiple times.",
+ object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc)));
+ return false;
+ }
/*
* Set DC regions as volatile for now, non-volatile support can
* be added in the future if needed.
@@ -829,6 +844,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
uint8_t *pci_conf = pci_dev->config;
unsigned short msix_num = 6;
int i, rc;
+ uint16_t count;
QTAILQ_INIT(&ct3d->error_list);
@@ -893,6 +909,28 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
}
cxl_event_init(&ct3d->cxl_dstate, 2);
+ /* Set default value for patrol scrub attributes */
+ ct3d->patrol_scrub_attrs.scrub_cycle_cap =
+ CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT |
+ CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT;
+ ct3d->patrol_scrub_attrs.scrub_cycle =
+ CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT |
+ (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8);
+ ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT;
+
+ /* Set default value for DDR5 ECS read attributes */
+ for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) {
+ ct3d->ecs_attrs[count].ecs_log_cap =
+ CXL_ECS_LOG_ENTRY_TYPE_DEFAULT;
+ ct3d->ecs_attrs[count].ecs_cap =
+ CXL_ECS_REALTIME_REPORT_CAP_DEFAULT;
+ ct3d->ecs_attrs[count].ecs_config =
+ CXL_ECS_THRESHOLD_COUNT_DEFAULT |
+ (CXL_ECS_MODE_DEFAULT << 3);
+ /* Reserved */
+ ct3d->ecs_attrs[count].ecs_flags = 0;
+ }
+
return;
err_release_cdat:
@@ -1127,7 +1165,7 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
return MEMTX_ERROR;
}
- if (sanitize_running(&ct3d->cci)) {
+ if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
qemu_guest_getrandom_nofail(data, size);
return MEMTX_OK;
}
@@ -1149,7 +1187,7 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
return MEMTX_ERROR;
}
- if (sanitize_running(&ct3d->cci)) {
+ if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) {
return MEMTX_OK;
}
@@ -1304,6 +1342,12 @@ void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d)
cxl_device_get_timestamp(&ct3d->cxl_dstate);
}
+void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d)
+{
+ ct3d->poison_list_overflowed = false;
+ ct3d->poison_list_overflow_ts = 0;
+}
+
void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
Error **errp)
{
@@ -1340,19 +1384,21 @@ void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
}
}
- if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) {
- cxl_set_poison_list_overflowed(ct3d);
- return;
- }
-
p = g_new0(CXLPoison, 1);
p->length = length;
p->start = start;
/* Different from injected via the mbox */
p->type = CXL_POISON_TYPE_INTERNAL;
- QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
- ct3d->poison_list_cnt++;
+ if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) {
+ QLIST_INSERT_HEAD(&ct3d->poison_list, p, node);
+ ct3d->poison_list_cnt++;
+ } else {
+ if (!ct3d->poison_list_overflowed) {
+ cxl_set_poison_list_overflowed(ct3d);
+ }
+ QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node);
+ }
}
/* For uncorrectable errors include support for multiple header recording */
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 18898afe81..a788e6937e 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = {
VIRTIO_F_IOMMU_PLATFORM,
VIRTIO_F_RING_PACKED,
VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VIRTIO_NET_F_HASH_REPORT,
VHOST_INVALID_FEATURE_BIT
@@ -78,6 +79,7 @@ static const int user_feature_bits[] = {
VIRTIO_F_IOMMU_PLATFORM,
VIRTIO_F_RING_PACKED,
VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
VIRTIO_NET_F_RSS,
VIRTIO_NET_F_HASH_REPORT,
VIRTIO_NET_F_GUEST_USO4,
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 8d25174d25..e86ea2e7ce 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -203,6 +203,7 @@
#include "sysemu/hostmem.h"
#include "hw/pci/msix.h"
#include "hw/pci/pcie_sriov.h"
+#include "sysemu/spdm-socket.h"
#include "migration/vmstate.h"
#include "nvme.h"
@@ -8315,6 +8316,27 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
return 0;
}
+static bool pcie_doe_spdm_rsp(DOECap *doe_cap)
+{
+ void *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ uint32_t req_len = pcie_doe_get_obj_len(req) * 4;
+ void *rsp = doe_cap->read_mbox;
+ uint32_t rsp_len = SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE;
+
+ uint32_t recvd = spdm_socket_rsp(doe_cap->spdm_socket,
+ SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE,
+ req, req_len, rsp, rsp_len);
+ doe_cap->read_mbox_len += DIV_ROUND_UP(recvd, 4);
+
+ return recvd != 0;
+}
+
+static DOEProtocol doe_spdm_prot[] = {
+ { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_CMA, pcie_doe_spdm_rsp },
+ { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_SECURED_CMA, pcie_doe_spdm_rsp },
+ { }
+};
+
static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
{
ERRP_GUARD();
@@ -8402,6 +8424,25 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
+ pcie_cap_deverr_init(pci_dev);
+
+ /* DOE Initialisation */
+ if (pci_dev->spdm_port) {
+ uint16_t doe_offset = n->params.sriov_max_vfs ?
+ PCI_CONFIG_SPACE_SIZE + PCI_ARI_SIZEOF
+ : PCI_CONFIG_SPACE_SIZE;
+
+ pcie_doe_init(pci_dev, &pci_dev->doe_spdm, doe_offset,
+ doe_spdm_prot, true, 0);
+
+ pci_dev->doe_spdm.spdm_socket = spdm_socket_connect(pci_dev->spdm_port,
+ errp);
+
+ if (pci_dev->doe_spdm.spdm_socket < 0) {
+ return false;
+ }
+ }
+
if (n->params.cmb_size_mb) {
nvme_init_cmb(n, pci_dev);
}
@@ -8650,6 +8691,11 @@ static void nvme_exit(PCIDevice *pci_dev)
g_free(n->cmb.buf);
}
+ if (pci_dev->doe_spdm.spdm_socket > 0) {
+ spdm_socket_close(pci_dev->doe_spdm.spdm_socket,
+ SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE);
+ }
+
if (n->pmr.dev) {
host_memory_backend_set_mapped(n->pmr.dev, false);
}
@@ -8695,6 +8741,7 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar,
false),
DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
+ DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -8766,11 +8813,25 @@ static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
{
uint16_t old_num_vfs = pcie_sriov_num_vfs(dev);
+ if (pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) {
+ pcie_doe_write_config(&dev->doe_spdm, address, val, len);
+ }
pci_default_write_config(dev, address, val, len);
pcie_cap_flr_write_config(dev, address, val, len);
nvme_sriov_post_write_config(dev, old_num_vfs);
}
+static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len)
+{
+ uint32_t val;
+ if (dev->spdm_port && pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) {
+ if (pcie_doe_read_config(&dev->doe_spdm, address, len, &val)) {
+ return val;
+ }
+ }
+ return pci_default_read_config(dev, address, len);
+}
+
static const VMStateDescription nvme_vmstate = {
.name = "nvme",
.unmigratable = 1,
@@ -8783,6 +8844,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
pc->realize = nvme_realize;
pc->config_write = nvme_pci_write_config;
+ pc->config_read = nvme_pci_read_config;
pc->exit = nvme_exit;
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
pc->revision = 2;
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index f69413ea2c..391fabb8a8 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -7,7 +7,8 @@
#include "hw/pci/pcie_host.h"
#include "hw/acpi/cxl.h"
-static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
+static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq,
+ Aml *scope, uint8_t bus_num)
{
Aml *method, *crs;
int i, slot_no;
@@ -20,7 +21,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
Aml *pkg = aml_package(4);
aml_append(pkg, aml_int((slot_no << 16) | 0xFFFF));
aml_append(pkg, aml_int(i));
- aml_append(pkg, aml_name("GSI%d", gsi));
+ aml_append(pkg, aml_name("L%.02X%X", bus_num, gsi));
aml_append(pkg, aml_int(0));
aml_append(rt_pkg, pkg);
}
@@ -30,7 +31,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
/* Create GSI link device */
for (i = 0; i < PCI_NUM_PINS; i++) {
uint32_t irqs = irq + i;
- Aml *dev_gsi = aml_device("GSI%d", i);
+ Aml *dev_gsi = aml_device("L%.02X%X", bus_num, i);
aml_append(dev_gsi, aml_name_decl("_HID", aml_string("PNP0C0F")));
aml_append(dev_gsi, aml_name_decl("_UID", aml_int(i)));
crs = aml_resource_template();
@@ -45,7 +46,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
aml_append(dev_gsi, aml_name_decl("_CRS", crs));
method = aml_method("_SRS", 1, AML_NOTSERIALIZED);
aml_append(dev_gsi, method);
- aml_append(dev, dev_gsi);
+ aml_append(scope, dev_gsi);
}
}
@@ -174,7 +175,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
}
- acpi_dsdt_add_pci_route_table(dev, cfg->irq);
+ acpi_dsdt_add_pci_route_table(dev, cfg->irq, scope, bus_num);
/*
* Resources defined for PXBs are composed of the following parts:
@@ -205,7 +206,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device")));
aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
- acpi_dsdt_add_pci_route_table(dev, cfg->irq);
+ acpi_dsdt_add_pci_route_table(dev, cfg->irq, scope, 0);
method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
aml_append(method, aml_return(aml_int(cfg->ecam.base)));
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4c7be52951..8ad5d7e2d8 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -85,6 +85,7 @@ static Property pci_props[] = {
QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
DEFINE_PROP_END_OF_LIST()
};
@@ -959,13 +960,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
- /*
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
- * device, as it may just be a VF that ended up with function 0 in
- * the legacy PCI interpretation. Avoid failing in such cases:
- */
- if (pci_is_vf(dev) &&
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ /* SR/IOV is not handled here. */
+ if (pci_is_vf(dev)) {
return;
}
@@ -998,7 +994,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
- if (bus->devices[PCI_DEVFN(slot, func)]) {
+ PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
+ if (device && !pci_is_vf(device)) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1283,6 +1280,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
+ pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@@ -1314,7 +1312,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1325,7 +1322,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
r = &pci_dev->io_regions[region_num];
- r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
r->memory = memory;
@@ -1333,22 +1329,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
- wmask = ~(size - 1);
- if (region_num == PCI_ROM_SLOT) {
- /* ROM enable bit is writable */
- wmask |= PCI_ROM_ADDRESS_ENABLE;
- }
-
- addr = pci_bar(pci_dev, region_num);
- pci_set_long(pci_dev->config + addr, type);
+ if (pci_is_vf(pci_dev)) {
+ PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
+ assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
- if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(pci_dev->wmask + addr, wmask);
- pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
} else {
- pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
- pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ r->addr = PCI_BAR_UNMAPPED;
+
+ wmask = ~(size - 1);
+ if (region_num == PCI_ROM_SLOT) {
+ /* ROM enable bit is writable */
+ wmask |= PCI_ROM_ADDRESS_ENABLE;
+ }
+
+ addr = pci_bar(pci_dev, region_num);
+ pci_set_long(pci_dev->config + addr, type);
+
+ if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(pci_dev->wmask + addr, wmask);
+ pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ }
}
}
@@ -1437,7 +1446,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
uint16_t vf_stride =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+ uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
+
+ if (vf_num) {
+ vf_num /= vf_stride;
+ }
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(pf->config + bar);
@@ -2105,6 +2118,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
+ pci_qdev_unrealize(DEVICE(pci_dev));
+ return;
+ }
+
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 56523ab4e8..0fc9f810b9 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,6 +20,8 @@
#include "qapi/error.h"
#include "trace.h"
+static GHashTable *pfs;
+
static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
{
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -31,17 +33,62 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
dev->exp.sriov_pf.vf = NULL;
}
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
- const char *vfname, uint16_t vf_dev_id,
- uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride,
- Error **errp)
+static void clear_ctrl_vfe(PCIDevice *dev)
+{
+ uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
+ pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
+}
+
+static void register_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs;
+ uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
+ clear_ctrl_vfe(dev);
+ return;
+ }
+
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ }
+}
+
+static void unregister_vfs(PCIDevice *dev)
+{
+ uint16_t i;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ }
+}
+
+static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
+ uint16_t vf_dev_id, uint16_t init_vfs,
+ uint16_t total_vfs, uint16_t vf_offset,
+ uint16_t vf_stride, Error **errp)
{
- BusState *bus = qdev_get_parent_bus(&dev->qdev);
- int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV PF");
+ return false;
+ }
+
+ if (pci_is_vf(dev)) {
+ error_setg(errp, "a device cannot be both an SR-IOV PF and a VF");
+ return false;
+ }
+
if (total_vfs) {
uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI);
uint16_t first_vf_devfn = dev->devfn + vf_offset;
@@ -90,6 +137,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+ return true;
+}
+
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
+{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
+
+ if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs,
+ total_vfs, vf_offset, vf_stride, errp)) {
+ return false;
+ }
+
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -119,7 +188,24 @@ void pcie_sriov_pf_exit(PCIDevice *dev)
{
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ if (dev->exp.sriov_pf.vf_user_created) {
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
+ uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
+
+ unregister_vfs(dev);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+
+ vf->exp.sriov_vf.pf = NULL;
+
+ pci_config_set_vendor_id(vf->config, ven_id);
+ pci_config_set_device_id(vf->config, vf_dev_id);
+ }
+ } else {
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ }
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -152,74 +238,172 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory)
{
- PCIIORegion *r;
- PCIBus *bus = pci_get_bus(dev);
uint8_t type;
- pcibus_t size = memory_region_size(memory);
- assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
- assert(region_num >= 0);
- assert(region_num < PCI_NUM_REGIONS);
+ assert(dev->exp.sriov_vf.pf);
type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
- if (!is_power_of_2(size)) {
- error_report("%s: PCI region size must be a power"
- " of two - type=0x%x, size=0x%"FMT_PCIBUS,
- __func__, type, size);
- exit(1);
- }
-
- r = &dev->io_regions[region_num];
- r->memory = memory;
- r->address_space =
- type & PCI_BASE_ADDRESS_SPACE_IO
- ? bus->address_space_io
- : bus->address_space_mem;
- r->size = size;
- r->type = type;
-
- r->addr = pci_bar_address(dev, region_num, r->type, r->size);
- if (r->addr != PCI_BAR_UNMAPPED) {
- memory_region_add_subregion_overlap(r->address_space,
- r->addr, r->memory, 1);
- }
+ return pci_register_bar(dev, region_num, type, memory);
}
-static void clear_ctrl_vfe(PCIDevice *dev)
+static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
{
- uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
- pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
+ return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
}
-static void register_vfs(PCIDevice *dev)
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
+ uint16_t offset,
+ Error **errp)
{
- uint16_t num_vfs;
+ GPtrArray *pf;
+ PCIDevice **vfs;
+ BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t vf_dev_id;
+ uint16_t vf_offset;
+ uint16_t vf_stride;
uint16_t i;
- uint16_t sriov_cap = dev->exp.sriov_cap;
- assert(sriov_cap > 0);
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
- if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
- clear_ctrl_vfe(dev);
- return;
+ if (!pfs || !dev->qdev.id) {
+ return 0;
}
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ pf = g_hash_table_lookup(pfs, dev->qdev.id);
+ if (!pf) {
+ return 0;
}
+
+ if (pf->len > UINT16_MAX) {
+ error_setg(errp, "too many VFs");
+ return -1;
+ }
+
+ g_ptr_array_sort(pf, compare_vf_devfns);
+ vfs = (void *)pf->pdata;
+
+ if (vfs[0]->devfn <= dev->devfn) {
+ error_setg(errp, "a VF function number is less than the PF function number");
+ return -1;
+ }
+
+ vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
+ vf_offset = vfs[0]->devfn - dev->devfn;
+ vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
+
+ for (i = 0; i < pf->len; i++) {
+ if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
+ error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
+ return -1;
+ }
+
+ if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
+ error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
+ return -1;
+ }
+
+ if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
+ error_setg(errp, "inconsistent SR-IOV VF device IDs");
+ return -1;
+ }
+
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
+ if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
+ vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
+ error_setg(errp, "inconsistent SR-IOV BARs");
+ return -1;
+ }
+ }
+
+ if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
+ error_setg(errp, "inconsistent SR-IOV stride");
+ return -1;
+ }
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
+ pf->len, vf_offset, vf_stride, errp)) {
+ return -1;
+ }
+
+ for (i = 0; i < pf->len; i++) {
+ vfs[i]->exp.sriov_vf.pf = dev;
+ vfs[i]->exp.sriov_vf.vf_number = i;
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vfs[i]->config, 0xffff);
+ pci_config_set_device_id(vfs[i]->config, 0xffff);
+ }
+
+ dev->exp.sriov_pf.vf = vfs;
+ dev->exp.sriov_pf.vf_user_created = true;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ PCIIORegion *region = &vfs[0]->io_regions[i];
+
+ if (region->size) {
+ pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
+ }
+ }
+
+ return PCI_EXT_CAP_SRIOV_SIZEOF;
}
-static void unregister_vfs(PCIDevice *dev)
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
{
- uint16_t i;
- uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
+ pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn));
- for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ if (dev->sriov_pf) {
+ PCIDevice *pci_pf;
+ GPtrArray *pf;
+
+ if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) {
+ error_setg(errp, "user cannot create SR-IOV VF with this device type");
+ return false;
+ }
+
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV VF");
+ return false;
+ }
+
+ if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) {
+ error_setg(errp, "PCI device specified as SR-IOV PF already exists");
+ return false;
+ }
+
+ if (!pfs) {
+ pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+ }
+
+ pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+ if (!pf) {
+ pf = g_ptr_array_new();
+ g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
+ }
+
+ g_ptr_array_add(pf, dev);
+ }
+
+ return true;
+}
+
+void pcie_sriov_unregister_device(PCIDevice *dev)
+{
+ if (dev->sriov_pf && pfs) {
+ GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+
+ if (pf) {
+ g_ptr_array_remove_fast(pf, dev);
+
+ if (!pf->len) {
+ g_hash_table_remove(pfs, dev->sriov_pf);
+ g_ptr_array_free(pf, FALSE);
+ }
+ }
}
}
@@ -306,7 +490,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
uint16_t pcie_sriov_vf_number(PCIDevice *dev)
{
- assert(pci_is_vf(dev));
+ assert(dev->exp.sriov_vf.pf);
return dev->exp.sriov_vf.vf_number;
}
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 0925528160..36d6a3a412 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -141,12 +141,36 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s)
}
}
+static void acpi_dsdt_add_plic_aplic(Aml *scope, uint8_t socket_count,
+ uint64_t mmio_base, uint64_t mmio_size,
+ const char *hid)
+{
+ uint64_t plic_aplic_addr;
+ uint32_t gsi_base;
+ uint8_t socket;
+
+ for (socket = 0; socket < socket_count; socket++) {
+ plic_aplic_addr = mmio_base + mmio_size * socket;
+ gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket;
+ Aml *dev = aml_device("IC%.02X", socket);
+ aml_append(dev, aml_name_decl("_HID", aml_string("%s", hid)));
+ aml_append(dev, aml_name_decl("_UID", aml_int(socket)));
+ aml_append(dev, aml_name_decl("_GSB", aml_int(gsi_base)));
+
+ Aml *crs = aml_resource_template();
+ aml_append(crs, aml_memory32_fixed(plic_aplic_addr, mmio_size,
+ AML_READ_WRITE));
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+ }
+}
+
static void
acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap,
uint32_t uart_irq)
{
Aml *dev = aml_device("COM0");
- aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501")));
+ aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0003")));
aml_append(dev, aml_name_decl("_UID", aml_int(0)));
Aml *crs = aml_resource_template();
@@ -411,6 +435,14 @@ static void build_dsdt(GArray *table_data,
socket_count = riscv_socket_count(ms);
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_PLIC].base,
+ memmap[VIRT_PLIC].size, "RSCV0001");
+ } else {
+ acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_APLIC_S].base,
+ memmap[VIRT_APLIC_S].size, "RSCV0002");
+ }
+
acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ);
if (socket_count == 1) {
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 3d5fe0994d..49cff2a0cb 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = {
VIRTIO_RING_F_EVENT_IDX,
VIRTIO_SCSI_F_HOTPLUG,
VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VHOST_INVALID_FEATURE_BIT
};
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index cc91ade525..55e4be5b34 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -36,6 +36,7 @@ static const int user_feature_bits[] = {
VIRTIO_RING_F_EVENT_IDX,
VIRTIO_SCSI_F_HOTPLUG,
VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VHOST_INVALID_FEATURE_BIT
};
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 3b7703489d..a394514264 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -1093,6 +1093,7 @@ static bool smbios_get_tables_ep(MachineState *ms,
Error **errp)
{
unsigned i, dimm_cnt, offset;
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
ERRP_GUARD();
assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 ||
@@ -1123,12 +1124,12 @@ static bool smbios_get_tables_ep(MachineState *ms,
smbios_build_type_9_table(errp);
smbios_build_type_11_table();
-#define MAX_DIMM_SZ (16 * GiB)
-#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \
- : ((current_machine->ram_size - 1) % MAX_DIMM_SZ) + 1)
+#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? mc->smbios_memory_device_size \
+ : ((current_machine->ram_size - 1) % mc->smbios_memory_device_size) + 1)
- dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) /
- MAX_DIMM_SZ;
+ dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size,
+ mc->smbios_memory_device_size) /
+ mc->smbios_memory_device_size;
/*
* The offset determines if we need to keep additional space between
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6d15b36e0b..cfc44a4569 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -599,7 +599,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
int iommu_idx;
- trace_vfio_listener_region_add_iommu(iova, end);
+ trace_vfio_listener_region_add_iommu(section->mr->name, iova, end);
/*
* FIXME: For VFIO iommu types which have KVM acceleration to
* avoid bouncing all map/unmaps through qemu this way, this
@@ -725,6 +725,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
if (memory_region_is_iommu(section->mr)) {
VFIOGuestIOMMU *giommu;
+ trace_vfio_listener_region_del_iommu(section->mr->name);
QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
giommu->n.start == section->offset_within_region) {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index e16179b507..98bd4dccea 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -95,7 +95,8 @@ vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t d
vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "iommu %s @ 0x%"PRIx64" - 0x%"PRIx64
vfio_listener_region_skip(const char *name, uint64_t start, uint64_t end) "SKIPPING %s 0x%"PRIx64" - 0x%"PRIx64
vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
-vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
+vfio_listener_region_add_iommu(const char* name, uint64_t start, uint64_t end) "region_add [iommu] %s 0x%"PRIx64" - 0x%"PRIx64
+vfio_listener_region_del_iommu(const char *name) "region_del [iommu] %s"
vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR
vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index b7c04f0856..04e36ae047 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -116,6 +116,7 @@ virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, u
virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x"
virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
+virtio_iommu_detach_endpoint_from_domain(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index ae48cc1c96..32ee7f496d 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -33,6 +33,7 @@ static const int user_feature_bits[] = {
VIRTIO_F_RING_PACKED,
VIRTIO_F_IOMMU_PLATFORM,
VIRTIO_F_RING_RESET,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VHOST_INVALID_FEATURE_BIT
};
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 802b44a07d..da3b0e0229 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -21,6 +21,7 @@ static const int user_feature_bits[] = {
VIRTIO_RING_F_INDIRECT_DESC,
VIRTIO_RING_F_EVENT_IDX,
VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VHOST_INVALID_FEATURE_BIT
};
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index bbe8aa4b99..5034768bff 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -205,6 +205,7 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
int queue_index;
uint32_t algo, keytype, keylen;
+ sreq->info.op_code = opcode;
algo = ldl_le_p(&sess_req->para.algo);
keytype = ldl_le_p(&sess_req->para.keytype);
keylen = ldl_le_p(&sess_req->para.keylen);
@@ -224,7 +225,6 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
iov_discard_front(&iov, &out_num, keylen);
}
- sreq->info.op_code = opcode;
asym_info = &sreq->info.u.asym_sess_info;
asym_info->algo = algo;
asym_info->keytype = keytype;
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 33ae61c4a6..59ef4fb217 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -308,6 +308,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
if (!ep->domain) {
return;
}
+ trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id);
g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
ep->iommu_mr);
QLIST_REMOVE(ep, next);
@@ -467,26 +468,6 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
return &sdev->as;
}
-static void virtio_iommu_device_clear(VirtIOIOMMU *s, PCIBus *bus, int devfn)
-{
- IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
- IOMMUDevice *sdev;
-
- if (!sbus) {
- return;
- }
-
- sdev = sbus->pbdev[devfn];
- if (!sdev) {
- return;
- }
-
- g_list_free_full(sdev->resv_regions, g_free);
- sdev->resv_regions = NULL;
- g_free(sdev);
- sbus->pbdev[devfn] = NULL;
-}
-
static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
{
const struct hiod_key *key1 = v1;
@@ -558,8 +539,6 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
{
IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
IOMMUDevice *sdev;
- GList *current_ranges;
- GList *l, *tmp, *new_ranges = NULL;
int ret = -EINVAL;
if (!sbus) {
@@ -573,35 +552,10 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
return ret;
}
- current_ranges = sdev->host_resv_ranges;
-
- g_assert(!sdev->probe_done);
-
- /* check that each new resv region is included in an existing one */
if (sdev->host_resv_ranges) {
- range_inverse_array(iova_ranges,
- &new_ranges,
- 0, UINT64_MAX);
-
- for (tmp = new_ranges; tmp; tmp = tmp->next) {
- Range *newr = (Range *)tmp->data;
- bool included = false;
-
- for (l = current_ranges; l; l = l->next) {
- Range * r = (Range *)l->data;
-
- if (range_contains_range(r, newr)) {
- included = true;
- break;
- }
- }
- if (!included) {
- goto error;
- }
- }
- /* all new reserved ranges are included in existing ones */
- ret = 0;
- goto out;
+ error_setg(errp, "%s virtio-iommu does not support aliased BDF",
+ __func__);
+ return ret;
}
range_inverse_array(iova_ranges,
@@ -610,14 +564,31 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
rebuild_resv_regions(sdev);
return 0;
-error:
- error_setg(errp, "%s Conflicting host reserved ranges set!",
- __func__);
-out:
- g_list_free_full(new_ranges, g_free);
- return ret;
}
+static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
+ int devfn)
+{
+ IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+ IOMMUDevice *sdev;
+
+ if (!sbus) {
+ return;
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ return;
+ }
+
+ g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free);
+ g_list_free_full(sdev->resv_regions, g_free);
+ sdev->host_resv_ranges = NULL;
+ sdev->resv_regions = NULL;
+ add_prop_resv_regions(sdev);
+}
+
+
static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask,
Error **errp)
{
@@ -726,9 +697,10 @@ virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
if (!hiod) {
return;
}
+ virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus,
+ hiod->aliased_devfn);
g_hash_table_remove(viommu->host_iommu_devices, &key);
- virtio_iommu_device_clear(viommu, bus, devfn);
}
static const PCIIOMMUOps virtio_iommu_ops = {
@@ -815,6 +787,7 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
if (QLIST_EMPTY(&domain->endpoint_list)) {
g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
}
+ g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id));
return VIRTIO_IOMMU_S_OK;
}
@@ -977,7 +950,6 @@ static int virtio_iommu_probe(VirtIOIOMMU *s,
}
buf += count;
free -= count;
- sdev->probe_done = true;
return VIRTIO_IOMMU_S_OK;
}
diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c
index e03543a70a..dba4987d6e 100644
--- a/hw/virtio/virtio-net-pci.c
+++ b/hw/virtio/virtio-net-pci.c
@@ -75,6 +75,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, void *data)
k->device_id = PCI_DEVICE_ID_VIRTIO_NET;
k->revision = VIRTIO_PCI_ABI_VERSION;
k->class_id = PCI_CLASS_NETWORK_ETHERNET;
+ k->sriov_vf_user_creatable = true;
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
device_class_set_props(dc, virtio_net_properties);
vpciklass->realize = virtio_net_pci_realize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 9534730bba..0c8fcc5627 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1955,6 +1955,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
uint8_t *config;
uint32_t size;
VirtIODevice *vdev = virtio_bus_get_device(bus);
+ int16_t res;
/*
* Virtio capabilities present without
@@ -2100,6 +2101,14 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
}
+
+ res = pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev,
+ proxy->last_pcie_cap_offset,
+ errp);
+ if (res > 0) {
+ proxy->last_pcie_cap_offset += res;
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
+ }
}
static void virtio_pci_device_unplugged(DeviceState *d)
@@ -2187,7 +2196,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
if (pcie_port && pci_is_express(pci_dev)) {
int pos;
- uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
+ proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
pos = pcie_endpoint_cap_init(pci_dev, 0);
assert(pos > 0);
@@ -2207,9 +2216,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
- pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
+ pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset,
PCI_ERR_SIZEOF, NULL);
- last_pcie_cap_offset += PCI_ERR_SIZEOF;
+ proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF;
}
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
@@ -2234,9 +2243,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
}
if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
- pcie_ats_init(pci_dev, last_pcie_cap_offset,
+ pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset,
proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
- last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
+ proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
}
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
@@ -2263,6 +2272,7 @@ static void virtio_pci_exit(PCIDevice *pci_dev)
bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
!pci_bus_is_root(pci_get_bus(pci_dev));
+ pcie_sriov_pf_exit(&proxy->pci_dev);
msix_uninit_exclusive_bar(pci_dev);
if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
pci_is_express(pci_dev)) {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 583a224163..397c261c3c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -872,6 +872,46 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
vq->used_elems[idx].ndescs = elem->ndescs;
}
+static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem,
+ unsigned int len)
+{
+ unsigned int i, steps, max_steps;
+
+ i = vq->used_idx % vq->vring.num;
+ steps = 0;
+ /*
+ * We shouldn't need to increase 'i' by more than the distance
+ * between used_idx and last_avail_idx.
+ */
+ max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num;
+
+ /* Search for element in vq->used_elems */
+ while (steps <= max_steps) {
+ /* Found element, set length and mark as filled */
+ if (vq->used_elems[i].index == elem->index) {
+ vq->used_elems[i].len = len;
+ vq->used_elems[i].in_order_filled = true;
+ break;
+ }
+
+ i += vq->used_elems[i].ndescs;
+ steps += vq->used_elems[i].ndescs;
+
+ if (i >= vq->vring.num) {
+ i -= vq->vring.num;
+ }
+ }
+
+ /*
+ * We should be able to find a matching VirtQueueElement in
+ * used_elems. If we don't, this is an error.
+ */
+ if (steps >= max_steps) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: %s cannot fill buffer id %u\n",
+ __func__, vq->vdev->name, elem->index);
+ }
+}
+
static void virtqueue_packed_fill_desc(VirtQueue *vq,
const VirtQueueElement *elem,
unsigned int idx,
@@ -922,7 +962,9 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
return;
}
- if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
+ virtqueue_ordered_fill(vq, elem, len);
+ } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
virtqueue_packed_fill(vq, elem, len, idx);
} else {
virtqueue_split_fill(vq, elem, len, idx);
@@ -981,6 +1023,73 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
}
}
+static void virtqueue_ordered_flush(VirtQueue *vq)
+{
+ unsigned int i = vq->used_idx % vq->vring.num;
+ unsigned int ndescs = 0;
+ uint16_t old = vq->used_idx;
+ uint16_t new;
+ bool packed;
+ VRingUsedElem uelem;
+
+ packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED);
+
+ if (packed) {
+ if (unlikely(!vq->vring.desc)) {
+ return;
+ }
+ } else if (unlikely(!vq->vring.used)) {
+ return;
+ }
+
+ /* First expected in-order element isn't ready, nothing to do */
+ if (!vq->used_elems[i].in_order_filled) {
+ return;
+ }
+
+ /* Search for filled elements in-order */
+ while (vq->used_elems[i].in_order_filled) {
+ /*
+ * First entry for packed VQs is written last so the guest
+ * doesn't see invalid descriptors.
+ */
+ if (packed && i != vq->used_idx) {
+ virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false);
+ } else if (!packed) {
+ uelem.id = vq->used_elems[i].index;
+ uelem.len = vq->used_elems[i].len;
+ vring_used_write(vq, &uelem, i);
+ }
+
+ vq->used_elems[i].in_order_filled = false;
+ ndescs += vq->used_elems[i].ndescs;
+ i += vq->used_elems[i].ndescs;
+ if (i >= vq->vring.num) {
+ i -= vq->vring.num;
+ }
+ }
+
+ if (packed) {
+ virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true);
+ vq->used_idx += ndescs;
+ if (vq->used_idx >= vq->vring.num) {
+ vq->used_idx -= vq->vring.num;
+ vq->used_wrap_counter ^= 1;
+ vq->signalled_used_valid = false;
+ }
+ } else {
+ /* Make sure buffer is written before we update index. */
+ smp_wmb();
+ new = old + ndescs;
+ vring_used_idx_set(vq, new);
+ if (unlikely((int16_t)(new - vq->signalled_used) <
+ (uint16_t)(new - old))) {
+ vq->signalled_used_valid = false;
+ }
+ }
+ vq->inuse -= ndescs;
+}
+
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
if (virtio_device_disabled(vq->vdev)) {
@@ -988,7 +1097,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
return;
}
- if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) {
+ virtqueue_ordered_flush(vq);
+ } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
virtqueue_packed_flush(vq, count);
} else {
virtqueue_split_flush(vq, count);
@@ -1505,7 +1616,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
{
- unsigned int i, head, max;
+ unsigned int i, head, max, idx;
VRingMemoryRegionCaches *caches;
MemoryRegionCache indirect_desc_cache;
MemoryRegionCache *desc_cache;
@@ -1629,6 +1740,13 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
elem->in_sg[i] = iov[out_num + i];
}
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
+ idx = (vq->last_avail_idx - 1) % vq->vring.num;
+ vq->used_elems[idx].index = elem->index;
+ vq->used_elems[idx].len = elem->len;
+ vq->used_elems[idx].ndescs = elem->ndescs;
+ }
+
vq->inuse++;
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
@@ -1762,6 +1880,13 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
elem->index = id;
elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
+
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
+ vq->used_elems[vq->last_avail_idx].index = elem->index;
+ vq->used_elems[vq->last_avail_idx].len = elem->len;
+ vq->used_elems[vq->last_avail_idx].ndescs = elem->ndescs;
+ }
+
vq->last_avail_idx += elem->ndescs;
vq->inuse += elem->ndescs;
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 815342d043..240ee04369 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -129,6 +129,14 @@ size_t qemu_ram_pagesize_largest(void);
*/
void cpu_address_space_init(CPUState *cpu, int asidx,
const char *prefix, MemoryRegion *mr);
+/**
+ * cpu_address_space_destroy:
+ * @cpu: CPU for which address space needs to be destroyed
+ * @asidx: integer index of this address space
+ *
+ * Note that with KVM only one address space is supported.
+ */
+void cpu_address_space_destroy(CPUState *cpu, int asidx);
void cpu_physical_memory_rw(hwaddr addr, void *buf,
hwaddr len, bool is_write);
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 1bd2c4ec2a..d73f424f56 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -41,6 +41,12 @@ void gdb_register_coprocessor(CPUState *cpu,
const GDBFeature *feature, int g_pos);
/**
+ * gdb_unregister_coprocessor_all() - unregisters supplemental set of registers
+ * @cpu - the CPU associated with registers
+ */
+void gdb_unregister_coprocessor_all(CPUState *cpu);
+
+/**
* gdbserver_start: start the gdb server
* @port_or_device: connection spec for gdb
*
diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h
index e6e1a9ef59..32654dc274 100644
--- a/include/hw/acpi/cpu.h
+++ b/include/hw/acpi/cpu.h
@@ -19,6 +19,8 @@
#include "hw/boards.h"
#include "hw/hotplug.h"
+#define ACPI_CPU_HOTPLUG_REG_LEN 12
+
typedef struct AcpiCpuStatus {
CPUState *cpu;
uint64_t arch_id;
@@ -61,9 +63,10 @@ typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids,
GArray *entry, bool force_enabled);
void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
- build_madt_cpu_fn build_madt_cpu, hwaddr io_base,
+ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr,
const char *res_root,
- const char *event_handler_method);
+ const char *event_handler_method,
+ AmlRegionSpace rs);
void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list);
diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h
index ba84ce0214..40af3550b5 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -62,6 +62,7 @@
#include "hw/sysbus.h"
#include "hw/acpi/memory_hotplug.h"
#include "hw/acpi/ghes.h"
+#include "hw/acpi/cpu.h"
#include "qom/object.h"
#define ACPI_POWER_BUTTON_DEVICE "PWRB"
@@ -86,6 +87,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED)
#define GED_DEVICE "GED"
#define AML_GED_EVT_REG "EREG"
#define AML_GED_EVT_SEL "ESEL"
+#define AML_GED_EVT_CPU_SCAN_METHOD "\\_SB.GED.CSCN"
/*
* Platforms need to specify the GED event bitmap
@@ -95,6 +97,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED)
#define ACPI_GED_MEM_HOTPLUG_EVT 0x1
#define ACPI_GED_PWR_DOWN_EVT 0x2
#define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4
+#define ACPI_GED_CPU_HOTPLUG_EVT 0x8
typedef struct GEDState {
MemoryRegion evt;
@@ -106,6 +109,8 @@ struct AcpiGedState {
SysBusDevice parent_obj;
MemHotplugState memhp_state;
MemoryRegion container_memhp;
+ CPUHotplugState cpuhp_state;
+ MemoryRegion container_cpuhp;
GEDState ged_state;
uint32_t ged_event_bitmap;
qemu_irq irq;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index ef6f18f2c1..48ff6d8b93 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -237,6 +237,9 @@ typedef struct {
* purposes only.
* Applies only to default memory backend, i.e., explicit memory backend
* wasn't used.
+ * @smbios_memory_device_size:
+ * Default size of memory device,
+ * SMBIOS 3.1.0 "7.18 Memory Device (Type 17)"
*/
struct MachineClass {
/*< private >*/
@@ -304,6 +307,7 @@ struct MachineClass {
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
ram_addr_t (*fixup_ram_size)(ram_addr_t size);
+ uint64_t smbios_memory_device_size;
};
/**
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index d946161717..1c9c775df6 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -496,6 +496,7 @@ struct CPUState {
QSIMPLEQ_HEAD(, qemu_work_item) work_list;
struct CPUAddressSpace *cpu_ases;
+ int cpu_ases_count;
int num_ases;
AddressSpace *as;
MemoryRegion *memory;
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 0a4fcb2800..fdd0f4e62b 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -181,6 +181,21 @@ typedef struct CXLCCI {
uint64_t runtime;
QEMUTimer *timer;
} bg;
+
+ /* firmware update */
+ struct {
+ uint8_t active_slot;
+ uint8_t staged_slot;
+ bool slot[4];
+ uint8_t curr_action;
+ uint8_t curr_slot;
+ /* handle partial transfers */
+ bool transferring;
+ size_t prev_offset;
+ size_t prev_len;
+ time_t last_partxfer;
+ } fw;
+
size_t payload_max;
/* Pointer to device hosting the CCI */
DeviceState *d;
@@ -397,9 +412,14 @@ static inline void __toggle_media(CXLDeviceState *cxl_dstate, int val)
#define cxl_dev_enable_media(cxlds) \
do { __toggle_media((cxlds), 0x1); } while (0)
-static inline bool sanitize_running(CXLCCI *cci)
+static inline bool cxl_dev_media_disabled(CXLDeviceState *cxl_dstate)
+{
+ uint64_t dev_status_reg = cxl_dstate->mbox_reg_state64[R_CXL_MEM_DEV_STS];
+ return FIELD_EX64(dev_status_reg, CXL_MEM_DEV_STS, MEDIA_STATUS) == 0x3;
+}
+static inline bool scan_media_running(CXLCCI *cci)
{
- return !!cci->bg.runtime && cci->bg.opcode == 0x4400;
+ return !!cci->bg.runtime && cci->bg.opcode == 0x4304;
}
typedef struct CXLError {
@@ -422,6 +442,47 @@ typedef struct CXLPoison {
typedef QLIST_HEAD(, CXLPoison) CXLPoisonList;
#define CXL_POISON_LIST_LIMIT 256
+/* CXL memory device patrol scrub control attributes */
+typedef struct CXLMemPatrolScrubReadAttrs {
+ uint8_t scrub_cycle_cap;
+ uint16_t scrub_cycle;
+ uint8_t scrub_flags;
+} QEMU_PACKED CXLMemPatrolScrubReadAttrs;
+
+typedef struct CXLMemPatrolScrubWriteAttrs {
+ uint8_t scrub_cycle_hr;
+ uint8_t scrub_flags;
+} QEMU_PACKED CXLMemPatrolScrubWriteAttrs;
+
+#define CXL_MEMDEV_PS_GET_FEATURE_VERSION 0x01
+#define CXL_MEMDEV_PS_SET_FEATURE_VERSION 0x01
+#define CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT BIT(0)
+#define CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT BIT(1)
+#define CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT 12
+#define CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT 1
+#define CXL_MEMDEV_PS_ENABLE_DEFAULT 0
+
+/* CXL memory device DDR5 ECS control attributes */
+typedef struct CXLMemECSReadAttrs {
+ uint8_t ecs_log_cap;
+ uint8_t ecs_cap;
+ uint16_t ecs_config;
+ uint8_t ecs_flags;
+} QEMU_PACKED CXLMemECSReadAttrs;
+
+typedef struct CXLMemECSWriteAttrs {
+ uint8_t ecs_log_cap;
+ uint16_t ecs_config;
+} QEMU_PACKED CXLMemECSWriteAttrs;
+
+#define CXL_ECS_GET_FEATURE_VERSION 0x01
+#define CXL_ECS_SET_FEATURE_VERSION 0x01
+#define CXL_ECS_LOG_ENTRY_TYPE_DEFAULT 0x01
+#define CXL_ECS_REALTIME_REPORT_CAP_DEFAULT 1
+#define CXL_ECS_THRESHOLD_COUNT_DEFAULT 3 /* 3: 256, 4: 1024, 5: 4096 */
+#define CXL_ECS_MODE_DEFAULT 0
+#define CXL_ECS_NUM_MEDIA_FRUS 3 /* Default */
+
#define DCD_MAX_NUM_REGION 8
typedef struct CXLDCExtentRaw {
@@ -459,6 +520,14 @@ typedef struct CXLDCRegion {
unsigned long *blk_bitmap;
} CXLDCRegion;
+typedef struct CXLSetFeatureInfo {
+ QemuUUID uuid;
+ uint8_t data_transfer_flag;
+ bool data_saved_across_reset;
+ uint16_t data_offset;
+ size_t data_size;
+} CXLSetFeatureInfo;
+
struct CXLType3Dev {
/* Private */
PCIDevice parent_obj;
@@ -491,6 +560,19 @@ struct CXLType3Dev {
unsigned int poison_list_cnt;
bool poison_list_overflowed;
uint64_t poison_list_overflow_ts;
+ /* Poison Injection - backup */
+ CXLPoisonList poison_list_bkp;
+ CXLPoisonList scan_media_results;
+ bool scan_media_hasrun;
+
+ CXLSetFeatureInfo set_feat_info;
+
+ /* Patrol scrub control attributes */
+ CXLMemPatrolScrubReadAttrs patrol_scrub_attrs;
+ CXLMemPatrolScrubWriteAttrs patrol_scrub_wr_attrs;
+ /* ECS control attributes */
+ CXLMemECSReadAttrs ecs_attrs[CXL_ECS_NUM_MEDIA_FRUS];
+ CXLMemECSWriteAttrs ecs_wr_attrs[CXL_ECS_NUM_MEDIA_FRUS];
struct dynamic_capacity {
HostMemoryBackend *host_dc;
@@ -554,10 +636,12 @@ CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl,
size_t *len);
CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds,
CXLClearEventPayload *pl);
+void cxl_discard_all_event_records(CXLDeviceState *cxlds);
void cxl_event_irq_assert(CXLType3Dev *ct3d);
void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
+void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d);
CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
diff --git a/include/hw/cxl/cxl_mailbox.h b/include/hw/cxl/cxl_mailbox.h
new file mode 100644
index 0000000000..beb048052e
--- /dev/null
+++ b/include/hw/cxl/cxl_mailbox.h
@@ -0,0 +1,18 @@
+/*
+ * QEMU CXL Mailbox
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef CXL_MAILBOX_H
+#define CXL_MAILBOX_H
+
+#define CXL_MBOX_IMMEDIATE_CONFIG_CHANGE (1 << 1)
+#define CXL_MBOX_IMMEDIATE_DATA_CHANGE (1 << 2)
+#define CXL_MBOX_IMMEDIATE_POLICY_CHANGE (1 << 3)
+#define CXL_MBOX_IMMEDIATE_LOG_CHANGE (1 << 4)
+#define CXL_MBOX_SECURITY_STATE_CHANGE (1 << 5)
+#define CXL_MBOX_BACKGROUND_OPERATION (1 << 6)
+
+#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index ca15132508..e7e41cb939 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -3,6 +3,7 @@
#include "hw/pci/pci.h"
#include "hw/pci/pcie.h"
+#include "hw/pci/pcie_doe.h"
#define TYPE_PCI_DEVICE "pci-device"
typedef struct PCIDeviceClass PCIDeviceClass;
@@ -37,6 +38,8 @@ struct PCIDeviceClass {
uint16_t subsystem_id; /* only for header type = 0 */
const char *romfile; /* rom bar */
+
+ bool sriov_vf_user_creatable;
};
enum PCIReqIDType {
@@ -157,9 +160,17 @@ struct PCIDevice {
MSIVectorReleaseNotifier msix_vector_release_notifier;
MSIVectorPollNotifier msix_vector_poll_notifier;
+ /* SPDM */
+ uint16_t spdm_port;
+
+ /* DOE */
+ DOECap doe_spdm;
+
/* ID of standby device in net_failover pair */
char *failover_pair_id;
uint32_t acpi_index;
+
+ char *sriov_pf;
};
static inline int pci_intx(PCIDevice *pci_dev)
@@ -192,7 +203,7 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d)
static inline int pci_is_vf(const PCIDevice *d)
{
- return d->exp.sriov_vf.pf != NULL;
+ return d->sriov_pf || d->exp.sriov_vf.pf != NULL;
}
static inline uint32_t pci_config_size(const PCIDevice *d)
diff --git a/include/hw/pci/pcie_doe.h b/include/hw/pci/pcie_doe.h
index 87dc17dcef..9e1275db8a 100644
--- a/include/hw/pci/pcie_doe.h
+++ b/include/hw/pci/pcie_doe.h
@@ -46,6 +46,8 @@ REG32(PCI_DOE_CAP_STATUS, 0)
/* PCI-SIG defined Data Object Types - r6.0 Table 6-32 */
#define PCI_SIG_DOE_DISCOVERY 0x00
+#define PCI_SIG_DOE_CMA 0x01
+#define PCI_SIG_DOE_SECURED_CMA 0x02
#define PCI_DOE_DW_SIZE_MAX (1 << 18)
#define PCI_DOE_PROTOCOL_NUM_MAX 256
@@ -106,6 +108,9 @@ struct DOECap {
/* Protocols and its callback response */
DOEProtocol *protocols;
uint16_t protocol_num;
+
+ /* Used for spdm-socket */
+ int spdm_socket;
};
void pcie_doe_init(PCIDevice *pdev, DOECap *doe_cap, uint16_t offset,
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index c5d2d318d3..f75b8f22ee 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,6 +18,7 @@
typedef struct PCIESriovPF {
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
+ bool vf_user_created; /* If VFs are created by user */
} PCIESriovPF;
typedef struct PCIESriovVF {
@@ -40,6 +41,23 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory);
+/**
+ * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
+ * VFs.
+ * @dev: A PCIe device being realized.
+ * @offset: The offset of the SR-IOV capability.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: The size of added capability. 0 if the user did not create VFs.
+ * -1 if failed.
+ */
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
+ uint16_t offset,
+ Error **errp);
+
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp);
+void pcie_sriov_unregister_device(PCIDevice *dev);
+
/*
* Default (minimal) page size support values
* as required by the SR/IOV standard:
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index bdb3da72d0..7db4210b16 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -43,7 +43,6 @@ typedef struct IOMMUDevice {
MemoryRegion bypass_mr; /* The alias of shared memory MR */
GList *resv_regions;
GList *host_resv_ranges;
- bool probe_done;
} IOMMUDevice;
typedef struct IOMMUPciBus {
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 9e67ba38c7..34539f2f67 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -152,6 +152,7 @@ struct VirtIOPCIProxy {
uint32_t modern_io_bar_idx;
uint32_t modern_mem_bar_idx;
int config_cap;
+ uint16_t last_pcie_cap_offset;
uint32_t flags;
bool disable_modern;
bool ignore_backend_features;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 7512afbc84..d2a1938757 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -69,6 +69,8 @@ typedef struct VirtQueueElement
unsigned int ndescs;
unsigned int out_num;
unsigned int in_num;
+ /* Element has been processed (VIRTIO_F_IN_ORDER) */
+ bool in_order_filled;
hwaddr *in_addr;
hwaddr *out_addr;
struct iovec *in_sg;
@@ -371,7 +373,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
DEFINE_PROP_BIT64("packed", _state, _field, \
VIRTIO_F_RING_PACKED, false), \
DEFINE_PROP_BIT64("queue_reset", _state, _field, \
- VIRTIO_F_RING_RESET, true)
+ VIRTIO_F_RING_RESET, true), \
+ DEFINE_PROP_BIT64("in_order", _state, _field, \
+ VIRTIO_F_IN_ORDER, false)
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c31d9c7356..c4a914b3d8 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -313,6 +313,31 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test);
*/
bool kvm_device_supported(int vmfd, uint64_t type);
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_vcpu(CPUState *cpu);
+
+/**
+ * kvm_park_vcpu - Park QEMU KVM vCPU context
+ * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
+ *
+ * @returns: none
+ */
+void kvm_park_vcpu(CPUState *cpu);
+
+/**
+ * kvm_unpark_vcpu - unpark QEMU KVM vCPU context
+ * @s: KVM State
+ * @vcpu_id: Architecture vCPU ID of the parked vCPU
+ *
+ * @returns: KVM fd
+ */
+int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id);
+
/* Arch specific hooks */
extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
diff --git a/include/sysemu/spdm-socket.h b/include/sysemu/spdm-socket.h
new file mode 100644
index 0000000000..5d8bd9aa4e
--- /dev/null
+++ b/include/sysemu/spdm-socket.h
@@ -0,0 +1,74 @@
+/*
+ * QEMU SPDM socket support
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SPDM_REQUESTER_H
+#define SPDM_REQUESTER_H
+
+/**
+ * spdm_socket_connect: connect to an external SPDM socket
+ * @port: port to connect to
+ * @errp: error object handle
+ *
+ * This will connect to an external SPDM socket server. On error
+ * it will return -1 and errp will be set. On success this function
+ * will return the socket number.
+ */
+int spdm_socket_connect(uint16_t port, Error **errp);
+
+/**
+ * spdm_socket_rsp: send and receive a message to a SPDM server
+ * @socket: socket returned from spdm_socket_connect()
+ * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro
+ * @req: request buffer
+ * @req_len: request buffer length
+ * @rsp: response buffer
+ * @rsp_len: response buffer length
+ *
+ * Send platform data to a SPDM server on socket and then receive
+ * a response.
+ */
+uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type,
+ void *req, uint32_t req_len,
+ void *rsp, uint32_t rsp_len);
+
+/**
+ * spdm_socket_close: send a shutdown command to the server
+ * @socket: socket returned from spdm_socket_connect()
+ * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro
+ *
+ * This will issue a shutdown command to the server.
+ */
+void spdm_socket_close(const int socket, uint32_t transport_type);
+
+#define SPDM_SOCKET_COMMAND_NORMAL 0x0001
+#define SPDM_SOCKET_COMMAND_OOB_ENCAP_KEY_UPDATE 0x8001
+#define SPDM_SOCKET_COMMAND_CONTINUE 0xFFFD
+#define SPDM_SOCKET_COMMAND_SHUTDOWN 0xFFFE
+#define SPDM_SOCKET_COMMAND_UNKOWN 0xFFFF
+#define SPDM_SOCKET_COMMAND_TEST 0xDEAD
+
+#define SPDM_SOCKET_TRANSPORT_TYPE_MCTP 0x01
+#define SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE 0x02
+
+#define SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE 0x1200
+
+#endif
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index daa38428c5..03457ead66 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -62,6 +62,7 @@ const int vdpa_feature_bits[] = {
VIRTIO_F_RING_PACKED,
VIRTIO_F_RING_RESET,
VIRTIO_F_VERSION_1,
+ VIRTIO_F_IN_ORDER,
VIRTIO_F_NOTIFICATION_DATA,
VIRTIO_NET_F_CSUM,
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
diff --git a/system/physmem.c b/system/physmem.c
index 9a3b3a7636..0e19186e1b 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -763,6 +763,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
if (!cpu->cpu_ases) {
cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
+ cpu->cpu_ases_count = cpu->num_ases;
}
newas = &cpu->cpu_ases[asidx];
@@ -776,6 +777,34 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
}
}
+void cpu_address_space_destroy(CPUState *cpu, int asidx)
+{
+ CPUAddressSpace *cpuas;
+
+ assert(cpu->cpu_ases);
+ assert(asidx >= 0 && asidx < cpu->num_ases);
+ /* KVM cannot currently support multiple address spaces. */
+ assert(asidx == 0 || !kvm_enabled());
+
+ cpuas = &cpu->cpu_ases[asidx];
+ if (tcg_enabled()) {
+ memory_listener_unregister(&cpuas->tcg_as_listener);
+ }
+
+ address_space_destroy(cpuas->as);
+ g_free_rcu(cpuas->as, rcu);
+
+ if (asidx == 0) {
+ /* reset the convenience alias for address space 0 */
+ cpu->as = NULL;
+ }
+
+ if (--cpu->cpu_ases_count == 0) {
+ g_free(cpu->cpu_ases);
+ cpu->cpu_ases = NULL;
+ }
+}
+
AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
{
/* Return the AddressSpace corresponding to the specified index */
diff --git a/tests/data/acpi/aarch64/virt/DSDT b/tests/data/acpi/aarch64/virt/DSDT
index c475039907..36d3e5d5a5 100644
--- a/tests/data/acpi/aarch64/virt/DSDT
+++ b/tests/data/acpi/aarch64/virt/DSDT
Binary files differ
diff --git a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt
index aee6ba017c..e6154d0355 100644
--- a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt
+++ b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt
Binary files differ
diff --git a/tests/data/acpi/aarch64/virt/DSDT.memhp b/tests/data/acpi/aarch64/virt/DSDT.memhp
index bae36cdd39..33f011d6b6 100644
--- a/tests/data/acpi/aarch64/virt/DSDT.memhp
+++ b/tests/data/acpi/aarch64/virt/DSDT.memhp
Binary files differ
diff --git a/tests/data/acpi/aarch64/virt/DSDT.pxb b/tests/data/acpi/aarch64/virt/DSDT.pxb
index fbd78f44c4..c0fdc6e9c1 100644
--- a/tests/data/acpi/aarch64/virt/DSDT.pxb
+++ b/tests/data/acpi/aarch64/virt/DSDT.pxb
Binary files differ
diff --git a/tests/data/acpi/aarch64/virt/DSDT.topology b/tests/data/acpi/aarch64/virt/DSDT.topology
index 501314c91b..029d03eecc 100644
--- a/tests/data/acpi/aarch64/virt/DSDT.topology
+++ b/tests/data/acpi/aarch64/virt/DSDT.topology
Binary files differ
diff --git a/tests/data/acpi/riscv64/virt/APIC b/tests/data/acpi/riscv64/virt/APIC
new file mode 100644
index 0000000000..66a25dfd2d
--- /dev/null
+++ b/tests/data/acpi/riscv64/virt/APIC
Binary files differ
diff --git a/tests/data/acpi/riscv64/virt/DSDT b/tests/data/acpi/riscv64/virt/DSDT
new file mode 100644
index 0000000000..6a33f5647d
--- /dev/null
+++ b/tests/data/acpi/riscv64/virt/DSDT
Binary files differ
diff --git a/tests/data/acpi/riscv64/virt/FACP b/tests/data/acpi/riscv64/virt/FACP
new file mode 100644
index 0000000000..a5276b65ea
--- /dev/null
+++ b/tests/data/acpi/riscv64/virt/FACP
Binary files differ
diff --git a/tests/data/acpi/riscv64/virt/MCFG b/tests/data/acpi/riscv64/virt/MCFG
new file mode 100644
index 0000000000..37eb923a93
--- /dev/null
+++ b/tests/data/acpi/riscv64/virt/MCFG
Binary files differ
diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT
new file mode 100644
index 0000000000..4f231735ab
--- /dev/null
+++ b/tests/data/acpi/riscv64/virt/RHCT
Binary files differ
diff --git a/tests/data/acpi/riscv64/virt/SPCR b/tests/data/acpi/riscv64/virt/SPCR
new file mode 100644
index 0000000000..4da9daf65f
--- /dev/null
+++ b/tests/data/acpi/riscv64/virt/SPCR
Binary files differ
diff --git a/tests/data/acpi/x86/microvm/DSDT.pcie b/tests/data/acpi/x86/microvm/DSDT.pcie
index 765f14ef3d..8eacd21d6e 100644
--- a/tests/data/acpi/x86/microvm/DSDT.pcie
+++ b/tests/data/acpi/x86/microvm/DSDT.pcie
Binary files differ
diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index f4c4704bab..36e5c0adde 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -267,15 +267,6 @@ static void dump_aml_files(test_data *data, bool rebuild)
data->arch, data->machine,
sdt->aml, ext);
- /*
- * To keep test cases not failing before the DATA files are moved to
- * ${arch}/${machine} folder, add this check as well.
- */
- if (!g_file_test(aml_file, G_FILE_TEST_EXISTS)) {
- aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir,
- data->machine, sdt->aml, ext);
- }
-
if (!g_file_test(aml_file, G_FILE_TEST_EXISTS) &&
sdt->aml_len == exp_sdt->aml_len &&
!memcmp(sdt->aml, exp_sdt->aml, sdt->aml_len)) {
@@ -412,11 +403,6 @@ static GArray *load_expected_aml(test_data *data)
try_again:
aml_file = g_strdup_printf("%s/%s/%s/%.4s%s", data_dir, data->arch,
data->machine, sdt->aml, ext);
- if (!g_file_test(aml_file, G_FILE_TEST_EXISTS)) {
- aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine,
- sdt->aml, ext);
- }
-
if (verbosity_level >= 2) {
fprintf(stderr, "Looking for expected file '%s'\n", aml_file);
}
@@ -1977,6 +1963,28 @@ static void test_acpi_microvm_acpi_erst(void)
}
#endif /* CONFIG_POSIX */
+static void test_acpi_riscv64_virt_tcg(void)
+{
+ test_data data = {
+ .machine = "virt",
+ .arch = "riscv64",
+ .tcg_only = true,
+ .uefi_fl1 = "pc-bios/edk2-riscv-code.fd",
+ .uefi_fl2 = "pc-bios/edk2-riscv-vars.fd",
+ .cd = "tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2",
+ .ram_start = 0x80000000ULL,
+ .scan_len = 128ULL * 1024 * 1024,
+ };
+
+ /*
+ * RHCT will have ISA string encoded. To reduce the effort
+ * of updating expected AML file for any new default ISA extension,
+ * use the profile rva22s64.
+ */
+ test_acpi_one("-cpu rva22s64 ", &data);
+ free_test_data(&data);
+}
+
static void test_acpi_aarch64_virt_tcg(void)
{
test_data data = {
@@ -2455,6 +2463,10 @@ int main(int argc, char *argv[])
qtest_add_func("acpi/virt/viot", test_acpi_aarch64_virt_viot);
}
}
+ } else if (strcmp(arch, "riscv64") == 0) {
+ if (has_tcg && qtest_has_device("virtio-blk-pci")) {
+ qtest_add_func("acpi/virt", test_acpi_riscv64_virt_tcg);
+ }
}
ret = g_test_run();
boot_sector_cleanup(disk);