diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-07-24 09:32:04 +1000 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2024-07-24 09:32:04 +1000 |
commit | 5885bcef3d760e84d17eb4113e85f2aea0bd0582 (patch) | |
tree | 34b0d77bb749f5489148cdc3257ccc19c344e065 | |
parent | 3b5efc553eb1ea8eaaa81d87b100036205a4525d (diff) | |
parent | 4f947b10d525958578002848a92eeb6152ffbf0d (diff) |
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
virtio,pci,pc: features,fixes
pci: Initial support for SPDM Responders
cxl: Add support for scan media, feature commands, device patrol scrub
control, DDR5 ECS control, firmware updates
virtio: in-order support
virtio-net: support for SR-IOV emulation (note: known issues on s390,
might get reverted if not fixed)
smbios: memory device size is now configurable per Machine
cpu: architecture agnostic code to support vCPU Hotplug
Fixes, cleanups all over the place.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmae9l8PHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRp8fYH/impBH9nViO/WK48io4mLSkl0EUL8Y/xrMvH
# zKFCKaXq8D96VTt1Z4EGKYgwG0voBKZaCEKYU/0ARGnSlSwxINQ8ROCnBWMfn2sx
# yQt08EXVMznNLtXjc6U5zCoCi6SaV85GH40No3MUFXBQt29ZSlFqO/fuHGZHYBwS
# wuVKvTjjNF4EsGt3rS4Qsv6BwZWMM+dE6yXpKWk68kR8IGp+6QGxkMbWt9uEX2Md
# VuemKVnFYw0XGCGy5K+ZkvoA2DGpEw0QxVSOMs8CI55Oc9SkTKz5fUSzXXGo1if+
# M1CTjOPJu6pMym6gy6XpFa8/QioDA/jE2vBQvfJ64TwhJDV159s=
# =k8e9
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 23 Jul 2024 10:16:31 AM AEST
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (61 commits)
hw/nvme: Add SPDM over DOE support
backends: Initial support for SPDM socket support
hw/pci: Add all Data Object Types defined in PCIe r6.0
tests/acpi: Add expected ACPI AML files for RISC-V
tests/qtest/bios-tables-test.c: Enable basic testing for RISC-V
tests/acpi: Add empty ACPI data files for RISC-V
tests/qtest/bios-tables-test.c: Remove the fall back path
tests/acpi: update expected DSDT blob for aarch64 and microvm
acpi/gpex: Create PCI link devices outside PCI root bridge
tests/acpi: Allow DSDT acpi table changes for aarch64
hw/riscv/virt-acpi-build.c: Update the HID of RISC-V UART
hw/riscv/virt-acpi-build.c: Add namespace devices for PLIC and APLIC
virtio-iommu: Add trace point on virtio_iommu_detach_endpoint_from_domain
hw/vfio/common: Add vfio_listener_region_del_iommu trace event
virtio-iommu: Remove the end point on detach
virtio-iommu: Free [host_]resv_ranges on unset_iommu_devices
virtio-iommu: Remove probe_done
Revert "virtio-iommu: Clear IOMMUDevice when VFIO device is unplugged"
gdbstub: Add helper function to unregister GDB register space
physmem: Add helper function to destroy CPU AddressSpace
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
81 files changed, 2486 insertions, 288 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index d5ff6c2498..6afc61979c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2009,6 +2009,7 @@ F: hw/pci-bridge/* F: qapi/pci.json F: docs/pci* F: docs/specs/*pci* +F: docs/system/sriov.rst PCIE DOE M: Huai-Cheng Kuo <hchkuo@avery-design.com.tw> @@ -2208,6 +2209,7 @@ F: docs/devel/vfio-iommufd.rst vhost M: Michael S. Tsirkin <mst@redhat.com> +R: Stefano Garzarella <sgarzare@redhat.com> S: Supported F: hw/*/*vhost* F: docs/interop/vhost-user.json @@ -3398,6 +3400,12 @@ F: tests/qtest/*tpm* F: docs/specs/tpm.rst T: git https://github.com/stefanberger/qemu-tpm.git tpm-next +SPDM +M: Alistair Francis <alistair.francis@wdc.com> +S: Maintained +F: backends/spdm-socket.c +F: include/sysemu/spdm-socket.h + Checkpatch S: Odd Fixes F: scripts/checkpatch.pl @@ -3657,6 +3665,7 @@ F: tests/uefi-test-tools/ VT-d Emulation M: Michael S. Tsirkin <mst@redhat.com> R: Jason Wang <jasowang@redhat.com> +R: Yi Liu <yi.l.liu@intel.com> S: Supported F: hw/i386/intel_iommu.c F: hw/i386/intel_iommu_internal.h diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 64bf47a033..0f110cce3e 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -340,14 +340,71 @@ err: return ret; } +void kvm_park_vcpu(CPUState *cpu) +{ + struct KVMParkedVcpu *vcpu; + + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + + vcpu = g_malloc0(sizeof(*vcpu)); + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); +} + +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) +{ + struct KVMParkedVcpu *cpu; + int kvm_fd = -ENOENT; + + QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { + if (cpu->vcpu_id == vcpu_id) { + QLIST_REMOVE(cpu, node); + kvm_fd = cpu->kvm_fd; + g_free(cpu); + } + } + + trace_kvm_unpark_vcpu(vcpu_id, kvm_fd > 0 ? "unparked" : "!found parked"); + + return kvm_fd; +} + +int kvm_create_vcpu(CPUState *cpu) +{ + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); + KVMState *s = kvm_state; + int kvm_fd; + + /* check if the KVM vCPU already exist but is parked */ + kvm_fd = kvm_unpark_vcpu(s, vcpu_id); + if (kvm_fd < 0) { + /* vCPU not parked: create a new KVM vCPU */ + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); + if (kvm_fd < 0) { + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); + return kvm_fd; + } + } + + cpu->kvm_fd = kvm_fd; + cpu->kvm_state = s; + cpu->vcpu_dirty = true; + cpu->dirty_pages = 0; + cpu->throttle_us_per_full = 0; + + trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd); + + return 0; +} + static int do_kvm_destroy_vcpu(CPUState *cpu) { KVMState *s = kvm_state; long mmap_size; - struct KVMParkedVcpu *vcpu = NULL; int ret = 0; - trace_kvm_destroy_vcpu(); + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); ret = kvm_arch_destroy_vcpu(cpu); if (ret < 0) { @@ -373,10 +430,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) } } - vcpu = g_malloc0(sizeof(*vcpu)); - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); - vcpu->kvm_fd = cpu->kvm_fd; - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + kvm_park_vcpu(cpu); err: return ret; } @@ -389,24 +443,6 @@ void kvm_destroy_vcpu(CPUState *cpu) } } -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) -{ - struct KVMParkedVcpu *cpu; - - QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { - if (cpu->vcpu_id == vcpu_id) { - int kvm_fd; - - QLIST_REMOVE(cpu, node); - kvm_fd = cpu->kvm_fd; - g_free(cpu); - return kvm_fd; - } - } - - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); -} - int kvm_init_vcpu(CPUState *cpu, Error **errp) { KVMState *s = kvm_state; @@ -415,19 +451,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); + ret = kvm_create_vcpu(cpu); if (ret < 0) { - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", + error_setg_errno(errp, -ret, + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", kvm_arch_vcpu_id(cpu)); goto err; } - cpu->kvm_fd = ret; - cpu->kvm_state = s; - cpu->vcpu_dirty = true; - cpu->dirty_pages = 0; - cpu->throttle_us_per_full = 0; - mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { ret = mmap_size; diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h index ca40add32c..171b22fd29 100644 --- a/accel/kvm/kvm-cpus.h +++ b/accel/kvm/kvm-cpus.h @@ -22,5 +22,4 @@ bool kvm_supports_guest_debug(void); int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len); void kvm_remove_all_breakpoints(CPUState *cpu); - #endif /* KVM_CPUS_H */ diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 681ccb667d..37626c1ac5 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int kvm_fd) "index: %d, id: %lu, kvm fd: %d" +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s" kvm_irqchip_commit_routes(void) "" kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" kvm_dirty_ring_reaper_kick(const char *reason) "%s" kvm_dirty_ring_flush(int finished) "%d" -kvm_destroy_vcpu(void) "" kvm_failed_get_vcpu_mmap_size(void) "" kvm_cpu_exec(void) "" kvm_interrupt_exit_request(void) "" diff --git a/backends/Kconfig b/backends/Kconfig index 2cb23f62fa..d3dbe19868 100644 --- a/backends/Kconfig +++ b/backends/Kconfig @@ -3,3 +3,7 @@ source tpm/Kconfig config IOMMUFD bool depends on VFIO + +config SPDM_SOCKET + bool + default y diff --git a/backends/meson.build b/backends/meson.build index 749b491f12..da714b93d1 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -33,4 +33,6 @@ endif system_ss.add(when: gio, if_true: files('dbus-vmstate.c')) system_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) +system_ss.add(when: 'CONFIG_SPDM_SOCKET', if_true: files('spdm-socket.c')) + subdir('tpm') diff --git a/backends/spdm-socket.c b/backends/spdm-socket.c new file mode 100644 index 0000000000..d0663d696c --- /dev/null +++ b/backends/spdm-socket.c @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * QEMU SPDM socket support + * + * This is based on: + * https://github.com/DMTF/spdm-emu/blob/07c0a838bcc1c6207c656ac75885c0603e344b6f/spdm_emu/spdm_emu_common/command.c + * but has been re-written to match QEMU style + * + * Copyright (c) 2021, DMTF. All rights reserved. + * Copyright (c) 2023. Western Digital Corporation or its affiliates. + */ + +#include "qemu/osdep.h" +#include "sysemu/spdm-socket.h" +#include "qapi/error.h" + +static bool read_bytes(const int socket, uint8_t *buffer, + size_t number_of_bytes) +{ + ssize_t number_received = 0; + ssize_t result; + + while (number_received < number_of_bytes) { + result = recv(socket, buffer + number_received, + number_of_bytes - number_received, 0); + if (result <= 0) { + return false; + } + number_received += result; + } + return true; +} + +static bool read_data32(const int socket, uint32_t *data) +{ + bool result; + + result = read_bytes(socket, (uint8_t *)data, sizeof(uint32_t)); + if (!result) { + return result; + } + *data = ntohl(*data); + return true; +} + +static bool read_multiple_bytes(const int socket, uint8_t *buffer, + uint32_t *bytes_received, + uint32_t max_buffer_length) +{ + uint32_t length; + bool result; + + result = read_data32(socket, &length); + if (!result) { + return result; + } + + if (length > max_buffer_length) { + return false; + } + + if (bytes_received) { + *bytes_received = length; + } + + if (length == 0) { + return true; + } + + return read_bytes(socket, buffer, length); +} + +static bool receive_platform_data(const int socket, + uint32_t transport_type, + uint32_t *command, + uint8_t *receive_buffer, + uint32_t *bytes_to_receive) +{ + bool result; + uint32_t response; + uint32_t bytes_received; + + result = read_data32(socket, &response); + if (!result) { + return result; + } + *command = response; + + result = read_data32(socket, &transport_type); + if (!result) { + return result; + } + + bytes_received = 0; + result = read_multiple_bytes(socket, receive_buffer, &bytes_received, + *bytes_to_receive); + if (!result) { + return result; + } + *bytes_to_receive = bytes_received; + + return result; +} + +static bool write_bytes(const int socket, const uint8_t *buffer, + uint32_t number_of_bytes) +{ + ssize_t number_sent = 0; + ssize_t result; + + while (number_sent < number_of_bytes) { + result = send(socket, buffer + number_sent, + number_of_bytes - number_sent, 0); + if (result == -1) { + return false; + } + number_sent += result; + } + return true; +} + +static bool write_data32(const int socket, uint32_t data) +{ + data = htonl(data); + return write_bytes(socket, (uint8_t *)&data, sizeof(uint32_t)); +} + +static bool write_multiple_bytes(const int socket, const uint8_t *buffer, + uint32_t bytes_to_send) +{ + bool result; + + result = write_data32(socket, bytes_to_send); + if (!result) { + return result; + } + + return write_bytes(socket, buffer, bytes_to_send); +} + +static bool send_platform_data(const int socket, + uint32_t transport_type, uint32_t command, + const uint8_t *send_buffer, size_t bytes_to_send) +{ + bool result; + + result = write_data32(socket, command); + if (!result) { + return result; + } + + result = write_data32(socket, transport_type); + if (!result) { + return result; + } + + return write_multiple_bytes(socket, send_buffer, bytes_to_send); +} + +int spdm_socket_connect(uint16_t port, Error **errp) +{ + int client_socket; + struct sockaddr_in server_addr; + + client_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (client_socket < 0) { + error_setg(errp, "cannot create socket: %s", strerror(errno)); + return -1; + } + + memset((char *)&server_addr, 0, sizeof(server_addr)); + server_addr.sin_family = AF_INET; + server_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + server_addr.sin_port = htons(port); + + + if (connect(client_socket, (struct sockaddr *)&server_addr, + sizeof(server_addr)) < 0) { + error_setg(errp, "cannot connect: %s", strerror(errno)); + close(client_socket); + return -1; + } + + return client_socket; +} + +uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type, + void *req, uint32_t req_len, + void *rsp, uint32_t rsp_len) +{ + uint32_t command; + bool result; + + result = send_platform_data(socket, transport_type, + SPDM_SOCKET_COMMAND_NORMAL, + req, req_len); + if (!result) { + return 0; + } + + result = receive_platform_data(socket, transport_type, &command, + (uint8_t *)rsp, &rsp_len); + if (!result) { + return 0; + } + + assert(command != 0); + + return rsp_len; +} + +void spdm_socket_close(const int socket, uint32_t transport_type) +{ + send_platform_data(socket, transport_type, + SPDM_SOCKET_COMMAND_SHUTDOWN, NULL, 0); +} diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 9492146855..6cc18a1c04 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -196,7 +196,7 @@ vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, VubDev *vdev_blk = req->vdev_blk; desc = buf; uint64_t range[2] = { le64_to_cpu(desc->sector) << 9, - le32_to_cpu(desc->num_sectors) << 9 }; + (uint64_t)le32_to_cpu(desc->num_sectors) << 9 }; if (type == VIRTIO_BLK_T_DISCARD) { if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { g_free(buf); diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst index 0bd3f9399f..3acd6fcd8b 100644 --- a/docs/specs/acpi_hw_reduced_hotplug.rst +++ b/docs/specs/acpi_hw_reduced_hotplug.rst @@ -64,7 +64,8 @@ GED IO interface (4 byte access) 0: Memory hotplug event 1: System power down event 2: NVDIMM hotplug event - 3-31: Reserved + 3: CPU hotplug event + 4-31: Reserved **write_access:** diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 1484e3e760..e2d907959a 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -29,6 +29,7 @@ guest hardware that is specific to QEMU. edu ivshmem-spec pvpanic + spdm standard-vga virt-ctlr vmcoreinfo diff --git a/docs/specs/spdm.rst b/docs/specs/spdm.rst new file mode 100644 index 0000000000..f7de080ff0 --- /dev/null +++ b/docs/specs/spdm.rst @@ -0,0 +1,134 @@ +====================================================== +QEMU Security Protocols and Data Models (SPDM) Support +====================================================== + +SPDM enables authentication, attestation and key exchange to assist in +providing infrastructure security enablement. It's a standard published +by the `DMTF`_. + +QEMU supports connecting to a SPDM responder implementation. This allows an +external application to emulate the SPDM responder logic for an SPDM device. + +Setting up a SPDM server +======================== + +When using QEMU with SPDM devices QEMU will connect to a server which +implements the SPDM functionality. + +SPDM-Utils +---------- + +You can use `SPDM Utils`_ to emulate a responder. This is the simplest method. + +SPDM-Utils is a Linux applications to manage, test and develop devices +supporting DMTF Security Protocol and Data Model (SPDM). It is written in Rust +and utilises libspdm. + +To use SPDM-Utils you will need to do the following steps. Details are included +in the SPDM-Utils README. + + 1. `Build libspdm`_ + 2. `Build SPDM Utils`_ + 3. `Run it as a server`_ + +spdm-emu +-------- + +You can use `spdm emu`_ to model the +SPDM responder. + +.. code-block:: shell + + $ cd spdm-emu + $ git submodule init; git submodule update --recursive + $ mkdir build; cd build + $ cmake -DARCH=x64 -DTOOLCHAIN=GCC -DTARGET=Debug -DCRYPTO=openssl .. + $ make -j32 + $ make copy_sample_key # Build certificates, required for SPDM authentication. + +It is worth noting that the certificates should be in compliance with +PCIe r6.1 sec 6.31.3. This means you will need to add the following to +openssl.cnf + +.. code-block:: + + subjectAltName = otherName:2.23.147;UTF8:Vendor=1b36:Device=0010:CC=010802:REV=02:SSVID=1af4:SSID=1100 + 2.23.147 = ASN1:OID:2.23.147 + +and then manually regenerate some certificates with: + +.. code-block:: shell + + $ openssl req -nodes -newkey ec:param.pem -keyout end_responder.key \ + -out end_responder.req -sha384 -batch \ + -subj "/CN=DMTF libspdm ECP384 responder cert" + + $ openssl x509 -req -in end_responder.req -out end_responder.cert \ + -CA inter.cert -CAkey inter.key -sha384 -days 3650 -set_serial 3 \ + -extensions v3_end -extfile ../openssl.cnf + + $ openssl asn1parse -in end_responder.cert -out end_responder.cert.der + + $ cat ca.cert.der inter.cert.der end_responder.cert.der > bundle_responder.certchain.der + +You can use SPDM-Utils instead as it will generate the correct certificates +automatically. + +The responder can then be launched with + +.. code-block:: shell + + $ cd bin + $ ./spdm_responder_emu --trans PCI_DOE + +Connecting an SPDM NVMe device +============================== + +Once a SPDM server is running we can start QEMU and connect to the server. + +For an NVMe device first let's setup a block we can use + +.. code-block:: shell + + $ cd qemu-spdm/linux/image + $ dd if=/dev/zero of=blknvme bs=1M count=2096 # 2GB NNMe Drive + +Then you can add this to your QEMU command line: + +.. code-block:: shell + + -drive file=blknvme,if=none,id=mynvme,format=raw \ + -device nvme,drive=mynvme,serial=deadbeef,spdm_port=2323 + +At which point QEMU will try to connect to the SPDM server. + +Note that if using x64-64 you will want to use the q35 machine instead +of the default. So the entire QEMU command might look like this + +.. code-block:: shell + + qemu-system-x86_64 -M q35 \ + --kernel bzImage \ + -drive file=rootfs.ext2,if=virtio,format=raw \ + -append "root=/dev/vda console=ttyS0" \ + -net none -nographic \ + -drive file=blknvme,if=none,id=mynvme,format=raw \ + -device nvme,drive=mynvme,serial=deadbeef,spdm_port=2323 + +.. _DMTF: + https://www.dmtf.org/standards/SPDM + +.. _SPDM Utils: + https://github.com/westerndigitalcorporation/spdm-utils + +.. _spdm emu: + https://github.com/dmtf/spdm-emu + +.. _Build libspdm: + https://github.com/westerndigitalcorporation/spdm-utils?tab=readme-ov-file#build-libspdm + +.. _Build SPDM Utils: + https://github.com/westerndigitalcorporation/spdm-utils?tab=readme-ov-file#build-the-binary + +.. _Run it as a server: + https://github.com/westerndigitalcorporation/spdm-utils#qemu-spdm-device-emulation diff --git a/docs/system/index.rst b/docs/system/index.rst index c21065e519..718e9d3c56 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -39,3 +39,4 @@ or Hypervisor.Framework. multi-process confidential-guest-support vm-templating + sriov diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst new file mode 100644 index 0000000000..a851a66a4b --- /dev/null +++ b/docs/system/sriov.rst @@ -0,0 +1,36 @@ +.. SPDX-License-Identifier: GPL-2.0-or-later + +Compsable SR-IOV device +======================= + +SR-IOV (Single Root I/O Virtualization) is an optional extended capability of a +PCI Express device. It allows a single physical function (PF) to appear as +multiple virtual functions (VFs) for the main purpose of eliminating software +overhead in I/O from virtual machines. + +There are devices with predefined SR-IOV configurations, but it is also possible +to compose an SR-IOV device yourself. Composing an SR-IOV device is currently +only supported by virtio-net-pci. + +Users can configure an SR-IOV-capable virtio-net device by adding +virtio-net-pci functions to a bus. Below is a command line example: + +.. code-block:: shell + + -netdev user,id=n -netdev user,id=o + -netdev user,id=p -netdev user,id=q + -device pcie-root-port,id=b + -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f + -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f + -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f + -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f + +The VFs specify the paired PF with ``sriov-pf`` property. The PF must be +added after all VFs. It is the user's responsibility to ensure that VFs have +function numbers larger than one of the PF, and that the function numbers +have a consistent stride. + +You may also need to perform additional steps to activate the SR-IOV feature on +your guest. For Linux, refer to [1]_. + +.. [1] https://docs.kernel.org/PCI/pci-iov-howto.html diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index b7be8e5a44..d08568cea0 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -618,6 +618,19 @@ void gdb_register_coprocessor(CPUState *cpu, } } +void gdb_unregister_coprocessor_all(CPUState *cpu) +{ + /* + * Safe to nuke everything. GDBRegisterState::xml is static const char so + * it won't be freed + */ + g_array_free(cpu->gdb_regs, true); + + cpu->gdb_regs = NULL; + cpu->gdb_num_regs = 0; + cpu->gdb_num_g_regs = 0; +} + static void gdb_process_breakpoint_remove_all(GDBProcess *p) { CPUState *cpu = gdb_get_first_cpu_in_process(p); diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c index 3fc4b14c26..c6c61bb9cd 100644 --- a/hw/acpi/acpi-cpu-hotplug-stub.c +++ b/hw/acpi/acpi-cpu-hotplug-stub.c @@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, return; } +void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + CPUHotplugState *state, hwaddr base_addr) +{ + return; +} + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) { return; diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 2d81c1e790..5cb60ca8bc 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -7,7 +7,6 @@ #include "trace.h" #include "sysemu/numa.h" -#define ACPI_CPU_HOTPLUG_REG_LEN 12 #define ACPI_CPU_SELECTOR_OFFSET_WR 0 #define ACPI_CPU_FLAGS_OFFSET_RW 4 #define ACPI_CPU_CMD_OFFSET_WR 5 @@ -339,9 +338,10 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_FW_EJECT_EVENT "CEJF" void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - build_madt_cpu_fn build_madt_cpu, hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, const char *res_root, - const char *event_handler_method) + const char *event_handler_method, + AmlRegionSpace rs) { Aml *ifctx; Aml *field; @@ -365,14 +365,22 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_name_decl("_UID", aml_string("CPU Hotplug resources"))); aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + assert((rs == AML_SYSTEM_IO) || (rs == AML_SYSTEM_MEMORY)); + crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, + if (rs == AML_SYSTEM_IO) { + aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1, ACPI_CPU_HOTPLUG_REG_LEN)); + } else if (rs == AML_SYSTEM_MEMORY) { + aml_append(crs, aml_memory32_fixed(base_addr, + ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); + } + aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); /* declare CPU hotplug MMIO region with related access fields */ aml_append(cpu_ctrl_dev, - aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), + aml_operation_region("PRST", rs, aml_int(base_addr), ACPI_CPU_HOTPLUG_REG_LEN)); field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 2d6e91b124..15b4c3ebbf 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = { ACPI_GED_MEM_HOTPLUG_EVT, ACPI_GED_PWR_DOWN_EVT, ACPI_GED_NVDIMM_HOTPLUG_EVT, + ACPI_GED_CPU_HOTPLUG_EVT, }; /* @@ -107,6 +108,9 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." MEMORY_SLOT_SCAN_METHOD)); break; + case ACPI_GED_CPU_HOTPLUG_EVT: + aml_append(if_ctx, aml_call0(AML_GED_EVT_CPU_SCAN_METHOD)); + break; case ACPI_GED_PWR_DOWN_EVT: aml_append(if_ctx, aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), @@ -234,6 +238,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, } else { acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } else { error_setg(errp, "virt: device plug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -248,6 +254,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && !(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)))) { acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -261,6 +269,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_unplug_cb(&s->memhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -272,6 +282,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) AcpiGedState *s = ACPI_GED(adev); acpi_memory_ospm_status(&s->memhp_state, list); + acpi_cpu_ospm_status(&s->cpuhp_state, list); } static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) @@ -286,6 +297,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) sel = ACPI_GED_PWR_DOWN_EVT; } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) { sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; + } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { + sel = ACPI_GED_CPU_HOTPLUG_EVT; } else { /* Unknown event. Return without generating interrupt. */ warn_report("GED: Unsupported event %d. No irq injected", ev); @@ -371,6 +384,42 @@ static const VMStateDescription vmstate_acpi_ged = { } }; +static void acpi_ged_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + AcpiGedState *s = ACPI_GED(dev); + uint32_t ged_events; + int i; + + ged_events = ctpop32(s->ged_event_bitmap); + + for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { + uint32_t event = s->ged_event_bitmap & ged_supported_events[i]; + + if (!event) { + continue; + } + + switch (event) { + case ACPI_GED_CPU_HOTPLUG_EVT: + /* initialize CPU Hotplug related regions */ + memory_region_init(&s->container_cpuhp, OBJECT(dev), + "cpuhp container", + ACPI_CPU_HOTPLUG_REG_LEN); + sysbus_init_mmio(sbd, &s->container_cpuhp); + cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), + &s->cpuhp_state, 0); + break; + } + ged_events--; + } + + if (ged_events) { + error_report("Unsupported events specified"); + abort(); + } +} + static void acpi_ged_initfn(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -411,6 +460,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) dc->desc = "ACPI Generic Event Device"; device_class_set_props(dc, acpi_ged_properties); dc->vmsd = &vmstate_acpi_ged; + dc->realize = acpi_ged_realize; hc->plug = acpi_ged_device_plug_cb; hc->unplug_request = acpi_ged_unplug_request_cb; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index b0c68d66a3..719e83e6a1 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3308,6 +3308,7 @@ DEFINE_VIRT_MACHINE_AS_LATEST(9, 1) static void virt_machine_9_0_options(MachineClass *mc) { virt_machine_9_1_options(mc); + mc->smbios_memory_device_size = 16 * GiB; compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } DEFINE_VIRT_MACHINE(9, 0) diff --git a/hw/audio/virtio-snd.c b/hw/audio/virtio-snd.c index 5993f4f040..e5196aa4bb 100644 --- a/hw/audio/virtio-snd.c +++ b/hw/audio/virtio-snd.c @@ -282,11 +282,13 @@ uint32_t virtio_snd_set_pcm_params(VirtIOSound *s, error_report("Number of channels is not supported."); return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP); } - if (!(supported_formats & BIT(params->format))) { + if (BIT(params->format) > sizeof(supported_formats) || + !(supported_formats & BIT(params->format))) { error_report("Stream format is not supported."); return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP); } - if (!(supported_rates & BIT(params->rate))) { + if (BIT(params->rate) > sizeof(supported_rates) || + !(supported_rates & BIT(params->rate))) { error_report("Stream rate is not supported."); return cpu_to_le32(VIRTIO_SND_S_NOT_SUPP); } @@ -1261,7 +1263,7 @@ static void virtio_snd_pcm_in_cb(void *data, int available) { VirtIOSoundPCMStream *stream = data; VirtIOSoundPCMBuffer *buffer; - size_t size; + size_t size, max_size; WITH_QEMU_LOCK_GUARD(&stream->queue_mutex) { while (!QSIMPLEQ_EMPTY(&stream->queue)) { @@ -1275,7 +1277,12 @@ static void virtio_snd_pcm_in_cb(void *data, int available) continue; } + max_size = iov_size(buffer->elem->in_sg, buffer->elem->in_num); for (;;) { + if (buffer->size >= max_size) { + return_rx_buffer(stream, buffer); + break; + } size = AUD_read(stream->voice.in, buffer->data + buffer->size, MIN(available, (stream->params.period_bytes - diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index fdbc30b9ce..5b7f46bbb0 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -51,6 +51,7 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index d2e3e4570a..7982ecd39a 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -282,7 +282,10 @@ static void cpu_common_finalize(Object *obj) } #endif free_queued_cpu_work(cpu); - g_array_free(cpu->gdb_regs, TRUE); + /* If cleanup didn't happen in context to gdb_unregister_coprocessor_all */ + if (cpu->gdb_regs) { + g_array_free(cpu->gdb_regs, TRUE); + } qemu_lockcnt_destroy(&cpu->in_ioctl_lock); qemu_mutex_destroy(&cpu->work_mutex); qemu_cond_destroy(cpu->halt_cond); diff --git a/hw/core/machine.c b/hw/core/machine.c index 8a878f84d7..27dcda0248 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1005,6 +1005,12 @@ static void machine_class_init(ObjectClass *oc, void *data) /* Default 128 MB as guest ram size */ mc->default_ram_size = 128 * MiB; mc->rom_file_has_mr = true; + /* + * SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size + * use max possible value that could be encoded into + * 'Extended Size' field (2047Tb). + */ + mc->smbios_memory_device_size = 2047 * TiB; /* numa node memory size aligned on 8MB by default. * On Linux, each node's border has to be 8MB aligned diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c index d397718b1b..12dee2e467 100644 --- a/hw/cxl/cxl-events.c +++ b/hw/cxl/cxl-events.c @@ -139,6 +139,19 @@ bool cxl_event_insert(CXLDeviceState *cxlds, CXLEventLogType log_type, return cxl_event_count(log) == 1; } +void cxl_discard_all_event_records(CXLDeviceState *cxlds) +{ + CXLEventLogType log_type; + CXLEventLog *log; + + for (log_type = 0; log_type < CXL_EVENT_TYPE_MAX; log_type++) { + log = &cxlds->event_logs[log_type]; + while (!cxl_event_empty(log)) { + cxl_event_delete_head(cxlds, log_type, log); + } + } +} + CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, uint8_t log_type, int max_recs, size_t *len) diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index c5f5fcfd64..e9f2543c43 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -315,7 +315,8 @@ static void machine_set_cxl(Object *obj, Visitor *v, const char *name, static void machine_get_cfmw(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - CXLFixedMemoryWindowOptionsList **list = opaque; + CXLState *state = opaque; + CXLFixedMemoryWindowOptionsList **list = &state->cfmw_list; visit_type_CXLFixedMemoryWindowOptionsList(v, name, list, errp); } diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 74eeb6fde7..b752920ec8 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -12,6 +12,7 @@ #include "hw/pci/msix.h" #include "hw/cxl/cxl.h" #include "hw/cxl/cxl_events.h" +#include "hw/cxl/cxl_mailbox.h" #include "hw/pci/pci.h" #include "hw/pci-bridge/cxl_upstream_port.h" #include "qemu/cutils.h" @@ -62,12 +63,18 @@ enum { #define SET_INTERRUPT_POLICY 0x3 FIRMWARE_UPDATE = 0x02, #define GET_INFO 0x0 + #define TRANSFER 0x1 + #define ACTIVATE 0x2 TIMESTAMP = 0x03, #define GET 0x0 #define SET 0x1 LOGS = 0x04, #define GET_SUPPORTED 0x0 #define GET_LOG 0x1 + FEATURES = 0x05, + #define GET_SUPPORTED 0x0 + #define GET_FEATURE 0x1 + #define SET_FEATURE 0x2 IDENTIFY = 0x40, #define MEMORY_DEVICE 0x0 CCLS = 0x41, @@ -83,6 +90,9 @@ enum { #define GET_POISON_LIST 0x0 #define INJECT_POISON 0x1 #define CLEAR_POISON 0x2 + #define GET_SCAN_MEDIA_CAPABILITIES 0x3 + #define SCAN_MEDIA 0x4 + #define GET_SCAN_MEDIA_RESULTS 0x5 DCD_CONFIG = 0x48, #define GET_DC_CONFIG 0x0 #define GET_DYN_CAP_EXT_LIST 0x1 @@ -235,7 +245,6 @@ static CXLRetCode cmd_events_get_records(const struct cxl_cmd *cmd, log_type = payload_in[0]; pl = (CXLGetEventPayload *)payload_out; - memset(pl, 0, sizeof(*pl)); max_recs = (cxlds->payload_size - CXL_EVENT_PAYLOAD_HDR_SIZE) / CXL_EVENT_RECORD_SIZE; @@ -273,7 +282,6 @@ static CXLRetCode cmd_events_get_interrupt_policy(const struct cxl_cmd *cmd, CXLEventLog *log; policy = (CXLEventInterruptPolicy *)payload_out; - memset(policy, 0, sizeof(*policy)); log = &cxlds->event_logs[CXL_EVENT_TYPE_INFO]; if (log->irq_enabled) { @@ -372,7 +380,6 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*is_identify) != 18); is_identify = (void *)payload_out; - memset(is_identify, 0, sizeof(*is_identify)); is_identify->pcie_vid = class->vendor_id; is_identify->pcie_did = class->device_id; if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_USP)) { @@ -606,7 +613,6 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*bg_op_status) != 8); bg_op_status = (void *)payload_out; - memset(bg_op_status, 0, sizeof(*bg_op_status)); bg_op_status->status = cci->bg.complete_pct << 1; if (cci->bg.runtime > 0) { bg_op_status->status |= 1U << 0; @@ -618,6 +624,9 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +#define CXL_FW_SLOTS 2 +#define CXL_FW_SIZE 0x02000000 /* 32 mb */ + /* CXL r3.1 Section 8.2.9.3.1: Get FW Info (Opcode 0200h) */ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, uint8_t *payload_in, @@ -647,17 +656,193 @@ static CXLRetCode cmd_firmware_update_get_info(const struct cxl_cmd *cmd, } fw_info = (void *)payload_out; - memset(fw_info, 0, sizeof(*fw_info)); - fw_info->slots_supported = 2; - fw_info->slot_info = BIT(0) | BIT(3); - fw_info->caps = 0; - pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0"); + fw_info->slots_supported = CXL_FW_SLOTS; + fw_info->slot_info = (cci->fw.active_slot & 0x7) | + ((cci->fw.staged_slot & 0x7) << 3); + fw_info->caps = BIT(0); /* online update supported */ + + if (cci->fw.slot[0]) { + pstrcpy(fw_info->fw_rev1, sizeof(fw_info->fw_rev1), "BWFW VERSION 0"); + } + if (cci->fw.slot[1]) { + pstrcpy(fw_info->fw_rev2, sizeof(fw_info->fw_rev2), "BWFW VERSION 1"); + } *len_out = sizeof(*fw_info); return CXL_MBOX_SUCCESS; } +/* CXL r3.1 section 8.2.9.3.2: Transfer FW (Opcode 0201h) */ +#define CXL_FW_XFER_ALIGNMENT 128 + +#define CXL_FW_XFER_ACTION_FULL 0x0 +#define CXL_FW_XFER_ACTION_INIT 0x1 +#define CXL_FW_XFER_ACTION_CONTINUE 0x2 +#define CXL_FW_XFER_ACTION_END 0x3 +#define CXL_FW_XFER_ACTION_ABORT 0x4 + +static CXLRetCode cmd_firmware_update_transfer(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t action; + uint8_t slot; + uint8_t rsvd1[2]; + uint32_t offset; + uint8_t rsvd2[0x78]; + uint8_t data[]; + } QEMU_PACKED *fw_transfer = (void *)payload_in; + size_t offset, length; + + if (fw_transfer->action == CXL_FW_XFER_ACTION_ABORT) { + /* + * At this point there aren't any on-going transfers + * running in the bg - this is serialized before this + * call altogether. Just mark the state machine and + * disregard any other input. + */ + cci->fw.transferring = false; + return CXL_MBOX_SUCCESS; + } + + offset = fw_transfer->offset * CXL_FW_XFER_ALIGNMENT; + length = len - sizeof(*fw_transfer); + if (offset + length > CXL_FW_SIZE) { + return CXL_MBOX_INVALID_INPUT; + } + + if (cci->fw.transferring) { + if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL || + fw_transfer->action == CXL_FW_XFER_ACTION_INIT) { + return CXL_MBOX_FW_XFER_IN_PROGRESS; + } + /* + * Abort partitioned package transfer if over 30 secs + * between parts. As opposed to the explicit ABORT action, + * semantically treat this condition as an error - as + * if a part action were passed without a previous INIT. + */ + if (difftime(time(NULL), cci->fw.last_partxfer) > 30.0) { + cci->fw.transferring = false; + return CXL_MBOX_INVALID_INPUT; + } + } else if (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE || + fw_transfer->action == CXL_FW_XFER_ACTION_END) { + return CXL_MBOX_INVALID_INPUT; + } + + /* allow back-to-back retransmission */ + if ((offset != cci->fw.prev_offset || length != cci->fw.prev_len) && + (fw_transfer->action == CXL_FW_XFER_ACTION_CONTINUE || + fw_transfer->action == CXL_FW_XFER_ACTION_END)) { + /* verify no overlaps */ + if (offset < cci->fw.prev_offset + cci->fw.prev_len) { + return CXL_MBOX_FW_XFER_OUT_OF_ORDER; + } + } + + switch (fw_transfer->action) { + case CXL_FW_XFER_ACTION_FULL: /* ignores offset */ + case CXL_FW_XFER_ACTION_END: + if (fw_transfer->slot == 0 || + fw_transfer->slot == cci->fw.active_slot || + fw_transfer->slot > CXL_FW_SLOTS) { + return CXL_MBOX_FW_INVALID_SLOT; + } + + /* mark the slot used upon bg completion */ + break; + case CXL_FW_XFER_ACTION_INIT: + if (offset != 0) { + return CXL_MBOX_INVALID_INPUT; + } + + cci->fw.transferring = true; + cci->fw.prev_offset = offset; + cci->fw.prev_len = length; + break; + case CXL_FW_XFER_ACTION_CONTINUE: + cci->fw.prev_offset = offset; + cci->fw.prev_len = length; + break; + default: + return CXL_MBOX_INVALID_INPUT; + } + + if (fw_transfer->action == CXL_FW_XFER_ACTION_FULL) { + cci->bg.runtime = 10 * 1000UL; + } else { + cci->bg.runtime = 2 * 1000UL; + } + /* keep relevant context for bg completion */ + cci->fw.curr_action = fw_transfer->action; + cci->fw.curr_slot = fw_transfer->slot; + *len_out = 0; + + return CXL_MBOX_BG_STARTED; +} + +static void __do_firmware_xfer(CXLCCI *cci) +{ + switch (cci->fw.curr_action) { + case CXL_FW_XFER_ACTION_FULL: + case CXL_FW_XFER_ACTION_END: + cci->fw.slot[cci->fw.curr_slot - 1] = true; + cci->fw.transferring = false; + break; + case CXL_FW_XFER_ACTION_INIT: + case CXL_FW_XFER_ACTION_CONTINUE: + time(&cci->fw.last_partxfer); + break; + default: + break; + } +} + +/* CXL r3.1 section 8.2.9.3.3: Activate FW (Opcode 0202h) */ +static CXLRetCode cmd_firmware_update_activate(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t action; + uint8_t slot; + } QEMU_PACKED *fw_activate = (void *)payload_in; + QEMU_BUILD_BUG_ON(sizeof(*fw_activate) != 0x2); + + if (fw_activate->slot == 0 || + fw_activate->slot == cci->fw.active_slot || + fw_activate->slot > CXL_FW_SLOTS) { + return CXL_MBOX_FW_INVALID_SLOT; + } + + /* ensure that an actual fw package is there */ + if (!cci->fw.slot[fw_activate->slot - 1]) { + return CXL_MBOX_FW_INVALID_SLOT; + } + + switch (fw_activate->action) { + case 0: /* online */ + cci->fw.active_slot = fw_activate->slot; + break; + case 1: /* reset */ + cci->fw.staged_slot = fw_activate->slot; + break; + default: + return CXL_MBOX_INVALID_INPUT; + } + + return CXL_MBOX_SUCCESS; +} + /* CXL r3.1 Section 8.2.9.4.1: Get Timestamp (Opcode 0300h) */ static CXLRetCode cmd_timestamp_get(const struct cxl_cmd *cmd, uint8_t *payload_in, @@ -768,6 +953,388 @@ static CXLRetCode cmd_logs_get_log(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.1 section 8.2.9.6: Features */ +/* + * Get Supported Features output payload + * CXL r3.1 section 8.2.9.6.1 Table 8-96 + */ +typedef struct CXLSupportedFeatureHeader { + uint16_t entries; + uint16_t nsuppfeats_dev; + uint32_t reserved; +} QEMU_PACKED CXLSupportedFeatureHeader; + +/* + * Get Supported Features Supported Feature Entry + * CXL r3.1 section 8.2.9.6.1 Table 8-97 + */ +typedef struct CXLSupportedFeatureEntry { + QemuUUID uuid; + uint16_t feat_index; + uint16_t get_feat_size; + uint16_t set_feat_size; + uint32_t attr_flags; + uint8_t get_feat_version; + uint8_t set_feat_version; + uint16_t set_feat_effects; + uint8_t rsvd[18]; +} QEMU_PACKED CXLSupportedFeatureEntry; + +/* + * Get Supported Features Supported Feature Entry + * CXL rev 3.1 section 8.2.9.6.1 Table 8-97 + */ +/* Supported Feature Entry : attribute flags */ +#define CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE BIT(0) +#define CXL_FEAT_ENTRY_ATTR_FLAG_DEEPEST_RESET_PERSISTENCE_MASK GENMASK(3, 1) +#define CXL_FEAT_ENTRY_ATTR_FLAG_PERSIST_ACROSS_FIRMWARE_UPDATE BIT(4) +#define CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_DEFAULT_SELECTION BIT(5) +#define CXL_FEAT_ENTRY_ATTR_FLAG_SUPPORT_SAVED_SELECTION BIT(6) + +/* Supported Feature Entry : set feature effects */ +#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_COLD_RESET BIT(0) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE BIT(1) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_DATA_CHANGE BIT(2) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_POLICY_CHANGE BIT(3) +#define CXL_FEAT_ENTRY_SFE_IMMEDIATE_LOG_CHANGE BIT(4) +#define CXL_FEAT_ENTRY_SFE_SECURITY_STATE_CHANGE BIT(5) +#define CXL_FEAT_ENTRY_SFE_BACKGROUND_OPERATION BIT(6) +#define CXL_FEAT_ENTRY_SFE_SUPPORT_SECONDARY_MAILBOX BIT(7) +#define CXL_FEAT_ENTRY_SFE_SUPPORT_ABORT_BACKGROUND_OPERATION BIT(8) +#define CXL_FEAT_ENTRY_SFE_CEL_VALID BIT(9) +#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_CONV_RESET BIT(10) +#define CXL_FEAT_ENTRY_SFE_CONFIG_CHANGE_CXL_RESET BIT(11) + +enum CXL_SUPPORTED_FEATURES_LIST { + CXL_FEATURE_PATROL_SCRUB = 0, + CXL_FEATURE_ECS, + CXL_FEATURE_MAX +}; + +/* Get Feature CXL 3.1 Spec 8.2.9.6.2 */ +/* + * Get Feature input payload + * CXL r3.1 section 8.2.9.6.2 Table 8-99 + */ +/* Get Feature : Payload in selection */ +enum CXL_GET_FEATURE_SELECTION { + CXL_GET_FEATURE_SEL_CURRENT_VALUE, + CXL_GET_FEATURE_SEL_DEFAULT_VALUE, + CXL_GET_FEATURE_SEL_SAVED_VALUE, + CXL_GET_FEATURE_SEL_MAX +}; + +/* Set Feature CXL 3.1 Spec 8.2.9.6.3 */ +/* + * Set Feature input payload + * CXL r3.1 section 8.2.9.6.3 Table 8-101 + */ +typedef struct CXLSetFeatureInHeader { + QemuUUID uuid; + uint32_t flags; + uint16_t offset; + uint8_t version; + uint8_t rsvd[9]; +} QEMU_PACKED QEMU_ALIGNED(16) CXLSetFeatureInHeader; + +/* Set Feature : Payload in flags */ +#define CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MASK 0x7 +enum CXL_SET_FEATURE_FLAG_DATA_TRANSFER { + CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_INITIATE_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_CONTINUE_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_ABORT_DATA_TRANSFER, + CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MAX +}; +#define CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET BIT(3) + +/* CXL r3.1 section 8.2.9.9.11.1: Device Patrol Scrub Control Feature */ +static const QemuUUID patrol_scrub_uuid = { + .data = UUID(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33, + 0x75, 0x77, 0x4e, 0x06, 0xdb, 0x8a) +}; + +typedef struct CXLMemPatrolScrubSetFeature { + CXLSetFeatureInHeader hdr; + CXLMemPatrolScrubWriteAttrs feat_data; +} QEMU_PACKED QEMU_ALIGNED(16) CXLMemPatrolScrubSetFeature; + +/* + * CXL r3.1 section 8.2.9.9.11.2: + * DDR5 Error Check Scrub (ECS) Control Feature + */ +static const QemuUUID ecs_uuid = { + .data = UUID(0xe5b13f22, 0x2328, 0x4a14, 0xb8, 0xba, + 0xb9, 0x69, 0x1e, 0x89, 0x33, 0x86) +}; + +typedef struct CXLMemECSSetFeature { + CXLSetFeatureInHeader hdr; + CXLMemECSWriteAttrs feat_data[]; +} QEMU_PACKED QEMU_ALIGNED(16) CXLMemECSSetFeature; + +/* CXL r3.1 section 8.2.9.6.1: Get Supported Features (Opcode 0500h) */ +static CXLRetCode cmd_features_get_supported(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint32_t count; + uint16_t start_index; + uint16_t reserved; + } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_in = (void *)payload_in; + + struct { + CXLSupportedFeatureHeader hdr; + CXLSupportedFeatureEntry feat_entries[]; + } QEMU_PACKED QEMU_ALIGNED(16) * get_feats_out = (void *)payload_out; + uint16_t index, req_entries; + uint16_t entry; + + if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + return CXL_MBOX_UNSUPPORTED; + } + if (get_feats_in->count < sizeof(CXLSupportedFeatureHeader) || + get_feats_in->start_index >= CXL_FEATURE_MAX) { + return CXL_MBOX_INVALID_INPUT; + } + + req_entries = (get_feats_in->count - + sizeof(CXLSupportedFeatureHeader)) / + sizeof(CXLSupportedFeatureEntry); + req_entries = MIN(req_entries, + (CXL_FEATURE_MAX - get_feats_in->start_index)); + + for (entry = 0, index = get_feats_in->start_index; + entry < req_entries; index++) { + switch (index) { + case CXL_FEATURE_PATROL_SCRUB: + /* Fill supported feature entry for device patrol scrub control */ + get_feats_out->feat_entries[entry++] = + (struct CXLSupportedFeatureEntry) { + .uuid = patrol_scrub_uuid, + .feat_index = index, + .get_feat_size = sizeof(CXLMemPatrolScrubReadAttrs), + .set_feat_size = sizeof(CXLMemPatrolScrubWriteAttrs), + .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE, + .get_feat_version = CXL_MEMDEV_PS_GET_FEATURE_VERSION, + .set_feat_version = CXL_MEMDEV_PS_SET_FEATURE_VERSION, + .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE | + CXL_FEAT_ENTRY_SFE_CEL_VALID, + }; + break; + case CXL_FEATURE_ECS: + /* Fill supported feature entry for device DDR5 ECS control */ + get_feats_out->feat_entries[entry++] = + (struct CXLSupportedFeatureEntry) { + .uuid = ecs_uuid, + .feat_index = index, + .get_feat_size = CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSReadAttrs), + .set_feat_size = CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSWriteAttrs), + .attr_flags = CXL_FEAT_ENTRY_ATTR_FLAG_CHANGABLE, + .get_feat_version = CXL_ECS_GET_FEATURE_VERSION, + .set_feat_version = CXL_ECS_SET_FEATURE_VERSION, + .set_feat_effects = CXL_FEAT_ENTRY_SFE_IMMEDIATE_CONFIG_CHANGE | + CXL_FEAT_ENTRY_SFE_CEL_VALID, + }; + break; + default: + __builtin_unreachable(); + } + } + get_feats_out->hdr.nsuppfeats_dev = CXL_FEATURE_MAX; + get_feats_out->hdr.entries = req_entries; + *len_out = sizeof(CXLSupportedFeatureHeader) + + req_entries * sizeof(CXLSupportedFeatureEntry); + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.1 section 8.2.9.6.2: Get Feature (Opcode 0501h) */ +static CXLRetCode cmd_features_get_feature(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + QemuUUID uuid; + uint16_t offset; + uint16_t count; + uint8_t selection; + } QEMU_PACKED QEMU_ALIGNED(16) * get_feature; + uint16_t bytes_to_copy = 0; + CXLType3Dev *ct3d; + CXLSetFeatureInfo *set_feat_info; + + if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + return CXL_MBOX_UNSUPPORTED; + } + + ct3d = CXL_TYPE3(cci->d); + get_feature = (void *)payload_in; + + set_feat_info = &ct3d->set_feat_info; + if (qemu_uuid_is_equal(&get_feature->uuid, &set_feat_info->uuid)) { + return CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS; + } + + if (get_feature->selection != CXL_GET_FEATURE_SEL_CURRENT_VALUE) { + return CXL_MBOX_UNSUPPORTED; + } + if (get_feature->offset + get_feature->count > cci->payload_max) { + return CXL_MBOX_INVALID_INPUT; + } + + if (qemu_uuid_is_equal(&get_feature->uuid, &patrol_scrub_uuid)) { + if (get_feature->offset >= sizeof(CXLMemPatrolScrubReadAttrs)) { + return CXL_MBOX_INVALID_INPUT; + } + bytes_to_copy = sizeof(CXLMemPatrolScrubReadAttrs) - + get_feature->offset; + bytes_to_copy = MIN(bytes_to_copy, get_feature->count); + memcpy(payload_out, + (uint8_t *)&ct3d->patrol_scrub_attrs + get_feature->offset, + bytes_to_copy); + } else if (qemu_uuid_is_equal(&get_feature->uuid, &ecs_uuid)) { + if (get_feature->offset >= CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSReadAttrs)) { + return CXL_MBOX_INVALID_INPUT; + } + bytes_to_copy = CXL_ECS_NUM_MEDIA_FRUS * + sizeof(CXLMemECSReadAttrs) - + get_feature->offset; + bytes_to_copy = MIN(bytes_to_copy, get_feature->count); + memcpy(payload_out, + (uint8_t *)&ct3d->ecs_attrs + get_feature->offset, + bytes_to_copy); + } else { + return CXL_MBOX_UNSUPPORTED; + } + + *len_out = bytes_to_copy; + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.1 section 8.2.9.6.3: Set Feature (Opcode 0502h) */ +static CXLRetCode cmd_features_set_feature(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLSetFeatureInHeader *hdr = (void *)payload_in; + CXLMemPatrolScrubWriteAttrs *ps_write_attrs; + CXLMemPatrolScrubSetFeature *ps_set_feature; + CXLMemECSWriteAttrs *ecs_write_attrs; + CXLMemECSSetFeature *ecs_set_feature; + CXLSetFeatureInfo *set_feat_info; + uint16_t bytes_to_copy = 0; + uint8_t data_transfer_flag; + CXLType3Dev *ct3d; + uint16_t count; + + + if (!object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + return CXL_MBOX_UNSUPPORTED; + } + ct3d = CXL_TYPE3(cci->d); + set_feat_info = &ct3d->set_feat_info; + + if (!qemu_uuid_is_null(&set_feat_info->uuid) && + !qemu_uuid_is_equal(&hdr->uuid, &set_feat_info->uuid)) { + return CXL_MBOX_FEATURE_TRANSFER_IN_PROGRESS; + } + if (hdr->flags & CXL_SET_FEAT_DATA_SAVED_ACROSS_RESET) { + set_feat_info->data_saved_across_reset = true; + } else { + set_feat_info->data_saved_across_reset = false; + } + + data_transfer_flag = + hdr->flags & CXL_SET_FEATURE_FLAG_DATA_TRANSFER_MASK; + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_INITIATE_DATA_TRANSFER) { + set_feat_info->uuid = hdr->uuid; + set_feat_info->data_size = 0; + } + set_feat_info->data_transfer_flag = data_transfer_flag; + set_feat_info->data_offset = hdr->offset; + bytes_to_copy = len_in - sizeof(CXLSetFeatureInHeader); + + if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) { + if (hdr->version != CXL_MEMDEV_PS_SET_FEATURE_VERSION) { + return CXL_MBOX_UNSUPPORTED; + } + + ps_set_feature = (void *)payload_in; + ps_write_attrs = &ps_set_feature->feat_data; + memcpy((uint8_t *)&ct3d->patrol_scrub_wr_attrs + hdr->offset, + ps_write_attrs, + bytes_to_copy); + set_feat_info->data_size += bytes_to_copy; + + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) { + ct3d->patrol_scrub_attrs.scrub_cycle &= ~0xFF; + ct3d->patrol_scrub_attrs.scrub_cycle |= + ct3d->patrol_scrub_wr_attrs.scrub_cycle_hr & 0xFF; + ct3d->patrol_scrub_attrs.scrub_flags &= ~0x1; + ct3d->patrol_scrub_attrs.scrub_flags |= + ct3d->patrol_scrub_wr_attrs.scrub_flags & 0x1; + } + } else if (qemu_uuid_is_equal(&hdr->uuid, + &ecs_uuid)) { + if (hdr->version != CXL_ECS_SET_FEATURE_VERSION) { + return CXL_MBOX_UNSUPPORTED; + } + + ecs_set_feature = (void *)payload_in; + ecs_write_attrs = ecs_set_feature->feat_data; + memcpy((uint8_t *)ct3d->ecs_wr_attrs + hdr->offset, + ecs_write_attrs, + bytes_to_copy); + set_feat_info->data_size += bytes_to_copy; + + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER) { + for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) { + ct3d->ecs_attrs[count].ecs_log_cap = + ct3d->ecs_wr_attrs[count].ecs_log_cap; + ct3d->ecs_attrs[count].ecs_config = + ct3d->ecs_wr_attrs[count].ecs_config & 0x1F; + } + } + } else { + return CXL_MBOX_UNSUPPORTED; + } + + if (data_transfer_flag == CXL_SET_FEATURE_FLAG_FULL_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_FINISH_DATA_TRANSFER || + data_transfer_flag == CXL_SET_FEATURE_FLAG_ABORT_DATA_TRANSFER) { + memset(&set_feat_info->uuid, 0, sizeof(QemuUUID)); + if (qemu_uuid_is_equal(&hdr->uuid, &patrol_scrub_uuid)) { + memset(&ct3d->patrol_scrub_wr_attrs, 0, set_feat_info->data_size); + } else if (qemu_uuid_is_equal(&hdr->uuid, &ecs_uuid)) { + memset(ct3d->ecs_wr_attrs, 0, set_feat_info->data_size); + } + set_feat_info->data_transfer_flag = 0; + set_feat_info->data_saved_across_reset = false; + set_feat_info->data_offset = 0; + set_feat_info->data_size = 0; + } + + return CXL_MBOX_SUCCESS; +} + /* CXL r3.1 Section 8.2.9.9.1.1: Identify Memory Device (Opcode 4000h) */ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, uint8_t *payload_in, @@ -805,7 +1372,6 @@ static CXLRetCode cmd_identify_memory_device(const struct cxl_cmd *cmd, } id = (void *)payload_out; - memset(id, 0, sizeof(*id)); snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); @@ -953,6 +1519,7 @@ static void __do_sanitization(CXLType3Dev *ct3d) memset(lsa, 0, memory_region_size(mr)); } } + cxl_discard_all_event_records(&ct3d->cxl_dstate); } /* @@ -1095,7 +1662,6 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd, out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); - memset(out, 0, out_pl_len); QLIST_FOREACH(ent, poison_list, node) { uint64_t start, stop; @@ -1117,6 +1683,10 @@ static CXLRetCode cmd_media_get_poison_list(const struct cxl_cmd *cmd, out->flags = (1 << 1); stq_le_p(&out->overflow_timestamp, ct3d->poison_list_overflow_ts); } + if (scan_media_running(cci)) { + out->flags |= (1 << 2); + } + stw_le_p(&out->count, record_count); *len_out = out_pl_len; return CXL_MBOX_SUCCESS; @@ -1146,6 +1716,16 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } } + /* + * Freeze the list if there is an on-going scan media operation. + */ + if (scan_media_running(cci)) { + /* + * XXX: Spec is ambiguous - is this case considered + * a successful return despite not adding to the list? + */ + goto success; + } if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { return CXL_MBOX_INJECT_POISON_LIMIT; @@ -1161,6 +1741,7 @@ static CXLRetCode cmd_media_inject_poison(const struct cxl_cmd *cmd, */ QLIST_INSERT_HEAD(poison_list, p, node); ct3d->poison_list_cnt++; +success: *len_out = 0; return CXL_MBOX_SUCCESS; @@ -1200,6 +1781,17 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, } } + /* + * Freeze the list if there is an on-going scan media operation. + */ + if (scan_media_running(cci)) { + /* + * XXX: Spec is ambiguous - is this case considered + * a successful return despite not removing from the list? + */ + goto success; + } + QLIST_FOREACH(ent, poison_list, node) { /* * Test for contained in entry. Simpler than general case @@ -1210,7 +1802,7 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, } } if (!ent) { - return CXL_MBOX_SUCCESS; + goto success; } QLIST_REMOVE(ent, node); @@ -1247,12 +1839,262 @@ static CXLRetCode cmd_media_clear_poison(const struct cxl_cmd *cmd, } /* Any fragments have been added, free original entry */ g_free(ent); +success: *len_out = 0; return CXL_MBOX_SUCCESS; } /* + * CXL r3.1 section 8.2.9.9.4.4: Get Scan Media Capabilities + */ +static CXLRetCode +cmd_media_get_scan_media_capabilities(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct get_scan_media_capabilities_pl { + uint64_t pa; + uint64_t length; + } QEMU_PACKED; + + struct get_scan_media_capabilities_out_pl { + uint32_t estimated_runtime_ms; + }; + + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; + struct get_scan_media_capabilities_pl *in = (void *)payload_in; + struct get_scan_media_capabilities_out_pl *out = (void *)payload_out; + uint64_t query_start; + uint64_t query_length; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignment required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + if (query_start + query_length > cxl_dstate->static_mem_size) { + return CXL_MBOX_INVALID_PA; + } + + /* + * Just use 400 nanosecond access/read latency + 100 ns for + * the cost of updating the poison list. For small enough + * chunks return at least 1 ms. + */ + stl_le_p(&out->estimated_runtime_ms, + MAX(1, query_length * (0.0005L / 64))); + + *len_out = sizeof(*out); + return CXL_MBOX_SUCCESS; +} + +static void __do_scan_media(CXLType3Dev *ct3d) +{ + CXLPoison *ent; + unsigned int results_cnt = 0; + + QLIST_FOREACH(ent, &ct3d->scan_media_results, node) { + results_cnt++; + } + + /* only scan media may clear the overflow */ + if (ct3d->poison_list_overflowed && + ct3d->poison_list_cnt == results_cnt) { + cxl_clear_poison_list_overflowed(ct3d); + } + /* scan media has run since last conventional reset */ + ct3d->scan_media_hasrun = true; +} + +/* + * CXL r3.1 section 8.2.9.9.4.5: Scan Media + */ +static CXLRetCode cmd_media_scan_media(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct scan_media_pl { + uint64_t pa; + uint64_t length; + uint8_t flags; + } QEMU_PACKED; + + struct scan_media_pl *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDeviceState *cxl_dstate = &ct3d->cxl_dstate; + uint64_t query_start; + uint64_t query_length; + CXLPoison *ent, *next; + + query_start = ldq_le_p(&in->pa); + /* 64 byte alignment required */ + if (query_start & 0x3f) { + return CXL_MBOX_INVALID_INPUT; + } + query_length = ldq_le_p(&in->length) * CXL_CACHE_LINE_SIZE; + + if (query_start + query_length > cxl_dstate->static_mem_size) { + return CXL_MBOX_INVALID_PA; + } + if (ct3d->dc.num_regions && query_start + query_length >= + cxl_dstate->static_mem_size + ct3d->dc.total_capacity) { + return CXL_MBOX_INVALID_PA; + } + + if (in->flags == 0) { /* TODO */ + qemu_log_mask(LOG_UNIMP, + "Scan Media Event Log is unsupported\n"); + } + + /* any previous results are discarded upon a new Scan Media */ + QLIST_FOREACH_SAFE(ent, &ct3d->scan_media_results, node, next) { + QLIST_REMOVE(ent, node); + g_free(ent); + } + + /* kill the poison list - it will be recreated */ + if (ct3d->poison_list_overflowed) { + QLIST_FOREACH_SAFE(ent, &ct3d->poison_list, node, next) { + QLIST_REMOVE(ent, node); + g_free(ent); + ct3d->poison_list_cnt--; + } + } + + /* + * Scan the backup list and move corresponding entries + * into the results list, updating the poison list + * when possible. + */ + QLIST_FOREACH_SAFE(ent, &ct3d->poison_list_bkp, node, next) { + CXLPoison *res; + + if (ent->start >= query_start + query_length || + ent->start + ent->length <= query_start) { + continue; + } + + /* + * If a Get Poison List cmd comes in while this + * scan is being done, it will see the new complete + * list, while setting the respective flag. + */ + if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) { + CXLPoison *p = g_new0(CXLPoison, 1); + + p->start = ent->start; + p->length = ent->length; + p->type = ent->type; + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; + } + + res = g_new0(CXLPoison, 1); + res->start = ent->start; + res->length = ent->length; + res->type = ent->type; + QLIST_INSERT_HEAD(&ct3d->scan_media_results, res, node); + + QLIST_REMOVE(ent, node); + g_free(ent); + } + + cci->bg.runtime = MAX(1, query_length * (0.0005L / 64)); + *len_out = 0; + + return CXL_MBOX_BG_STARTED; +} + +/* + * CXL r3.1 section 8.2.9.9.4.6: Get Scan Media Results + */ +static CXLRetCode cmd_media_get_scan_media_results(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct get_scan_media_results_out_pl { + uint64_t dpa_restart; + uint64_t length; + uint8_t flags; + uint8_t rsvd1; + uint16_t count; + uint8_t rsvd2[0xc]; + struct { + uint64_t addr; + uint32_t length; + uint32_t resv; + } QEMU_PACKED records[]; + } QEMU_PACKED; + + struct get_scan_media_results_out_pl *out = (void *)payload_out; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLPoisonList *scan_media_results = &ct3d->scan_media_results; + CXLPoison *ent, *next; + uint16_t total_count = 0, record_count = 0, i = 0; + uint16_t out_pl_len; + + if (!ct3d->scan_media_hasrun) { + return CXL_MBOX_UNSUPPORTED; + } + + /* + * Calculate limits, all entries are within the same address range of the + * last scan media call. + */ + QLIST_FOREACH(ent, scan_media_results, node) { + size_t rec_size = record_count * sizeof(out->records[0]); + + if (sizeof(*out) + rec_size < CXL_MAILBOX_MAX_PAYLOAD_SIZE) { + record_count++; + } + total_count++; + } + + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + memset(out, 0, out_pl_len); + QLIST_FOREACH_SAFE(ent, scan_media_results, node, next) { + uint64_t start, stop; + + if (i == record_count) { + break; + } + + start = ROUND_DOWN(ent->start, 64ull); + stop = ROUND_DOWN(ent->start, 64ull) + ent->length; + stq_le_p(&out->records[i].addr, start | (ent->type & 0x7)); + stl_le_p(&out->records[i].length, (stop - start) / CXL_CACHE_LINE_SIZE); + i++; + + /* consume the returning entry */ + QLIST_REMOVE(ent, node); + g_free(ent); + } + + stw_le_p(&out->count, record_count); + if (total_count > record_count) { + out->flags = (1 << 0); /* More Media Error Records */ + } + + *len_out = out_pl_len; + return CXL_MBOX_SUCCESS; +} + +/* * CXL r3.1 section 8.2.9.9.9.1: Get Dynamic Capacity Configuration * (Opcode: 4800h) */ @@ -1822,40 +2664,51 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -#define IMMEDIATE_CONFIG_CHANGE (1 << 1) -#define IMMEDIATE_DATA_CHANGE (1 << 2) -#define IMMEDIATE_POLICY_CHANGE (1 << 3) -#define IMMEDIATE_LOG_CHANGE (1 << 4) -#define SECURITY_STATE_CHANGE (1 << 5) -#define BACKGROUND_OPERATION (1 << 6) - static const struct cxl_cmd cxl_cmd_set[256][256] = { [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS", cmd_events_get_records, 1, 0 }, [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", - cmd_events_clear_records, ~0, IMMEDIATE_LOG_CHANGE }, + cmd_events_clear_records, ~0, CXL_MBOX_IMMEDIATE_LOG_CHANGE }, [EVENTS][GET_INTERRUPT_POLICY] = { "EVENTS_GET_INTERRUPT_POLICY", cmd_events_get_interrupt_policy, 0, 0 }, [EVENTS][SET_INTERRUPT_POLICY] = { "EVENTS_SET_INTERRUPT_POLICY", cmd_events_set_interrupt_policy, - ~0, IMMEDIATE_CONFIG_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE }, [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, + [FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER", + cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION }, + [FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE", + cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, - 8, IMMEDIATE_POLICY_CHANGE }, + 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, + [FEATURES][GET_SUPPORTED] = { "FEATURES_GET_SUPPORTED", + cmd_features_get_supported, 0x8, 0 }, + [FEATURES][GET_FEATURE] = { "FEATURES_GET_FEATURE", + cmd_features_get_feature, 0x15, 0 }, + [FEATURES][SET_FEATURE] = { "FEATURES_SET_FEATURE", + cmd_features_set_feature, + ~0, + (CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE | + CXL_MBOX_IMMEDIATE_POLICY_CHANGE | + CXL_MBOX_IMMEDIATE_LOG_CHANGE | + CXL_MBOX_SECURITY_STATE_CHANGE)}, [IDENTIFY][MEMORY_DEVICE] = { "IDENTIFY_MEMORY_DEVICE", cmd_identify_memory_device, 0, 0 }, [CCLS][GET_PARTITION_INFO] = { "CCLS_GET_PARTITION_INFO", cmd_ccls_get_partition_info, 0, 0 }, [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, - ~0, IMMEDIATE_CONFIG_CHANGE | IMMEDIATE_DATA_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE }, [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0, - IMMEDIATE_DATA_CHANGE | SECURITY_STATE_CHANGE | BACKGROUND_OPERATION }, + (CXL_MBOX_IMMEDIATE_DATA_CHANGE | + CXL_MBOX_SECURITY_STATE_CHANGE | + CXL_MBOX_BACKGROUND_OPERATION)}, [PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE", cmd_get_security_state, 0, 0 }, [MEDIA_AND_POISON][GET_POISON_LIST] = { "MEDIA_AND_POISON_GET_POISON_LIST", @@ -1864,6 +2717,14 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { cmd_media_inject_poison, 8, 0 }, [MEDIA_AND_POISON][CLEAR_POISON] = { "MEDIA_AND_POISON_CLEAR_POISON", cmd_media_clear_poison, 72, 0 }, + [MEDIA_AND_POISON][GET_SCAN_MEDIA_CAPABILITIES] = { + "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES", + cmd_media_get_scan_media_capabilities, 16, 0 }, + [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA", + cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION }, + [MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = { + "MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS", + cmd_media_get_scan_media_results, 0, 0 }, }; static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { @@ -1874,10 +2735,10 @@ static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = { 8, 0 }, [DCD_CONFIG][ADD_DYN_CAP_RSP] = { "DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp, - ~0, IMMEDIATE_DATA_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE }, [DCD_CONFIG][RELEASE_DYN_CAP] = { "DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap, - ~0, IMMEDIATE_DATA_CHANGE }, + ~0, CXL_MBOX_IMMEDIATE_DATA_CHANGE }, }; static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { @@ -1885,8 +2746,8 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { [INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS", cmd_infostat_bg_op_sts, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, - [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 0, - IMMEDIATE_POLICY_CHANGE }, + [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, + CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, @@ -1913,6 +2774,7 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, int ret; const struct cxl_cmd *cxl_cmd; opcode_handler h; + CXLDeviceState *cxl_dstate; *len_out = 0; cxl_cmd = &cci->cxl_cmd_set[set][cmd]; @@ -1928,28 +2790,34 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, } /* Only one bg command at a time */ - if ((cxl_cmd->effect & BACKGROUND_OPERATION) && + if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) && cci->bg.runtime > 0) { return CXL_MBOX_BUSY; } - /* forbid any selected commands while overwriting */ - if (sanitize_running(cci)) { - if (h == cmd_events_get_records || - h == cmd_ccls_get_partition_info || - h == cmd_ccls_set_lsa || - h == cmd_ccls_get_lsa || - h == cmd_logs_get_log || - h == cmd_media_get_poison_list || - h == cmd_media_inject_poison || - h == cmd_media_clear_poison || - h == cmd_sanitize_overwrite) { - return CXL_MBOX_MEDIA_DISABLED; + /* forbid any selected commands while the media is disabled */ + if (object_dynamic_cast(OBJECT(cci->d), TYPE_CXL_TYPE3)) { + cxl_dstate = &CXL_TYPE3(cci->d)->cxl_dstate; + + if (cxl_dev_media_disabled(cxl_dstate)) { + if (h == cmd_events_get_records || + h == cmd_ccls_get_partition_info || + h == cmd_ccls_set_lsa || + h == cmd_ccls_get_lsa || + h == cmd_logs_get_log || + h == cmd_media_get_poison_list || + h == cmd_media_inject_poison || + h == cmd_media_clear_poison || + h == cmd_sanitize_overwrite || + h == cmd_firmware_update_transfer || + h == cmd_firmware_update_activate) { + return CXL_MBOX_MEDIA_DISABLED; + } } } ret = (*h)(cxl_cmd, pl_in, len_in, pl_out, len_out, cci); - if ((cxl_cmd->effect & BACKGROUND_OPERATION) && + if ((cxl_cmd->effect & CXL_MBOX_BACKGROUND_OPERATION) && ret == CXL_MBOX_BG_STARTED) { *bg_started = true; } else { @@ -1987,6 +2855,9 @@ static void bg_timercb(void *opaque) cci->bg.complete_pct = 100; cci->bg.ret_code = ret; switch (cci->bg.opcode) { + case 0x0201: /* fw transfer */ + __do_firmware_xfer(cci); + break; case 0x4400: /* sanitize */ { CXLType3Dev *ct3d = CXL_TYPE3(cci->d); @@ -1995,8 +2866,13 @@ static void bg_timercb(void *opaque) cxl_dev_enable_media(&ct3d->cxl_dstate); } break; - case 0x4304: /* TODO: scan media */ + case 0x4304: /* scan media */ + { + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + + __do_scan_media(ct3d); break; + } default: __builtin_unreachable(); break; @@ -2053,6 +2929,10 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max) cci->bg.runtime = 0; cci->bg.timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, bg_timercb, cci); + + memset(&cci->fw, 0, sizeof(cci->fw)); + cci->fw.active_slot = 1; + cci->fw.slot[cci->fw.active_slot - 1] = true; } static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256]) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index f4e366f64f..5d4bd2b710 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1536,7 +1536,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, .fw_unplugs_cpu = pm->smi_on_cpu_unplug, }; build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, - pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", + AML_SYSTEM_IO); } if (pcms->memhp_io_base && nr_mem) { diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 37c21a0aec..be0cb39b5c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -358,7 +358,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, { struct vtd_iotlb_key key; VTDIOTLBEntry *entry; - int level; + unsigned level; for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { key.gfn = vtd_get_iotlb_gfn(addr, level); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index f8cf99bddf..5f32c36943 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -264,10 +264,10 @@ #define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32) #define VTD_FRCD_SID_MASK 0xffffULL #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) +#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) +#define VTD_FRCD_PP(val) (((val) & 0x1ULL) << 31) /* For the low 64-bit of 128-bit */ #define VTD_FRCD_FI(val) ((val) & ~0xfffULL) -#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) -#define VTD_FRCD_PP(val) (((val) & 0x1) << 31) #define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48) /* DMA Remapping Fault Conditions */ @@ -436,7 +436,7 @@ struct VTDIOTLBPageInvInfo { uint16_t domain_id; uint32_t pasid; uint64_t addr; - uint8_t mask; + uint64_t mask; }; typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 9445b07b4f..d9e69243b4 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -495,6 +495,7 @@ static void pc_i440fx_machine_9_0_options(MachineClass *m) pc_i440fx_machine_9_1_options(m); m->alias = NULL; m->is_default = false; + m->smbios_memory_device_size = 16 * GiB; compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 71d3c6d122..9d108b194e 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -374,6 +374,7 @@ static void pc_q35_machine_9_0_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_9_1_options(m); m->alias = NULL; + m->smbios_memory_device_size = 16 * GiB; compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); pcmc->isa_bios_alias = false; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 35ac59883a..d648192ab9 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -737,6 +737,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) error_setg(errp, "volatile memdev must have backing device"); return false; } + if (host_memory_backend_is_mapped(ct3d->hostvmem)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(ct3d->hostvmem))); + return false; + } memory_region_set_nonvolatile(vmr, false); memory_region_set_enabled(vmr, true); host_memory_backend_set_mapped(ct3d->hostvmem, true); @@ -760,6 +765,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) error_setg(errp, "persistent memdev must have backing device"); return false; } + if (host_memory_backend_is_mapped(ct3d->hostpmem)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(ct3d->hostpmem))); + return false; + } memory_region_set_nonvolatile(pmr, true); memory_region_set_enabled(pmr, true); host_memory_backend_set_mapped(ct3d->hostpmem, true); @@ -790,6 +800,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) return false; } + if (host_memory_backend_is_mapped(ct3d->dc.host_dc)) { + error_setg(errp, "memory backend %s can't be used multiple times.", + object_get_canonical_path_component(OBJECT(ct3d->dc.host_dc))); + return false; + } /* * Set DC regions as volatile for now, non-volatile support can * be added in the future if needed. @@ -829,6 +844,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) uint8_t *pci_conf = pci_dev->config; unsigned short msix_num = 6; int i, rc; + uint16_t count; QTAILQ_INIT(&ct3d->error_list); @@ -893,6 +909,28 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) } cxl_event_init(&ct3d->cxl_dstate, 2); + /* Set default value for patrol scrub attributes */ + ct3d->patrol_scrub_attrs.scrub_cycle_cap = + CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT | + CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT; + ct3d->patrol_scrub_attrs.scrub_cycle = + CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT | + (CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT << 8); + ct3d->patrol_scrub_attrs.scrub_flags = CXL_MEMDEV_PS_ENABLE_DEFAULT; + + /* Set default value for DDR5 ECS read attributes */ + for (count = 0; count < CXL_ECS_NUM_MEDIA_FRUS; count++) { + ct3d->ecs_attrs[count].ecs_log_cap = + CXL_ECS_LOG_ENTRY_TYPE_DEFAULT; + ct3d->ecs_attrs[count].ecs_cap = + CXL_ECS_REALTIME_REPORT_CAP_DEFAULT; + ct3d->ecs_attrs[count].ecs_config = + CXL_ECS_THRESHOLD_COUNT_DEFAULT | + (CXL_ECS_MODE_DEFAULT << 3); + /* Reserved */ + ct3d->ecs_attrs[count].ecs_flags = 0; + } + return; err_release_cdat: @@ -1127,7 +1165,7 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, return MEMTX_ERROR; } - if (sanitize_running(&ct3d->cci)) { + if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) { qemu_guest_getrandom_nofail(data, size); return MEMTX_OK; } @@ -1149,7 +1187,7 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, return MEMTX_ERROR; } - if (sanitize_running(&ct3d->cci)) { + if (cxl_dev_media_disabled(&ct3d->cxl_dstate)) { return MEMTX_OK; } @@ -1304,6 +1342,12 @@ void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d) cxl_device_get_timestamp(&ct3d->cxl_dstate); } +void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d) +{ + ct3d->poison_list_overflowed = false; + ct3d->poison_list_overflow_ts = 0; +} + void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, Error **errp) { @@ -1340,19 +1384,21 @@ void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, } } - if (ct3d->poison_list_cnt == CXL_POISON_LIST_LIMIT) { - cxl_set_poison_list_overflowed(ct3d); - return; - } - p = g_new0(CXLPoison, 1); p->length = length; p->start = start; /* Different from injected via the mbox */ p->type = CXL_POISON_TYPE_INTERNAL; - QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); - ct3d->poison_list_cnt++; + if (ct3d->poison_list_cnt < CXL_POISON_LIST_LIMIT) { + QLIST_INSERT_HEAD(&ct3d->poison_list, p, node); + ct3d->poison_list_cnt++; + } else { + if (!ct3d->poison_list_overflowed) { + cxl_set_poison_list_overflowed(ct3d); + } + QLIST_INSERT_HEAD(&ct3d->poison_list_bkp, p, node); + } } /* For uncorrectable errors include support for multiple header recording */ diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 18898afe81..a788e6937e 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VIRTIO_NET_F_HASH_REPORT, VHOST_INVALID_FEATURE_BIT @@ -78,6 +79,7 @@ static const int user_feature_bits[] = { VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_GUEST_USO4, diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 8d25174d25..e86ea2e7ce 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -203,6 +203,7 @@ #include "sysemu/hostmem.h" #include "hw/pci/msix.h" #include "hw/pci/pcie_sriov.h" +#include "sysemu/spdm-socket.h" #include "migration/vmstate.h" #include "nvme.h" @@ -8315,6 +8316,27 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) return 0; } +static bool pcie_doe_spdm_rsp(DOECap *doe_cap) +{ + void *req = pcie_doe_get_write_mbox_ptr(doe_cap); + uint32_t req_len = pcie_doe_get_obj_len(req) * 4; + void *rsp = doe_cap->read_mbox; + uint32_t rsp_len = SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE; + + uint32_t recvd = spdm_socket_rsp(doe_cap->spdm_socket, + SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE, + req, req_len, rsp, rsp_len); + doe_cap->read_mbox_len += DIV_ROUND_UP(recvd, 4); + + return recvd != 0; +} + +static DOEProtocol doe_spdm_prot[] = { + { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_CMA, pcie_doe_spdm_rsp }, + { PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_SECURED_CMA, pcie_doe_spdm_rsp }, + { } +}; + static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { ERRP_GUARD(); @@ -8402,6 +8424,25 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); + pcie_cap_deverr_init(pci_dev); + + /* DOE Initialisation */ + if (pci_dev->spdm_port) { + uint16_t doe_offset = n->params.sriov_max_vfs ? + PCI_CONFIG_SPACE_SIZE + PCI_ARI_SIZEOF + : PCI_CONFIG_SPACE_SIZE; + + pcie_doe_init(pci_dev, &pci_dev->doe_spdm, doe_offset, + doe_spdm_prot, true, 0); + + pci_dev->doe_spdm.spdm_socket = spdm_socket_connect(pci_dev->spdm_port, + errp); + + if (pci_dev->doe_spdm.spdm_socket < 0) { + return false; + } + } + if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); } @@ -8650,6 +8691,11 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cmb.buf); } + if (pci_dev->doe_spdm.spdm_socket > 0) { + spdm_socket_close(pci_dev->doe_spdm.spdm_socket, + SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE); + } + if (n->pmr.dev) { host_memory_backend_set_mapped(n->pmr.dev, false); } @@ -8695,6 +8741,7 @@ static Property nvme_props[] = { DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar, false), DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff), + DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -8766,11 +8813,25 @@ static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, { uint16_t old_num_vfs = pcie_sriov_num_vfs(dev); + if (pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) { + pcie_doe_write_config(&dev->doe_spdm, address, val, len); + } pci_default_write_config(dev, address, val, len); pcie_cap_flr_write_config(dev, address, val, len); nvme_sriov_post_write_config(dev, old_num_vfs); } +static uint32_t nvme_pci_read_config(PCIDevice *dev, uint32_t address, int len) +{ + uint32_t val; + if (dev->spdm_port && pcie_find_capability(dev, PCI_EXT_CAP_ID_DOE)) { + if (pcie_doe_read_config(&dev->doe_spdm, address, len, &val)) { + return val; + } + } + return pci_default_read_config(dev, address, len); +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, @@ -8783,6 +8844,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) pc->realize = nvme_realize; pc->config_write = nvme_pci_write_config; + pc->config_read = nvme_pci_read_config; pc->exit = nvme_exit; pc->class_id = PCI_CLASS_STORAGE_EXPRESS; pc->revision = 2; diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index f69413ea2c..391fabb8a8 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -7,7 +7,8 @@ #include "hw/pci/pcie_host.h" #include "hw/acpi/cxl.h" -static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) +static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq, + Aml *scope, uint8_t bus_num) { Aml *method, *crs; int i, slot_no; @@ -20,7 +21,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) Aml *pkg = aml_package(4); aml_append(pkg, aml_int((slot_no << 16) | 0xFFFF)); aml_append(pkg, aml_int(i)); - aml_append(pkg, aml_name("GSI%d", gsi)); + aml_append(pkg, aml_name("L%.02X%X", bus_num, gsi)); aml_append(pkg, aml_int(0)); aml_append(rt_pkg, pkg); } @@ -30,7 +31,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) /* Create GSI link device */ for (i = 0; i < PCI_NUM_PINS; i++) { uint32_t irqs = irq + i; - Aml *dev_gsi = aml_device("GSI%d", i); + Aml *dev_gsi = aml_device("L%.02X%X", bus_num, i); aml_append(dev_gsi, aml_name_decl("_HID", aml_string("PNP0C0F"))); aml_append(dev_gsi, aml_name_decl("_UID", aml_int(i))); crs = aml_resource_template(); @@ -45,7 +46,7 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq) aml_append(dev_gsi, aml_name_decl("_CRS", crs)); method = aml_method("_SRS", 1, AML_NOTSERIALIZED); aml_append(dev_gsi, method); - aml_append(dev, dev_gsi); + aml_append(scope, dev_gsi); } } @@ -174,7 +175,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); } - acpi_dsdt_add_pci_route_table(dev, cfg->irq); + acpi_dsdt_add_pci_route_table(dev, cfg->irq, scope, bus_num); /* * Resources defined for PXBs are composed of the following parts: @@ -205,7 +206,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) aml_append(dev, aml_name_decl("_STR", aml_unicode("PCIe 0 Device"))); aml_append(dev, aml_name_decl("_CCA", aml_int(1))); - acpi_dsdt_add_pci_route_table(dev, cfg->irq); + acpi_dsdt_add_pci_route_table(dev, cfg->irq, scope, 0); method = aml_method("_CBA", 0, AML_NOTSERIALIZED); aml_append(method, aml_return(aml_int(cfg->ecam.base))); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4c7be52951..8ad5d7e2d8 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -85,6 +85,7 @@ static Property pci_props[] = { QEMU_PCIE_ERR_UNC_MASK_BITNR, true), DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), + DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf), DEFINE_PROP_END_OF_LIST() }; @@ -959,13 +960,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } - /* - * With SR/IOV and ARI, a device at function 0 need not be a multifunction - * device, as it may just be a VF that ended up with function 0 in - * the legacy PCI interpretation. Avoid failing in such cases: - */ - if (pci_is_vf(dev) && - dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + /* SR/IOV is not handled here. */ + if (pci_is_vf(dev)) { return; } @@ -998,7 +994,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* function 0 indicates single function, so function > 0 must be NULL */ for (func = 1; func < PCI_FUNC_MAX; ++func) { - if (bus->devices[PCI_DEVFN(slot, func)]) { + PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)]; + if (device && !pci_is_vf(device)) { error_setg(errp, "PCI: %x.0 indicates single function, " "but %x.%x is already populated.", slot, slot, func); @@ -1283,6 +1280,7 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_unregister_io_regions(pci_dev); pci_del_option_rom(pci_dev); + pcie_sriov_unregister_device(pci_dev); if (pc->exit) { pc->exit(pci_dev); @@ -1314,7 +1312,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; - assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1325,7 +1322,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2); r = &pci_dev->io_regions[region_num]; - r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; r->memory = memory; @@ -1333,22 +1329,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } - - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); + if (pci_is_vf(pci_dev)) { + PCIDevice *pf = pci_dev->exp.sriov_vf.pf; + assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); + r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); + r->addr = PCI_BAR_UNMAPPED; + + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } + + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } } } @@ -1437,7 +1446,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); uint16_t vf_stride = pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; + uint32_t vf_num = d->devfn - (pf->devfn + vf_offset); + + if (vf_num) { + vf_num /= vf_stride; + } if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { new_addr = pci_get_quad(pf->config + bar); @@ -2105,6 +2118,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + if (!pcie_sriov_register_device(pci_dev, errp)) { + pci_qdev_unrealize(DEVICE(pci_dev)); + return; + } + /* * A PCIe Downstream Port that do not have ARI Forwarding enabled must * associate only Device 0 with the device attached to the bus diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 56523ab4e8..0fc9f810b9 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,6 +20,8 @@ #include "qapi/error.h" #include "trace.h" +static GHashTable *pfs; + static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) { for (uint16_t i = 0; i < total_vfs; i++) { @@ -31,17 +33,62 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) dev->exp.sriov_pf.vf = NULL; } -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, - const char *vfname, uint16_t vf_dev_id, - uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp) +static void clear_ctrl_vfe(PCIDevice *dev) +{ + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); +} + +static void register_vfs(PCIDevice *dev) +{ + uint16_t num_vfs; + uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { + clear_ctrl_vfe(dev); + return; + } + + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + } +} + +static void unregister_vfs(PCIDevice *dev) +{ + uint16_t i; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + } +} + +static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, + uint16_t vf_dev_id, uint16_t init_vfs, + uint16_t total_vfs, uint16_t vf_offset, + uint16_t vf_stride, Error **errp) { - BusState *bus = qdev_get_parent_bus(&dev->qdev); - int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV PF"); + return false; + } + + if (pci_is_vf(dev)) { + error_setg(errp, "a device cannot be both an SR-IOV PF and a VF"); + return false; + } + if (total_vfs) { uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI); uint16_t first_vf_devfn = dev->devfn + vf_offset; @@ -90,6 +137,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); + return true; +} + +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) +{ + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; + + if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs, + total_vfs, vf_offset, vf_stride, errp)) { + return false; + } + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); for (uint16_t i = 0; i < total_vfs; i++) { @@ -119,7 +188,24 @@ void pcie_sriov_pf_exit(PCIDevice *dev) { uint8_t *cfg = dev->config + dev->exp.sriov_cap; - unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + if (dev->exp.sriov_pf.vf_user_created) { + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF); + uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID); + + unregister_vfs(dev); + + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + + vf->exp.sriov_vf.pf = NULL; + + pci_config_set_vendor_id(vf->config, ven_id); + pci_config_set_device_id(vf->config, vf_dev_id); + } + } else { + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + } } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -152,74 +238,172 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory) { - PCIIORegion *r; - PCIBus *bus = pci_get_bus(dev); uint8_t type; - pcibus_t size = memory_region_size(memory); - assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ - assert(region_num >= 0); - assert(region_num < PCI_NUM_REGIONS); + assert(dev->exp.sriov_vf.pf); type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - if (!is_power_of_2(size)) { - error_report("%s: PCI region size must be a power" - " of two - type=0x%x, size=0x%"FMT_PCIBUS, - __func__, type, size); - exit(1); - } - - r = &dev->io_regions[region_num]; - r->memory = memory; - r->address_space = - type & PCI_BASE_ADDRESS_SPACE_IO - ? bus->address_space_io - : bus->address_space_mem; - r->size = size; - r->type = type; - - r->addr = pci_bar_address(dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } + return pci_register_bar(dev, region_num, type, memory); } -static void clear_ctrl_vfe(PCIDevice *dev) +static gint compare_vf_devfns(gconstpointer a, gconstpointer b) { - uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; - pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); + return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; } -static void register_vfs(PCIDevice *dev) +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp) { - uint16_t num_vfs; + GPtrArray *pf; + PCIDevice **vfs; + BusState *bus = qdev_get_parent_bus(DEVICE(dev)); + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t vf_dev_id; + uint16_t vf_offset; + uint16_t vf_stride; uint16_t i; - uint16_t sriov_cap = dev->exp.sriov_cap; - assert(sriov_cap > 0); - num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); - if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { - clear_ctrl_vfe(dev); - return; + if (!pfs || !dev->qdev.id) { + return 0; } - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + pf = g_hash_table_lookup(pfs, dev->qdev.id); + if (!pf) { + return 0; } + + if (pf->len > UINT16_MAX) { + error_setg(errp, "too many VFs"); + return -1; + } + + g_ptr_array_sort(pf, compare_vf_devfns); + vfs = (void *)pf->pdata; + + if (vfs[0]->devfn <= dev->devfn) { + error_setg(errp, "a VF function number is less than the PF function number"); + return -1; + } + + vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID); + vf_offset = vfs[0]->devfn - dev->devfn; + vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn; + + for (i = 0; i < pf->len; i++) { + if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) { + error_setg(errp, "SR-IOV VF parent bus mismatches with PF"); + return -1; + } + + if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) { + error_setg(errp, "SR-IOV VF vendor ID mismatches with PF"); + return -1; + } + + if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) { + error_setg(errp, "inconsistent SR-IOV VF device IDs"); + return -1; + } + + for (size_t j = 0; j < PCI_NUM_REGIONS; j++) { + if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size || + vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) { + error_setg(errp, "inconsistent SR-IOV BARs"); + return -1; + } + } + + if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) { + error_setg(errp, "inconsistent SR-IOV stride"); + return -1; + } + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len, + pf->len, vf_offset, vf_stride, errp)) { + return -1; + } + + for (i = 0; i < pf->len; i++) { + vfs[i]->exp.sriov_vf.pf = dev; + vfs[i]->exp.sriov_vf.vf_number = i; + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vfs[i]->config, 0xffff); + pci_config_set_device_id(vfs[i]->config, 0xffff); + } + + dev->exp.sriov_pf.vf = vfs; + dev->exp.sriov_pf.vf_user_created = true; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *region = &vfs[0]->io_regions[i]; + + if (region->size) { + pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size); + } + } + + return PCI_EXT_CAP_SRIOV_SIZEOF; } -static void unregister_vfs(PCIDevice *dev) +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp) { - uint16_t i; - uint8_t *cfg = dev->config + dev->exp.sriov_cap; + if (!dev->exp.sriov_pf.vf && dev->qdev.id && + pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } - trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + if (dev->sriov_pf) { + PCIDevice *pci_pf; + GPtrArray *pf; + + if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) { + error_setg(errp, "user cannot create SR-IOV VF with this device type"); + return false; + } + + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV VF"); + return false; + } + + if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) { + error_setg(errp, "PCI device specified as SR-IOV PF already exists"); + return false; + } + + if (!pfs) { + pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + } + + pf = g_hash_table_lookup(pfs, dev->sriov_pf); + if (!pf) { + pf = g_ptr_array_new(); + g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf); + } + + g_ptr_array_add(pf, dev); + } + + return true; +} + +void pcie_sriov_unregister_device(PCIDevice *dev) +{ + if (dev->sriov_pf && pfs) { + GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf); + + if (pf) { + g_ptr_array_remove_fast(pf, dev); + + if (!pf->len) { + g_hash_table_remove(pfs, dev->sriov_pf); + g_ptr_array_free(pf, FALSE); + } + } } } @@ -306,7 +490,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) uint16_t pcie_sriov_vf_number(PCIDevice *dev) { - assert(pci_is_vf(dev)); + assert(dev->exp.sriov_vf.pf); return dev->exp.sriov_vf.vf_number; } diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c index 0925528160..36d6a3a412 100644 --- a/hw/riscv/virt-acpi-build.c +++ b/hw/riscv/virt-acpi-build.c @@ -141,12 +141,36 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) } } +static void acpi_dsdt_add_plic_aplic(Aml *scope, uint8_t socket_count, + uint64_t mmio_base, uint64_t mmio_size, + const char *hid) +{ + uint64_t plic_aplic_addr; + uint32_t gsi_base; + uint8_t socket; + + for (socket = 0; socket < socket_count; socket++) { + plic_aplic_addr = mmio_base + mmio_size * socket; + gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket; + Aml *dev = aml_device("IC%.02X", socket); + aml_append(dev, aml_name_decl("_HID", aml_string("%s", hid))); + aml_append(dev, aml_name_decl("_UID", aml_int(socket))); + aml_append(dev, aml_name_decl("_GSB", aml_int(gsi_base))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(plic_aplic_addr, mmio_size, + AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } +} + static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, uint32_t uart_irq) { Aml *dev = aml_device("COM0"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501"))); + aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0003"))); aml_append(dev, aml_name_decl("_UID", aml_int(0))); Aml *crs = aml_resource_template(); @@ -411,6 +435,14 @@ static void build_dsdt(GArray *table_data, socket_count = riscv_socket_count(ms); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_PLIC].base, + memmap[VIRT_PLIC].size, "RSCV0001"); + } else { + acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_APLIC_S].base, + memmap[VIRT_APLIC_S].size, "RSCV0002"); + } + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ); if (socket_count == 1) { diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 3d5fe0994d..49cff2a0cb 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = { VIRTIO_RING_F_EVENT_IDX, VIRTIO_SCSI_F_HOTPLUG, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index cc91ade525..55e4be5b34 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -36,6 +36,7 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_EVENT_IDX, VIRTIO_SCSI_F_HOTPLUG, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 3b7703489d..a394514264 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -1093,6 +1093,7 @@ static bool smbios_get_tables_ep(MachineState *ms, Error **errp) { unsigned i, dimm_cnt, offset; + MachineClass *mc = MACHINE_GET_CLASS(ms); ERRP_GUARD(); assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || @@ -1123,12 +1124,12 @@ static bool smbios_get_tables_ep(MachineState *ms, smbios_build_type_9_table(errp); smbios_build_type_11_table(); -#define MAX_DIMM_SZ (16 * GiB) -#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \ - : ((current_machine->ram_size - 1) % MAX_DIMM_SZ) + 1) +#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? mc->smbios_memory_device_size \ + : ((current_machine->ram_size - 1) % mc->smbios_memory_device_size) + 1) - dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / - MAX_DIMM_SZ; + dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, + mc->smbios_memory_device_size) / + mc->smbios_memory_device_size; /* * The offset determines if we need to keep additional space between diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 6d15b36e0b..cfc44a4569 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -599,7 +599,7 @@ static void vfio_listener_region_add(MemoryListener *listener, IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); int iommu_idx; - trace_vfio_listener_region_add_iommu(iova, end); + trace_vfio_listener_region_add_iommu(section->mr->name, iova, end); /* * FIXME: For VFIO iommu types which have KVM acceleration to * avoid bouncing all map/unmaps through qemu this way, this @@ -725,6 +725,7 @@ static void vfio_listener_region_del(MemoryListener *listener, if (memory_region_is_iommu(section->mr)) { VFIOGuestIOMMU *giommu; + trace_vfio_listener_region_del_iommu(section->mr->name); QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { if (MEMORY_REGION(giommu->iommu_mr) == section->mr && giommu->n.start == section->offset_within_region) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e16179b507..98bd4dccea 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -95,7 +95,8 @@ vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t d vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "iommu %s @ 0x%"PRIx64" - 0x%"PRIx64 vfio_listener_region_skip(const char *name, uint64_t start, uint64_t end) "SKIPPING %s 0x%"PRIx64" - 0x%"PRIx64 vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" -vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 +vfio_listener_region_add_iommu(const char* name, uint64_t start, uint64_t end) "region_add [iommu] %s 0x%"PRIx64" - 0x%"PRIx64 +vfio_listener_region_del_iommu(const char *name) "region_del [iommu] %s" vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index b7c04f0856..04e36ae047 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -116,6 +116,7 @@ virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, u virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x" virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" +virtio_iommu_detach_endpoint_from_domain(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d" virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index ae48cc1c96..32ee7f496d 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -33,6 +33,7 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 802b44a07d..da3b0e0229 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -21,6 +21,7 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index bbe8aa4b99..5034768bff 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -205,6 +205,7 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, int queue_index; uint32_t algo, keytype, keylen; + sreq->info.op_code = opcode; algo = ldl_le_p(&sess_req->para.algo); keytype = ldl_le_p(&sess_req->para.keytype); keylen = ldl_le_p(&sess_req->para.keylen); @@ -224,7 +225,6 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, iov_discard_front(&iov, &out_num, keylen); } - sreq->info.op_code = opcode; asym_info = &sreq->info.u.asym_sess_info; asym_info->algo = algo; asym_info->keytype = keytype; diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 33ae61c4a6..59ef4fb217 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -308,6 +308,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) if (!ep->domain) { return; } + trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id); g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, ep->iommu_mr); QLIST_REMOVE(ep, next); @@ -467,26 +468,6 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } -static void virtio_iommu_device_clear(VirtIOIOMMU *s, PCIBus *bus, int devfn) -{ - IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); - IOMMUDevice *sdev; - - if (!sbus) { - return; - } - - sdev = sbus->pbdev[devfn]; - if (!sdev) { - return; - } - - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - g_free(sdev); - sbus->pbdev[devfn] = NULL; -} - static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) { const struct hiod_key *key1 = v1; @@ -558,8 +539,6 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, { IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); IOMMUDevice *sdev; - GList *current_ranges; - GList *l, *tmp, *new_ranges = NULL; int ret = -EINVAL; if (!sbus) { @@ -573,35 +552,10 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, return ret; } - current_ranges = sdev->host_resv_ranges; - - g_assert(!sdev->probe_done); - - /* check that each new resv region is included in an existing one */ if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; + error_setg(errp, "%s virtio-iommu does not support aliased BDF", + __func__); + return ret; } range_inverse_array(iova_ranges, @@ -610,14 +564,31 @@ static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, rebuild_resv_regions(sdev); return 0; -error: - error_setg(errp, "%s Conflicting host reserved ranges set!", - __func__); -out: - g_list_free_full(new_ranges, g_free); - return ret; } +static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + + if (!sbus) { + return; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + return; + } + + g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free); + g_list_free_full(sdev->resv_regions, g_free); + sdev->host_resv_ranges = NULL; + sdev->resv_regions = NULL; + add_prop_resv_regions(sdev); +} + + static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask, Error **errp) { @@ -726,9 +697,10 @@ virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) if (!hiod) { return; } + virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn); g_hash_table_remove(viommu->host_iommu_devices, &key); - virtio_iommu_device_clear(viommu, bus, devfn); } static const PCIIOMMUOps virtio_iommu_ops = { @@ -815,6 +787,7 @@ static int virtio_iommu_detach(VirtIOIOMMU *s, if (QLIST_EMPTY(&domain->endpoint_list)) { g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); } + g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id)); return VIRTIO_IOMMU_S_OK; } @@ -977,7 +950,6 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, } buf += count; free -= count; - sdev->probe_done = true; return VIRTIO_IOMMU_S_OK; } diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index e03543a70a..dba4987d6e 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -75,6 +75,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_VIRTIO_NET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_NETWORK_ETHERNET; + k->sriov_vf_user_creatable = true; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); device_class_set_props(dc, virtio_net_properties); vpciklass->realize = virtio_net_pci_realize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 9534730bba..0c8fcc5627 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1955,6 +1955,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(bus); + int16_t res; /* * Virtio capabilities present without @@ -2100,6 +2101,14 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } + + res = pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev, + proxy->last_pcie_cap_offset, + errp); + if (res > 0) { + proxy->last_pcie_cap_offset += res; + virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); + } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -2187,7 +2196,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) if (pcie_port && pci_is_express(pci_dev)) { int pos; - uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; + proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); @@ -2207,9 +2216,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); if (proxy->flags & VIRTIO_PCI_FLAG_AER) { - pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset, + pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, PCI_ERR_SIZEOF, NULL); - last_pcie_cap_offset += PCI_ERR_SIZEOF; + proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { @@ -2234,9 +2243,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) } if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { - pcie_ats_init(pci_dev, last_pcie_cap_offset, + pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); - last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; + proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { @@ -2263,6 +2272,7 @@ static void virtio_pci_exit(PCIDevice *pci_dev) bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && !pci_bus_is_root(pci_get_bus(pci_dev)); + pcie_sriov_pf_exit(&proxy->pci_dev); msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 583a224163..397c261c3c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -872,6 +872,46 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, vq->used_elems[idx].ndescs = elem->ndescs; } +static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + unsigned int i, steps, max_steps; + + i = vq->used_idx % vq->vring.num; + steps = 0; + /* + * We shouldn't need to increase 'i' by more than the distance + * between used_idx and last_avail_idx. + */ + max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; + + /* Search for element in vq->used_elems */ + while (steps <= max_steps) { + /* Found element, set length and mark as filled */ + if (vq->used_elems[i].index == elem->index) { + vq->used_elems[i].len = len; + vq->used_elems[i].in_order_filled = true; + break; + } + + i += vq->used_elems[i].ndescs; + steps += vq->used_elems[i].ndescs; + + if (i >= vq->vring.num) { + i -= vq->vring.num; + } + } + + /* + * We should be able to find a matching VirtQueueElement in + * used_elems. If we don't, this is an error. + */ + if (steps >= max_steps) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: %s cannot fill buffer id %u\n", + __func__, vq->vdev->name, elem->index); + } +} + static void virtqueue_packed_fill_desc(VirtQueue *vq, const VirtQueueElement *elem, unsigned int idx, @@ -922,7 +962,9 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, return; } - if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) { + virtqueue_ordered_fill(vq, elem, len); + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_fill(vq, elem, len, idx); } else { virtqueue_split_fill(vq, elem, len, idx); @@ -981,6 +1023,73 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) } } +static void virtqueue_ordered_flush(VirtQueue *vq) +{ + unsigned int i = vq->used_idx % vq->vring.num; + unsigned int ndescs = 0; + uint16_t old = vq->used_idx; + uint16_t new; + bool packed; + VRingUsedElem uelem; + + packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED); + + if (packed) { + if (unlikely(!vq->vring.desc)) { + return; + } + } else if (unlikely(!vq->vring.used)) { + return; + } + + /* First expected in-order element isn't ready, nothing to do */ + if (!vq->used_elems[i].in_order_filled) { + return; + } + + /* Search for filled elements in-order */ + while (vq->used_elems[i].in_order_filled) { + /* + * First entry for packed VQs is written last so the guest + * doesn't see invalid descriptors. + */ + if (packed && i != vq->used_idx) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false); + } else if (!packed) { + uelem.id = vq->used_elems[i].index; + uelem.len = vq->used_elems[i].len; + vring_used_write(vq, &uelem, i); + } + + vq->used_elems[i].in_order_filled = false; + ndescs += vq->used_elems[i].ndescs; + i += vq->used_elems[i].ndescs; + if (i >= vq->vring.num) { + i -= vq->vring.num; + } + } + + if (packed) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true); + vq->used_idx += ndescs; + if (vq->used_idx >= vq->vring.num) { + vq->used_idx -= vq->vring.num; + vq->used_wrap_counter ^= 1; + vq->signalled_used_valid = false; + } + } else { + /* Make sure buffer is written before we update index. */ + smp_wmb(); + new = old + ndescs; + vring_used_idx_set(vq, new); + if (unlikely((int16_t)(new - vq->signalled_used) < + (uint16_t)(new - old))) { + vq->signalled_used_valid = false; + } + } + vq->inuse -= ndescs; +} + void virtqueue_flush(VirtQueue *vq, unsigned int count) { if (virtio_device_disabled(vq->vdev)) { @@ -988,7 +1097,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) return; } - if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) { + virtqueue_ordered_flush(vq); + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_flush(vq, count); } else { virtqueue_split_flush(vq, count); @@ -1505,7 +1616,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { - unsigned int i, head, max; + unsigned int i, head, max, idx; VRingMemoryRegionCaches *caches; MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; @@ -1629,6 +1740,13 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) elem->in_sg[i] = iov[out_num + i]; } + if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) { + idx = (vq->last_avail_idx - 1) % vq->vring.num; + vq->used_elems[idx].index = elem->index; + vq->used_elems[idx].len = elem->len; + vq->used_elems[idx].ndescs = elem->ndescs; + } + vq->inuse++; trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); @@ -1762,6 +1880,13 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) elem->index = id; elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries; + + if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) { + vq->used_elems[vq->last_avail_idx].index = elem->index; + vq->used_elems[vq->last_avail_idx].len = elem->len; + vq->used_elems[vq->last_avail_idx].ndescs = elem->ndescs; + } + vq->last_avail_idx += elem->ndescs; vq->inuse += elem->ndescs; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 815342d043..240ee04369 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -129,6 +129,14 @@ size_t qemu_ram_pagesize_largest(void); */ void cpu_address_space_init(CPUState *cpu, int asidx, const char *prefix, MemoryRegion *mr); +/** + * cpu_address_space_destroy: + * @cpu: CPU for which address space needs to be destroyed + * @asidx: integer index of this address space + * + * Note that with KVM only one address space is supported. + */ +void cpu_address_space_destroy(CPUState *cpu, int asidx); void cpu_physical_memory_rw(hwaddr addr, void *buf, hwaddr len, bool is_write); diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h index 1bd2c4ec2a..d73f424f56 100644 --- a/include/exec/gdbstub.h +++ b/include/exec/gdbstub.h @@ -41,6 +41,12 @@ void gdb_register_coprocessor(CPUState *cpu, const GDBFeature *feature, int g_pos); /** + * gdb_unregister_coprocessor_all() - unregisters supplemental set of registers + * @cpu - the CPU associated with registers + */ +void gdb_unregister_coprocessor_all(CPUState *cpu); + +/** * gdbserver_start: start the gdb server * @port_or_device: connection spec for gdb * diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h index e6e1a9ef59..32654dc274 100644 --- a/include/hw/acpi/cpu.h +++ b/include/hw/acpi/cpu.h @@ -19,6 +19,8 @@ #include "hw/boards.h" #include "hw/hotplug.h" +#define ACPI_CPU_HOTPLUG_REG_LEN 12 + typedef struct AcpiCpuStatus { CPUState *cpu; uint64_t arch_id; @@ -61,9 +63,10 @@ typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, GArray *entry, bool force_enabled); void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, - build_madt_cpu_fn build_madt_cpu, hwaddr io_base, + build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, const char *res_root, - const char *event_handler_method); + const char *event_handler_method, + AmlRegionSpace rs); void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h index ba84ce0214..40af3550b5 100644 --- a/include/hw/acpi/generic_event_device.h +++ b/include/hw/acpi/generic_event_device.h @@ -62,6 +62,7 @@ #include "hw/sysbus.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/ghes.h" +#include "hw/acpi/cpu.h" #include "qom/object.h" #define ACPI_POWER_BUTTON_DEVICE "PWRB" @@ -86,6 +87,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) #define GED_DEVICE "GED" #define AML_GED_EVT_REG "EREG" #define AML_GED_EVT_SEL "ESEL" +#define AML_GED_EVT_CPU_SCAN_METHOD "\\_SB.GED.CSCN" /* * Platforms need to specify the GED event bitmap @@ -95,6 +97,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 #define ACPI_GED_PWR_DOWN_EVT 0x2 #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4 +#define ACPI_GED_CPU_HOTPLUG_EVT 0x8 typedef struct GEDState { MemoryRegion evt; @@ -106,6 +109,8 @@ struct AcpiGedState { SysBusDevice parent_obj; MemHotplugState memhp_state; MemoryRegion container_memhp; + CPUHotplugState cpuhp_state; + MemoryRegion container_cpuhp; GEDState ged_state; uint32_t ged_event_bitmap; qemu_irq irq; diff --git a/include/hw/boards.h b/include/hw/boards.h index ef6f18f2c1..48ff6d8b93 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -237,6 +237,9 @@ typedef struct { * purposes only. * Applies only to default memory backend, i.e., explicit memory backend * wasn't used. + * @smbios_memory_device_size: + * Default size of memory device, + * SMBIOS 3.1.0 "7.18 Memory Device (Type 17)" */ struct MachineClass { /*< private >*/ @@ -304,6 +307,7 @@ struct MachineClass { const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine); int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx); ram_addr_t (*fixup_ram_size)(ram_addr_t size); + uint64_t smbios_memory_device_size; }; /** diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index d946161717..1c9c775df6 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -496,6 +496,7 @@ struct CPUState { QSIMPLEQ_HEAD(, qemu_work_item) work_list; struct CPUAddressSpace *cpu_ases; + int cpu_ases_count; int num_ases; AddressSpace *as; MemoryRegion *memory; diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index 0a4fcb2800..fdd0f4e62b 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -181,6 +181,21 @@ typedef struct CXLCCI { uint64_t runtime; QEMUTimer *timer; } bg; + + /* firmware update */ + struct { + uint8_t active_slot; + uint8_t staged_slot; + bool slot[4]; + uint8_t curr_action; + uint8_t curr_slot; + /* handle partial transfers */ + bool transferring; + size_t prev_offset; + size_t prev_len; + time_t last_partxfer; + } fw; + size_t payload_max; /* Pointer to device hosting the CCI */ DeviceState *d; @@ -397,9 +412,14 @@ static inline void __toggle_media(CXLDeviceState *cxl_dstate, int val) #define cxl_dev_enable_media(cxlds) \ do { __toggle_media((cxlds), 0x1); } while (0) -static inline bool sanitize_running(CXLCCI *cci) +static inline bool cxl_dev_media_disabled(CXLDeviceState *cxl_dstate) +{ + uint64_t dev_status_reg = cxl_dstate->mbox_reg_state64[R_CXL_MEM_DEV_STS]; + return FIELD_EX64(dev_status_reg, CXL_MEM_DEV_STS, MEDIA_STATUS) == 0x3; +} +static inline bool scan_media_running(CXLCCI *cci) { - return !!cci->bg.runtime && cci->bg.opcode == 0x4400; + return !!cci->bg.runtime && cci->bg.opcode == 0x4304; } typedef struct CXLError { @@ -422,6 +442,47 @@ typedef struct CXLPoison { typedef QLIST_HEAD(, CXLPoison) CXLPoisonList; #define CXL_POISON_LIST_LIMIT 256 +/* CXL memory device patrol scrub control attributes */ +typedef struct CXLMemPatrolScrubReadAttrs { + uint8_t scrub_cycle_cap; + uint16_t scrub_cycle; + uint8_t scrub_flags; +} QEMU_PACKED CXLMemPatrolScrubReadAttrs; + +typedef struct CXLMemPatrolScrubWriteAttrs { + uint8_t scrub_cycle_hr; + uint8_t scrub_flags; +} QEMU_PACKED CXLMemPatrolScrubWriteAttrs; + +#define CXL_MEMDEV_PS_GET_FEATURE_VERSION 0x01 +#define CXL_MEMDEV_PS_SET_FEATURE_VERSION 0x01 +#define CXL_MEMDEV_PS_SCRUB_CYCLE_CHANGE_CAP_DEFAULT BIT(0) +#define CXL_MEMDEV_PS_SCRUB_REALTIME_REPORT_CAP_DEFAULT BIT(1) +#define CXL_MEMDEV_PS_CUR_SCRUB_CYCLE_DEFAULT 12 +#define CXL_MEMDEV_PS_MIN_SCRUB_CYCLE_DEFAULT 1 +#define CXL_MEMDEV_PS_ENABLE_DEFAULT 0 + +/* CXL memory device DDR5 ECS control attributes */ +typedef struct CXLMemECSReadAttrs { + uint8_t ecs_log_cap; + uint8_t ecs_cap; + uint16_t ecs_config; + uint8_t ecs_flags; +} QEMU_PACKED CXLMemECSReadAttrs; + +typedef struct CXLMemECSWriteAttrs { + uint8_t ecs_log_cap; + uint16_t ecs_config; +} QEMU_PACKED CXLMemECSWriteAttrs; + +#define CXL_ECS_GET_FEATURE_VERSION 0x01 +#define CXL_ECS_SET_FEATURE_VERSION 0x01 +#define CXL_ECS_LOG_ENTRY_TYPE_DEFAULT 0x01 +#define CXL_ECS_REALTIME_REPORT_CAP_DEFAULT 1 +#define CXL_ECS_THRESHOLD_COUNT_DEFAULT 3 /* 3: 256, 4: 1024, 5: 4096 */ +#define CXL_ECS_MODE_DEFAULT 0 +#define CXL_ECS_NUM_MEDIA_FRUS 3 /* Default */ + #define DCD_MAX_NUM_REGION 8 typedef struct CXLDCExtentRaw { @@ -459,6 +520,14 @@ typedef struct CXLDCRegion { unsigned long *blk_bitmap; } CXLDCRegion; +typedef struct CXLSetFeatureInfo { + QemuUUID uuid; + uint8_t data_transfer_flag; + bool data_saved_across_reset; + uint16_t data_offset; + size_t data_size; +} CXLSetFeatureInfo; + struct CXLType3Dev { /* Private */ PCIDevice parent_obj; @@ -491,6 +560,19 @@ struct CXLType3Dev { unsigned int poison_list_cnt; bool poison_list_overflowed; uint64_t poison_list_overflow_ts; + /* Poison Injection - backup */ + CXLPoisonList poison_list_bkp; + CXLPoisonList scan_media_results; + bool scan_media_hasrun; + + CXLSetFeatureInfo set_feat_info; + + /* Patrol scrub control attributes */ + CXLMemPatrolScrubReadAttrs patrol_scrub_attrs; + CXLMemPatrolScrubWriteAttrs patrol_scrub_wr_attrs; + /* ECS control attributes */ + CXLMemECSReadAttrs ecs_attrs[CXL_ECS_NUM_MEDIA_FRUS]; + CXLMemECSWriteAttrs ecs_wr_attrs[CXL_ECS_NUM_MEDIA_FRUS]; struct dynamic_capacity { HostMemoryBackend *host_dc; @@ -554,10 +636,12 @@ CXLRetCode cxl_event_get_records(CXLDeviceState *cxlds, CXLGetEventPayload *pl, size_t *len); CXLRetCode cxl_event_clear_records(CXLDeviceState *cxlds, CXLClearEventPayload *pl); +void cxl_discard_all_event_records(CXLDeviceState *cxlds); void cxl_event_irq_assert(CXLType3Dev *ct3d); void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d); +void cxl_clear_poison_list_overflowed(CXLType3Dev *ct3d); CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len); diff --git a/include/hw/cxl/cxl_mailbox.h b/include/hw/cxl/cxl_mailbox.h new file mode 100644 index 0000000000..beb048052e --- /dev/null +++ b/include/hw/cxl/cxl_mailbox.h @@ -0,0 +1,18 @@ +/* + * QEMU CXL Mailbox + * + * This work is licensed under the terms of the GNU GPL, version 2. See the + * COPYING file in the top-level directory. + */ + +#ifndef CXL_MAILBOX_H +#define CXL_MAILBOX_H + +#define CXL_MBOX_IMMEDIATE_CONFIG_CHANGE (1 << 1) +#define CXL_MBOX_IMMEDIATE_DATA_CHANGE (1 << 2) +#define CXL_MBOX_IMMEDIATE_POLICY_CHANGE (1 << 3) +#define CXL_MBOX_IMMEDIATE_LOG_CHANGE (1 << 4) +#define CXL_MBOX_SECURITY_STATE_CHANGE (1 << 5) +#define CXL_MBOX_BACKGROUND_OPERATION (1 << 6) + +#endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index ca15132508..e7e41cb939 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -3,6 +3,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pcie.h" +#include "hw/pci/pcie_doe.h" #define TYPE_PCI_DEVICE "pci-device" typedef struct PCIDeviceClass PCIDeviceClass; @@ -37,6 +38,8 @@ struct PCIDeviceClass { uint16_t subsystem_id; /* only for header type = 0 */ const char *romfile; /* rom bar */ + + bool sriov_vf_user_creatable; }; enum PCIReqIDType { @@ -157,9 +160,17 @@ struct PCIDevice { MSIVectorReleaseNotifier msix_vector_release_notifier; MSIVectorPollNotifier msix_vector_poll_notifier; + /* SPDM */ + uint16_t spdm_port; + + /* DOE */ + DOECap doe_spdm; + /* ID of standby device in net_failover pair */ char *failover_pair_id; uint32_t acpi_index; + + char *sriov_pf; }; static inline int pci_intx(PCIDevice *pci_dev) @@ -192,7 +203,7 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d) static inline int pci_is_vf(const PCIDevice *d) { - return d->exp.sriov_vf.pf != NULL; + return d->sriov_pf || d->exp.sriov_vf.pf != NULL; } static inline uint32_t pci_config_size(const PCIDevice *d) diff --git a/include/hw/pci/pcie_doe.h b/include/hw/pci/pcie_doe.h index 87dc17dcef..9e1275db8a 100644 --- a/include/hw/pci/pcie_doe.h +++ b/include/hw/pci/pcie_doe.h @@ -46,6 +46,8 @@ REG32(PCI_DOE_CAP_STATUS, 0) /* PCI-SIG defined Data Object Types - r6.0 Table 6-32 */ #define PCI_SIG_DOE_DISCOVERY 0x00 +#define PCI_SIG_DOE_CMA 0x01 +#define PCI_SIG_DOE_SECURED_CMA 0x02 #define PCI_DOE_DW_SIZE_MAX (1 << 18) #define PCI_DOE_PROTOCOL_NUM_MAX 256 @@ -106,6 +108,9 @@ struct DOECap { /* Protocols and its callback response */ DOEProtocol *protocols; uint16_t protocol_num; + + /* Used for spdm-socket */ + int spdm_socket; }; void pcie_doe_init(PCIDevice *pdev, DOECap *doe_cap, uint16_t offset, diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index c5d2d318d3..f75b8f22ee 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -18,6 +18,7 @@ typedef struct PCIESriovPF { uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ + bool vf_user_created; /* If VFs are created by user */ } PCIESriovPF; typedef struct PCIESriovVF { @@ -40,6 +41,23 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory); +/** + * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created + * VFs. + * @dev: A PCIe device being realized. + * @offset: The offset of the SR-IOV capability. + * @errp: pointer to Error*, to store an error if it happens. + * + * Return: The size of added capability. 0 if the user did not create VFs. + * -1 if failed. + */ +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp); + +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp); +void pcie_sriov_unregister_device(PCIDevice *dev); + /* * Default (minimal) page size support values * as required by the SR/IOV standard: diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index bdb3da72d0..7db4210b16 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -43,7 +43,6 @@ typedef struct IOMMUDevice { MemoryRegion bypass_mr; /* The alias of shared memory MR */ GList *resv_regions; GList *host_resv_ranges; - bool probe_done; } IOMMUDevice; typedef struct IOMMUPciBus { diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 9e67ba38c7..34539f2f67 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -152,6 +152,7 @@ struct VirtIOPCIProxy { uint32_t modern_io_bar_idx; uint32_t modern_mem_bar_idx; int config_cap; + uint16_t last_pcie_cap_offset; uint32_t flags; bool disable_modern; bool ignore_backend_features; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7512afbc84..d2a1938757 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -69,6 +69,8 @@ typedef struct VirtQueueElement unsigned int ndescs; unsigned int out_num; unsigned int in_num; + /* Element has been processed (VIRTIO_F_IN_ORDER) */ + bool in_order_filled; hwaddr *in_addr; hwaddr *out_addr; struct iovec *in_sg; @@ -371,7 +373,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("packed", _state, _field, \ VIRTIO_F_RING_PACKED, false), \ DEFINE_PROP_BIT64("queue_reset", _state, _field, \ - VIRTIO_F_RING_RESET, true) + VIRTIO_F_RING_RESET, true), \ + DEFINE_PROP_BIT64("in_order", _state, _field, \ + VIRTIO_F_IN_ORDER, false) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index c31d9c7356..c4a914b3d8 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -313,6 +313,31 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test); */ bool kvm_device_supported(int vmfd, uint64_t type); +/** + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. + * + * @returns: 0 when success, errno (<0) when failed. + */ +int kvm_create_vcpu(CPUState *cpu); + +/** + * kvm_park_vcpu - Park QEMU KVM vCPU context + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. + * + * @returns: none + */ +void kvm_park_vcpu(CPUState *cpu); + +/** + * kvm_unpark_vcpu - unpark QEMU KVM vCPU context + * @s: KVM State + * @vcpu_id: Architecture vCPU ID of the parked vCPU + * + * @returns: KVM fd + */ +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id); + /* Arch specific hooks */ extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; diff --git a/include/sysemu/spdm-socket.h b/include/sysemu/spdm-socket.h new file mode 100644 index 0000000000..5d8bd9aa4e --- /dev/null +++ b/include/sysemu/spdm-socket.h @@ -0,0 +1,74 @@ +/* + * QEMU SPDM socket support + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SPDM_REQUESTER_H +#define SPDM_REQUESTER_H + +/** + * spdm_socket_connect: connect to an external SPDM socket + * @port: port to connect to + * @errp: error object handle + * + * This will connect to an external SPDM socket server. On error + * it will return -1 and errp will be set. On success this function + * will return the socket number. + */ +int spdm_socket_connect(uint16_t port, Error **errp); + +/** + * spdm_socket_rsp: send and receive a message to a SPDM server + * @socket: socket returned from spdm_socket_connect() + * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro + * @req: request buffer + * @req_len: request buffer length + * @rsp: response buffer + * @rsp_len: response buffer length + * + * Send platform data to a SPDM server on socket and then receive + * a response. + */ +uint32_t spdm_socket_rsp(const int socket, uint32_t transport_type, + void *req, uint32_t req_len, + void *rsp, uint32_t rsp_len); + +/** + * spdm_socket_close: send a shutdown command to the server + * @socket: socket returned from spdm_socket_connect() + * @transport_type: SPDM_SOCKET_TRANSPORT_TYPE_* macro + * + * This will issue a shutdown command to the server. + */ +void spdm_socket_close(const int socket, uint32_t transport_type); + +#define SPDM_SOCKET_COMMAND_NORMAL 0x0001 +#define SPDM_SOCKET_COMMAND_OOB_ENCAP_KEY_UPDATE 0x8001 +#define SPDM_SOCKET_COMMAND_CONTINUE 0xFFFD +#define SPDM_SOCKET_COMMAND_SHUTDOWN 0xFFFE +#define SPDM_SOCKET_COMMAND_UNKOWN 0xFFFF +#define SPDM_SOCKET_COMMAND_TEST 0xDEAD + +#define SPDM_SOCKET_TRANSPORT_TYPE_MCTP 0x01 +#define SPDM_SOCKET_TRANSPORT_TYPE_PCI_DOE 0x02 + +#define SPDM_SOCKET_MAX_MESSAGE_BUFFER_SIZE 0x1200 + +#endif diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index daa38428c5..03457ead66 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -62,6 +62,7 @@ const int vdpa_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_RING_RESET, VIRTIO_F_VERSION_1, + VIRTIO_F_IN_ORDER, VIRTIO_F_NOTIFICATION_DATA, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, diff --git a/system/physmem.c b/system/physmem.c index 9a3b3a7636..0e19186e1b 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -763,6 +763,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx, if (!cpu->cpu_ases) { cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); + cpu->cpu_ases_count = cpu->num_ases; } newas = &cpu->cpu_ases[asidx]; @@ -776,6 +777,34 @@ void cpu_address_space_init(CPUState *cpu, int asidx, } } +void cpu_address_space_destroy(CPUState *cpu, int asidx) +{ + CPUAddressSpace *cpuas; + + assert(cpu->cpu_ases); + assert(asidx >= 0 && asidx < cpu->num_ases); + /* KVM cannot currently support multiple address spaces. */ + assert(asidx == 0 || !kvm_enabled()); + + cpuas = &cpu->cpu_ases[asidx]; + if (tcg_enabled()) { + memory_listener_unregister(&cpuas->tcg_as_listener); + } + + address_space_destroy(cpuas->as); + g_free_rcu(cpuas->as, rcu); + + if (asidx == 0) { + /* reset the convenience alias for address space 0 */ + cpu->as = NULL; + } + + if (--cpu->cpu_ases_count == 0) { + g_free(cpu->cpu_ases); + cpu->cpu_ases = NULL; + } +} + AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) { /* Return the AddressSpace corresponding to the specified index */ diff --git a/tests/data/acpi/aarch64/virt/DSDT b/tests/data/acpi/aarch64/virt/DSDT Binary files differindex c475039907..36d3e5d5a5 100644 --- a/tests/data/acpi/aarch64/virt/DSDT +++ b/tests/data/acpi/aarch64/virt/DSDT diff --git a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt Binary files differindex aee6ba017c..e6154d0355 100644 --- a/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt +++ b/tests/data/acpi/aarch64/virt/DSDT.acpihmatvirt diff --git a/tests/data/acpi/aarch64/virt/DSDT.memhp b/tests/data/acpi/aarch64/virt/DSDT.memhp Binary files differindex bae36cdd39..33f011d6b6 100644 --- a/tests/data/acpi/aarch64/virt/DSDT.memhp +++ b/tests/data/acpi/aarch64/virt/DSDT.memhp diff --git a/tests/data/acpi/aarch64/virt/DSDT.pxb b/tests/data/acpi/aarch64/virt/DSDT.pxb Binary files differindex fbd78f44c4..c0fdc6e9c1 100644 --- a/tests/data/acpi/aarch64/virt/DSDT.pxb +++ b/tests/data/acpi/aarch64/virt/DSDT.pxb diff --git a/tests/data/acpi/aarch64/virt/DSDT.topology b/tests/data/acpi/aarch64/virt/DSDT.topology Binary files differindex 501314c91b..029d03eecc 100644 --- a/tests/data/acpi/aarch64/virt/DSDT.topology +++ b/tests/data/acpi/aarch64/virt/DSDT.topology diff --git a/tests/data/acpi/riscv64/virt/APIC b/tests/data/acpi/riscv64/virt/APIC Binary files differnew file mode 100644 index 0000000000..66a25dfd2d --- /dev/null +++ b/tests/data/acpi/riscv64/virt/APIC diff --git a/tests/data/acpi/riscv64/virt/DSDT b/tests/data/acpi/riscv64/virt/DSDT Binary files differnew file mode 100644 index 0000000000..6a33f5647d --- /dev/null +++ b/tests/data/acpi/riscv64/virt/DSDT diff --git a/tests/data/acpi/riscv64/virt/FACP b/tests/data/acpi/riscv64/virt/FACP Binary files differnew file mode 100644 index 0000000000..a5276b65ea --- /dev/null +++ b/tests/data/acpi/riscv64/virt/FACP diff --git a/tests/data/acpi/riscv64/virt/MCFG b/tests/data/acpi/riscv64/virt/MCFG Binary files differnew file mode 100644 index 0000000000..37eb923a93 --- /dev/null +++ b/tests/data/acpi/riscv64/virt/MCFG diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT Binary files differnew file mode 100644 index 0000000000..4f231735ab --- /dev/null +++ b/tests/data/acpi/riscv64/virt/RHCT diff --git a/tests/data/acpi/riscv64/virt/SPCR b/tests/data/acpi/riscv64/virt/SPCR Binary files differnew file mode 100644 index 0000000000..4da9daf65f --- /dev/null +++ b/tests/data/acpi/riscv64/virt/SPCR diff --git a/tests/data/acpi/x86/microvm/DSDT.pcie b/tests/data/acpi/x86/microvm/DSDT.pcie Binary files differindex 765f14ef3d..8eacd21d6e 100644 --- a/tests/data/acpi/x86/microvm/DSDT.pcie +++ b/tests/data/acpi/x86/microvm/DSDT.pcie diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index f4c4704bab..36e5c0adde 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -267,15 +267,6 @@ static void dump_aml_files(test_data *data, bool rebuild) data->arch, data->machine, sdt->aml, ext); - /* - * To keep test cases not failing before the DATA files are moved to - * ${arch}/${machine} folder, add this check as well. - */ - if (!g_file_test(aml_file, G_FILE_TEST_EXISTS)) { - aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, - data->machine, sdt->aml, ext); - } - if (!g_file_test(aml_file, G_FILE_TEST_EXISTS) && sdt->aml_len == exp_sdt->aml_len && !memcmp(sdt->aml, exp_sdt->aml, sdt->aml_len)) { @@ -412,11 +403,6 @@ static GArray *load_expected_aml(test_data *data) try_again: aml_file = g_strdup_printf("%s/%s/%s/%.4s%s", data_dir, data->arch, data->machine, sdt->aml, ext); - if (!g_file_test(aml_file, G_FILE_TEST_EXISTS)) { - aml_file = g_strdup_printf("%s/%s/%.4s%s", data_dir, data->machine, - sdt->aml, ext); - } - if (verbosity_level >= 2) { fprintf(stderr, "Looking for expected file '%s'\n", aml_file); } @@ -1977,6 +1963,28 @@ static void test_acpi_microvm_acpi_erst(void) } #endif /* CONFIG_POSIX */ +static void test_acpi_riscv64_virt_tcg(void) +{ + test_data data = { + .machine = "virt", + .arch = "riscv64", + .tcg_only = true, + .uefi_fl1 = "pc-bios/edk2-riscv-code.fd", + .uefi_fl2 = "pc-bios/edk2-riscv-vars.fd", + .cd = "tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2", + .ram_start = 0x80000000ULL, + .scan_len = 128ULL * 1024 * 1024, + }; + + /* + * RHCT will have ISA string encoded. To reduce the effort + * of updating expected AML file for any new default ISA extension, + * use the profile rva22s64. + */ + test_acpi_one("-cpu rva22s64 ", &data); + free_test_data(&data); +} + static void test_acpi_aarch64_virt_tcg(void) { test_data data = { @@ -2455,6 +2463,10 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/virt/viot", test_acpi_aarch64_virt_viot); } } + } else if (strcmp(arch, "riscv64") == 0) { + if (has_tcg && qtest_has_device("virtio-blk-pci")) { + qtest_add_func("acpi/virt", test_acpi_riscv64_virt_tcg); + } } ret = g_test_run(); boot_sector_cleanup(disk); |