aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2022-11-07 18:43:56 -0500
committerStefan Hajnoczi <stefanha@redhat.com>2022-11-07 18:43:56 -0500
commitf21f1cfeb94b94ce044726856c291bed9391e3a4 (patch)
tree051f38512751eceb808446cfb01fe22ce2f3b259 /hw
parent524fc737431d240f9d9f10aaf381003092868bac (diff)
parent1ef47f40dce3d5b176ddf76d57b5bfa2efb0b3c6 (diff)
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
pci,pc,virtio: features, tests, fixes, cleanups lots of acpi rework first version of biosbits infrastructure ASID support in vhost-vdpa core_count2 support in smbios PCIe DOE emulation virtio vq reset HMAT support part of infrastructure for viommu support in vhost-vdpa VTD PASID support fixes, tests all over the place Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmNpXDkPHG1zdEByZWRo # YXQuY29tAAoJECgfDbjSjVRpD0AH/2G8ZPrgrxJC9y3uD5/5J6QRzO+TsDYbg5ut # uBf4rKSHHzcu6zdyAfsrhbAKKzyD4HrEGNXZrBjnKM1xCiB/SGBcDIWntwrca2+s # 5Dpbi4xvd4tg6tVD4b47XNDCcn2uUbeI0e2M5QIbtCmzdi/xKbFAfl5G8DQp431X # Kmz79G4CdKWyjVlM0HoYmdCw/4FxkdjD02tE/Uc5YMrePNaEg5Bw4hjCHbx1b6ur # 6gjeXAtncm9s4sO0l+sIdyiqlxiTry9FSr35WaQ0qPU+Og5zaf1EiWfdl8TRo4qU # EAATw5A4hyw11GfOGp7oOVkTGvcNB/H7aIxD7emdWZV8+BMRPKo= # =zTCn # -----END PGP SIGNATURE----- # gpg: Signature made Mon 07 Nov 2022 14:27:53 EST # gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469 # gpg: issuer "mst@redhat.com" # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full] # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [full] # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu: (83 commits) checkpatch: better pattern for inline comments hw/virtio: introduce virtio_device_should_start tests/acpi: update tables for new core count test bios-tables-test: add test for number of cores > 255 tests/acpi: allow changes for core_count2 test bios-tables-test: teach test to use smbios 3.0 tables hw/smbios: add core_count2 to smbios table type 4 vhost-user: Support vhost_dev_start vhost: Change the sequence of device start intel-iommu: PASID support intel-iommu: convert VTD_PE_GET_FPD_ERR() to be a function intel-iommu: drop VTDBus intel-iommu: don't warn guest errors when getting rid2pasid entry vfio: move implement of vfio_get_xlat_addr() to memory.c tests: virt: Update expected *.acpihmatvirt tables tests: acpi: aarch64/virt: add a test for hmat nodes with no initiators hw/arm/virt: Enable HMAT on arm virt machine tests: Add HMAT AArch64/virt empty table files tests: acpi: q35: update expected blobs *.hmat-noinitiators expected HMAT: tests: acpi: q35: add test for hmat nodes without initiators ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi/aml-build-stub.c10
-rw-r--r--hw/acpi/aml-build.c13
-rw-r--r--hw/acpi/erst.c6
-rw-r--r--hw/acpi/nvdimm.c106
-rw-r--r--hw/arm/Kconfig1
-rw-r--r--hw/arm/virt-acpi-build.c33
-rw-r--r--hw/block/vhost-user-blk.c24
-rw-r--r--hw/core/machine.c8
-rw-r--r--hw/cxl/cxl-cdat.c224
-rw-r--r--hw/cxl/meson.build1
-rw-r--r--hw/display/acpi-vga-stub.c7
-rw-r--r--hw/display/acpi-vga.c26
-rw-r--r--hw/display/meson.build17
-rw-r--r--hw/display/vga-pci.c4
-rw-r--r--hw/display/vga_int.h2
-rw-r--r--hw/i386/acpi-build.c203
-rw-r--r--hw/i386/e820_memory_layout.c20
-rw-r--r--hw/i386/e820_memory_layout.h8
-rw-r--r--hw/i386/fw_cfg.c3
-rw-r--r--hw/i386/fw_cfg.h1
-rw-r--r--hw/i386/intel_iommu.c694
-rw-r--r--hw/i386/intel_iommu_internal.h16
-rw-r--r--hw/i386/microvm.c2
-rw-r--r--hw/i386/pc.c2
-rw-r--r--hw/i386/trace-events2
-rw-r--r--hw/isa/lpc_ich9.c23
-rw-r--r--hw/isa/piix3.c17
-rw-r--r--hw/mem/cxl_type3.c264
-rw-r--r--hw/net/e1000e.c15
-rw-r--r--hw/net/rocker/rocker.c23
-rw-r--r--hw/net/vhost_net-stub.c12
-rw-r--r--hw/net/vhost_net.c90
-rw-r--r--hw/net/virtio-net.c57
-rw-r--r--hw/net/vmxnet3.c27
-rw-r--r--hw/nvme/ctrl.c5
-rw-r--r--hw/pci-bridge/cxl_upstream.c195
-rw-r--r--hw/pci/meson.build1
-rw-r--r--hw/pci/msix.c24
-rw-r--r--hw/pci/pcie_doe.c367
-rw-r--r--hw/rdma/vmw/pvrdma_main.c7
-rw-r--r--hw/remote/vfio-user-obj.c9
-rw-r--r--hw/smbios/smbios.c19
-rw-r--r--hw/smbios/smbios_build.h9
-rw-r--r--hw/vfio/common.c66
-rw-r--r--hw/virtio/vhost-user-fs.c2
-rw-r--r--hw/virtio/vhost-user-gpio.c2
-rw-r--r--hw/virtio/vhost-user-i2c.c2
-rw-r--r--hw/virtio/vhost-user-rng.c2
-rw-r--r--hw/virtio/vhost-user-vsock.c2
-rw-r--r--hw/virtio/vhost-user.c79
-rw-r--r--hw/virtio/vhost-vsock.c2
-rw-r--r--hw/virtio/vhost.c16
-rw-r--r--hw/virtio/virtio-crypto.c337
-rw-r--r--hw/virtio/virtio-iommu-pci.c12
-rw-r--r--hw/virtio/virtio-pci.c83
-rw-r--r--hw/virtio/virtio-rng-pci.c14
-rw-r--r--hw/virtio/virtio.c62
57 files changed, 2434 insertions, 844 deletions
diff --git a/hw/acpi/aml-build-stub.c b/hw/acpi/aml-build-stub.c
index 8d8ad1a314..89a8fec4af 100644
--- a/hw/acpi/aml-build-stub.c
+++ b/hw/acpi/aml-build-stub.c
@@ -26,6 +26,16 @@ void aml_append(Aml *parent_ctx, Aml *child)
{
}
+Aml *aml_return(Aml *val)
+{
+ return NULL;
+}
+
+Aml *aml_method(const char *name, int arg_count, AmlSerializeFlag sflag)
+{
+ return NULL;
+}
+
Aml *aml_resource_template(void)
{
return NULL;
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index e6bfac95c7..42feb4d4d7 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2070,7 +2070,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
acpi_table_end(linker, &table);
}
-/* build rev1/rev3/rev5.1 FADT */
+/* build rev1/rev3/rev5.1/rev6.0 FADT */
void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
const char *oem_id, const char *oem_table_id)
{
@@ -2193,8 +2193,15 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
/* SLEEP_STATUS_REG */
build_append_gas_from_struct(tbl, &f->sleep_sts);
- /* TODO: extra fields need to be added to support revisions above rev5 */
- assert(f->rev == 5);
+ if (f->rev == 5) {
+ goto done;
+ }
+
+ /* Hypervisor Vendor Identity */
+ build_append_padded_str(tbl, "QEMU", 8, '\0');
+
+ /* TODO: extra fields need to be added to support revisions above rev6 */
+ assert(f->rev == 6);
done:
acpi_table_end(linker, &table);
diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c
index df856b2669..aefcc03ad6 100644
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@@ -635,7 +635,7 @@ static unsigned read_erst_record(ERSTDeviceState *s)
if (record_length < UEFI_CPER_RECORD_MIN_SIZE) {
rc = STATUS_FAILED;
}
- if ((s->record_offset + record_length) > exchange_length) {
+ if (record_length > exchange_length - s->record_offset) {
rc = STATUS_FAILED;
}
/* If all is ok, copy the record to the exchange buffer */
@@ -684,7 +684,7 @@ static unsigned write_erst_record(ERSTDeviceState *s)
if (record_length < UEFI_CPER_RECORD_MIN_SIZE) {
return STATUS_FAILED;
}
- if ((s->record_offset + record_length) > exchange_length) {
+ if (record_length > exchange_length - s->record_offset) {
return STATUS_FAILED;
}
@@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s)
if (nvram) {
/* Write the record into the slot */
memcpy(nvram, exchange, record_length);
- memset(nvram + record_length, exchange_length - record_length, 0xFF);
+ memset(nvram + record_length, 0xFF, exchange_length - record_length);
/* If a new record, increment the record_count */
if (!record_found) {
uint32_t record_count;
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 31e46df0bd..a3b25a92f3 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -922,6 +922,7 @@ void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io,
#define NVDIMM_DSM_RFIT_STATUS "RSTA"
#define NVDIMM_QEMU_RSVD_UUID "648B9CF2-CDA1-4312-8AD9-49C4AF32BD62"
+#define NVDIMM_DEVICE_DSM_UUID "4309AC30-0D11-11E4-9191-0800200C9A66"
static void nvdimm_build_common_dsm(Aml *dev,
NVDIMMState *nvdimm_state)
@@ -1029,15 +1030,14 @@ static void nvdimm_build_common_dsm(Aml *dev,
/* UUID for QEMU internal use */), expected_uuid));
aml_append(elsectx, ifctx);
elsectx2 = aml_else();
- aml_append(elsectx2, aml_store(
- aml_touuid("4309AC30-0D11-11E4-9191-0800200C9A66")
+ aml_append(elsectx2, aml_store(aml_touuid(NVDIMM_DEVICE_DSM_UUID)
/* UUID for NVDIMM Devices */, expected_uuid));
aml_append(elsectx, elsectx2);
aml_append(method, elsectx);
uuid_invalid = aml_lnot(aml_equal(uuid, expected_uuid));
- unsupport = aml_if(aml_or(unpatched, uuid_invalid, NULL));
+ unsupport = aml_if(aml_lor(unpatched, uuid_invalid));
/*
* function 0 is called to inquire what functions are supported by
@@ -1069,10 +1069,9 @@ static void nvdimm_build_common_dsm(Aml *dev,
* in the DSM Spec.
*/
pckg = aml_arg(3);
- ifctx = aml_if(aml_and(aml_equal(aml_object_type(pckg),
+ ifctx = aml_if(aml_land(aml_equal(aml_object_type(pckg),
aml_int(4 /* Package */)) /* It is a Package? */,
- aml_equal(aml_sizeof(pckg), aml_int(1)) /* 1 element? */,
- NULL));
+ aml_equal(aml_sizeof(pckg), aml_int(1)) /* 1 element? */));
pckg_index = aml_local(2);
pckg_buf = aml_local(3);
@@ -1244,6 +1243,7 @@ static void nvdimm_build_fit(Aml *dev)
static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots)
{
uint32_t slot;
+ Aml *method, *pkg, *field, *com_call;
for (slot = 0; slot < ram_slots; slot++) {
uint32_t handle = nvdimm_slot_to_handle(slot);
@@ -1261,6 +1261,100 @@ static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots)
*/
aml_append(nvdimm_dev, aml_name_decl("_ADR", aml_int(handle)));
+ /*
+ * ACPI v6.4: Section 6.5.10 NVDIMM Label Methods
+ */
+ /* _LSI */
+ method = aml_method("_LSI", 0, AML_SERIALIZED);
+ com_call = aml_call5(NVDIMM_COMMON_DSM,
+ aml_touuid(NVDIMM_DEVICE_DSM_UUID),
+ aml_int(1), aml_int(4), aml_int(0),
+ aml_int(handle));
+ aml_append(method, aml_store(com_call, aml_local(0)));
+
+ aml_append(method, aml_create_dword_field(aml_local(0),
+ aml_int(0), "STTS"));
+ aml_append(method, aml_create_dword_field(aml_local(0), aml_int(4),
+ "SLSA"));
+ aml_append(method, aml_create_dword_field(aml_local(0), aml_int(8),
+ "MAXT"));
+
+ pkg = aml_package(3);
+ aml_append(pkg, aml_name("STTS"));
+ aml_append(pkg, aml_name("SLSA"));
+ aml_append(pkg, aml_name("MAXT"));
+ aml_append(method, aml_store(pkg, aml_local(1)));
+ aml_append(method, aml_return(aml_local(1)));
+
+ aml_append(nvdimm_dev, method);
+
+ /* _LSR */
+ method = aml_method("_LSR", 2, AML_SERIALIZED);
+ aml_append(method, aml_name_decl("INPT", aml_buffer(8, NULL)));
+
+ aml_append(method, aml_create_dword_field(aml_name("INPT"),
+ aml_int(0), "OFST"));
+ aml_append(method, aml_create_dword_field(aml_name("INPT"),
+ aml_int(4), "LEN"));
+ aml_append(method, aml_store(aml_arg(0), aml_name("OFST")));
+ aml_append(method, aml_store(aml_arg(1), aml_name("LEN")));
+
+ pkg = aml_package(1);
+ aml_append(pkg, aml_name("INPT"));
+ aml_append(method, aml_store(pkg, aml_local(0)));
+
+ com_call = aml_call5(NVDIMM_COMMON_DSM,
+ aml_touuid(NVDIMM_DEVICE_DSM_UUID),
+ aml_int(1), aml_int(5), aml_local(0),
+ aml_int(handle));
+ aml_append(method, aml_store(com_call, aml_local(3)));
+ field = aml_create_dword_field(aml_local(3), aml_int(0), "STTS");
+ aml_append(method, field);
+ field = aml_create_field(aml_local(3), aml_int(32),
+ aml_shiftleft(aml_name("LEN"), aml_int(3)),
+ "LDAT");
+ aml_append(method, field);
+ aml_append(method, aml_name_decl("LSA", aml_buffer(0, NULL)));
+ aml_append(method, aml_to_buffer(aml_name("LDAT"), aml_name("LSA")));
+
+ pkg = aml_package(2);
+ aml_append(pkg, aml_name("STTS"));
+ aml_append(pkg, aml_name("LSA"));
+
+ aml_append(method, aml_store(pkg, aml_local(1)));
+ aml_append(method, aml_return(aml_local(1)));
+
+ aml_append(nvdimm_dev, method);
+
+ /* _LSW */
+ method = aml_method("_LSW", 3, AML_SERIALIZED);
+ aml_append(method, aml_store(aml_arg(2), aml_local(2)));
+ aml_append(method, aml_name_decl("INPT", aml_buffer(8, NULL)));
+ field = aml_create_dword_field(aml_name("INPT"),
+ aml_int(0), "OFST");
+ aml_append(method, field);
+ field = aml_create_dword_field(aml_name("INPT"),
+ aml_int(4), "TLEN");
+ aml_append(method, field);
+ aml_append(method, aml_store(aml_arg(0), aml_name("OFST")));
+ aml_append(method, aml_store(aml_arg(1), aml_name("TLEN")));
+
+ aml_append(method, aml_concatenate(aml_name("INPT"), aml_local(2),
+ aml_name("INPT")));
+ pkg = aml_package(1);
+ aml_append(pkg, aml_name("INPT"));
+ aml_append(method, aml_store(pkg, aml_local(0)));
+ com_call = aml_call5(NVDIMM_COMMON_DSM,
+ aml_touuid(NVDIMM_DEVICE_DSM_UUID),
+ aml_int(1), aml_int(6), aml_local(0),
+ aml_int(handle));
+ aml_append(method, aml_store(com_call, aml_local(3)));
+ field = aml_create_dword_field(aml_local(3), aml_int(0), "STTS");
+ aml_append(method, field);
+ aml_append(method, aml_return(aml_name("STTS")));
+
+ aml_append(nvdimm_dev, method);
+
nvdimm_build_device_dsm(nvdimm_dev, handle);
aml_append(root_dev, nvdimm_dev);
}
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 15fa79afd3..17fcde8e1c 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -30,6 +30,7 @@ config ARM_VIRT
select ACPI_VIOT
select VIRTIO_MEM_SUPPORTED
select ACPI_CXL
+ select ACPI_HMAT
config CHEETAH
bool
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 13c6e3e468..4156111d49 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -42,6 +42,7 @@
#include "hw/acpi/memory_hotplug.h"
#include "hw/acpi/generic_event_device.h"
#include "hw/acpi/tpm.h"
+#include "hw/acpi/hmat.h"
#include "hw/pci/pcie_host.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
@@ -685,7 +686,7 @@ build_dbg2(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
};
/*
- * ACPI spec, Revision 5.1 Errata A
+ * ACPI spec, Revision 6.0 Errata A
* 5.2.12 Multiple APIC Description Table (MADT)
*/
static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size)
@@ -704,7 +705,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
int i;
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
const MemMapEntry *memmap = vms->memmap;
- AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = vms->oem_id,
+ AcpiTable table = { .sig = "APIC", .rev = 4, .oem_id = vms->oem_id,
.oem_table_id = vms->oem_table_id };
acpi_table_begin(&table, table_data);
@@ -739,7 +740,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
/* 5.2.12.14 GIC Structure */
build_append_int_noprefix(table_data, 0xB, 1); /* Type */
- build_append_int_noprefix(table_data, 76, 1); /* Length */
+ build_append_int_noprefix(table_data, 80, 1); /* Length */
build_append_int_noprefix(table_data, 0, 2); /* Reserved */
build_append_int_noprefix(table_data, i, 4); /* GIC ID */
build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */
@@ -759,6 +760,10 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/
/* MPIDR */
build_append_int_noprefix(table_data, armcpu->mp_affinity, 8);
+ /* Processor Power Efficiency Class */
+ build_append_int_noprefix(table_data, 0, 1);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 3);
}
if (vms->gic_version != VIRT_GIC_VERSION_2) {
@@ -771,12 +776,6 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
if (its_class_name() && !vmc->no_its) {
/*
- * FIXME: Structure is from Revision 6.0 where 'GIC Structure'
- * has additional fields on top of implemented 5.1 Errata A,
- * to make it consistent with v6.0 we need to bump everything
- * to v6.0
- */
- /*
* ACPI spec, Revision 6.0 Errata A
* (original 6.0 definition has invalid Length)
* 5.2.12.18 GIC ITS Structure
@@ -808,13 +807,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
/* FADT */
-static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker,
+static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker,
VirtMachineState *vms, unsigned dsdt_tbl_offset)
{
- /* ACPI v5.1 */
+ /* ACPI v6.0 */
AcpiFadtData fadt = {
- .rev = 5,
- .minor_ver = 1,
+ .rev = 6,
+ .minor_ver = 0,
.flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI,
.xdsdt_tbl_offset = &dsdt_tbl_offset,
};
@@ -944,7 +943,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
/* FADT MADT PPTT GTDT MCFG SPCR DBG2 pointed to by RSDT */
acpi_add_table(table_offsets, tables_blob);
- build_fadt_rev5(tables_blob, tables->linker, vms, dsdt);
+ build_fadt_rev6(tables_blob, tables->linker, vms, dsdt);
acpi_add_table(table_offsets, tables_blob);
build_madt(tables_blob, tables->linker, vms);
@@ -989,6 +988,12 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
build_slit(tables_blob, tables->linker, ms, vms->oem_id,
vms->oem_table_id);
}
+
+ if (ms->numa_state->hmat_enabled) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_hmat(tables_blob, tables->linker, ms->numa_state,
+ vms->oem_id, vms->oem_table_id);
+ }
}
if (ms->nvdimms_state->is_enabled) {
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 13bf5cc47a..16ad400889 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -168,13 +168,6 @@ static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp)
goto err_guest_notifiers;
}
- ret = vhost_dev_start(&s->dev, vdev);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "Error starting vhost");
- goto err_guest_notifiers;
- }
- s->started_vu = true;
-
/* guest_notifier_mask/pending not used yet, so just unmask
* everything here. virtio-pci will do the right thing by
* enabling/disabling irqfd.
@@ -183,9 +176,20 @@ static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp)
vhost_virtqueue_mask(&s->dev, vdev, i, false);
}
+ s->dev.vq_index_end = s->dev.nvqs;
+ ret = vhost_dev_start(&s->dev, vdev);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error starting vhost");
+ goto err_guest_notifiers;
+ }
+ s->started_vu = true;
+
return ret;
err_guest_notifiers:
+ for (i = 0; i < s->dev.nvqs; i++) {
+ vhost_virtqueue_mask(&s->dev, vdev, i, true);
+ }
k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
err_host_notifiers:
vhost_dev_disable_notifiers(&s->dev, vdev);
@@ -222,14 +226,10 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserBlk *s = VHOST_USER_BLK(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
Error *local_err = NULL;
int ret;
- if (!vdev->vm_running) {
- should_start = false;
- }
-
if (!s->connected) {
return;
}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index aa520e74a8..8d34caa31d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -40,7 +40,9 @@
#include "hw/virtio/virtio-pci.h"
#include "qom/object_interfaces.h"
-GlobalProperty hw_compat_7_1[] = {};
+GlobalProperty hw_compat_7_1[] = {
+ { "virtio-device", "queue_reset", "false" },
+};
const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1);
GlobalProperty hw_compat_7_0[] = {
@@ -1176,9 +1178,7 @@ static void numa_validate_initiator(NumaState *numa_state)
for (i = 0; i < numa_state->num_nodes; i++) {
if (numa_info[i].initiator == MAX_NODES) {
- error_report("The initiator of NUMA node %d is missing, use "
- "'-numa node,initiator' option to declare it", i);
- exit(1);
+ continue;
}
if (!numa_info[numa_info[i].initiator].present) {
diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c
new file mode 100644
index 0000000000..3653aa56f0
--- /dev/null
+++ b/hw/cxl/cxl-cdat.c
@@ -0,0 +1,224 @@
+/*
+ * CXL CDAT Structure
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/cxl/cxl.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+static void cdat_len_check(CDATSubHeader *hdr, Error **errp)
+{
+ assert(hdr->length);
+ assert(hdr->reserved == 0);
+
+ switch (hdr->type) {
+ case CDAT_TYPE_DSMAS:
+ assert(hdr->length == sizeof(CDATDsmas));
+ break;
+ case CDAT_TYPE_DSLBIS:
+ assert(hdr->length == sizeof(CDATDslbis));
+ break;
+ case CDAT_TYPE_DSMSCIS:
+ assert(hdr->length == sizeof(CDATDsmscis));
+ break;
+ case CDAT_TYPE_DSIS:
+ assert(hdr->length == sizeof(CDATDsis));
+ break;
+ case CDAT_TYPE_DSEMTS:
+ assert(hdr->length == sizeof(CDATDsemts));
+ break;
+ case CDAT_TYPE_SSLBIS:
+ assert(hdr->length >= sizeof(CDATSslbisHeader));
+ assert((hdr->length - sizeof(CDATSslbisHeader)) %
+ sizeof(CDATSslbe) == 0);
+ break;
+ default:
+ error_setg(errp, "Type %d is reserved", hdr->type);
+ }
+}
+
+static void ct3_build_cdat(CDATObject *cdat, Error **errp)
+{
+ g_autofree CDATTableHeader *cdat_header = NULL;
+ g_autofree CDATEntry *cdat_st = NULL;
+ uint8_t sum = 0;
+ int ent, i;
+
+ /* Use default table if fopen == NULL */
+ assert(cdat->build_cdat_table);
+
+ cdat_header = g_malloc0(sizeof(*cdat_header));
+ if (!cdat_header) {
+ error_setg(errp, "Failed to allocate CDAT header");
+ return;
+ }
+
+ cdat->built_buf_len = cdat->build_cdat_table(&cdat->built_buf, cdat->private);
+
+ if (!cdat->built_buf_len) {
+ /* Build later as not all data available yet */
+ cdat->to_update = true;
+ return;
+ }
+ cdat->to_update = false;
+
+ cdat_st = g_malloc0(sizeof(*cdat_st) * (cdat->built_buf_len + 1));
+ if (!cdat_st) {
+ error_setg(errp, "Failed to allocate CDAT entry array");
+ return;
+ }
+
+ /* Entry 0 for CDAT header, starts with Entry 1 */
+ for (ent = 1; ent < cdat->built_buf_len + 1; ent++) {
+ CDATSubHeader *hdr = cdat->built_buf[ent - 1];
+ uint8_t *buf = (uint8_t *)cdat->built_buf[ent - 1];
+
+ cdat_st[ent].base = hdr;
+ cdat_st[ent].length = hdr->length;
+
+ cdat_header->length += hdr->length;
+ for (i = 0; i < hdr->length; i++) {
+ sum += buf[i];
+ }
+ }
+
+ /* CDAT header */
+ cdat_header->revision = CXL_CDAT_REV;
+ /* For now, no runtime updates */
+ cdat_header->sequence = 0;
+ cdat_header->length += sizeof(CDATTableHeader);
+ sum += cdat_header->revision + cdat_header->sequence +
+ cdat_header->length;
+ /* Sum of all bytes including checksum must be 0 */
+ cdat_header->checksum = ~sum + 1;
+
+ cdat_st[0].base = g_steal_pointer(&cdat_header);
+ cdat_st[0].length = sizeof(*cdat_header);
+ cdat->entry_len = 1 + cdat->built_buf_len;
+ cdat->entry = g_steal_pointer(&cdat_st);
+}
+
+static void ct3_load_cdat(CDATObject *cdat, Error **errp)
+{
+ g_autofree CDATEntry *cdat_st = NULL;
+ uint8_t sum = 0;
+ int num_ent;
+ int i = 0, ent = 1, file_size = 0;
+ CDATSubHeader *hdr;
+ FILE *fp = NULL;
+
+ /* Read CDAT file and create its cache */
+ fp = fopen(cdat->filename, "r");
+ if (!fp) {
+ error_setg(errp, "CDAT: Unable to open file");
+ return;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ file_size = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+ cdat->buf = g_malloc0(file_size);
+
+ if (fread(cdat->buf, file_size, 1, fp) == 0) {
+ error_setg(errp, "CDAT: File read failed");
+ return;
+ }
+
+ fclose(fp);
+
+ if (file_size < sizeof(CDATTableHeader)) {
+ error_setg(errp, "CDAT: File too short");
+ return;
+ }
+ i = sizeof(CDATTableHeader);
+ num_ent = 1;
+ while (i < file_size) {
+ hdr = (CDATSubHeader *)(cdat->buf + i);
+ cdat_len_check(hdr, errp);
+ i += hdr->length;
+ num_ent++;
+ }
+ if (i != file_size) {
+ error_setg(errp, "CDAT: File length missmatch");
+ return;
+ }
+
+ cdat_st = g_malloc0(sizeof(*cdat_st) * num_ent);
+ if (!cdat_st) {
+ error_setg(errp, "CDAT: Failed to allocate entry array");
+ return;
+ }
+
+ /* Set CDAT header, Entry = 0 */
+ cdat_st[0].base = cdat->buf;
+ cdat_st[0].length = sizeof(CDATTableHeader);
+ i = 0;
+
+ while (i < cdat_st[0].length) {
+ sum += cdat->buf[i++];
+ }
+
+ /* Read CDAT structures */
+ while (i < file_size) {
+ hdr = (CDATSubHeader *)(cdat->buf + i);
+ cdat_len_check(hdr, errp);
+
+ cdat_st[ent].base = hdr;
+ cdat_st[ent].length = hdr->length;
+
+ while (cdat->buf + i <
+ (uint8_t *)cdat_st[ent].base + cdat_st[ent].length) {
+ assert(i < file_size);
+ sum += cdat->buf[i++];
+ }
+
+ ent++;
+ }
+
+ if (sum != 0) {
+ warn_report("CDAT: Found checksum mismatch in %s", cdat->filename);
+ }
+ cdat->entry_len = num_ent;
+ cdat->entry = g_steal_pointer(&cdat_st);
+}
+
+void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp)
+{
+ CDATObject *cdat = &cxl_cstate->cdat;
+
+ if (cdat->filename) {
+ ct3_load_cdat(cdat, errp);
+ } else {
+ ct3_build_cdat(cdat, errp);
+ }
+}
+
+void cxl_doe_cdat_update(CXLComponentState *cxl_cstate, Error **errp)
+{
+ CDATObject *cdat = &cxl_cstate->cdat;
+
+ if (cdat->to_update) {
+ ct3_build_cdat(cdat, errp);
+ }
+}
+
+void cxl_doe_cdat_release(CXLComponentState *cxl_cstate)
+{
+ CDATObject *cdat = &cxl_cstate->cdat;
+
+ free(cdat->entry);
+ if (cdat->built_buf) {
+ cdat->free_cdat_table(cdat->built_buf, cdat->built_buf_len,
+ cdat->private);
+ }
+ if (cdat->buf) {
+ free(cdat->buf);
+ }
+}
diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build
index f117b99949..cfa95ffd40 100644
--- a/hw/cxl/meson.build
+++ b/hw/cxl/meson.build
@@ -4,6 +4,7 @@ softmmu_ss.add(when: 'CONFIG_CXL',
'cxl-device-utils.c',
'cxl-mailbox-utils.c',
'cxl-host.c',
+ 'cxl-cdat.c',
),
if_false: files(
'cxl-host-stubs.c',
diff --git a/hw/display/acpi-vga-stub.c b/hw/display/acpi-vga-stub.c
new file mode 100644
index 0000000000..a9b0ecf76d
--- /dev/null
+++ b/hw/display/acpi-vga-stub.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi_aml_interface.h"
+#include "vga_int.h"
+
+void build_vga_aml(AcpiDevAmlIf *adev, Aml *scope)
+{
+}
diff --git a/hw/display/acpi-vga.c b/hw/display/acpi-vga.c
new file mode 100644
index 0000000000..f0e9ef1fcf
--- /dev/null
+++ b/hw/display/acpi-vga.c
@@ -0,0 +1,26 @@
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi_aml_interface.h"
+#include "hw/pci/pci.h"
+#include "vga_int.h"
+
+void build_vga_aml(AcpiDevAmlIf *adev, Aml *scope)
+{
+ int s3d = 0;
+ Aml *method;
+
+ if (object_dynamic_cast(OBJECT(adev), "qxl-vga")) {
+ s3d = 3;
+ }
+
+ method = aml_method("_S1D", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_return(aml_int(0)));
+ aml_append(scope, method);
+
+ method = aml_method("_S2D", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_return(aml_int(0)));
+ aml_append(scope, method);
+
+ method = aml_method("_S3D", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_return(aml_int(s3d)));
+ aml_append(scope, method);
+}
diff --git a/hw/display/meson.build b/hw/display/meson.build
index adc53dd8b6..7a725ed80e 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -38,10 +38,21 @@ softmmu_ss.add(when: 'CONFIG_NEXTCUBE', if_true: files('next-fb.c'))
specific_ss.add(when: 'CONFIG_VGA', if_true: files('vga.c'))
+if (config_all_devices.has_key('CONFIG_VGA_CIRRUS') or
+ config_all_devices.has_key('CONFIG_VGA_PCI') or
+ config_all_devices.has_key('CONFIG_VMWARE_VGA') or
+ config_all_devices.has_key('CONFIG_ATI_VGA')
+ )
+ softmmu_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'),
+ if_false: files('acpi-vga-stub.c'))
+endif
+
if config_all_devices.has_key('CONFIG_QXL')
qxl_ss = ss.source_set()
qxl_ss.add(when: 'CONFIG_QXL', if_true: [files('qxl.c', 'qxl-logger.c', 'qxl-render.c'),
pixman, spice])
+ qxl_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'),
+ if_false: files('acpi-vga-stub.c'))
hw_display_modules += {'qxl': qxl_ss}
endif
@@ -52,6 +63,7 @@ softmmu_ss.add(when: 'CONFIG_ARTIST', if_true: files('artist.c'))
softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 'ati_2d.c', 'ati_dbg.c'))
+
if config_all_devices.has_key('CONFIG_VIRTIO_GPU')
virtio_gpu_ss = ss.source_set()
virtio_gpu_ss.add(when: 'CONFIG_VIRTIO_GPU',
@@ -87,14 +99,19 @@ if config_all_devices.has_key('CONFIG_VIRTIO_VGA')
if_true: [files('virtio-vga.c'), pixman])
virtio_vga_ss.add(when: 'CONFIG_VHOST_USER_VGA',
if_true: files('vhost-user-vga.c'))
+ virtio_vga_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'),
+ if_false: files('acpi-vga-stub.c'))
hw_display_modules += {'virtio-vga': virtio_vga_ss}
virtio_vga_gl_ss = ss.source_set()
virtio_vga_gl_ss.add(when: ['CONFIG_VIRTIO_VGA', virgl, opengl],
if_true: [files('virtio-vga-gl.c'), pixman])
+ virtio_vga_gl_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'),
+ if_false: files('acpi-vga-stub.c'))
hw_display_modules += {'virtio-vga-gl': virtio_vga_gl_ss}
endif
specific_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_lcdc.c'))
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-vga-stub.c'))
modules += { 'hw-display': hw_display_modules }
diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c
index 3e5bc259f7..9a91de7ed1 100644
--- a/hw/display/vga-pci.c
+++ b/hw/display/vga-pci.c
@@ -35,6 +35,7 @@
#include "hw/loader.h"
#include "hw/display/edid.h"
#include "qom/object.h"
+#include "hw/acpi/acpi_aml_interface.h"
enum vga_pci_flags {
PCI_VGA_FLAG_ENABLE_MMIO = 1,
@@ -354,11 +355,13 @@ static void vga_pci_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
k->vendor_id = PCI_VENDOR_ID_QEMU;
k->device_id = PCI_DEVICE_ID_QEMU_VGA;
dc->vmsd = &vmstate_vga_pci;
set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+ adevc->build_dev_aml = build_vga_aml;
}
static const TypeInfo vga_pci_type_info = {
@@ -369,6 +372,7 @@ static const TypeInfo vga_pci_type_info = {
.class_init = vga_pci_class_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { TYPE_ACPI_DEV_AML_IF },
{ },
},
};
diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h
index 305e700014..330406ad9c 100644
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -30,6 +30,7 @@
#include "ui/console.h"
#include "hw/display/bochs-vbe.h"
+#include "hw/acpi/acpi_aml_interface.h"
#define ST01_V_RETRACE 0x08
#define ST01_DISP_ENABLE 0x01
@@ -195,4 +196,5 @@ void pci_std_vga_mmio_region_init(VGACommonState *s,
MemoryRegion *subs,
bool qext, bool edid);
+void build_vga_aml(AcpiDevAmlIf *adev, Aml *scope);
#endif
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 4f54b61904..d9eaa5fc4d 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -60,6 +60,7 @@
#include "hw/i386/fw_cfg.h"
#include "hw/i386/ich9.h"
#include "hw/pci/pci_bus.h"
+#include "hw/pci-host/i440fx.h"
#include "hw/pci-host/q35.h"
#include "hw/i386/x86-iommu.h"
@@ -112,7 +113,6 @@ typedef struct AcpiPmInfo {
} AcpiPmInfo;
typedef struct AcpiMiscInfo {
- bool is_piix4;
bool has_hpet;
#ifdef CONFIG_TPM
TPMVersion tpm_version;
@@ -121,13 +121,6 @@ typedef struct AcpiMiscInfo {
unsigned dsdt_size;
} AcpiMiscInfo;
-typedef struct AcpiBuildPciBusHotplugState {
- GArray *device_table;
- GArray *notify_table;
- struct AcpiBuildPciBusHotplugState *parent;
- bool pcihp_bridge_en;
-} AcpiBuildPciBusHotplugState;
-
typedef struct FwCfgTPMConfig {
uint32_t tpmppi_address;
uint8_t tpm_version;
@@ -288,17 +281,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm)
static void acpi_get_misc_info(AcpiMiscInfo *info)
{
- Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM);
- Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE);
- assert(!!piix != !!lpc);
-
- if (piix) {
- info->is_piix4 = true;
- }
- if (lpc) {
- info->is_piix4 = false;
- }
-
info->has_hpet = hpet_find();
#ifdef CONFIG_TPM
info->tpm_version = tpm_get_version(tpm_find());
@@ -430,18 +412,11 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
bool hotpluggbale_slot = false;
bool bridge_in_acpi = false;
bool cold_plugged_bridge = false;
- bool is_vga = false;
if (pdev) {
pc = PCI_DEVICE_GET_CLASS(pdev);
dc = DEVICE_GET_CLASS(pdev);
- if (pc->class_id == PCI_CLASS_BRIDGE_ISA) {
- continue;
- }
-
- is_vga = pc->class_id == PCI_CLASS_DISPLAY_VGA;
-
/*
* Cold plugged bridges aren't themselves hot-pluggable.
* Hotplugged bridges *are* hot-pluggable.
@@ -455,9 +430,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
/*
* allow describing coldplugged bridges in ACPI even if they are not
* on function 0, as they are not unpluggable, for all other devices
- * generate description only for function 0 per slot
+ * generate description only for function 0 per slot, and for other
+ * functions if device on function provides its own AML
*/
- if (func && !bridge_in_acpi) {
+ if (func && !bridge_in_acpi && !get_dev_aml_func(DEVICE(pdev))) {
continue;
}
} else {
@@ -489,28 +465,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
aml_append(dev, aml_pci_device_dsm());
}
- if (is_vga) {
- /* add VGA specific AML methods */
- int s3d;
-
- if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) {
- s3d = 3;
- } else {
- s3d = 0;
- }
-
- method = aml_method("_S1D", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_return(aml_int(0)));
- aml_append(dev, method);
-
- method = aml_method("_S2D", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_return(aml_int(0)));
- aml_append(dev, method);
-
- method = aml_method("_S3D", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_return(aml_int(s3d)));
- aml_append(dev, method);
- }
+ call_dev_aml_func(DEVICE(pdev), dev);
bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en;
if (bridge_in_acpi) {
@@ -1030,7 +985,6 @@ static void build_piix4_pci0_int(Aml *table)
{
Aml *dev;
Aml *crs;
- Aml *field;
Aml *method;
uint32_t irqs;
Aml *sb_scope = aml_scope("_SB");
@@ -1039,13 +993,6 @@ static void build_piix4_pci0_int(Aml *table)
aml_append(pci0_scope, build_prt(true));
aml_append(sb_scope, pci0_scope);
- field = aml_field("PCI0.ISA.P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
- aml_append(field, aml_named_field("PRQ0", 8));
- aml_append(field, aml_named_field("PRQ1", 8));
- aml_append(field, aml_named_field("PRQ2", 8));
- aml_append(field, aml_named_field("PRQ3", 8));
- aml_append(sb_scope, field);
-
aml_append(sb_scope, build_irq_status_method());
aml_append(sb_scope, build_iqcr_method(true));
@@ -1149,7 +1096,6 @@ static Aml *build_q35_routing_table(const char *str)
static void build_q35_pci0_int(Aml *table)
{
- Aml *field;
Aml *method;
Aml *sb_scope = aml_scope("_SB");
Aml *pci0_scope = aml_scope("PCI0");
@@ -1186,18 +1132,6 @@ static void build_q35_pci0_int(Aml *table)
aml_append(pci0_scope, method);
aml_append(sb_scope, pci0_scope);
- field = aml_field("PCI0.ISA.PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
- aml_append(field, aml_named_field("PRQA", 8));
- aml_append(field, aml_named_field("PRQB", 8));
- aml_append(field, aml_named_field("PRQC", 8));
- aml_append(field, aml_named_field("PRQD", 8));
- aml_append(field, aml_reserved_field(0x20));
- aml_append(field, aml_named_field("PRQE", 8));
- aml_append(field, aml_named_field("PRQF", 8));
- aml_append(field, aml_named_field("PRQG", 8));
- aml_append(field, aml_named_field("PRQH", 8));
- aml_append(sb_scope, field);
-
aml_append(sb_scope, build_irq_status_method());
aml_append(sb_scope, build_iqcr_method(false));
@@ -1262,54 +1196,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg)
return dev;
}
-static void build_q35_isa_bridge(Aml *table)
-{
- Aml *dev;
- Aml *scope;
- Object *obj;
- bool ambiguous;
-
- /*
- * temporarily fish out isa bridge, build_q35_isa_bridge() will be dropped
- * once PCI is converted to AcpiDevAmlIf and would be ble to generate
- * AML for bridge itself
- */
- obj = object_resolve_path_type("", TYPE_ICH9_LPC_DEVICE, &ambiguous);
- assert(obj && !ambiguous);
-
- scope = aml_scope("_SB.PCI0");
- dev = aml_device("ISA");
- aml_append(dev, aml_name_decl("_ADR", aml_int(0x001F0000)));
-
- call_dev_aml_func(DEVICE(obj), dev);
- aml_append(scope, dev);
- aml_append(table, scope);
-}
-
-static void build_piix4_isa_bridge(Aml *table)
-{
- Aml *dev;
- Aml *scope;
- Object *obj;
- bool ambiguous;
-
- /*
- * temporarily fish out isa bridge, build_piix4_isa_bridge() will be dropped
- * once PCI is converted to AcpiDevAmlIf and would be ble to generate
- * AML for bridge itself
- */
- obj = object_resolve_path_type("", TYPE_PIIX3_PCI_DEVICE, &ambiguous);
- assert(obj && !ambiguous);
-
- scope = aml_scope("_SB.PCI0");
- dev = aml_device("ISA");
- aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010000)));
-
- call_dev_aml_func(DEVICE(obj), dev);
- aml_append(scope, dev);
- aml_append(table, scope);
-}
-
static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr)
{
Aml *scope;
@@ -1416,25 +1302,6 @@ static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug)
return method;
}
-static void build_smb0(Aml *table, int devnr, int func)
-{
- Aml *scope = aml_scope("_SB.PCI0");
- Aml *dev = aml_device("SMB0");
- bool ambiguous;
- Object *obj;
- /*
- * temporarily fish out device hosting SMBUS, build_smb0 will be gone once
- * PCI enumeration will be switched to call_dev_aml_func()
- */
- obj = object_resolve_path_type("", TYPE_ICH9_SMB_DEVICE, &ambiguous);
- assert(obj && !ambiguous);
-
- aml_append(dev, aml_name_decl("_ADR", aml_int(devnr << 16 | func)));
- call_dev_aml_func(DEVICE(obj), dev);
- aml_append(scope, dev);
- aml_append(table, scope);
-}
-
static void build_acpi0017(Aml *table)
{
Aml *dev, *scope, *method;
@@ -1456,6 +1323,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiPmInfo *pm, AcpiMiscInfo *misc,
Range *pci_hole, Range *pci_hole64, MachineState *machine)
{
+ Object *i440fx = object_resolve_type_unambiguous(TYPE_I440FX_PCI_HOST_BRIDGE);
+ Object *q35 = object_resolve_type_unambiguous(TYPE_Q35_HOST_DEVICE);
CrsRangeEntry *entry;
Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs;
CrsRangeSet crs_range_set;
@@ -1476,11 +1345,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id,
.oem_table_id = x86ms->oem_table_id };
+ assert(!!i440fx != !!q35);
+
acpi_table_begin(&table, table_data);
dsdt = init_aml_allocator();
build_dbg_aml(dsdt);
- if (misc->is_piix4) {
+ if (i440fx) {
sb_scope = aml_scope("_SB");
dev = aml_device("PCI0");
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
@@ -1489,12 +1360,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(sb_scope, dev);
aml_append(dsdt, sb_scope);
- build_piix4_isa_bridge(dsdt);
if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
}
build_piix4_pci0_int(dsdt);
- } else {
+ } else if (q35) {
sb_scope = aml_scope("_SB");
dev = aml_device("PCI0");
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
@@ -1534,14 +1404,10 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dsdt, sb_scope);
- build_q35_isa_bridge(dsdt);
if (pm->pcihp_bridge_en) {
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
}
build_q35_pci0_int(dsdt);
- if (pcms->smbus) {
- build_smb0(dsdt, ICH9_SMB_DEV, ICH9_SMB_FUNC);
- }
}
if (misc->has_hpet) {
@@ -1554,6 +1420,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dsdt, sb_scope);
}
+ scope = aml_scope("_GPE");
+ {
+ aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006")));
+ if (machine->nvdimms_state->is_enabled) {
+ method = aml_method("_E04", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
+ aml_int(0x80)));
+ aml_append(scope, method);
+ }
+ }
+ aml_append(dsdt, scope);
+
if (pcmc->legacy_cpu_hotplug) {
build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base);
} else {
@@ -1572,28 +1450,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
pcms->memhp_io_base);
}
- scope = aml_scope("_GPE");
- {
- aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006")));
-
- if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
- method = aml_method("_E01", 0, AML_NOTSERIALIZED);
- aml_append(method,
- aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF));
- aml_append(method, aml_call0("\\_SB.PCI0.PCNT"));
- aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK")));
- aml_append(scope, method);
- }
-
- if (machine->nvdimms_state->is_enabled) {
- method = aml_method("_E04", 0, AML_NOTSERIALIZED);
- aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
- aml_int(0x80)));
- aml_append(scope, method);
- }
- }
- aml_append(dsdt, scope);
-
crs_range_set_init(&crs_range_set);
bus = PC_MACHINE(machine)->bus;
if (bus) {
@@ -1872,6 +1728,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
}
aml_append(dsdt, sb_scope);
+ if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
+ scope = aml_scope("_GPE");
+ {
+ method = aml_method("_E01", 0, AML_NOTSERIALIZED);
+ aml_append(method,
+ aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF));
+ aml_append(method, aml_call0("\\_SB.PCI0.PCNT"));
+ aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK")));
+ aml_append(scope, method);
+ }
+ aml_append(dsdt, scope);
+ }
+
/* copy AML table into ACPI tables blob and patch header there */
g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
acpi_table_end(linker, &table);
diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c
index bcf9eaf837..06970ac44a 100644
--- a/hw/i386/e820_memory_layout.c
+++ b/hw/i386/e820_memory_layout.c
@@ -11,29 +11,11 @@
#include "e820_memory_layout.h"
static size_t e820_entries;
-struct e820_table e820_reserve;
struct e820_entry *e820_table;
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
{
- int index = le32_to_cpu(e820_reserve.count);
- struct e820_entry *entry;
-
- if (type != E820_RAM) {
- /* old FW_CFG_E820_TABLE entry -- reservations only */
- if (index >= E820_NR_ENTRIES) {
- return -EBUSY;
- }
- entry = &e820_reserve.entry[index++];
-
- entry->address = cpu_to_le64(address);
- entry->length = cpu_to_le64(length);
- entry->type = cpu_to_le32(type);
-
- e820_reserve.count = cpu_to_le32(index);
- }
-
- /* new "etc/e820" file -- include ram too */
+ /* new "etc/e820" file -- include ram and reserved entries */
e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1);
e820_table[e820_entries].address = cpu_to_le64(address);
e820_table[e820_entries].length = cpu_to_le64(length);
diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h
index 04f93780f9..7c239aa033 100644
--- a/hw/i386/e820_memory_layout.h
+++ b/hw/i386/e820_memory_layout.h
@@ -16,20 +16,12 @@
#define E820_NVS 4
#define E820_UNUSABLE 5
-#define E820_NR_ENTRIES 16
-
struct e820_entry {
uint64_t address;
uint64_t length;
uint32_t type;
} QEMU_PACKED __attribute((__aligned__(4)));
-struct e820_table {
- uint32_t count;
- struct e820_entry entry[E820_NR_ENTRIES];
-} QEMU_PACKED __attribute((__aligned__(4)));
-
-extern struct e820_table e820_reserve;
extern struct e820_entry *e820_table;
int e820_add_entry(uint64_t address, uint64_t length, uint32_t type);
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index a283785a8d..72a42f3c66 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -36,7 +36,6 @@ const char *fw_cfg_arch_key_name(uint16_t key)
{FW_CFG_ACPI_TABLES, "acpi_tables"},
{FW_CFG_SMBIOS_ENTRIES, "smbios_entries"},
{FW_CFG_IRQ0_OVERRIDE, "irq0_override"},
- {FW_CFG_E820_TABLE, "e820_table"},
{FW_CFG_HPET, "hpet"},
};
@@ -127,8 +126,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms,
#endif
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
- &e820_reserve, sizeof(e820_reserve));
fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
sizeof(struct e820_entry) * e820_get_num_entries());
diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h
index 275f15c1c5..86ca7c1c0c 100644
--- a/hw/i386/fw_cfg.h
+++ b/hw/i386/fw_cfg.h
@@ -17,7 +17,6 @@
#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2)
-#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3)
#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4)
FWCfgState *fw_cfg_arch_create(MachineState *ms,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6524c2ee32..a08ee85edf 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -49,17 +49,24 @@
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
-#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\
- if (ret_fr) { \
- ret_fr = -ret_fr; \
- if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { \
- trace_vtd_fault_disabled(); \
- } else { \
- vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); \
- } \
- goto error; \
- } \
-}
+
+/*
+ * PCI bus number (or SID) is not reliable since the device is usaully
+ * initalized before guest can configure the PCI bridge
+ * (SECONDARY_BUS_NUMBER).
+ */
+struct vtd_as_key {
+ PCIBus *bus;
+ uint8_t devfn;
+ uint32_t pasid;
+};
+
+struct vtd_iotlb_key {
+ uint64_t gfn;
+ uint32_t pasid;
+ uint32_t level;
+ uint16_t sid;
+};
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
@@ -200,14 +207,46 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
}
/* GHashTable functions */
-static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
+static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2)
{
- return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+ const struct vtd_iotlb_key *key1 = v1;
+ const struct vtd_iotlb_key *key2 = v2;
+
+ return key1->sid == key2->sid &&
+ key1->pasid == key2->pasid &&
+ key1->level == key2->level &&
+ key1->gfn == key2->gfn;
+}
+
+static guint vtd_iotlb_hash(gconstpointer v)
+{
+ const struct vtd_iotlb_key *key = v;
+
+ return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) |
+ (key->level) << VTD_IOTLB_LVL_SHIFT |
+ (key->pasid) << VTD_IOTLB_PASID_SHIFT;
}
-static guint vtd_uint64_hash(gconstpointer v)
+static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct vtd_as_key *key1 = v1;
+ const struct vtd_as_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn) &&
+ (key1->pasid == key2->pasid);
+}
+
+/*
+ * Note that we use pointer to PCIBus as the key, so hashing/shifting
+ * based on the pointer value is intended. Note that we deal with
+ * collisions through vtd_as_equal().
+ */
+static guint vtd_as_hash(gconstpointer v)
{
- return (guint)*(const uint64_t *)v;
+ const struct vtd_as_key *key = v;
+ guint value = (guint)(uintptr_t)key->bus;
+
+ return (guint)(value << 8 | key->devfn);
}
static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
@@ -248,22 +287,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
{
VTDAddressSpace *vtd_as;
- VTDBus *vtd_bus;
- GHashTableIter bus_it;
- uint32_t devfn_it;
+ GHashTableIter as_it;
trace_vtd_context_cache_reset();
- g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
+ g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
- while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
- for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
- vtd_as = vtd_bus->dev_as[devfn_it];
- if (!vtd_as) {
- continue;
- }
- vtd_as->context_cache_entry.context_cache_gen = 0;
- }
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
+ vtd_as->context_cache_entry.context_cache_gen = 0;
}
s->context_cache_gen = 1;
}
@@ -290,13 +321,6 @@ static void vtd_reset_caches(IntelIOMMUState *s)
vtd_iommu_unlock(s);
}
-static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
- uint32_t level)
-{
- return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
- ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
-}
-
static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
{
return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
@@ -304,15 +328,17 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
/* Must be called with IOMMU lock held */
static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
- hwaddr addr)
+ uint32_t pasid, hwaddr addr)
{
+ struct vtd_iotlb_key key;
VTDIOTLBEntry *entry;
- uint64_t key;
int level;
for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
- key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
- source_id, level);
+ key.gfn = vtd_get_iotlb_gfn(addr, level);
+ key.level = level;
+ key.sid = source_id;
+ key.pasid = pasid;
entry = g_hash_table_lookup(s->iotlb, &key);
if (entry) {
goto out;
@@ -326,10 +352,11 @@ out:
/* Must be with IOMMU lock held */
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t slpte,
- uint8_t access_flags, uint32_t level)
+ uint8_t access_flags, uint32_t level,
+ uint32_t pasid)
{
VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
- uint64_t *key = g_malloc(sizeof(*key));
+ struct vtd_iotlb_key *key = g_malloc(sizeof(*key));
uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
@@ -343,7 +370,13 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
entry->slpte = slpte;
entry->access_flags = access_flags;
entry->mask = vtd_slpt_level_page_mask(level);
- *key = vtd_get_iotlb_key(gfn, source_id, level);
+ entry->pasid = pasid;
+
+ key->gfn = gfn;
+ key->sid = source_id;
+ key->level = level;
+ key->pasid = pasid;
+
g_hash_table_replace(s->iotlb, key, entry);
}
@@ -436,7 +469,8 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
/* Must not update F field now, should be done later */
static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
uint16_t source_id, hwaddr addr,
- VTDFaultReason fault, bool is_write)
+ VTDFaultReason fault, bool is_write,
+ bool is_pasid, uint32_t pasid)
{
uint64_t hi = 0, lo;
hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
@@ -444,7 +478,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
assert(index < DMAR_FRCD_REG_NR);
lo = VTD_FRCD_FI(addr);
- hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
+ hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) |
+ VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid);
if (!is_write) {
hi |= VTD_FRCD_T;
}
@@ -475,7 +510,8 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
/* Log and report an DMAR (address translation) fault to software */
static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
hwaddr addr, VTDFaultReason fault,
- bool is_write)
+ bool is_write, bool is_pasid,
+ uint32_t pasid)
{
uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
@@ -502,7 +538,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
return;
}
- vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
+ vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault,
+ is_write, is_pasid, pasid);
if (fsts_reg & VTD_FSTS_PPF) {
error_report_once("There are pending faults already, "
@@ -807,13 +844,15 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s,
static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
VTDContextEntry *ce,
- VTDPASIDEntry *pe)
+ VTDPASIDEntry *pe,
+ uint32_t pasid)
{
- uint32_t pasid;
dma_addr_t pasid_dir_base;
int ret = 0;
- pasid = VTD_CE_GET_RID2PASID(ce);
+ if (pasid == PCI_NO_PASID) {
+ pasid = VTD_CE_GET_RID2PASID(ce);
+ }
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe);
@@ -822,15 +861,17 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
VTDContextEntry *ce,
- bool *pe_fpd_set)
+ bool *pe_fpd_set,
+ uint32_t pasid)
{
int ret;
- uint32_t pasid;
dma_addr_t pasid_dir_base;
VTDPASIDDirEntry pdire;
VTDPASIDEntry pe;
- pasid = VTD_CE_GET_RID2PASID(ce);
+ if (pasid == PCI_NO_PASID) {
+ pasid = VTD_CE_GET_RID2PASID(ce);
+ }
pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
/*
@@ -876,12 +917,13 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
}
static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return VTD_PE_GET_LEVEL(&pe);
}
@@ -894,12 +936,13 @@ static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
}
static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9;
}
@@ -941,31 +984,33 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
}
static inline uint64_t vtd_iova_limit(IntelIOMMUState *s,
- VTDContextEntry *ce, uint8_t aw)
+ VTDContextEntry *ce, uint8_t aw,
+ uint32_t pasid)
{
- uint32_t ce_agaw = vtd_get_iova_agaw(s, ce);
+ uint32_t ce_agaw = vtd_get_iova_agaw(s, ce, pasid);
return 1ULL << MIN(ce_agaw, aw);
}
/* Return true if IOVA passes range check, otherwise false. */
static inline bool vtd_iova_range_check(IntelIOMMUState *s,
uint64_t iova, VTDContextEntry *ce,
- uint8_t aw)
+ uint8_t aw, uint32_t pasid)
{
/*
* Check if @iova is above 2^X-1, where X is the minimum of MGAW
* in CAP_REG and AW in context-entry.
*/
- return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1));
+ return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1));
}
static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
}
@@ -993,50 +1038,25 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
return slpte & rsvd_mask;
}
-/* Find the VTD address space associated with a given bus number */
-static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
-{
- VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
- GHashTableIter iter;
-
- if (vtd_bus) {
- return vtd_bus;
- }
-
- /*
- * Iterate over the registered buses to find the one which
- * currently holds this bus number and update the bus_num
- * lookup table.
- */
- g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
- if (pci_bus_num(vtd_bus->bus) == bus_num) {
- s->vtd_as_by_bus_num[bus_num] = vtd_bus;
- return vtd_bus;
- }
- }
-
- return NULL;
-}
-
/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
* of the translation, can be used for deciding the size of large page.
*/
static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t iova, bool is_write,
uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes, uint8_t aw_bits)
+ bool *reads, bool *writes, uint8_t aw_bits,
+ uint32_t pasid)
{
- dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
- uint32_t level = vtd_get_iova_level(s, ce);
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
uint32_t offset;
uint64_t slpte;
uint64_t access_right_check;
uint64_t xlat, size;
- if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
- error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
- __func__, iova);
+ if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) {
+ error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ","
+ "pasid=0x%" PRIx32 ")", __func__, iova, pasid);
return -VTD_FR_ADDR_BEYOND_MGAW;
}
@@ -1049,8 +1069,9 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (slpte == (uint64_t)-1) {
error_report_once("%s: detected read error on DMAR slpte "
- "(iova=0x%" PRIx64 ")", __func__, iova);
- if (level == vtd_get_iova_level(s, ce)) {
+ "(iova=0x%" PRIx64 ", pasid=0x%" PRIx32 ")",
+ __func__, iova, pasid);
+ if (level == vtd_get_iova_level(s, ce, pasid)) {
/* Invalid programming of context-entry */
return -VTD_FR_CONTEXT_ENTRY_INV;
} else {
@@ -1062,15 +1083,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (!(slpte & access_right_check)) {
error_report_once("%s: detected slpte permission error "
"(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
- "slpte=0x%" PRIx64 ", write=%d)", __func__,
- iova, level, slpte, is_write);
+ "slpte=0x%" PRIx64 ", write=%d, pasid=0x%"
+ PRIx32 ")", __func__, iova, level,
+ slpte, is_write, pasid);
return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
}
if (vtd_slpte_nonzero_rsvd(slpte, level)) {
error_report_once("%s: detected splte reserve non-zero "
"iova=0x%" PRIx64 ", level=0x%" PRIx32
- "slpte=0x%" PRIx64 ")", __func__, iova,
- level, slpte);
+ "slpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
+ __func__, iova, level, slpte, pasid);
return -VTD_FR_PAGING_ENTRY_RSVD;
}
@@ -1098,9 +1120,10 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
error_report_once("%s: xlat address is in interrupt range "
"(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", "
"slpte=0x%" PRIx64 ", write=%d, "
- "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")",
+ "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", "
+ "pasid=0x%" PRIx32 ")",
__func__, iova, level, slpte, is_write,
- xlat, size);
+ xlat, size, pasid);
return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR :
-VTD_FR_INTERRUPT_ADDR;
}
@@ -1314,18 +1337,19 @@ next:
*/
static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce,
uint64_t start, uint64_t end,
- vtd_page_walk_info *info)
+ vtd_page_walk_info *info,
+ uint32_t pasid)
{
- dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
- uint32_t level = vtd_get_iova_level(s, ce);
+ dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
+ uint32_t level = vtd_get_iova_level(s, ce, pasid);
- if (!vtd_iova_range_check(s, start, ce, info->aw)) {
+ if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(s, end, ce, info->aw)) {
+ if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) {
/* Fix end so that it reaches the maximum */
- end = vtd_iova_limit(s, ce, info->aw);
+ end = vtd_iova_limit(s, ce, info->aw, pasid);
}
return vtd_page_walk_level(addr, start, end, level, true, true, info);
@@ -1393,7 +1417,7 @@ static int vtd_ce_rid2pasid_check(IntelIOMMUState *s,
* has valid rid2pasid setting, which includes valid
* rid2pasid field and corresponding pasid entry setting
*/
- return vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ return vtd_ce_get_rid2pasid_entry(s, ce, &pe, PCI_NO_PASID);
}
/* Map a device to its corresponding domain (context-entry) */
@@ -1476,12 +1500,13 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event,
}
static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
- VTDContextEntry *ce)
+ VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
if (s->root_scalable) {
- vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
}
@@ -1499,10 +1524,10 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
.notify_unmap = true,
.aw = s->aw_bits,
.as = vtd_as,
- .domain_id = vtd_get_domain_id(s, ce),
+ .domain_id = vtd_get_domain_id(s, ce, vtd_as->pasid),
};
- return vtd_page_walk(s, ce, addr, addr + size, &info);
+ return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid);
}
static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
@@ -1546,16 +1571,19 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
* 1st-level translation or 2nd-level translation, it depends
* on PGTT setting.
*/
-static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce)
+static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce,
+ uint32_t pasid)
{
VTDPASIDEntry pe;
int ret;
if (s->root_scalable) {
- ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+ ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid);
if (ret) {
- error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
- __func__, ret);
+ /*
+ * This error is guest triggerable. We should assumt PT
+ * not enabled for safety.
+ */
return false;
}
return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
@@ -1569,14 +1597,12 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
{
IntelIOMMUState *s;
VTDContextEntry ce;
- int ret;
assert(as);
s = as->iommu_state;
- ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
- as->devfn, &ce);
- if (ret) {
+ if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn,
+ &ce)) {
/*
* Possibly failed to parse the context entry for some reason
* (e.g., during init, or any guest configuration errors on
@@ -1586,19 +1612,20 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as)
return false;
}
- return vtd_dev_pt_enabled(s, &ce);
+ return vtd_dev_pt_enabled(s, &ce, as->pasid);
}
/* Return whether the device is using IOMMU translation. */
static bool vtd_switch_address_space(VTDAddressSpace *as)
{
- bool use_iommu;
+ bool use_iommu, pt;
/* Whether we need to take the BQL on our own */
bool take_bql = !qemu_mutex_iothread_locked();
assert(as);
use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as);
+ pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as);
trace_vtd_switch_address_space(pci_bus_num(as->bus),
VTD_PCI_SLOT(as->devfn),
@@ -1618,11 +1645,53 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
if (use_iommu) {
memory_region_set_enabled(&as->nodmar, false);
memory_region_set_enabled(MEMORY_REGION(&as->iommu), true);
+ /*
+ * vt-d spec v3.4 3.14:
+ *
+ * """
+ * Requests-with-PASID with input address in range 0xFEEx_xxxx
+ * are translated normally like any other request-with-PASID
+ * through DMA-remapping hardware.
+ * """
+ *
+ * Need to disable ir for as with PASID.
+ */
+ if (as->pasid != PCI_NO_PASID) {
+ memory_region_set_enabled(&as->iommu_ir, false);
+ } else {
+ memory_region_set_enabled(&as->iommu_ir, true);
+ }
} else {
memory_region_set_enabled(MEMORY_REGION(&as->iommu), false);
memory_region_set_enabled(&as->nodmar, true);
}
+ /*
+ * vtd-spec v3.4 3.14:
+ *
+ * """
+ * Requests-with-PASID with input address in range 0xFEEx_xxxx are
+ * translated normally like any other request-with-PASID through
+ * DMA-remapping hardware. However, if such a request is processed
+ * using pass-through translation, it will be blocked as described
+ * in the paragraph below.
+ *
+ * Software must not program paging-structure entries to remap any
+ * address to the interrupt address range. Untranslated requests
+ * and translation requests that result in an address in the
+ * interrupt range will be blocked with condition code LGN.4 or
+ * SGN.8.
+ * """
+ *
+ * We enable per as memory region (iommu_ir_fault) for catching
+ * the tranlsation for interrupt range through PASID + PT.
+ */
+ if (pt && as->pasid != PCI_NO_PASID) {
+ memory_region_set_enabled(&as->iommu_ir_fault, true);
+ } else {
+ memory_region_set_enabled(&as->iommu_ir_fault, false);
+ }
+
if (take_bql) {
qemu_mutex_unlock_iothread();
}
@@ -1632,24 +1701,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
static void vtd_switch_address_space_all(IntelIOMMUState *s)
{
+ VTDAddressSpace *vtd_as;
GHashTableIter iter;
- VTDBus *vtd_bus;
- int i;
-
- g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
- while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
- for (i = 0; i < PCI_DEVFN_MAX; i++) {
- if (!vtd_bus->dev_as[i]) {
- continue;
- }
- vtd_switch_address_space(vtd_bus->dev_as[i]);
- }
- }
-}
-static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
-{
- return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
+ g_hash_table_iter_init(&iter, s->vtd_address_spaces);
+ while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
+ vtd_switch_address_space(vtd_as);
+ }
}
static const bool vtd_qualified_faults[] = {
@@ -1686,18 +1744,37 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
}
+static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct vtd_as_key *as_key = (struct vtd_as_key *)key;
+ uint16_t target_sid = *(uint16_t *)user_data;
+ uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn);
+ return sid == target_sid;
+}
+
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+ uint8_t bus_num = PCI_BUS_NUM(sid);
+ VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
+
+ if (vtd_as &&
+ (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) {
+ return vtd_as;
+ }
+
+ vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid);
+ s->vtd_as_cache[bus_num] = vtd_as;
+
+ return vtd_as;
+}
+
static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
{
- VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
bool success = false;
- vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
- if (!vtd_bus) {
- goto out;
- }
-
- vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
+ vtd_as = vtd_get_as_by_sid(s, source_id);
if (!vtd_as) {
goto out;
}
@@ -1711,6 +1788,22 @@ out:
trace_vtd_pt_enable_fast_path(source_id, success);
}
+static void vtd_report_fault(IntelIOMMUState *s,
+ int err, bool is_fpd_set,
+ uint16_t source_id,
+ hwaddr addr,
+ bool is_write,
+ bool is_pasid,
+ uint32_t pasid)
+{
+ if (is_fpd_set && vtd_is_qualified_fault(err)) {
+ trace_vtd_fault_disabled();
+ } else {
+ vtd_report_dmar_fault(s, source_id, addr, err, is_write,
+ is_pasid, pasid);
+ }
+}
+
/* Map dev to context-entry then do a paging-structures walk to do a iommu
* translation.
*
@@ -1732,13 +1825,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
uint64_t slpte, page_mask;
- uint32_t level;
- uint16_t source_id = vtd_make_source_id(bus_num, devfn);
+ uint32_t level, pasid = vtd_as->pasid;
+ uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
bool is_fpd_set = false;
bool reads = true;
bool writes = true;
uint8_t access_flags;
+ bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
VTDIOTLBEntry *iotlb_entry;
/*
@@ -1751,15 +1845,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry = &vtd_as->context_cache_entry;
- /* Try to fetch slpte form IOTLB */
- iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
- if (iotlb_entry) {
- trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
- iotlb_entry->domain_id);
- slpte = iotlb_entry->slpte;
- access_flags = iotlb_entry->access_flags;
- page_mask = iotlb_entry->mask;
- goto out;
+ /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */
+ if (!rid2pasid) {
+ iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
+ if (iotlb_entry) {
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+ slpte = iotlb_entry->slpte;
+ access_flags = iotlb_entry->access_flags;
+ page_mask = iotlb_entry->mask;
+ goto out;
+ }
}
/* Try to fetch context-entry from cache first */
@@ -1770,16 +1866,26 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
ce = cc_entry->context_entry;
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (!is_fpd_set && s->root_scalable) {
- ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+ ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid);
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set,
+ source_id, addr, is_write,
+ false, 0);
+ goto error;
+ }
}
} else {
ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
if (!ret_fr && !is_fpd_set && s->root_scalable) {
- ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
+ ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid);
+ }
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set,
+ source_id, addr, is_write,
+ false, 0);
+ goto error;
}
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
/* Update context-cache */
trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
cc_entry->context_cache_gen,
@@ -1788,11 +1894,15 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
cc_entry->context_cache_gen = s->context_cache_gen;
}
+ if (rid2pasid) {
+ pasid = VTD_CE_GET_RID2PASID(&ce);
+ }
+
/*
* We don't need to translate for pass-through context entries.
* Also, let's ignore IOTLB caching as well for PT devices.
*/
- if (vtd_dev_pt_enabled(s, &ce)) {
+ if (vtd_dev_pt_enabled(s, &ce, pasid)) {
entry->iova = addr & VTD_PAGE_MASK_4K;
entry->translated_addr = entry->iova;
entry->addr_mask = ~VTD_PAGE_MASK_4K;
@@ -1813,14 +1923,31 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
return true;
}
+ /* Try to fetch slpte form IOTLB for RID2PASID slow path */
+ if (rid2pasid) {
+ iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr);
+ if (iotlb_entry) {
+ trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+ slpte = iotlb_entry->slpte;
+ access_flags = iotlb_entry->access_flags;
+ page_mask = iotlb_entry->mask;
+ goto out;
+ }
+ }
+
ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level,
- &reads, &writes, s->aw_bits);
- VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+ &reads, &writes, s->aw_bits, pasid);
+ if (ret_fr) {
+ vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,
+ addr, is_write, pasid != PCI_NO_PASID, pasid);
+ goto error;
+ }
page_mask = vtd_slpt_level_page_mask(level);
access_flags = IOMMU_ACCESS_FLAG(reads, writes);
- vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte,
- access_flags, level);
+ vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid),
+ addr, slpte, access_flags, level, pasid);
out:
vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
@@ -1905,11 +2032,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
uint16_t source_id,
uint16_t func_mask)
{
+ GHashTableIter as_it;
uint16_t mask;
- VTDBus *vtd_bus;
VTDAddressSpace *vtd_as;
uint8_t bus_n, devfn;
- uint16_t devfn_it;
trace_vtd_inv_desc_cc_devices(source_id, func_mask);
@@ -1932,32 +2058,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
mask = ~mask;
bus_n = VTD_SID_TO_BUS(source_id);
- vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
- if (vtd_bus) {
- devfn = VTD_SID_TO_DEVFN(source_id);
- for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
- vtd_as = vtd_bus->dev_as[devfn_it];
- if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
- trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
- VTD_PCI_FUNC(devfn_it));
- vtd_iommu_lock(s);
- vtd_as->context_cache_entry.context_cache_gen = 0;
- vtd_iommu_unlock(s);
- /*
- * Do switch address space when needed, in case if the
- * device passthrough bit is switched.
- */
- vtd_switch_address_space(vtd_as);
- /*
- * So a device is moving out of (or moving into) a
- * domain, resync the shadow page table.
- * This won't bring bad even if we have no such
- * notifier registered - the IOMMU notification
- * framework will skip MAP notifications if that
- * happened.
- */
- vtd_sync_shadow_page_table(vtd_as);
- }
+ devfn = VTD_SID_TO_DEVFN(source_id);
+
+ g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
+ if ((pci_bus_num(vtd_as->bus) == bus_n) &&
+ (vtd_as->devfn & mask) == (devfn & mask)) {
+ trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
+ VTD_PCI_FUNC(vtd_as->devfn));
+ vtd_iommu_lock(s);
+ vtd_as->context_cache_entry.context_cache_gen = 0;
+ vtd_iommu_unlock(s);
+ /*
+ * Do switch address space when needed, in case if the
+ * device passthrough bit is switched.
+ */
+ vtd_switch_address_space(vtd_as);
+ /*
+ * So a device is moving out of (or moving into) a
+ * domain, resync the shadow page table.
+ * This won't bring bad even if we have no such
+ * notifier registered - the IOMMU notification
+ * framework will skip MAP notifications if that
+ * happened.
+ */
+ vtd_sync_shadow_page_table(vtd_as);
}
}
}
@@ -2014,7 +2139,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
- domain_id == vtd_get_domain_id(s, &ce)) {
+ domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
vtd_sync_shadow_page_table(vtd_as);
}
}
@@ -2022,7 +2147,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
- uint8_t am)
+ uint8_t am, uint32_t pasid)
{
VTDAddressSpace *vtd_as;
VTDContextEntry ce;
@@ -2030,9 +2155,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
hwaddr size = (1 << am) * VTD_PAGE_SIZE;
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
+ if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) {
+ continue;
+ }
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
- if (!ret && domain_id == vtd_get_domain_id(s, &ce)) {
+ if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
if (vtd_as_has_map_notifier(vtd_as)) {
/*
* As long as we have MAP notifications registered in
@@ -2076,7 +2204,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
vtd_iommu_unlock(s);
- vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
+ vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
}
/* Flush IOTLB
@@ -2473,18 +2601,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
{
VTDAddressSpace *vtd_dev_as;
IOMMUTLBEvent event;
- struct VTDBus *vtd_bus;
hwaddr addr;
uint64_t sz;
uint16_t sid;
- uint8_t devfn;
bool size;
- uint8_t bus_num;
addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
- devfn = sid & 0xff;
- bus_num = sid >> 8;
size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
@@ -2495,12 +2618,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
return false;
}
- vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
- if (!vtd_bus) {
- goto done;
- }
-
- vtd_dev_as = vtd_bus->dev_as[devfn];
+ /*
+ * Using sid is OK since the guest should have finished the
+ * initialization of both the bus and device.
+ */
+ vtd_dev_as = vtd_get_as_by_sid(s, sid);
if (!vtd_dev_as) {
goto done;
}
@@ -3151,6 +3273,7 @@ static Property vtd_properties[] = {
DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
+ DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_END_OF_LIST(),
@@ -3425,32 +3548,98 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
},
};
-VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
+static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as,
+ hwaddr addr, bool is_write)
{
- uintptr_t key = (uintptr_t)bus;
- VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
- VTDAddressSpace *vtd_dev_as;
- char name[128];
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint8_t bus_n = pci_bus_num(vtd_as->bus);
+ uint16_t sid = PCI_BUILD_BDF(bus_n, vtd_as->devfn);
+ bool is_fpd_set = false;
+ VTDContextEntry ce;
- if (!vtd_bus) {
- uintptr_t *new_key = g_malloc(sizeof(*new_key));
- *new_key = (uintptr_t)bus;
- /* No corresponding free() */
- vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
- PCI_DEVFN_MAX);
- vtd_bus->bus = bus;
- g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
+ assert(vtd_as->pasid != PCI_NO_PASID);
+
+ /* Try out best to fetch FPD, we can't do anything more */
+ if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
+ is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+ if (!is_fpd_set && s->root_scalable) {
+ vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+ }
}
- vtd_dev_as = vtd_bus->dev_as[devfn];
+ vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR,
+ is_fpd_set, sid, addr, is_write,
+ true, vtd_as->pasid);
+}
+
+static MemTxResult vtd_mem_ir_fault_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size,
+ MemTxAttrs attrs)
+{
+ vtd_report_ir_illegal_access(opaque, addr, false);
+ return MEMTX_ERROR;
+}
+
+static MemTxResult vtd_mem_ir_fault_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size,
+ MemTxAttrs attrs)
+{
+ vtd_report_ir_illegal_access(opaque, addr, true);
+
+ return MEMTX_ERROR;
+}
+
+static const MemoryRegionOps vtd_mem_ir_fault_ops = {
+ .read_with_attrs = vtd_mem_ir_fault_read,
+ .write_with_attrs = vtd_mem_ir_fault_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ },
+};
+
+VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
+ int devfn, unsigned int pasid)
+{
+ /*
+ * We can't simply use sid here since the bus number might not be
+ * initialized by the guest.
+ */
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ .pasid = pasid,
+ };
+ VTDAddressSpace *vtd_dev_as;
+ char name[128];
+
+ vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key);
if (!vtd_dev_as) {
- snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
- PCI_FUNC(devfn));
- vtd_bus->dev_as[devfn] = vtd_dev_as = g_new0(VTDAddressSpace, 1);
+ struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
+
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+ new_key->pasid = pasid;
+
+ if (pasid == PCI_NO_PASID) {
+ snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
+ PCI_FUNC(devfn));
+ } else {
+ snprintf(name, sizeof(name), "vtd-%02x.%x-pasid-%x", PCI_SLOT(devfn),
+ PCI_FUNC(devfn), pasid);
+ }
+
+ vtd_dev_as = g_new0(VTDAddressSpace, 1);
vtd_dev_as->bus = bus;
vtd_dev_as->devfn = (uint8_t)devfn;
+ vtd_dev_as->pasid = pasid;
vtd_dev_as->iommu_state = s;
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
vtd_dev_as->iova_tree = iova_tree_new();
@@ -3492,6 +3681,24 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
&vtd_dev_as->iommu_ir, 1);
/*
+ * This region is used for catching fault to access interrupt
+ * range via passthrough + PASID. See also
+ * vtd_switch_address_space(). We can't use alias since we
+ * need to know the sid which is valid for MSI who uses
+ * bus_master_as (see msi_send_message()).
+ */
+ memory_region_init_io(&vtd_dev_as->iommu_ir_fault, OBJECT(s),
+ &vtd_mem_ir_fault_ops, vtd_dev_as, "vtd-no-ir",
+ VTD_INTERRUPT_ADDR_SIZE);
+ /*
+ * Hook to root since when PT is enabled vtd_dev_as->iommu
+ * will be disabled.
+ */
+ memory_region_add_subregion_overlap(MEMORY_REGION(&vtd_dev_as->root),
+ VTD_INTERRUPT_ADDR_FIRST,
+ &vtd_dev_as->iommu_ir_fault, 2);
+
+ /*
* Hook both the containers under the root container, we
* switch between DMAR & noDMAR by enable/disable
* corresponding sub-containers
@@ -3503,6 +3710,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
&vtd_dev_as->nodmar, 0);
vtd_switch_address_space(vtd_dev_as);
+
+ g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as);
}
return vtd_dev_as;
}
@@ -3609,7 +3818,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
"legacy mode",
bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn),
- vtd_get_domain_id(s, &ce),
+ vtd_get_domain_id(s, &ce, vtd_as->pasid),
ce.hi, ce.lo);
if (vtd_as_has_map_notifier(vtd_as)) {
/* This is required only for MAP typed notifiers */
@@ -3619,10 +3828,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
.notify_unmap = false,
.aw = s->aw_bits,
.as = vtd_as,
- .domain_id = vtd_get_domain_id(s, &ce),
+ .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid),
};
- vtd_page_walk(s, &ce, 0, ~0ULL, &info);
+ vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid);
}
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
@@ -3722,6 +3931,10 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap |= VTD_ECAP_SC;
}
+ if (s->pasid) {
+ s->ecap |= VTD_ECAP_PASID;
+ }
+
vtd_reset_caches(s);
/* Define registers with default values and bit semantics */
@@ -3795,7 +4008,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
- vtd_as = vtd_find_add_as(s, bus, devfn);
+ vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID);
return &vtd_as->as;
}
@@ -3838,6 +4051,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return false;
}
+ if (s->pasid && !s->scalable_mode) {
+ error_setg(errp, "Need to set scalable mode for PASID");
+ return false;
+ }
+
return true;
}
@@ -3874,6 +4092,17 @@ static void vtd_realize(DeviceState *dev, Error **errp)
X86MachineState *x86ms = X86_MACHINE(ms);
PCIBus *bus = pcms->bus;
IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ if (s->pasid && x86_iommu->dt_supported) {
+ /*
+ * PASID-based-Device-TLB Invalidate Descriptor is not
+ * implemented and it requires support from vhost layer which
+ * needs to be implemented in the future.
+ */
+ error_setg(errp, "PASID based device IOTLB is not supported");
+ return;
+ }
if (!vtd_decide_config(s, errp)) {
return;
@@ -3881,7 +4110,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
QLIST_INIT(&s->vtd_as_with_notifiers);
qemu_mutex_init(&s->iommu_lock);
- memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
@@ -3901,10 +4129,10 @@ static void vtd_realize(DeviceState *dev, Error **errp)
sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
/* No corresponding destroy */
- s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
+ s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal,
g_free, g_free);
- s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
- g_free, g_free);
+ s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
+ g_free, g_free);
vtd_init(s);
sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 930ce61feb..f090e61e11 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -114,8 +114,9 @@
VTD_INTERRUPT_ADDR_FIRST + 1)
/* The shift of source_id in the key of IOTLB hash table */
-#define VTD_IOTLB_SID_SHIFT 36
-#define VTD_IOTLB_LVL_SHIFT 52
+#define VTD_IOTLB_SID_SHIFT 20
+#define VTD_IOTLB_LVL_SHIFT 28
+#define VTD_IOTLB_PASID_SHIFT 30
#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */
/* IOTLB_REG */
@@ -191,6 +192,7 @@
#define VTD_ECAP_SC (1ULL << 7)
#define VTD_ECAP_MHMV (15ULL << 20)
#define VTD_ECAP_SRS (1ULL << 31)
+#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
@@ -211,6 +213,8 @@
#define VTD_CAP_DRAIN_READ (1ULL << 55)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
+#define VTD_PASID_ID_SHIFT 20
+#define VTD_PASID_ID_MASK ((1ULL << VTD_PASID_ID_SHIFT) - 1)
/* Supported Adjusted Guest Address Widths */
#define VTD_CAP_SAGAW_SHIFT 8
@@ -262,6 +266,8 @@
#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK)
/* For the low 64-bit of 128-bit */
#define VTD_FRCD_FI(val) ((val) & ~0xfffULL)
+#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40)
+#define VTD_FRCD_PP(val) (((val) & 0x1) << 31)
/* DMA Remapping Fault Conditions */
typedef enum VTDFaultReason {
@@ -379,6 +385,11 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
+#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4)
+#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4)
+#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK)
+#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL
+#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL
/* Mask for Device IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL)
@@ -413,6 +424,7 @@ typedef union VTDInvDesc VTDInvDesc;
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
+ uint32_t pasid;
uint64_t addr;
uint8_t mask;
};
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index ffd1884100..170a331e3f 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms)
fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE,
- &e820_reserve, sizeof(e820_reserve));
fw_cfg_add_file(fw_cfg, "etc/e820", e820_table,
sizeof(struct e820_entry) * e820_get_num_entries());
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index ef14da5094..546b703cb4 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms,
hwaddr cxl_size = MiB;
cxl_base = pc_get_cxl_range_start(pcms);
- e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
cxl_resv_end = cxl_base + cxl_size;
@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms,
memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
"cxl-fixed-memory-region", fw->size);
memory_region_add_subregion(system_memory, fw->base, &fw->mr);
- e820_add_entry(fw->base, fw->size, E820_RESERVED);
cxl_fmw_base += fw->size;
cxl_resv_end = cxl_fmw_base;
}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index e49814dd64..04fd71bfc4 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -12,6 +12,8 @@ vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate device
vtd_inv_desc_iotlb_global(void) "iotlb invalidate global"
vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16
vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8
+vtd_inv_desc_iotlb_pasid_pages(uint16_t domain, uint64_t addr, uint8_t mask, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8" pasid 0x%"PRIx32
+vtd_inv_desc_iotlb_pasid(uint16_t domain, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" pasid 0x%"PRIx32
vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32
vtd_inv_desc_wait_irq(const char *msg) "%s"
vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 8694e58b21..0b0a83e080 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -809,6 +809,7 @@ static void ich9_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
static void build_ich9_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
{
+ Aml *field;
BusChild *kid;
ICH9LPCState *s = ICH9_LPC_DEVICE(adev);
BusState *bus = BUS(s->isa_bus);
@@ -816,6 +817,28 @@ static void build_ich9_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
/* ICH9 PCI to ISA irq remapping */
aml_append(scope, aml_operation_region("PIRQ", AML_PCI_CONFIG,
aml_int(0x60), 0x0C));
+ /* Fields declarion has to happen *after* operation region */
+ field = aml_field("PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
+ aml_append(field, aml_named_field("PRQA", 8));
+ aml_append(field, aml_named_field("PRQB", 8));
+ aml_append(field, aml_named_field("PRQC", 8));
+ aml_append(field, aml_named_field("PRQD", 8));
+ aml_append(field, aml_reserved_field(0x20));
+ aml_append(field, aml_named_field("PRQE", 8));
+ aml_append(field, aml_named_field("PRQF", 8));
+ aml_append(field, aml_named_field("PRQG", 8));
+ aml_append(field, aml_named_field("PRQH", 8));
+ aml_append(scope, field);
+
+ /* hack: put fields into _SB scope for LNKx to find them */
+ aml_append(scope, aml_alias("PRQA", "\\_SB.PRQA"));
+ aml_append(scope, aml_alias("PRQB", "\\_SB.PRQB"));
+ aml_append(scope, aml_alias("PRQC", "\\_SB.PRQC"));
+ aml_append(scope, aml_alias("PRQD", "\\_SB.PRQD"));
+ aml_append(scope, aml_alias("PRQE", "\\_SB.PRQE"));
+ aml_append(scope, aml_alias("PRQF", "\\_SB.PRQF"));
+ aml_append(scope, aml_alias("PRQG", "\\_SB.PRQG"));
+ aml_append(scope, aml_alias("PRQH", "\\_SB.PRQH"));
QTAILQ_FOREACH(kid, &bus->children, sibling) {
call_dev_aml_func(DEVICE(kid->child), scope);
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 808fd4eadf..f9b4af5c05 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -316,12 +316,27 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp)
static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
{
+ Aml *field;
BusChild *kid;
BusState *bus = qdev_get_child_bus(DEVICE(adev), "isa.0");
/* PIIX PCI to ISA irq remapping */
aml_append(scope, aml_operation_region("P40C", AML_PCI_CONFIG,
- aml_int(0x60), 0x04));
+ aml_int(0x60), 0x04));
+ /* Fields declarion has to happen *after* operation region */
+ field = aml_field("P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
+ aml_append(field, aml_named_field("PRQ0", 8));
+ aml_append(field, aml_named_field("PRQ1", 8));
+ aml_append(field, aml_named_field("PRQ2", 8));
+ aml_append(field, aml_named_field("PRQ3", 8));
+ aml_append(scope, field);
+
+ /* hack: put fields into _SB scope for LNKx to find them */
+ aml_append(scope, aml_alias("PRQ0", "\\_SB.PRQ0"));
+ aml_append(scope, aml_alias("PRQ1", "\\_SB.PRQ1"));
+ aml_append(scope, aml_alias("PRQ2", "\\_SB.PRQ2"));
+ aml_append(scope, aml_alias("PRQ3", "\\_SB.PRQ3"));
+
QTAILQ_FOREACH(kid, &bus->children, sibling) {
call_dev_aml_func(DEVICE(kid->child), scope);
}
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index a71bf1afeb..255590201a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -12,7 +12,245 @@
#include "qemu/range.h"
#include "qemu/rcu.h"
#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
#include "hw/cxl/cxl.h"
+#include "hw/pci/msix.h"
+
+#define DWORD_BYTE 4
+
+/* Default CDAT entries for a memory region */
+enum {
+ CT3_CDAT_DSMAS,
+ CT3_CDAT_DSLBIS0,
+ CT3_CDAT_DSLBIS1,
+ CT3_CDAT_DSLBIS2,
+ CT3_CDAT_DSLBIS3,
+ CT3_CDAT_DSEMTS,
+ CT3_CDAT_NUM_ENTRIES
+};
+
+static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
+ int dsmad_handle, MemoryRegion *mr)
+{
+ g_autofree CDATDsmas *dsmas = NULL;
+ g_autofree CDATDslbis *dslbis0 = NULL;
+ g_autofree CDATDslbis *dslbis1 = NULL;
+ g_autofree CDATDslbis *dslbis2 = NULL;
+ g_autofree CDATDslbis *dslbis3 = NULL;
+ g_autofree CDATDsemts *dsemts = NULL;
+
+ dsmas = g_malloc(sizeof(*dsmas));
+ if (!dsmas) {
+ return -ENOMEM;
+ }
+ *dsmas = (CDATDsmas) {
+ .header = {
+ .type = CDAT_TYPE_DSMAS,
+ .length = sizeof(*dsmas),
+ },
+ .DSMADhandle = dsmad_handle,
+ .flags = CDAT_DSMAS_FLAG_NV,
+ .DPA_base = 0,
+ .DPA_length = int128_get64(mr->size),
+ };
+
+ /* For now, no memory side cache, plausiblish numbers */
+ dslbis0 = g_malloc(sizeof(*dslbis0));
+ if (!dslbis0) {
+ return -ENOMEM;
+ }
+ *dslbis0 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis0),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_READ_LATENCY,
+ .entry_base_unit = 10000, /* 10ns base */
+ .entry[0] = 15, /* 150ns */
+ };
+
+ dslbis1 = g_malloc(sizeof(*dslbis1));
+ if (!dslbis1) {
+ return -ENOMEM;
+ }
+ *dslbis1 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis1),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_WRITE_LATENCY,
+ .entry_base_unit = 10000,
+ .entry[0] = 25, /* 250ns */
+ };
+
+ dslbis2 = g_malloc(sizeof(*dslbis2));
+ if (!dslbis2) {
+ return -ENOMEM;
+ }
+ *dslbis2 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis2),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
+ .entry_base_unit = 1000, /* GB/s */
+ .entry[0] = 16,
+ };
+
+ dslbis3 = g_malloc(sizeof(*dslbis3));
+ if (!dslbis3) {
+ return -ENOMEM;
+ }
+ *dslbis3 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis3),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
+ .entry_base_unit = 1000, /* GB/s */
+ .entry[0] = 16,
+ };
+
+ dsemts = g_malloc(sizeof(*dsemts));
+ if (!dsemts) {
+ return -ENOMEM;
+ }
+ *dsemts = (CDATDsemts) {
+ .header = {
+ .type = CDAT_TYPE_DSEMTS,
+ .length = sizeof(*dsemts),
+ },
+ .DSMAS_handle = dsmad_handle,
+ /* Reserved - the non volatile from DSMAS matters */
+ .EFI_memory_type_attr = 2,
+ .DPA_offset = 0,
+ .DPA_length = int128_get64(mr->size),
+ };
+
+ /* Header always at start of structure */
+ cdat_table[CT3_CDAT_DSMAS] = g_steal_pointer(&dsmas);
+ cdat_table[CT3_CDAT_DSLBIS0] = g_steal_pointer(&dslbis0);
+ cdat_table[CT3_CDAT_DSLBIS1] = g_steal_pointer(&dslbis1);
+ cdat_table[CT3_CDAT_DSLBIS2] = g_steal_pointer(&dslbis2);
+ cdat_table[CT3_CDAT_DSLBIS3] = g_steal_pointer(&dslbis3);
+ cdat_table[CT3_CDAT_DSEMTS] = g_steal_pointer(&dsemts);
+
+ return 0;
+}
+
+static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
+{
+ g_autofree CDATSubHeader **table = NULL;
+ MemoryRegion *nonvolatile_mr;
+ CXLType3Dev *ct3d = priv;
+ int dsmad_handle = 0;
+ int rc;
+
+ if (!ct3d->hostmem) {
+ return 0;
+ }
+
+ nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!nonvolatile_mr) {
+ return -EINVAL;
+ }
+
+ table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table));
+ if (!table) {
+ return -ENOMEM;
+ }
+
+ rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr);
+ if (rc < 0) {
+ return rc;
+ }
+
+ *cdat_table = g_steal_pointer(&table);
+
+ return CT3_CDAT_NUM_ENTRIES;
+}
+
+static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ g_free(cdat_table[i]);
+ }
+ g_free(cdat_table);
+}
+
+static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
+{
+ CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
+ uint16_t ent;
+ void *base;
+ uint32_t len;
+ CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ CDATRsp rsp;
+
+ assert(cdat->entry_len);
+
+ /* Discard if request length mismatched */
+ if (pcie_doe_get_obj_len(req) <
+ DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
+ return false;
+ }
+
+ ent = req->entry_handle;
+ base = cdat->entry[ent].base;
+ len = cdat->entry[ent].length;
+
+ rsp = (CDATRsp) {
+ .header = {
+ .vendor_id = CXL_VENDOR_ID,
+ .data_obj_type = CXL_DOE_TABLE_ACCESS,
+ .reserved = 0x0,
+ .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
+ },
+ .rsp_code = CXL_DOE_TAB_RSP,
+ .table_type = CXL_DOE_TAB_TYPE_CDAT,
+ .entry_handle = (ent < cdat->entry_len - 1) ?
+ ent + 1 : CXL_DOE_TAB_ENT_MAX,
+ };
+
+ memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
+ memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
+ base, len);
+
+ doe_cap->read_mbox_len += rsp.header.length;
+
+ return true;
+}
+
+static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+ uint32_t val;
+
+ if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
+ return val;
+ }
+
+ return pci_default_read_config(pci_dev, addr, size);
+}
+
+static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
+ int size)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+
+ pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
+ pci_default_write_config(pci_dev, addr, val, size);
+}
/*
* Null value of all Fs suggested by IEEE RA guidelines for use of
@@ -139,6 +377,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
return true;
}
+static DOEProtocol doe_cdat_prot[] = {
+ { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
+ { }
+};
+
static void ct3_realize(PCIDevice *pci_dev, Error **errp)
{
CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
@@ -146,6 +389,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
ComponentRegisters *regs = &cxl_cstate->crb;
MemoryRegion *mr = &regs->component_registers;
uint8_t *pci_conf = pci_dev->config;
+ unsigned short msix_num = 1;
+ int i;
if (!cxl_setup_memory(ct3d, errp)) {
return;
@@ -180,6 +425,20 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64,
&ct3d->cxl_dstate.device_registers);
+
+ /* MSI(-X) Initailization */
+ msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
+ for (i = 0; i < msix_num; i++) {
+ msix_vector_use(pci_dev, i);
+ }
+
+ /* DOE Initailization */
+ pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
+
+ cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
+ cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
+ cxl_cstate->cdat.private = ct3d;
+ cxl_doe_cdat_init(cxl_cstate, errp);
}
static void ct3_exit(PCIDevice *pci_dev)
@@ -188,6 +447,7 @@ static void ct3_exit(PCIDevice *pci_dev)
CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
ComponentRegisters *regs = &cxl_cstate->crb;
+ cxl_doe_cdat_release(cxl_cstate);
g_free(regs->special_ops);
address_space_destroy(&ct3d->hostmem_as);
}
@@ -287,6 +547,7 @@ static Property ct3_props[] = {
DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
HostMemoryBackend *),
DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
+ DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
DEFINE_PROP_END_OF_LIST(),
};
@@ -352,6 +613,9 @@ static void ct3_class_init(ObjectClass *oc, void *data)
pc->device_id = 0xd93; /* LVF for now */
pc->revision = 1;
+ pc->config_write = ct3d_config_write;
+ pc->config_read = ct3d_config_read;
+
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
dc->desc = "CXL PMEM Device (Type 3)";
dc->reset = ct3d_reset;
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index ac96f7665a..7523e9f5d2 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -276,25 +276,18 @@ e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors)
}
}
-static bool
+static void
e1000e_use_msix_vectors(E1000EState *s, int num_vectors)
{
int i;
for (i = 0; i < num_vectors; i++) {
- int res = msix_vector_use(PCI_DEVICE(s), i);
- if (res < 0) {
- trace_e1000e_msix_use_vector_fail(i, res);
- e1000e_unuse_msix_vectors(s, i);
- return false;
- }
+ msix_vector_use(PCI_DEVICE(s), i);
}
- return true;
}
static void
e1000e_init_msix(E1000EState *s)
{
- PCIDevice *d = PCI_DEVICE(s);
int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM,
&s->msix,
E1000E_MSIX_IDX, E1000E_MSIX_TABLE,
@@ -305,9 +298,7 @@ e1000e_init_msix(E1000EState *s)
if (res < 0) {
trace_e1000e_msix_init_fail(res);
} else {
- if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) {
- msix_uninit(d, &s->msix, &s->msix);
- }
+ e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM);
}
}
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index d8f3f16fe8..281d43e6cf 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -1212,24 +1212,14 @@ static void rocker_msix_vectors_unuse(Rocker *r,
}
}
-static int rocker_msix_vectors_use(Rocker *r,
- unsigned int num_vectors)
+static void rocker_msix_vectors_use(Rocker *r, unsigned int num_vectors)
{
PCIDevice *dev = PCI_DEVICE(r);
- int err;
int i;
for (i = 0; i < num_vectors; i++) {
- err = msix_vector_use(dev, i);
- if (err) {
- goto rollback;
- }
+ msix_vector_use(dev, i);
}
- return 0;
-
-rollback:
- rocker_msix_vectors_unuse(r, i);
- return err;
}
static int rocker_msix_init(Rocker *r, Error **errp)
@@ -1247,16 +1237,9 @@ static int rocker_msix_init(Rocker *r, Error **errp)
return err;
}
- err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
- if (err) {
- goto err_msix_vectors_use;
- }
+ rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
return 0;
-
-err_msix_vectors_use:
- msix_uninit(dev, &r->msix_bar, &r->msix_bar);
- return err;
}
static void rocker_msix_uninit(Rocker *r)
diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index 89d71cfb8e..9f7daae99c 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -101,3 +101,15 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
{
return 0;
}
+
+void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
+ int vq_index)
+{
+
+}
+
+int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
+ int vq_index)
+{
+ return 0;
+}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b974b..feda448878 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -34,6 +34,7 @@
#include "standard-headers/linux/virtio_ring.h"
#include "hw/virtio/vhost.h"
#include "hw/virtio/virtio-bus.h"
+#include "linux-headers/linux/vhost.h"
/* Features supported by host kernel. */
@@ -46,6 +47,7 @@ static const int kernel_feature_bits[] = {
VIRTIO_NET_F_MTU,
VIRTIO_F_IOMMU_PLATFORM,
VIRTIO_F_RING_PACKED,
+ VIRTIO_F_RING_RESET,
VIRTIO_NET_F_HASH_REPORT,
VHOST_INVALID_FEATURE_BIT
};
@@ -387,21 +389,20 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
} else {
peer = qemu_get_peer(ncs, n->max_queue_pairs);
}
- r = vhost_net_start_one(get_vhost_net(peer), dev);
-
- if (r < 0) {
- goto err_start;
- }
if (peer->vring_enable) {
/* restore vring enable state */
r = vhost_set_vring_enable(peer, peer->vring_enable);
if (r < 0) {
- vhost_net_stop_one(get_vhost_net(peer), dev);
goto err_start;
}
}
+
+ r = vhost_net_start_one(get_vhost_net(peer), dev);
+ if (r < 0) {
+ goto err_start;
+ }
}
return 0;
@@ -531,3 +532,80 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
}
+
+void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
+ int vq_index)
+{
+ VHostNetState *net = get_vhost_net(nc->peer);
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ struct vhost_vring_file file = { .fd = -1 };
+ int idx;
+
+ /* should only be called after backend is connected */
+ assert(vhost_ops);
+
+ idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
+
+ if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
+ file.index = idx;
+ int r = vhost_net_set_backend(&net->dev, &file);
+ assert(r >= 0);
+ }
+
+ vhost_virtqueue_stop(&net->dev,
+ vdev,
+ net->dev.vqs + idx,
+ net->dev.vq_index + idx);
+}
+
+int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
+ int vq_index)
+{
+ VHostNetState *net = get_vhost_net(nc->peer);
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ struct vhost_vring_file file = { };
+ int idx, r;
+
+ if (!net->dev.started) {
+ return -EBUSY;
+ }
+
+ /* should only be called after backend is connected */
+ assert(vhost_ops);
+
+ idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
+
+ r = vhost_virtqueue_start(&net->dev,
+ vdev,
+ net->dev.vqs + idx,
+ net->dev.vq_index + idx);
+ if (r < 0) {
+ goto err_start;
+ }
+
+ if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
+ file.index = idx;
+ file.fd = net->backend;
+ r = vhost_net_set_backend(&net->dev, &file);
+ if (r < 0) {
+ r = -errno;
+ goto err_start;
+ }
+ }
+
+ return 0;
+
+err_start:
+ error_report("Error when restarting the queue.");
+
+ if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
+ file.fd = VHOST_FILE_UNBIND;
+ file.index = idx;
+ int r = vhost_net_set_backend(&net->dev, &file);
+ assert(r >= 0);
+ }
+
+ vhost_dev_stop(&net->dev, vdev);
+
+ return r;
+}
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index b6903aea54..8b32339b76 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -124,6 +124,16 @@ static int vq2q(int queue_index)
return queue_index / 2;
}
+static void flush_or_purge_queued_packets(NetClientState *nc)
+{
+ if (!nc->peer) {
+ return;
+ }
+
+ qemu_flush_or_purge_queued_packets(nc->peer, true);
+ assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
+}
+
/* TODO
* - we could suppress RX interrupt if we were so inclined.
*/
@@ -536,6 +546,43 @@ static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
return info;
}
+static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
+{
+ VirtIONet *n = VIRTIO_NET(vdev);
+ NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
+
+ if (!nc->peer) {
+ return;
+ }
+
+ if (get_vhost_net(nc->peer) &&
+ nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
+ vhost_net_virtqueue_reset(vdev, nc, queue_index);
+ }
+
+ flush_or_purge_queued_packets(nc);
+}
+
+static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
+{
+ VirtIONet *n = VIRTIO_NET(vdev);
+ NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
+ int r;
+
+ if (!nc->peer || !vdev->vhost_started) {
+ return;
+ }
+
+ if (get_vhost_net(nc->peer) &&
+ nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
+ r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
+ if (r < 0) {
+ error_report("unable to restart vhost net virtqueue: %d, "
+ "when resetting the queue", queue_index);
+ }
+ }
+}
+
static void virtio_net_reset(VirtIODevice *vdev)
{
VirtIONet *n = VIRTIO_NET(vdev);
@@ -566,12 +613,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
/* Flush any async TX */
for (i = 0; i < n->max_queue_pairs; i++) {
- NetClientState *nc = qemu_get_subqueue(n->nic, i);
-
- if (nc->peer) {
- qemu_flush_or_purge_queued_packets(nc->peer, true);
- assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
- }
+ flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
}
}
@@ -746,6 +788,7 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
}
if (!get_vhost_net(nc->peer)) {
+ virtio_add_feature(&features, VIRTIO_F_RING_RESET);
return features;
}
@@ -3822,6 +3865,8 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->set_features = virtio_net_set_features;
vdc->bad_features = virtio_net_bad_features;
vdc->reset = virtio_net_reset;
+ vdc->queue_reset = virtio_net_queue_reset;
+ vdc->queue_enable = virtio_net_queue_enable;
vdc->set_status = virtio_net_set_status;
vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 0b7acf7f89..d2ab527ef4 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2110,20 +2110,14 @@ vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors)
}
}
-static bool
+static void
vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors)
{
PCIDevice *d = PCI_DEVICE(s);
int i;
for (i = 0; i < num_vectors; i++) {
- int res = msix_vector_use(d, i);
- if (0 > res) {
- VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res);
- vmxnet3_unuse_msix_vectors(s, i);
- return false;
- }
+ msix_vector_use(d, i);
}
- return true;
}
static bool
@@ -2141,13 +2135,8 @@ vmxnet3_init_msix(VMXNET3State *s)
VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
s->msix_used = false;
} else {
- if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
- VMW_WRPRN("Failed to use MSI-X vectors, error %d", res);
- msix_uninit(d, &s->msix_bar, &s->msix_bar);
- s->msix_used = false;
- } else {
- s->msix_used = true;
- }
+ vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
+ s->msix_used = true;
}
return s->msix_used;
}
@@ -2412,19 +2401,13 @@ static const VMStateDescription vmstate_vmxnet3_rxq_descr = {
static int vmxnet3_post_load(void *opaque, int version_id)
{
VMXNET3State *s = opaque;
- PCIDevice *d = PCI_DEVICE(s);
net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
s->max_tx_frags, s->peer_has_vhdr);
net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
if (s->msix_used) {
- if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
- VMW_WRPRN("Failed to re-use MSI-X vectors");
- msix_uninit(d, &s->msix_bar, &s->msix_bar);
- s->msix_used = false;
- return -1;
- }
+ vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS);
}
if (!vmxnet3_validate_queues(s)) {
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 9a9857ccf8..ac3885ce50 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4740,11 +4740,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
uint16_t cqid, uint16_t vector, uint16_t size,
uint16_t irq_enabled)
{
- int ret;
-
if (msix_enabled(&n->parent_obj)) {
- ret = msix_vector_use(&n->parent_obj, vector);
- assert(ret == 0);
+ msix_vector_use(&n->parent_obj, vector);
}
cq->ctrl = n;
cq->cqid = cqid;
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index a83a3e81e4..9b8b57df9d 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -10,11 +10,12 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
+#include "hw/qdev-properties.h"
#include "hw/pci/msi.h"
#include "hw/pci/pcie.h"
#include "hw/pci/pcie_port.h"
-#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 1
+#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2
#define CXL_UPSTREAM_PORT_MSI_OFFSET 0x70
#define CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET 0x90
@@ -28,6 +29,7 @@ typedef struct CXLUpstreamPort {
/*< public >*/
CXLComponentState cxl_cstate;
+ DOECap doe_cdat;
} CXLUpstreamPort;
CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp)
@@ -60,6 +62,9 @@ static void cxl_usp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
static void cxl_usp_write_config(PCIDevice *d, uint32_t address,
uint32_t val, int len)
{
+ CXLUpstreamPort *usp = CXL_USP(d);
+
+ pcie_doe_write_config(&usp->doe_cdat, address, val, len);
pci_bridge_write_config(d, address, val, len);
pcie_cap_flr_write_config(d, address, val, len);
pcie_aer_write_config(d, address, val, len);
@@ -67,6 +72,18 @@ static void cxl_usp_write_config(PCIDevice *d, uint32_t address,
cxl_usp_dvsec_write_config(d, address, val, len);
}
+static uint32_t cxl_usp_read_config(PCIDevice *d, uint32_t address, int len)
+{
+ CXLUpstreamPort *usp = CXL_USP(d);
+ uint32_t val;
+
+ if (pcie_doe_read_config(&usp->doe_cdat, address, len, &val)) {
+ return val;
+ }
+
+ return pci_default_read_config(d, address, len);
+}
+
static void latch_registers(CXLUpstreamPort *usp)
{
uint32_t *reg_state = usp->cxl_cstate.crb.cache_mem_registers;
@@ -119,6 +136,167 @@ static void build_dvsecs(CXLComponentState *cxl)
REG_LOC_DVSEC_REVID, dvsec);
}
+static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
+{
+ CDATObject *cdat = &CXL_USP(doe_cap->pdev)->cxl_cstate.cdat;
+ uint16_t ent;
+ void *base;
+ uint32_t len;
+ CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ CDATRsp rsp;
+
+ cxl_doe_cdat_update(&CXL_USP(doe_cap->pdev)->cxl_cstate, &error_fatal);
+ assert(cdat->entry_len);
+
+ /* Discard if request length mismatched */
+ if (pcie_doe_get_obj_len(req) <
+ DIV_ROUND_UP(sizeof(CDATReq), sizeof(uint32_t))) {
+ return false;
+ }
+
+ ent = req->entry_handle;
+ base = cdat->entry[ent].base;
+ len = cdat->entry[ent].length;
+
+ rsp = (CDATRsp) {
+ .header = {
+ .vendor_id = CXL_VENDOR_ID,
+ .data_obj_type = CXL_DOE_TABLE_ACCESS,
+ .reserved = 0x0,
+ .length = DIV_ROUND_UP((sizeof(rsp) + len), sizeof(uint32_t)),
+ },
+ .rsp_code = CXL_DOE_TAB_RSP,
+ .table_type = CXL_DOE_TAB_TYPE_CDAT,
+ .entry_handle = (ent < cdat->entry_len - 1) ?
+ ent + 1 : CXL_DOE_TAB_ENT_MAX,
+ };
+
+ memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
+ memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), sizeof(uint32_t)),
+ base, len);
+
+ doe_cap->read_mbox_len += rsp.header.length;
+
+ return true;
+}
+
+static DOEProtocol doe_cdat_prot[] = {
+ { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
+ { }
+};
+
+enum {
+ CXL_USP_CDAT_SSLBIS_LAT,
+ CXL_USP_CDAT_SSLBIS_BW,
+ CXL_USP_CDAT_NUM_ENTRIES
+};
+
+static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
+{
+ g_autofree CDATSslbis *sslbis_latency = NULL;
+ g_autofree CDATSslbis *sslbis_bandwidth = NULL;
+ CXLUpstreamPort *us = CXL_USP(priv);
+ PCIBus *bus = &PCI_BRIDGE(us)->sec_bus;
+ int devfn, sslbis_size, i;
+ int count = 0;
+ uint16_t port_ids[256];
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ PCIDevice *d = bus->devices[devfn];
+ PCIEPort *port;
+
+ if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+ continue;
+ }
+
+ /*
+ * Whilst the PCI express spec doesn't allow anything other than
+ * downstream ports on this bus, let us be a little paranoid
+ */
+ if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+ continue;
+ }
+
+ port = PCIE_PORT(d);
+ port_ids[count] = port->port;
+ count++;
+ }
+
+ /* May not yet have any ports - try again later */
+ if (count == 0) {
+ return 0;
+ }
+
+ sslbis_size = sizeof(CDATSslbis) + sizeof(*sslbis_latency->sslbe) * count;
+ sslbis_latency = g_malloc(sslbis_size);
+ if (!sslbis_latency) {
+ return -ENOMEM;
+ }
+ *sslbis_latency = (CDATSslbis) {
+ .sslbis_header = {
+ .header = {
+ .type = CDAT_TYPE_SSLBIS,
+ .length = sslbis_size,
+ },
+ .data_type = HMATLB_DATA_TYPE_ACCESS_LATENCY,
+ .entry_base_unit = 10000,
+ },
+ };
+
+ for (i = 0; i < count; i++) {
+ sslbis_latency->sslbe[i] = (CDATSslbe) {
+ .port_x_id = CDAT_PORT_ID_USP,
+ .port_y_id = port_ids[i],
+ .latency_bandwidth = 15, /* 150ns */
+ };
+ }
+
+ sslbis_bandwidth = g_malloc(sslbis_size);
+ if (!sslbis_bandwidth) {
+ return 0;
+ }
+ *sslbis_bandwidth = (CDATSslbis) {
+ .sslbis_header = {
+ .header = {
+ .type = CDAT_TYPE_SSLBIS,
+ .length = sslbis_size,
+ },
+ .data_type = HMATLB_DATA_TYPE_ACCESS_BANDWIDTH,
+ .entry_base_unit = 1000,
+ },
+ };
+
+ for (i = 0; i < count; i++) {
+ sslbis_bandwidth->sslbe[i] = (CDATSslbe) {
+ .port_x_id = CDAT_PORT_ID_USP,
+ .port_y_id = port_ids[i],
+ .latency_bandwidth = 16, /* 16 GB/s */
+ };
+ }
+
+ *cdat_table = g_malloc0(sizeof(*cdat_table) * CXL_USP_CDAT_NUM_ENTRIES);
+ if (!*cdat_table) {
+ return -ENOMEM;
+ }
+
+ /* Header always at start of structure */
+ (*cdat_table)[CXL_USP_CDAT_SSLBIS_LAT] = g_steal_pointer(&sslbis_latency);
+ (*cdat_table)[CXL_USP_CDAT_SSLBIS_BW] = g_steal_pointer(&sslbis_bandwidth);
+
+ return CXL_USP_CDAT_NUM_ENTRIES;
+}
+
+static void free_default_cdat_table(CDATSubHeader **cdat_table, int num,
+ void *priv)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ g_free(cdat_table[i]);
+ }
+ g_free(cdat_table);
+}
+
static void cxl_usp_realize(PCIDevice *d, Error **errp)
{
PCIEPort *p = PCIE_PORT(d);
@@ -161,6 +339,14 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp)
PCI_BASE_ADDRESS_MEM_TYPE_64,
component_bar);
+ pcie_doe_init(d, &usp->doe_cdat, cxl_cstate->dvsec_offset, doe_cdat_prot,
+ true, 1);
+
+ cxl_cstate->cdat.build_cdat_table = build_cdat_table;
+ cxl_cstate->cdat.free_cdat_table = free_default_cdat_table;
+ cxl_cstate->cdat.private = d;
+ cxl_doe_cdat_init(cxl_cstate, errp);
+
return;
err_cap:
@@ -179,6 +365,11 @@ static void cxl_usp_exitfn(PCIDevice *d)
pci_bridge_exitfn(d);
}
+static Property cxl_upstream_props[] = {
+ DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename),
+ DEFINE_PROP_END_OF_LIST()
+};
+
static void cxl_upstream_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -186,6 +377,7 @@ static void cxl_upstream_class_init(ObjectClass *oc, void *data)
k->is_bridge = true;
k->config_write = cxl_usp_write_config;
+ k->config_read = cxl_usp_read_config;
k->realize = cxl_usp_realize;
k->exit = cxl_usp_exitfn;
k->vendor_id = 0x19e5; /* Huawei */
@@ -194,6 +386,7 @@ static void cxl_upstream_class_init(ObjectClass *oc, void *data)
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->desc = "CXL Switch Upstream Port";
dc->reset = cxl_usp_reset;
+ device_class_set_props(dc, cxl_upstream_props);
}
static const TypeInfo cxl_usp_info = {
diff --git a/hw/pci/meson.build b/hw/pci/meson.build
index bcc9c75919..5aff7ed1c6 100644
--- a/hw/pci/meson.build
+++ b/hw/pci/meson.build
@@ -13,6 +13,7 @@ pci_ss.add(files(
# allow plugging PCIe devices into PCI buses, include them even if
# CONFIG_PCI_EXPRESS=n.
pci_ss.add(files('pcie.c', 'pcie_aer.c'))
+pci_ss.add(files('pcie_doe.c'))
softmmu_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 'pcie_host.c'))
softmmu_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss)
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 1e381a9813..9e70fcd6fa 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -136,17 +136,12 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
}
}
-void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
+void msix_set_mask(PCIDevice *dev, int vector, bool mask)
{
- ERRP_GUARD();
unsigned offset;
bool was_masked;
- if (vector > dev->msix_entries_nr) {
- error_setg(errp, "msix: vector %d not allocated. max vector is %d",
- vector, dev->msix_entries_nr);
- return;
- }
+ assert(vector < dev->msix_entries_nr);
offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
@@ -522,7 +517,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
{
MSIMessage msg;
- if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
+ assert(vector < dev->msix_entries_nr);
+
+ if (!dev->msix_entry_used[vector]) {
return;
}
@@ -558,20 +555,17 @@ void msix_reset(PCIDevice *dev)
* don't want to follow the spec suggestion can declare all vectors as used. */
/* Mark vector as used. */
-int msix_vector_use(PCIDevice *dev, unsigned vector)
+void msix_vector_use(PCIDevice *dev, unsigned vector)
{
- if (vector >= dev->msix_entries_nr) {
- return -EINVAL;
- }
-
+ assert(vector < dev->msix_entries_nr);
dev->msix_entry_used[vector]++;
- return 0;
}
/* Mark vector as unused. */
void msix_vector_unuse(PCIDevice *dev, unsigned vector)
{
- if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
+ assert(vector < dev->msix_entries_nr);
+ if (!dev->msix_entry_used[vector]) {
return;
}
if (--dev->msix_entry_used[vector]) {
diff --git a/hw/pci/pcie_doe.c b/hw/pci/pcie_doe.c
new file mode 100644
index 0000000000..2210f86968
--- /dev/null
+++ b/hw/pci/pcie_doe.c
@@ -0,0 +1,367 @@
+/*
+ * PCIe Data Object Exchange
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/range.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pcie.h"
+#include "hw/pci/pcie_doe.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+
+#define DWORD_BYTE 4
+
+typedef struct DoeDiscoveryReq {
+ DOEHeader header;
+ uint8_t index;
+ uint8_t reserved[3];
+} QEMU_PACKED DoeDiscoveryReq;
+
+typedef struct DoeDiscoveryRsp {
+ DOEHeader header;
+ uint16_t vendor_id;
+ uint8_t data_obj_type;
+ uint8_t next_index;
+} QEMU_PACKED DoeDiscoveryRsp;
+
+static bool pcie_doe_discovery(DOECap *doe_cap)
+{
+ DoeDiscoveryReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ DoeDiscoveryRsp rsp;
+ uint8_t index = req->index;
+ DOEProtocol *prot;
+
+ /* Discard request if length does not match DoeDiscoveryReq */
+ if (pcie_doe_get_obj_len(req) <
+ DIV_ROUND_UP(sizeof(DoeDiscoveryReq), DWORD_BYTE)) {
+ return false;
+ }
+
+ rsp.header = (DOEHeader) {
+ .vendor_id = PCI_VENDOR_ID_PCI_SIG,
+ .data_obj_type = PCI_SIG_DOE_DISCOVERY,
+ .length = DIV_ROUND_UP(sizeof(DoeDiscoveryRsp), DWORD_BYTE),
+ };
+
+ /* Point to the requested protocol, index 0 must be Discovery */
+ if (index == 0) {
+ rsp.vendor_id = PCI_VENDOR_ID_PCI_SIG;
+ rsp.data_obj_type = PCI_SIG_DOE_DISCOVERY;
+ } else {
+ if (index < doe_cap->protocol_num) {
+ prot = &doe_cap->protocols[index - 1];
+ rsp.vendor_id = prot->vendor_id;
+ rsp.data_obj_type = prot->data_obj_type;
+ } else {
+ rsp.vendor_id = 0xFFFF;
+ rsp.data_obj_type = 0xFF;
+ }
+ }
+
+ if (index + 1 == doe_cap->protocol_num) {
+ rsp.next_index = 0;
+ } else {
+ rsp.next_index = index + 1;
+ }
+
+ pcie_doe_set_rsp(doe_cap, &rsp);
+
+ return true;
+}
+
+static void pcie_doe_reset_mbox(DOECap *st)
+{
+ st->read_mbox_idx = 0;
+ st->read_mbox_len = 0;
+ st->write_mbox_len = 0;
+
+ memset(st->read_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+ memset(st->write_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+}
+
+void pcie_doe_init(PCIDevice *dev, DOECap *doe_cap, uint16_t offset,
+ DOEProtocol *protocols, bool intr, uint16_t vec)
+{
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_DOE, 0x1, offset,
+ PCI_DOE_SIZEOF);
+
+ doe_cap->pdev = dev;
+ doe_cap->offset = offset;
+
+ if (intr && (msi_present(dev) || msix_present(dev))) {
+ doe_cap->cap.intr = intr;
+ doe_cap->cap.vec = vec;
+ }
+
+ doe_cap->write_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+ doe_cap->read_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+
+ pcie_doe_reset_mbox(doe_cap);
+
+ doe_cap->protocols = protocols;
+ for (; protocols->vendor_id; protocols++) {
+ doe_cap->protocol_num++;
+ }
+ assert(doe_cap->protocol_num < PCI_DOE_PROTOCOL_NUM_MAX);
+
+ /* Increment to allow for the discovery protocol */
+ doe_cap->protocol_num++;
+}
+
+void pcie_doe_fini(DOECap *doe_cap)
+{
+ g_free(doe_cap->read_mbox);
+ g_free(doe_cap->write_mbox);
+ g_free(doe_cap);
+}
+
+uint32_t pcie_doe_build_protocol(DOEProtocol *p)
+{
+ return DATA_OBJ_BUILD_HEADER1(p->vendor_id, p->data_obj_type);
+}
+
+void *pcie_doe_get_write_mbox_ptr(DOECap *doe_cap)
+{
+ return doe_cap->write_mbox;
+}
+
+/*
+ * Copy the response to read mailbox buffer
+ * This might be called in self-defined handle_request() if a DOE response is
+ * required in the corresponding protocol
+ */
+void pcie_doe_set_rsp(DOECap *doe_cap, void *rsp)
+{
+ uint32_t len = pcie_doe_get_obj_len(rsp);
+
+ memcpy(doe_cap->read_mbox + doe_cap->read_mbox_len, rsp, len * DWORD_BYTE);
+ doe_cap->read_mbox_len += len;
+}
+
+uint32_t pcie_doe_get_obj_len(void *obj)
+{
+ uint32_t len;
+
+ if (!obj) {
+ return 0;
+ }
+
+ /* Only lower 18 bits are valid */
+ len = DATA_OBJ_LEN_MASK(((DOEHeader *)obj)->length);
+
+ /* PCIe r6.0 Table 6.29: a value of 00000h indicates 2^18 DW */
+ return (len) ? len : PCI_DOE_DW_SIZE_MAX;
+}
+
+static void pcie_doe_irq_assert(DOECap *doe_cap)
+{
+ PCIDevice *dev = doe_cap->pdev;
+
+ if (doe_cap->cap.intr && doe_cap->ctrl.intr) {
+ if (doe_cap->status.intr) {
+ return;
+ }
+ doe_cap->status.intr = 1;
+
+ if (msix_enabled(dev)) {
+ msix_notify(dev, doe_cap->cap.vec);
+ } else if (msi_enabled(dev)) {
+ msi_notify(dev, doe_cap->cap.vec);
+ }
+ }
+}
+
+static void pcie_doe_set_ready(DOECap *doe_cap, bool rdy)
+{
+ doe_cap->status.ready = rdy;
+
+ if (rdy) {
+ pcie_doe_irq_assert(doe_cap);
+ }
+}
+
+static void pcie_doe_set_error(DOECap *doe_cap, bool err)
+{
+ doe_cap->status.error = err;
+
+ if (err) {
+ pcie_doe_irq_assert(doe_cap);
+ }
+}
+
+/*
+ * Check incoming request in write_mbox for protocol format
+ */
+static void pcie_doe_prepare_rsp(DOECap *doe_cap)
+{
+ bool success = false;
+ int p;
+ bool (*handle_request)(DOECap *) = NULL;
+
+ if (doe_cap->status.error) {
+ return;
+ }
+
+ if (doe_cap->write_mbox[0] ==
+ DATA_OBJ_BUILD_HEADER1(PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_DISCOVERY)) {
+ handle_request = pcie_doe_discovery;
+ } else {
+ for (p = 0; p < doe_cap->protocol_num - 1; p++) {
+ if (doe_cap->write_mbox[0] ==
+ pcie_doe_build_protocol(&doe_cap->protocols[p])) {
+ handle_request = doe_cap->protocols[p].handle_request;
+ break;
+ }
+ }
+ }
+
+ /*
+ * PCIe r6 DOE 6.30.1:
+ * If the number of DW transferred does not match the
+ * indicated Length for a data object, then the
+ * data object must be silently discarded.
+ */
+ if (handle_request && (doe_cap->write_mbox_len ==
+ pcie_doe_get_obj_len(pcie_doe_get_write_mbox_ptr(doe_cap)))) {
+ success = handle_request(doe_cap);
+ }
+
+ if (success) {
+ pcie_doe_set_ready(doe_cap, 1);
+ } else {
+ pcie_doe_reset_mbox(doe_cap);
+ }
+}
+
+/*
+ * Read from DOE config space.
+ * Return false if the address not within DOE_CAP range.
+ */
+bool pcie_doe_read_config(DOECap *doe_cap, uint32_t addr, int size,
+ uint32_t *buf)
+{
+ uint32_t shift;
+ uint16_t doe_offset = doe_cap->offset;
+
+ if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP,
+ PCI_DOE_SIZEOF - 4, addr)) {
+ return false;
+ }
+
+ addr -= doe_offset;
+ *buf = 0;
+
+ if (range_covers_byte(PCI_EXP_DOE_CAP, DWORD_BYTE, addr)) {
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, INTR_SUPP,
+ doe_cap->cap.intr);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, DOE_INTR_MSG_NUM,
+ doe_cap->cap.vec);
+ } else if (range_covers_byte(PCI_EXP_DOE_CTRL, DWORD_BYTE, addr)) {
+ /* Must return ABORT=0 and GO=0 */
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_CONTROL, DOE_INTR_EN,
+ doe_cap->ctrl.intr);
+ } else if (range_covers_byte(PCI_EXP_DOE_STATUS, DWORD_BYTE, addr)) {
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_BUSY,
+ doe_cap->status.busy);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS,
+ doe_cap->status.intr);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_ERROR,
+ doe_cap->status.error);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DATA_OBJ_RDY,
+ doe_cap->status.ready);
+ /* Mailbox should be DW accessed */
+ } else if (addr == PCI_EXP_DOE_RD_DATA_MBOX && size == DWORD_BYTE) {
+ if (doe_cap->status.ready && !doe_cap->status.error) {
+ *buf = doe_cap->read_mbox[doe_cap->read_mbox_idx];
+ }
+ }
+
+ /* Process Alignment */
+ shift = addr % DWORD_BYTE;
+ *buf = extract32(*buf, shift * 8, size * 8);
+
+ return true;
+}
+
+/*
+ * Write to DOE config space.
+ * Return if the address not within DOE_CAP range or receives an abort
+ */
+void pcie_doe_write_config(DOECap *doe_cap,
+ uint32_t addr, uint32_t val, int size)
+{
+ uint16_t doe_offset = doe_cap->offset;
+ uint32_t shift;
+
+ if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP,
+ PCI_DOE_SIZEOF - 4, addr)) {
+ return;
+ }
+
+ /* Process Alignment */
+ shift = addr % DWORD_BYTE;
+ addr -= (doe_offset + shift);
+ val = deposit32(val, shift * 8, size * 8, val);
+
+ switch (addr) {
+ case PCI_EXP_DOE_CTRL:
+ if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_ABORT)) {
+ pcie_doe_set_ready(doe_cap, 0);
+ pcie_doe_set_error(doe_cap, 0);
+ pcie_doe_reset_mbox(doe_cap);
+ return;
+ }
+
+ if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_GO)) {
+ pcie_doe_prepare_rsp(doe_cap);
+ }
+
+ if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_INTR_EN)) {
+ doe_cap->ctrl.intr = 1;
+ /* Clear interrupt bit located within the first byte */
+ } else if (shift == 0) {
+ doe_cap->ctrl.intr = 0;
+ }
+ break;
+ case PCI_EXP_DOE_STATUS:
+ if (FIELD_EX32(val, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS)) {
+ doe_cap->status.intr = 0;
+ }
+ break;
+ case PCI_EXP_DOE_RD_DATA_MBOX:
+ /* Mailbox should be DW accessed */
+ if (size != DWORD_BYTE) {
+ return;
+ }
+ doe_cap->read_mbox_idx++;
+ if (doe_cap->read_mbox_idx == doe_cap->read_mbox_len) {
+ pcie_doe_reset_mbox(doe_cap);
+ pcie_doe_set_ready(doe_cap, 0);
+ } else if (doe_cap->read_mbox_idx > doe_cap->read_mbox_len) {
+ /* Underflow */
+ pcie_doe_set_error(doe_cap, 1);
+ }
+ break;
+ case PCI_EXP_DOE_WR_DATA_MBOX:
+ /* Mailbox should be DW accessed */
+ if (size != DWORD_BYTE) {
+ return;
+ }
+ doe_cap->write_mbox[doe_cap->write_mbox_len] = val;
+ doe_cap->write_mbox_len++;
+ break;
+ case PCI_EXP_DOE_CAP:
+ /* fallthrough */
+ default:
+ break;
+ }
+}
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index 58db0b8e3b..4fc6712025 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -307,12 +307,7 @@ static int init_msix(PCIDevice *pdev)
}
for (i = 0; i < RDMA_MAX_INTRS; i++) {
- rc = msix_vector_use(PCI_DEVICE(dev), i);
- if (rc < 0) {
- rdma_error_report("Fail mark MSI-X vector %d", i);
- uninit_msix(pdev, i);
- return rc;
- }
+ msix_vector_use(PCI_DEVICE(dev), i);
}
return 0;
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index c6cc53acf2..4e36bb8bcf 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -602,17 +602,10 @@ static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
uint32_t count, bool mask)
{
VfuObject *o = vfu_get_private(vfu_ctx);
- Error *err = NULL;
uint32_t vector;
for (vector = start; vector < count; vector++) {
- msix_set_mask(o->pci_dev, vector, mask, &err);
- if (err) {
- VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
- error_get_pretty(err));
- error_free(err);
- err = NULL;
- }
+ msix_set_mask(o->pci_dev, vector, mask);
}
}
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 51437ca09f..b4243de735 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -711,8 +711,14 @@ static void smbios_build_type_3_table(void)
static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
{
char sock_str[128];
+ size_t tbl_len = SMBIOS_TYPE_4_LEN_V28;
- SMBIOS_BUILD_TABLE_PRE(4, T4_BASE + instance, true); /* required */
+ if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) {
+ tbl_len = SMBIOS_TYPE_4_LEN_V30;
+ }
+
+ SMBIOS_BUILD_TABLE_PRE_SIZE(4, T4_BASE + instance,
+ true, tbl_len); /* required */
snprintf(sock_str, sizeof(sock_str), "%s%2x", type4.sock_pfx, instance);
SMBIOS_TABLE_SET_STR(4, socket_designation_str, sock_str);
@@ -739,8 +745,15 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
SMBIOS_TABLE_SET_STR(4, serial_number_str, type4.serial);
SMBIOS_TABLE_SET_STR(4, asset_tag_number_str, type4.asset);
SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part);
- t->core_count = t->core_enabled = ms->smp.cores;
- t->thread_count = ms->smp.threads;
+
+ t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores;
+ t->core_enabled = t->core_count;
+
+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
+
+ t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads;
+ t->thread_count2 = cpu_to_le16(ms->smp.threads);
+
t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
t->processor_family2 = cpu_to_le16(0x01); /* Other */
diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h
index 56b5a1e3f3..351660024e 100644
--- a/hw/smbios/smbios_build.h
+++ b/hw/smbios/smbios_build.h
@@ -27,6 +27,11 @@ extern unsigned smbios_table_max;
extern unsigned smbios_table_cnt;
#define SMBIOS_BUILD_TABLE_PRE(tbl_type, tbl_handle, tbl_required) \
+ SMBIOS_BUILD_TABLE_PRE_SIZE(tbl_type, tbl_handle, tbl_required, \
+ sizeof(struct smbios_type_##tbl_type))\
+
+#define SMBIOS_BUILD_TABLE_PRE_SIZE(tbl_type, tbl_handle, \
+ tbl_required, tbl_len) \
struct smbios_type_##tbl_type *t; \
size_t t_off; /* table offset into smbios_tables */ \
int str_index = 0; \
@@ -39,12 +44,12 @@ extern unsigned smbios_table_cnt;
/* use offset of table t within smbios_tables */ \
/* (pointer must be updated after each realloc) */ \
t_off = smbios_tables_len; \
- smbios_tables_len += sizeof(*t); \
+ smbios_tables_len += tbl_len; \
smbios_tables = g_realloc(smbios_tables, smbios_tables_len); \
t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \
\
t->header.type = tbl_type; \
- t->header.length = sizeof(*t); \
+ t->header.length = tbl_len; \
t->header.handle = cpu_to_le16(tbl_handle); \
} while (0)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6b5d8c0bf6..130e5d1dc7 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -578,45 +578,11 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
ram_addr_t *ram_addr, bool *read_only)
{
- MemoryRegion *mr;
- hwaddr xlat;
- hwaddr len = iotlb->addr_mask + 1;
- bool writable = iotlb->perm & IOMMU_WO;
-
- /*
- * The IOMMU TLB entry we have just covers translation through
- * this IOMMU to its immediate target. We need to translate
- * it the rest of the way through to memory.
- */
- mr = address_space_translate(&address_space_memory,
- iotlb->translated_addr,
- &xlat, &len, writable,
- MEMTXATTRS_UNSPECIFIED);
- if (!memory_region_is_ram(mr)) {
- error_report("iommu map to non memory area %"HWADDR_PRIx"",
- xlat);
- return false;
- } else if (memory_region_has_ram_discard_manager(mr)) {
- RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
- MemoryRegionSection tmp = {
- .mr = mr,
- .offset_within_region = xlat,
- .size = int128_make64(len),
- };
-
- /*
- * Malicious VMs can map memory into the IOMMU, which is expected
- * to remain discarded. vfio will pin all pages, populating memory.
- * Disallow that. vmstate priorities make sure any RamDiscardManager
- * were already restored before IOMMUs are restored.
- */
- if (!ram_discard_manager_is_populated(rdm, &tmp)) {
- error_report("iommu map to discarded memory (e.g., unplugged via"
- " virtio-mem): %"HWADDR_PRIx"",
- iotlb->translated_addr);
- return false;
- }
+ bool ret, mr_has_discard_manager;
+ ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
+ &mr_has_discard_manager);
+ if (ret && mr_has_discard_manager) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -635,29 +601,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
-
- /*
- * Translation truncates length to the IOMMU page size,
- * check that it did not truncate too much.
- */
- if (len & iotlb->addr_mask) {
- error_report("iommu has granularity incompatible with target AS");
- return false;
- }
-
- if (vaddr) {
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
- }
-
- if (ram_addr) {
- *ram_addr = memory_region_get_ram_addr(mr) + xlat;
- }
-
- if (read_only) {
- *read_only = !writable || mr->readonly;
- }
-
- return true;
+ return ret;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index ad0f91c607..1c40f42045 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -123,7 +123,7 @@ static void vuf_stop(VirtIODevice *vdev)
static void vuf_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserFS *fs = VHOST_USER_FS(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&fs->vhost_dev) == should_start) {
return;
diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
index 8b40fe450c..677d1c7730 100644
--- a/hw/virtio/vhost-user-gpio.c
+++ b/hw/virtio/vhost-user-gpio.c
@@ -152,7 +152,7 @@ static void vu_gpio_stop(VirtIODevice *vdev)
static void vu_gpio_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
trace_virtio_gpio_set_status(status);
diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
index bc58b6c0d1..864eba695e 100644
--- a/hw/virtio/vhost-user-i2c.c
+++ b/hw/virtio/vhost-user-i2c.c
@@ -93,7 +93,7 @@ static void vu_i2c_stop(VirtIODevice *vdev)
static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&i2c->vhost_dev) == should_start) {
return;
diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c
index bc1f36c5ac..8b47287875 100644
--- a/hw/virtio/vhost-user-rng.c
+++ b/hw/virtio/vhost-user-rng.c
@@ -90,7 +90,7 @@ static void vu_rng_stop(VirtIODevice *vdev)
static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostUserRNG *rng = VHOST_USER_RNG(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&rng->vhost_dev) == should_start) {
return;
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 7b67e29d83..9431b9792c 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -55,7 +55,7 @@ const VhostDevConfigOps vsock_ops = {
static void vuv_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
return;
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 03415b6c95..abe23d4ebe 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -81,6 +81,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
/* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
+ VHOST_USER_PROTOCOL_F_STATUS = 16,
VHOST_USER_PROTOCOL_F_MAX
};
@@ -126,6 +127,8 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_MAX_MEM_SLOTS = 36,
VHOST_USER_ADD_MEM_REG = 37,
VHOST_USER_REM_MEM_REG = 38,
+ VHOST_USER_SET_STATUS = 39,
+ VHOST_USER_GET_STATUS = 40,
VHOST_USER_MAX
} VhostUserRequest;
@@ -1452,6 +1455,43 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
return 0;
}
+static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
+{
+ return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
+}
+
+static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
+{
+ uint64_t value;
+ int ret;
+
+ ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
+ if (ret < 0) {
+ return ret;
+ }
+ *status = value;
+
+ return 0;
+}
+
+static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
+{
+ uint8_t s;
+ int ret;
+
+ ret = vhost_user_get_status(dev, &s);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if ((s & status) == status) {
+ return 0;
+ }
+ s |= status;
+
+ return vhost_user_set_status(dev, s);
+}
+
static int vhost_user_set_features(struct vhost_dev *dev,
uint64_t features)
{
@@ -1460,6 +1500,7 @@ static int vhost_user_set_features(struct vhost_dev *dev,
* backend is actually logging changes
*/
bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
+ int ret;
/*
* We need to include any extra backend only feature bits that
@@ -1467,9 +1508,18 @@ static int vhost_user_set_features(struct vhost_dev *dev,
* VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
* features.
*/
- return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
+ ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
features | dev->backend_features,
log_enabled);
+
+ if (virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_STATUS)) {
+ if (!ret) {
+ return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+ }
+ }
+
+ return ret;
}
static int vhost_user_set_protocol_features(struct vhost_dev *dev,
@@ -1543,6 +1593,11 @@ static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
n = g_ptr_array_index(u->notifiers, idx);
if (!n) {
+ /*
+ * In case notification arrive out-of-order,
+ * make room for current index.
+ */
+ g_ptr_array_remove_index(u->notifiers, idx);
n = g_new0(VhostUserHostNotifier, 1);
n->idx = idx;
g_ptr_array_insert(u->notifiers, idx, n);
@@ -2615,6 +2670,27 @@ void vhost_user_cleanup(VhostUserState *user)
user->chr = NULL;
}
+static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
+{
+ if (!virtio_has_feature(dev->protocol_features,
+ VHOST_USER_PROTOCOL_F_STATUS)) {
+ return 0;
+ }
+
+ /* Set device status only for last queue pair */
+ if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+ return 0;
+ }
+
+ if (started) {
+ return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+ VIRTIO_CONFIG_S_DRIVER |
+ VIRTIO_CONFIG_S_DRIVER_OK);
+ } else {
+ return vhost_user_set_status(dev, 0);
+ }
+}
+
const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_backend_init = vhost_user_backend_init,
@@ -2649,4 +2725,5 @@ const VhostOps user_ops = {
.vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
.vhost_get_inflight_fd = vhost_user_get_inflight_fd,
.vhost_set_inflight_fd = vhost_user_set_inflight_fd,
+ .vhost_dev_start = vhost_user_dev_start,
};
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index 7dc3c73931..aa16d584ee 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -70,7 +70,7 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start)
static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status)
{
VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
- bool should_start = virtio_device_started(vdev, status);
+ bool should_start = virtio_device_should_start(vdev, status);
int ret;
if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 5185c15295..d1c4c20b8c 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1081,10 +1081,10 @@ out:
return ret;
}
-static int vhost_virtqueue_start(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
+int vhost_virtqueue_start(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusState *vbus = VIRTIO_BUS(qbus);
@@ -1201,10 +1201,10 @@ fail_alloc_desc:
return r;
}
-static void vhost_virtqueue_stop(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
+void vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
{
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index df4bde210b..97da74e719 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -27,6 +27,39 @@
#define VIRTIO_CRYPTO_VM_VERSION 1
+typedef struct VirtIOCryptoSessionReq {
+ VirtIODevice *vdev;
+ VirtQueue *vq;
+ VirtQueueElement *elem;
+ CryptoDevBackendSessionInfo info;
+ CryptoDevCompletionFunc cb;
+} VirtIOCryptoSessionReq;
+
+static void virtio_crypto_free_create_session_req(VirtIOCryptoSessionReq *sreq)
+{
+ switch (sreq->info.op_code) {
+ case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
+ g_free(sreq->info.u.sym_sess_info.cipher_key);
+ g_free(sreq->info.u.sym_sess_info.auth_key);
+ break;
+
+ case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION:
+ g_free(sreq->info.u.asym_sess_info.key);
+ break;
+
+ case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION:
+ case VIRTIO_CRYPTO_HASH_DESTROY_SESSION:
+ case VIRTIO_CRYPTO_MAC_DESTROY_SESSION:
+ case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION:
+ case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION:
+ break;
+
+ default:
+ error_report("Unknown opcode: %u", sreq->info.op_code);
+ }
+ g_free(sreq);
+}
+
/*
* Transfer virtqueue index to crypto queue index.
* The control virtqueue is after the data virtqueues
@@ -75,27 +108,24 @@ virtio_crypto_cipher_session_helper(VirtIODevice *vdev,
return 0;
}
-static int64_t
+static int
virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
struct virtio_crypto_sym_create_session_req *sess_req,
uint32_t queue_id,
uint32_t opcode,
- struct iovec *iov, unsigned int out_num)
+ struct iovec *iov, unsigned int out_num,
+ VirtIOCryptoSessionReq *sreq)
{
VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
- CryptoDevBackendSessionInfo info;
- CryptoDevBackendSymSessionInfo *sym_info;
- int64_t session_id;
+ CryptoDevBackendSymSessionInfo *sym_info = &sreq->info.u.sym_sess_info;
int queue_index;
uint32_t op_type;
- Error *local_err = NULL;
int ret;
- memset(&info, 0, sizeof(info));
op_type = ldl_le_p(&sess_req->op_type);
- info.op_code = opcode;
+ sreq->info.op_code = opcode;
- sym_info = &info.u.sym_sess_info;
+ sym_info = &sreq->info.u.sym_sess_info;
sym_info->op_type = op_type;
if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
@@ -103,7 +133,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
&sess_req->u.cipher.para,
&iov, &out_num);
if (ret < 0) {
- goto err;
+ return ret;
}
} else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
size_t s;
@@ -112,7 +142,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
&sess_req->u.chain.para.cipher_param,
&iov, &out_num);
if (ret < 0) {
- goto err;
+ return ret;
}
/* hash part */
sym_info->alg_chain_order = ldl_le_p(
@@ -129,8 +159,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
if (sym_info->auth_key_len > vcrypto->conf.max_auth_key_len) {
error_report("virtio-crypto length of auth key is too big: %u",
sym_info->auth_key_len);
- ret = -VIRTIO_CRYPTO_ERR;
- goto err;
+ return -VIRTIO_CRYPTO_ERR;
}
/* get auth key */
if (sym_info->auth_key_len > 0) {
@@ -140,8 +169,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
if (unlikely(s != sym_info->auth_key_len)) {
virtio_error(vdev,
"virtio-crypto authenticated key incorrect");
- ret = -EFAULT;
- goto err;
+ return -EFAULT;
}
iov_discard_front(&iov, &out_num, sym_info->auth_key_len);
}
@@ -153,49 +181,30 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
} else {
/* VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED */
error_report("unsupported hash mode");
- ret = -VIRTIO_CRYPTO_NOTSUPP;
- goto err;
+ return -VIRTIO_CRYPTO_NOTSUPP;
}
} else {
/* VIRTIO_CRYPTO_SYM_OP_NONE */
error_report("unsupported cipher op_type: VIRTIO_CRYPTO_SYM_OP_NONE");
- ret = -VIRTIO_CRYPTO_NOTSUPP;
- goto err;
+ return -VIRTIO_CRYPTO_NOTSUPP;
}
queue_index = virtio_crypto_vq2q(queue_id);
- session_id = cryptodev_backend_create_session(
- vcrypto->cryptodev,
- &info, queue_index, &local_err);
- if (session_id >= 0) {
- ret = session_id;
- } else {
- if (local_err) {
- error_report_err(local_err);
- }
- ret = -VIRTIO_CRYPTO_ERR;
- }
-
-err:
- g_free(sym_info->cipher_key);
- g_free(sym_info->auth_key);
- return ret;
+ return cryptodev_backend_create_session(vcrypto->cryptodev, &sreq->info,
+ queue_index, sreq->cb, sreq);
}
-static int64_t
+static int
virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
struct virtio_crypto_akcipher_create_session_req *sess_req,
uint32_t queue_id, uint32_t opcode,
- struct iovec *iov, unsigned int out_num)
+ struct iovec *iov, unsigned int out_num,
+ VirtIOCryptoSessionReq *sreq)
{
VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
- CryptoDevBackendSessionInfo info = {0};
- CryptoDevBackendAsymSessionInfo *asym_info;
- int64_t session_id;
+ CryptoDevBackendAsymSessionInfo *asym_info = &sreq->info.u.asym_sess_info;
int queue_index;
uint32_t algo, keytype, keylen;
- g_autofree uint8_t *key = NULL;
- Error *local_err = NULL;
algo = ldl_le_p(&sess_req->para.algo);
keytype = ldl_le_p(&sess_req->para.keytype);
@@ -208,20 +217,19 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
}
if (keylen) {
- key = g_malloc(keylen);
- if (iov_to_buf(iov, out_num, 0, key, keylen) != keylen) {
+ asym_info->key = g_malloc(keylen);
+ if (iov_to_buf(iov, out_num, 0, asym_info->key, keylen) != keylen) {
virtio_error(vdev, "virtio-crypto asym key incorrect");
return -EFAULT;
}
iov_discard_front(&iov, &out_num, keylen);
}
- info.op_code = opcode;
- asym_info = &info.u.asym_sess_info;
+ sreq->info.op_code = opcode;
+ asym_info = &sreq->info.u.asym_sess_info;
asym_info->algo = algo;
asym_info->keytype = keytype;
asym_info->keylen = keylen;
- asym_info->key = key;
switch (asym_info->algo) {
case VIRTIO_CRYPTO_AKCIPHER_RSA:
asym_info->u.rsa.padding_algo =
@@ -237,45 +245,95 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
}
queue_index = virtio_crypto_vq2q(queue_id);
- session_id = cryptodev_backend_create_session(vcrypto->cryptodev, &info,
- queue_index, &local_err);
- if (session_id < 0) {
- if (local_err) {
- error_report_err(local_err);
- }
- return -VIRTIO_CRYPTO_ERR;
- }
-
- return session_id;
+ return cryptodev_backend_create_session(vcrypto->cryptodev, &sreq->info,
+ queue_index, sreq->cb, sreq);
}
-static uint8_t
+static int
virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto,
struct virtio_crypto_destroy_session_req *close_sess_req,
- uint32_t queue_id)
+ uint32_t queue_id,
+ VirtIOCryptoSessionReq *sreq)
{
- int ret;
uint64_t session_id;
- uint32_t status;
- Error *local_err = NULL;
session_id = ldq_le_p(&close_sess_req->session_id);
DPRINTF("close session, id=%" PRIu64 "\n", session_id);
- ret = cryptodev_backend_close_session(
- vcrypto->cryptodev, session_id, queue_id, &local_err);
- if (ret == 0) {
- status = VIRTIO_CRYPTO_OK;
+ return cryptodev_backend_close_session(
+ vcrypto->cryptodev, session_id, queue_id, sreq->cb, sreq);
+}
+
+static void virtio_crypto_create_session_completion(void *opaque, int ret)
+{
+ VirtIOCryptoSessionReq *sreq = (VirtIOCryptoSessionReq *)opaque;
+ VirtQueue *vq = sreq->vq;
+ VirtQueueElement *elem = sreq->elem;
+ VirtIODevice *vdev = sreq->vdev;
+ struct virtio_crypto_session_input input;
+ struct iovec *in_iov = elem->in_sg;
+ unsigned in_num = elem->in_num;
+ size_t s;
+
+ memset(&input, 0, sizeof(input));
+ /* Serious errors, need to reset virtio crypto device */
+ if (ret == -EFAULT) {
+ virtqueue_detach_element(vq, elem, 0);
+ goto out;
+ } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) {
+ stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
+ } else if (ret == -VIRTIO_CRYPTO_KEY_REJECTED) {
+ stl_le_p(&input.status, VIRTIO_CRYPTO_KEY_REJECTED);
+ } else if (ret != VIRTIO_CRYPTO_OK) {
+ stl_le_p(&input.status, VIRTIO_CRYPTO_ERR);
} else {
- if (local_err) {
- error_report_err(local_err);
- } else {
- error_report("destroy session failed");
- }
+ /* Set the session id */
+ stq_le_p(&input.session_id, sreq->info.session_id);
+ stl_le_p(&input.status, VIRTIO_CRYPTO_OK);
+ }
+
+ s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
+ if (unlikely(s != sizeof(input))) {
+ virtio_error(vdev, "virtio-crypto input incorrect");
+ virtqueue_detach_element(vq, elem, 0);
+ goto out;
+ }
+ virtqueue_push(vq, elem, sizeof(input));
+ virtio_notify(vdev, vq);
+
+out:
+ g_free(elem);
+ virtio_crypto_free_create_session_req(sreq);
+}
+
+static void virtio_crypto_destroy_session_completion(void *opaque, int ret)
+{
+ VirtIOCryptoSessionReq *sreq = (VirtIOCryptoSessionReq *)opaque;
+ VirtQueue *vq = sreq->vq;
+ VirtQueueElement *elem = sreq->elem;
+ VirtIODevice *vdev = sreq->vdev;
+ struct iovec *in_iov = elem->in_sg;
+ unsigned in_num = elem->in_num;
+ uint8_t status;
+ size_t s;
+
+ if (ret < 0) {
status = VIRTIO_CRYPTO_ERR;
+ } else {
+ status = VIRTIO_CRYPTO_OK;
+ }
+ s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status));
+ if (unlikely(s != sizeof(status))) {
+ virtio_error(vdev, "virtio-crypto status incorrect");
+ virtqueue_detach_element(vq, elem, 0);
+ goto out;
}
+ virtqueue_push(vq, elem, sizeof(status));
+ virtio_notify(vdev, vq);
- return status;
+out:
+ g_free(elem);
+ g_free(sreq);
}
static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
@@ -283,16 +341,16 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
struct virtio_crypto_op_ctrl_req ctrl;
VirtQueueElement *elem;
- struct iovec *in_iov;
- struct iovec *out_iov;
- unsigned in_num;
+ VirtIOCryptoSessionReq *sreq;
unsigned out_num;
+ unsigned in_num;
uint32_t queue_id;
uint32_t opcode;
struct virtio_crypto_session_input input;
- int64_t session_id;
- uint8_t status;
size_t s;
+ int ret;
+ struct iovec *out_iov;
+ struct iovec *in_iov;
for (;;) {
g_autofree struct iovec *out_iov_copy = NULL;
@@ -327,44 +385,34 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
opcode = ldl_le_p(&ctrl.header.opcode);
queue_id = ldl_le_p(&ctrl.header.queue_id);
- memset(&input, 0, sizeof(input));
+ sreq = g_new0(VirtIOCryptoSessionReq, 1);
+ sreq->vdev = vdev;
+ sreq->vq = vq;
+ sreq->elem = elem;
+
switch (opcode) {
case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
- session_id = virtio_crypto_create_sym_session(vcrypto,
- &ctrl.u.sym_create_session,
- queue_id, opcode,
- out_iov, out_num);
- goto check_session;
+ sreq->cb = virtio_crypto_create_session_completion;
+ ret = virtio_crypto_create_sym_session(vcrypto,
+ &ctrl.u.sym_create_session,
+ queue_id, opcode,
+ out_iov, out_num,
+ sreq);
+ if (ret < 0) {
+ virtio_crypto_create_session_completion(sreq, ret);
+ }
+ break;
case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION:
- session_id = virtio_crypto_create_asym_session(vcrypto,
+ sreq->cb = virtio_crypto_create_session_completion;
+ ret = virtio_crypto_create_asym_session(vcrypto,
&ctrl.u.akcipher_create_session,
queue_id, opcode,
- out_iov, out_num);
-
-check_session:
- /* Serious errors, need to reset virtio crypto device */
- if (session_id == -EFAULT) {
- virtqueue_detach_element(vq, elem, 0);
- break;
- } else if (session_id == -VIRTIO_CRYPTO_NOTSUPP) {
- stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
- } else if (session_id == -VIRTIO_CRYPTO_ERR) {
- stl_le_p(&input.status, VIRTIO_CRYPTO_ERR);
- } else {
- /* Set the session id */
- stq_le_p(&input.session_id, session_id);
- stl_le_p(&input.status, VIRTIO_CRYPTO_OK);
- }
-
- s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
- if (unlikely(s != sizeof(input))) {
- virtio_error(vdev, "virtio-crypto input incorrect");
- virtqueue_detach_element(vq, elem, 0);
- break;
+ out_iov, out_num,
+ sreq);
+ if (ret < 0) {
+ virtio_crypto_create_session_completion(sreq, ret);
}
- virtqueue_push(vq, elem, sizeof(input));
- virtio_notify(vdev, vq);
break;
case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION:
@@ -372,37 +420,36 @@ check_session:
case VIRTIO_CRYPTO_MAC_DESTROY_SESSION:
case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION:
case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION:
- status = virtio_crypto_handle_close_session(vcrypto,
- &ctrl.u.destroy_session, queue_id);
- /* The status only occupy one byte, we can directly use it */
- s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status));
- if (unlikely(s != sizeof(status))) {
- virtio_error(vdev, "virtio-crypto status incorrect");
- virtqueue_detach_element(vq, elem, 0);
- break;
+ sreq->cb = virtio_crypto_destroy_session_completion;
+ ret = virtio_crypto_handle_close_session(vcrypto,
+ &ctrl.u.destroy_session, queue_id,
+ sreq);
+ if (ret < 0) {
+ virtio_crypto_destroy_session_completion(sreq, ret);
}
- virtqueue_push(vq, elem, sizeof(status));
- virtio_notify(vdev, vq);
break;
+
case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
case VIRTIO_CRYPTO_AEAD_CREATE_SESSION:
default:
+ memset(&input, 0, sizeof(input));
error_report("virtio-crypto unsupported ctrl opcode: %d", opcode);
stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
if (unlikely(s != sizeof(input))) {
virtio_error(vdev, "virtio-crypto input incorrect");
virtqueue_detach_element(vq, elem, 0);
- break;
+ } else {
+ virtqueue_push(vq, elem, sizeof(input));
+ virtio_notify(vdev, vq);
}
- virtqueue_push(vq, elem, sizeof(input));
- virtio_notify(vdev, vq);
+ g_free(sreq);
+ g_free(elem);
break;
} /* end switch case */
- g_free(elem);
} /* end for loop */
}
@@ -448,6 +495,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req)
}
}
+ g_free(req->in_iov);
g_free(req);
}
@@ -458,6 +506,7 @@ virtio_crypto_sym_input_data_helper(VirtIODevice *vdev,
CryptoDevBackendSymOpInfo *sym_op_info)
{
size_t s, len;
+ struct iovec *in_iov = req->in_iov;
if (status != VIRTIO_CRYPTO_OK) {
return;
@@ -465,18 +514,18 @@ virtio_crypto_sym_input_data_helper(VirtIODevice *vdev,
len = sym_op_info->src_len;
/* Save the cipher result */
- s = iov_from_buf(req->in_iov, req->in_num, 0, sym_op_info->dst, len);
+ s = iov_from_buf(in_iov, req->in_num, 0, sym_op_info->dst, len);
if (s != len) {
virtio_error(vdev, "virtio-crypto dest data incorrect");
return;
}
- iov_discard_front(&req->in_iov, &req->in_num, len);
+ iov_discard_front(&in_iov, &req->in_num, len);
if (sym_op_info->op_type ==
VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
/* Save the digest result */
- s = iov_from_buf(req->in_iov, req->in_num, 0,
+ s = iov_from_buf(in_iov, req->in_num, 0,
sym_op_info->digest_result,
sym_op_info->digest_result_len);
if (s != sym_op_info->digest_result_len) {
@@ -491,6 +540,7 @@ virtio_crypto_akcipher_input_data_helper(VirtIODevice *vdev,
CryptoDevBackendAsymOpInfo *asym_op_info)
{
size_t s, len;
+ struct iovec *in_iov = req->in_iov;
if (status != VIRTIO_CRYPTO_OK) {
return;
@@ -501,23 +551,24 @@ virtio_crypto_akcipher_input_data_helper(VirtIODevice *vdev,
return;
}
- s = iov_from_buf(req->in_iov, req->in_num, 0, asym_op_info->dst, len);
+ s = iov_from_buf(in_iov, req->in_num, 0, asym_op_info->dst, len);
if (s != len) {
virtio_error(vdev, "virtio-crypto asym dest data incorrect");
return;
}
- iov_discard_front(&req->in_iov, &req->in_num, len);
+ iov_discard_front(&in_iov, &req->in_num, len);
/* For akcipher, dst_len may be changed after operation */
req->in_len = sizeof(struct virtio_crypto_inhdr) + asym_op_info->dst_len;
}
-
-static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status)
+static void virtio_crypto_req_complete(void *opaque, int ret)
{
+ VirtIOCryptoReq *req = (VirtIOCryptoReq *)opaque;
VirtIOCrypto *vcrypto = req->vcrypto;
VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+ uint8_t status = -ret;
if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
virtio_crypto_sym_input_data_helper(vdev, req, status,
@@ -529,6 +580,7 @@ static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status)
stb_p(&req->in->status, status);
virtqueue_push(req->vq, &req->elem, req->in_len);
virtio_notify(vdev, req->vq);
+ virtio_crypto_free_request(req);
}
static VirtIOCryptoReq *
@@ -773,9 +825,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
unsigned in_num;
unsigned out_num;
uint32_t opcode;
- uint8_t status = VIRTIO_CRYPTO_ERR;
CryptoDevBackendOpInfo *op_info = &request->op_info;
- Error *local_err = NULL;
if (elem->out_num < 1 || elem->in_num < 1) {
virtio_error(vdev, "virtio-crypto dataq missing headers");
@@ -815,6 +865,8 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
*/
request->in_num = in_num;
request->in_iov = in_iov;
+ /* now, we free the in_iov_copy inside virtio_crypto_free_request */
+ in_iov_copy = NULL;
opcode = ldl_le_p(&req.header.opcode);
op_info->session_id = ldq_le_p(&req.header.session_id);
@@ -843,23 +895,15 @@ check_result:
if (ret == -EFAULT) {
return -1;
} else if (ret == -VIRTIO_CRYPTO_NOTSUPP) {
- virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP);
- virtio_crypto_free_request(request);
+ virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP);
} else {
-
- /* Set request's parameter */
ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev,
- request, queue_index, &local_err);
+ request, queue_index,
+ virtio_crypto_req_complete,
+ request);
if (ret < 0) {
- status = -ret;
- if (local_err) {
- error_report_err(local_err);
- }
- } else { /* ret == VIRTIO_CRYPTO_OK */
- status = ret;
+ virtio_crypto_req_complete(request, ret);
}
- virtio_crypto_req_complete(request, status);
- virtio_crypto_free_request(request);
}
break;
@@ -870,8 +914,7 @@ check_result:
default:
error_report("virtio-crypto unsupported dataq opcode: %u",
opcode);
- virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP);
- virtio_crypto_free_request(request);
+ virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP);
}
return 0;
@@ -1011,7 +1054,7 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
vcrypto->vqs[i].vcrypto = vcrypto;
}
- vcrypto->ctrl_vq = virtio_add_queue(vdev, 64, virtio_crypto_handle_ctrl);
+ vcrypto->ctrl_vq = virtio_add_queue(vdev, 1024, virtio_crypto_handle_ctrl);
if (!cryptodev_backend_is_ready(vcrypto->cryptodev)) {
vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
} else {
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 79ea8334f0..7ef2f9dcdb 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -17,6 +17,7 @@
#include "hw/qdev-properties-system.h"
#include "qapi/error.h"
#include "hw/boards.h"
+#include "hw/pci/pci_bus.h"
#include "qom/object.h"
typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI;
@@ -44,6 +45,7 @@ static Property virtio_iommu_pci_properties[] = {
static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev);
+ PCIBus *pbus = pci_get_bus(&vpci_dev->pci_dev);
DeviceState *vdev = DEVICE(&dev->vdev);
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
@@ -57,11 +59,17 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
error_setg(errp, "reserved region %d has an invalid type", i);
error_append_hint(errp, "Valid values are 0 and 1\n");
+ return;
}
}
+ if (!pci_bus_is_root(pbus)) {
+ error_setg(errp, "virtio-iommu-pci must be plugged on the root bus");
+ return;
+ }
+
object_property_set_link(OBJECT(dev), "primary-bus",
- OBJECT(pci_get_bus(&vpci_dev->pci_dev)),
- &error_abort);
+ OBJECT(pbus), &error_abort);
+
virtio_pci_force_virtio_1(vpci_dev);
qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
}
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 34db51e241..a1c9dfa7bb 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -71,9 +71,11 @@ static void virtio_pci_notify(DeviceState *d, uint16_t vector)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
- if (msix_enabled(&proxy->pci_dev))
- msix_notify(&proxy->pci_dev, vector);
- else {
+ if (msix_enabled(&proxy->pci_dev)) {
+ if (vector != VIRTIO_NO_VECTOR) {
+ msix_notify(&proxy->pci_dev, vector);
+ }
+ } else {
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
}
@@ -175,6 +177,7 @@ static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+ uint16_t vector;
int ret;
ret = pci_device_load(&proxy->pci_dev, f);
@@ -184,12 +187,17 @@ static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
msix_unuse_all_vectors(&proxy->pci_dev);
msix_load(&proxy->pci_dev, f);
if (msix_present(&proxy->pci_dev)) {
- qemu_get_be16s(f, &vdev->config_vector);
+ qemu_get_be16s(f, &vector);
+
+ if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
+ return -EINVAL;
+ }
} else {
- vdev->config_vector = VIRTIO_NO_VECTOR;
+ vector = VIRTIO_NO_VECTOR;
}
- if (vdev->config_vector != VIRTIO_NO_VECTOR) {
- return msix_vector_use(&proxy->pci_dev, vdev->config_vector);
+ vdev->config_vector = vector;
+ if (vector != VIRTIO_NO_VECTOR) {
+ msix_vector_use(&proxy->pci_dev, vector);
}
return 0;
}
@@ -202,12 +210,15 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
uint16_t vector;
if (msix_present(&proxy->pci_dev)) {
qemu_get_be16s(f, &vector);
+ if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) {
+ return -EINVAL;
+ }
} else {
vector = VIRTIO_NO_VECTOR;
}
virtio_queue_set_vector(vdev, n, vector);
if (vector != VIRTIO_NO_VECTOR) {
- return msix_vector_use(&proxy->pci_dev, vector);
+ msix_vector_use(&proxy->pci_dev, vector);
}
return 0;
@@ -299,6 +310,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
VirtIOPCIProxy *proxy = opaque;
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+ uint16_t vector;
hwaddr pa;
switch (addr) {
@@ -352,18 +364,28 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
}
break;
case VIRTIO_MSI_CONFIG_VECTOR:
- msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+ if (vdev->config_vector != VIRTIO_NO_VECTOR) {
+ msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+ }
/* Make it possible for guest to discover an error took place. */
- if (msix_vector_use(&proxy->pci_dev, val) < 0)
+ if (val < proxy->nvectors) {
+ msix_vector_use(&proxy->pci_dev, val);
+ } else {
val = VIRTIO_NO_VECTOR;
+ }
vdev->config_vector = val;
break;
case VIRTIO_MSI_QUEUE_VECTOR:
- msix_vector_unuse(&proxy->pci_dev,
- virtio_queue_vector(vdev, vdev->queue_sel));
+ vector = virtio_queue_vector(vdev, vdev->queue_sel);
+ if (vector != VIRTIO_NO_VECTOR) {
+ msix_vector_unuse(&proxy->pci_dev, vector);
+ }
/* Make it possible for guest to discover an error took place. */
- if (msix_vector_use(&proxy->pci_dev, val) < 0)
+ if (val < proxy->nvectors) {
+ msix_vector_use(&proxy->pci_dev, val);
+ } else {
val = VIRTIO_NO_VECTOR;
+ }
virtio_queue_set_vector(vdev, vdev->queue_sel, val);
break;
default:
@@ -1251,6 +1273,9 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
case VIRTIO_PCI_COMMON_Q_USEDHI:
val = proxy->vqs[vdev->queue_sel].used[1];
break;
+ case VIRTIO_PCI_COMMON_Q_RESET:
+ val = proxy->vqs[vdev->queue_sel].reset;
+ break;
default:
val = 0;
}
@@ -1263,6 +1288,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
{
VirtIOPCIProxy *proxy = opaque;
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+ uint16_t vector;
if (vdev == NULL) {
return;
@@ -1284,9 +1310,13 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
}
break;
case VIRTIO_PCI_COMMON_MSIX:
- msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+ if (vdev->config_vector != VIRTIO_NO_VECTOR) {
+ msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+ }
/* Make it possible for guest to discover an error took place. */
- if (msix_vector_use(&proxy->pci_dev, val) < 0) {
+ if (val < proxy->nvectors) {
+ msix_vector_use(&proxy->pci_dev, val);
+ } else {
val = VIRTIO_NO_VECTOR;
}
vdev->config_vector = val;
@@ -1318,10 +1348,14 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
proxy->vqs[vdev->queue_sel].num);
break;
case VIRTIO_PCI_COMMON_Q_MSIX:
- msix_vector_unuse(&proxy->pci_dev,
- virtio_queue_vector(vdev, vdev->queue_sel));
+ vector = virtio_queue_vector(vdev, vdev->queue_sel);
+ if (vector != VIRTIO_NO_VECTOR) {
+ msix_vector_unuse(&proxy->pci_dev, vector);
+ }
/* Make it possible for guest to discover an error took place. */
- if (msix_vector_use(&proxy->pci_dev, val) < 0) {
+ if (val < proxy->nvectors) {
+ msix_vector_use(&proxy->pci_dev, val);
+ } else {
val = VIRTIO_NO_VECTOR;
}
virtio_queue_set_vector(vdev, vdev->queue_sel, val);
@@ -1338,6 +1372,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
proxy->vqs[vdev->queue_sel].used[0]);
proxy->vqs[vdev->queue_sel].enabled = 1;
+ proxy->vqs[vdev->queue_sel].reset = 0;
+ virtio_queue_enable(vdev, vdev->queue_sel);
} else {
virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
}
@@ -1360,6 +1396,16 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
case VIRTIO_PCI_COMMON_Q_USEDHI:
proxy->vqs[vdev->queue_sel].used[1] = val;
break;
+ case VIRTIO_PCI_COMMON_Q_RESET:
+ if (val == 1) {
+ proxy->vqs[vdev->queue_sel].reset = 1;
+
+ virtio_queue_reset(vdev, vdev->queue_sel);
+
+ proxy->vqs[vdev->queue_sel].reset = 0;
+ proxy->vqs[vdev->queue_sel].enabled = 0;
+ }
+ break;
default:
break;
}
@@ -1954,6 +2000,7 @@ static void virtio_pci_reset(DeviceState *qdev)
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
proxy->vqs[i].enabled = 0;
+ proxy->vqs[i].reset = 0;
proxy->vqs[i].num = 0;
proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c
index 151ece6f94..6e76f8b57b 100644
--- a/hw/virtio/virtio-rng-pci.c
+++ b/hw/virtio/virtio-rng-pci.c
@@ -13,6 +13,7 @@
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-rng.h"
+#include "hw/qdev-properties.h"
#include "qapi/error.h"
#include "qemu/module.h"
#include "qom/object.h"
@@ -31,11 +32,23 @@ struct VirtIORngPCI {
VirtIORNG vdev;
};
+static Property virtio_rng_properties[] = {
+ DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+ VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+ DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+ DEV_NVECTORS_UNSPECIFIED),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void virtio_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
VirtIORngPCI *vrng = VIRTIO_RNG_PCI(vpci_dev);
DeviceState *vdev = DEVICE(&vrng->vdev);
+ if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+ vpci_dev->nvectors = 2;
+ }
+
if (!qdev_realize(vdev, BUS(&vpci_dev->bus), errp)) {
return;
}
@@ -54,6 +67,7 @@ static void virtio_rng_pci_class_init(ObjectClass *klass, void *data)
pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_RNG;
pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
pcidev_k->class_id = PCI_CLASS_OTHERS;
+ device_class_set_props(dc, virtio_rng_properties);
}
static void virtio_rng_initfn(Object *obj)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 808446b4c9..9683b2e158 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2464,6 +2464,51 @@ static enum virtio_device_endian virtio_current_cpu_endian(void)
}
}
+static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i)
+{
+ vdev->vq[i].vring.desc = 0;
+ vdev->vq[i].vring.avail = 0;
+ vdev->vq[i].vring.used = 0;
+ vdev->vq[i].last_avail_idx = 0;
+ vdev->vq[i].shadow_avail_idx = 0;
+ vdev->vq[i].used_idx = 0;
+ vdev->vq[i].last_avail_wrap_counter = true;
+ vdev->vq[i].shadow_avail_wrap_counter = true;
+ vdev->vq[i].used_wrap_counter = true;
+ virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
+ vdev->vq[i].signalled_used = 0;
+ vdev->vq[i].signalled_used_valid = false;
+ vdev->vq[i].notification = true;
+ vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
+ vdev->vq[i].inuse = 0;
+ virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
+}
+
+void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
+{
+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+ if (k->queue_reset) {
+ k->queue_reset(vdev, queue_index);
+ }
+
+ __virtio_queue_reset(vdev, queue_index);
+}
+
+void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
+{
+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ error_report("queue_enable is only suppported in devices of virtio "
+ "1.0 or later.");
+ }
+
+ if (k->queue_enable) {
+ k->queue_enable(vdev, queue_index);
+ }
+}
+
void virtio_reset(void *opaque)
{
VirtIODevice *vdev = opaque;
@@ -2495,22 +2540,7 @@ void virtio_reset(void *opaque)
virtio_notify_vector(vdev, vdev->config_vector);
for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
- vdev->vq[i].vring.desc = 0;
- vdev->vq[i].vring.avail = 0;
- vdev->vq[i].vring.used = 0;
- vdev->vq[i].last_avail_idx = 0;
- vdev->vq[i].shadow_avail_idx = 0;
- vdev->vq[i].used_idx = 0;
- vdev->vq[i].last_avail_wrap_counter = true;
- vdev->vq[i].shadow_avail_wrap_counter = true;
- vdev->vq[i].used_wrap_counter = true;
- virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
- vdev->vq[i].signalled_used = 0;
- vdev->vq[i].signalled_used_valid = false;
- vdev->vq[i].notification = true;
- vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
- vdev->vq[i].inuse = 0;
- virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
+ __virtio_queue_reset(vdev, i);
}
}