From 4ad08e8a57f0a779dbf58bcdccd8c9b6f6432512 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 31 Aug 2022 10:23:10 +0530 Subject: hw/i386/e820: remove legacy reserved entries for e820 e820 reserved entries were used before the dynamic entries with fw config files were intoduced. Please see the following change: 7d67110f2d9a6("pc: add etc/e820 fw_cfg file") Identical support was introduced into seabios as well with the following commit: ce39bd4031820 ("Add support for etc/e820 fw_cfg file") Both the above commits are now quite old. QEMU machines 1.7 and newer no longer use the reserved entries. Seabios uses fw config files and dynamic e820 entries by default and only falls back to using reserved entries when it has to work with old qemu (versions earlier than 1.7). Please see functions qemu_cfg_e820() and qemu_early_e820(). It is safe to remove legacy FW_CFG_E820_TABLE and associated code now as QEMU 7.0 has deprecated i440fx machines 1.7 and older. It would be incredibly rare to run the latest qemu version with a very old version of seabios that did not support fw config files for e820. As far as I could see, edk2/ovfm never supported reserved entries and uses fw config files from the beginning. So there should be no incompatibilities with ovfm as well. CC: Gerd Hoffmann Signed-off-by: Ani Sinha Acked-by: Gerd Hoffmann Message-Id: <20220831045311.33083-1-ani@anisinha.ca> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/e820_memory_layout.c | 20 +------------------- hw/i386/e820_memory_layout.h | 8 -------- hw/i386/fw_cfg.c | 3 --- hw/i386/fw_cfg.h | 1 - hw/i386/microvm.c | 2 -- 5 files changed, 1 insertion(+), 33 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c index bcf9eaf837..06970ac44a 100644 --- a/hw/i386/e820_memory_layout.c +++ b/hw/i386/e820_memory_layout.c @@ -11,29 +11,11 @@ #include "e820_memory_layout.h" static size_t e820_entries; -struct e820_table e820_reserve; struct e820_entry *e820_table; int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) { - int index = le32_to_cpu(e820_reserve.count); - struct e820_entry *entry; - - if (type != E820_RAM) { - /* old FW_CFG_E820_TABLE entry -- reservations only */ - if (index >= E820_NR_ENTRIES) { - return -EBUSY; - } - entry = &e820_reserve.entry[index++]; - - entry->address = cpu_to_le64(address); - entry->length = cpu_to_le64(length); - entry->type = cpu_to_le32(type); - - e820_reserve.count = cpu_to_le32(index); - } - - /* new "etc/e820" file -- include ram too */ + /* new "etc/e820" file -- include ram and reserved entries */ e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); e820_table[e820_entries].address = cpu_to_le64(address); e820_table[e820_entries].length = cpu_to_le64(length); diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h index 04f93780f9..7c239aa033 100644 --- a/hw/i386/e820_memory_layout.h +++ b/hw/i386/e820_memory_layout.h @@ -16,20 +16,12 @@ #define E820_NVS 4 #define E820_UNUSABLE 5 -#define E820_NR_ENTRIES 16 - struct e820_entry { uint64_t address; uint64_t length; uint32_t type; } QEMU_PACKED __attribute((__aligned__(4))); -struct e820_table { - uint32_t count; - struct e820_entry entry[E820_NR_ENTRIES]; -} QEMU_PACKED __attribute((__aligned__(4))); - -extern struct e820_table e820_reserve; extern struct e820_entry *e820_table; int e820_add_entry(uint64_t address, uint64_t length, uint32_t type); diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index a283785a8d..72a42f3c66 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -36,7 +36,6 @@ const char *fw_cfg_arch_key_name(uint16_t key) {FW_CFG_ACPI_TABLES, "acpi_tables"}, {FW_CFG_SMBIOS_ENTRIES, "smbios_entries"}, {FW_CFG_IRQ0_OVERRIDE, "irq0_override"}, - {FW_CFG_E820_TABLE, "e820_table"}, {FW_CFG_HPET, "hpet"}, }; @@ -127,8 +126,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, #endif fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, sizeof(struct e820_entry) * e820_get_num_entries()); diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h index 275f15c1c5..86ca7c1c0c 100644 --- a/hw/i386/fw_cfg.h +++ b/hw/i386/fw_cfg.h @@ -17,7 +17,6 @@ #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) #define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) -#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) FWCfgState *fw_cfg_arch_create(MachineState *ms, diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index ffd1884100..170a331e3f 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms) fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size); fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, sizeof(struct e820_entry) * e820_get_num_entries()); -- cgit v1.2.3 From cfead31326409dad187024de1bf7b40d7a86737e Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 17 Oct 2022 12:21:36 +0200 Subject: acpi: pc: vga: use AcpiDevAmlIf interface to build VGA device descriptors NB: We do not expect any functional change in any ACPI tables with this change. It's only a refactoring. NB2: Some targets (or1k) do not support acpi and CONFIG_ACPI is off for them. However, modules are reused between all architectures so CONFIG_ACPI is on. For those architectures, dummy stub function definitions help to resolve symbols. This change uses more of these and so it adds a couple of dummy stub definitions so that symbols for those can be resolved. Signed-off-by: Igor Mammedov Message-Id: <20221017102146.2254096-2-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Ani Sinha CC: Bernhard Beschow Signed-off-by: Ani Sinha Message-Id: <20221107152744.868434-1-ani@anisinha.ca> --- hw/i386/acpi-build.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 4f54b61904..26932b4e2c 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -430,7 +430,6 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, bool hotpluggbale_slot = false; bool bridge_in_acpi = false; bool cold_plugged_bridge = false; - bool is_vga = false; if (pdev) { pc = PCI_DEVICE_GET_CLASS(pdev); @@ -440,8 +439,6 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, continue; } - is_vga = pc->class_id == PCI_CLASS_DISPLAY_VGA; - /* * Cold plugged bridges aren't themselves hot-pluggable. * Hotplugged bridges *are* hot-pluggable. @@ -489,28 +486,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_append(dev, aml_pci_device_dsm()); } - if (is_vga) { - /* add VGA specific AML methods */ - int s3d; - - if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) { - s3d = 3; - } else { - s3d = 0; - } - - method = aml_method("_S1D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S2D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S3D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(s3d))); - aml_append(dev, method); - } + call_dev_aml_func(DEVICE(pdev), dev); bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en; if (bridge_in_acpi) { -- cgit v1.2.3 From 47a373faa6b272d12c8b166f7a0c45c867589005 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 17 Oct 2022 12:21:38 +0200 Subject: acpi: pc/q35: drop ad-hoc PCI-ISA bridge AML routines and let bus ennumeration generate AML PCI-ISA bridges that are built in PIIX/Q35 are building its own AML using AcpiDevAmlIf interface. Now build_append_pci_bus_devices() gained AcpiDevAmlIf interface support to get AML of devices atached to PCI slots. So drop ad-hoc build_q35_isa_bridge()/build_piix4_isa_bridge() and let PCI bus enumeration to include PCI-ISA bridge AML when it's enumerated by build_append_pci_bus_devices(). AML change is mostly contextual, which moves whole ISA hierarchy directly under PCI host bridge instead of it being described as separate \SB.PCI0.ISA block. Note: If bus/slot that hosts ISA bridge has BSEL set, it will gain new ASUN and _DMS entries (i.e. acpi-index support, but it should not cause any functional change and that is fine from PCI Firmware spec point of view), potentially it's possible to suppress that by adding a flag to PCIDevice but I don't see a reason to do that yet, I'd rather treat bridge just as any other PCI device if it's possible. Signed-off-by: Igor Mammedov Message-Id: <20221017102146.2254096-4-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 75 ---------------------------------------------------- 1 file changed, 75 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 26932b4e2c..e1483bb11a 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -435,10 +435,6 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, pc = PCI_DEVICE_GET_CLASS(pdev); dc = DEVICE_GET_CLASS(pdev); - if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { - continue; - } - /* * Cold plugged bridges aren't themselves hot-pluggable. * Hotplugged bridges *are* hot-pluggable. @@ -1006,7 +1002,6 @@ static void build_piix4_pci0_int(Aml *table) { Aml *dev; Aml *crs; - Aml *field; Aml *method; uint32_t irqs; Aml *sb_scope = aml_scope("_SB"); @@ -1015,13 +1010,6 @@ static void build_piix4_pci0_int(Aml *table) aml_append(pci0_scope, build_prt(true)); aml_append(sb_scope, pci0_scope); - field = aml_field("PCI0.ISA.P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQ0", 8)); - aml_append(field, aml_named_field("PRQ1", 8)); - aml_append(field, aml_named_field("PRQ2", 8)); - aml_append(field, aml_named_field("PRQ3", 8)); - aml_append(sb_scope, field); - aml_append(sb_scope, build_irq_status_method()); aml_append(sb_scope, build_iqcr_method(true)); @@ -1125,7 +1113,6 @@ static Aml *build_q35_routing_table(const char *str) static void build_q35_pci0_int(Aml *table) { - Aml *field; Aml *method; Aml *sb_scope = aml_scope("_SB"); Aml *pci0_scope = aml_scope("PCI0"); @@ -1162,18 +1149,6 @@ static void build_q35_pci0_int(Aml *table) aml_append(pci0_scope, method); aml_append(sb_scope, pci0_scope); - field = aml_field("PCI0.ISA.PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQA", 8)); - aml_append(field, aml_named_field("PRQB", 8)); - aml_append(field, aml_named_field("PRQC", 8)); - aml_append(field, aml_named_field("PRQD", 8)); - aml_append(field, aml_reserved_field(0x20)); - aml_append(field, aml_named_field("PRQE", 8)); - aml_append(field, aml_named_field("PRQF", 8)); - aml_append(field, aml_named_field("PRQG", 8)); - aml_append(field, aml_named_field("PRQH", 8)); - aml_append(sb_scope, field); - aml_append(sb_scope, build_irq_status_method()); aml_append(sb_scope, build_iqcr_method(false)); @@ -1238,54 +1213,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg) return dev; } -static void build_q35_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Object *obj; - bool ambiguous; - - /* - * temporarily fish out isa bridge, build_q35_isa_bridge() will be dropped - * once PCI is converted to AcpiDevAmlIf and would be ble to generate - * AML for bridge itself - */ - obj = object_resolve_path_type("", TYPE_ICH9_LPC_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x001F0000))); - - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - -static void build_piix4_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Object *obj; - bool ambiguous; - - /* - * temporarily fish out isa bridge, build_piix4_isa_bridge() will be dropped - * once PCI is converted to AcpiDevAmlIf and would be ble to generate - * AML for bridge itself - */ - obj = object_resolve_path_type("", TYPE_PIIX3_PCI_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010000))); - - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) { Aml *scope; @@ -1465,7 +1392,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); - build_piix4_isa_bridge(dsdt); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } @@ -1510,7 +1436,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); - build_q35_isa_bridge(dsdt); if (pm->pcihp_bridge_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } -- cgit v1.2.3 From 6d2146147b4e21017313de0f1fc7972ce6a7a7c6 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 17 Oct 2022 12:21:42 +0200 Subject: acpi: enumerate SMB bridge automatically along with other PCI devices to make that happen (bridge sits at _ADR: 0x001F0003), relax PCI enumeration logic to include devices with *function* > 0 if device has something to say about itself (i.e. has build_dev_aml callback set). Signed-off-by: Igor Mammedov Message-Id: <20221017102146.2254096-8-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e1483bb11a..916343d8d6 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -448,9 +448,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, /* * allow describing coldplugged bridges in ACPI even if they are not * on function 0, as they are not unpluggable, for all other devices - * generate description only for function 0 per slot + * generate description only for function 0 per slot, and for other + * functions if device on function provides its own AML */ - if (func && !bridge_in_acpi) { + if (func && !bridge_in_acpi && !get_dev_aml_func(DEVICE(pdev))) { continue; } } else { @@ -1319,25 +1320,6 @@ static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) return method; } -static void build_smb0(Aml *table, int devnr, int func) -{ - Aml *scope = aml_scope("_SB.PCI0"); - Aml *dev = aml_device("SMB0"); - bool ambiguous; - Object *obj; - /* - * temporarily fish out device hosting SMBUS, build_smb0 will be gone once - * PCI enumeration will be switched to call_dev_aml_func() - */ - obj = object_resolve_path_type("", TYPE_ICH9_SMB_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - aml_append(dev, aml_name_decl("_ADR", aml_int(devnr << 16 | func))); - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - static void build_acpi0017(Aml *table) { Aml *dev, *scope, *method; @@ -1440,9 +1422,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_q35_pci0_int(dsdt); - if (pcms->smbus) { - build_smb0(dsdt, ICH9_SMB_DEV, ICH9_SMB_FUNC); - } } if (misc->has_hpet) { -- cgit v1.2.3 From d12dbd44e42fa0e851d62e0804f78bab5bd148d5 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 17 Oct 2022 12:21:45 +0200 Subject: acpi: pc/35: sanitize _GPE declaration order Move _GPE block declaration before it gets referenced by other hotplug handlers. While at it move PCI hotplug (_E01) handler after PCI tree description to avoid forward reference to to not yet declared methods/devices. PS: Forward 'usage' usualy is fine as long as it's hidden within method, however 'iasl' may print warnings. So be nice to iasl/guest OS and do things in proper order. PS2: Also follow up patches will move some of hotplug code from PCI tree to _E01 and that also requires PCI Device nodes build first, before Scope can reuse that from global context. Signed-off-by: Igor Mammedov Message-Id: <20221017102146.2254096-11-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 916343d8d6..960305462c 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1434,6 +1434,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); } + scope = aml_scope("_GPE"); + { + aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); + if (machine->nvdimms_state->is_enabled) { + method = aml_method("_E04", 0, AML_NOTSERIALIZED); + aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), + aml_int(0x80))); + aml_append(scope, method); + } + } + aml_append(dsdt, scope); + if (pcmc->legacy_cpu_hotplug) { build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base); } else { @@ -1452,28 +1464,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, pcms->memhp_io_base); } - scope = aml_scope("_GPE"); - { - aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); - - if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - method = aml_method("_E01", 0, AML_NOTSERIALIZED); - aml_append(method, - aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); - aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); - aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); - aml_append(scope, method); - } - - if (machine->nvdimms_state->is_enabled) { - method = aml_method("_E04", 0, AML_NOTSERIALIZED); - aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), - aml_int(0x80))); - aml_append(scope, method); - } - } - aml_append(dsdt, scope); - crs_range_set_init(&crs_range_set); bus = PC_MACHINE(machine)->bus; if (bus) { @@ -1752,6 +1742,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } aml_append(dsdt, sb_scope); + if (pm->pcihp_bridge_en || pm->pcihp_root_en) { + scope = aml_scope("_GPE"); + { + method = aml_method("_E01", 0, AML_NOTSERIALIZED); + aml_append(method, + aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); + aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); + aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); + aml_append(scope, method); + } + aml_append(dsdt, scope); + } + /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); acpi_table_end(linker, &table); -- cgit v1.2.3 From 2486dd045794d65598fbca9fd1224c27b9732dce Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Wed, 26 Oct 2022 16:59:13 -0400 Subject: hw/i386/pc.c: CXL Fixed Memory Window should not reserve e820 in bios Early-boot e820 records will be inserted by the bios/efi/early boot software and be reported to the kernel via insert_resource. Later, when CXL drivers iterate through the regions again, they will insert another resource and make the RESERVED memory area a child. This RESERVED memory area causes the memory region to become unusable, and as a result attempting to create memory regions with `cxl create-region ...` Will fail due to the RESERVED area intersecting with the CXL window. During boot the following traceback is observed: 0xffffffff81101650 in insert_resource_expand_to_fit () 0xffffffff83d964c5 in e820__reserve_resources_late () 0xffffffff83e03210 in pcibios_resource_survey () 0xffffffff83e04f4a in pcibios_init () Which produces a call to reserve the CFMWS area: (gdb) p *new $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, child = 0x0} Later the Kernel parses ACPI tables and reserves the exact same area as the CXL Fixed Memory Window: 0xffffffff811016a4 in insert_resource_conflict () insert_resource () 0xffffffff81a81389 in cxl_parse_cfmws () 0xffffffff818c4a81 in call_handler () acpi_parse_entries_array () (gdb) p/x *new $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, child = 0x0} This produces the following output in /proc/iomem: 590000000-68fffffff : CXL Window 0 590000000-68fffffff : Reserved This reserved area causes `get_free_mem_region()` to fail due to a check against `__region_intersects()`. Due to this reserved area, the intersect check will only ever return REGION_INTERSECTS, which causes `cxl create-region` to always fail. Signed-off-by: Gregory Price Message-Id: <20221026205912.8579-1-gregory.price@memverge.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jonathan Cameron --- hw/i386/pc.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ef14da5094..546b703cb4 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, hwaddr cxl_size = MiB; cxl_base = pc_get_cxl_range_start(pcms); - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); memory_region_add_subregion(system_memory, cxl_base, mr); cxl_resv_end = cxl_base + cxl_size; @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, "cxl-fixed-memory-region", fw->size); memory_region_add_subregion(system_memory, fw->base, &fw->mr); - e820_add_entry(fw->base, fw->size, E820_RESERVED); cxl_fmw_base += fw->size; cxl_resv_end = cxl_fmw_base; } -- cgit v1.2.3 From 6f56d6de99a0f7c71f4a60fff585bbc0561e43c9 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 28 Oct 2022 12:34:17 +0200 Subject: hw/i386/acpi-build: Remove unused struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ammends commit b23046abe78f48498a423b802d6d86ba0172d57f 'pc: acpi-build: simplify PCI bus tree generation'. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20221026133110.91828-2-shentey@gmail.com> Message-Id: <20221028103419.93398-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 960305462c..1ebf14b899 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -121,13 +121,6 @@ typedef struct AcpiMiscInfo { unsigned dsdt_size; } AcpiMiscInfo; -typedef struct AcpiBuildPciBusHotplugState { - GArray *device_table; - GArray *notify_table; - struct AcpiBuildPciBusHotplugState *parent; - bool pcihp_bridge_en; -} AcpiBuildPciBusHotplugState; - typedef struct FwCfgTPMConfig { uint32_t tpmppi_address; uint8_t tpm_version; -- cgit v1.2.3 From bbaa5c41fad29e9f6b4f1975103d530dc1391772 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 28 Oct 2022 12:34:18 +0200 Subject: hw/i386/acpi-build: Resolve redundant attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The is_piix4 attribute is set once in one location and read once in another. Doing both in one location allows for removing the attribute altogether. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20221026133110.91828-3-shentey@gmail.com> Message-Id: <20221028103419.93398-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1ebf14b899..73d8a59737 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -112,7 +112,6 @@ typedef struct AcpiPmInfo { } AcpiPmInfo; typedef struct AcpiMiscInfo { - bool is_piix4; bool has_hpet; #ifdef CONFIG_TPM TPMVersion tpm_version; @@ -281,17 +280,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) static void acpi_get_misc_info(AcpiMiscInfo *info) { - Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM); - Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE); - assert(!!piix != !!lpc); - - if (piix) { - info->is_piix4 = true; - } - if (lpc) { - info->is_piix4 = false; - } - info->has_hpet = hpet_find(); #ifdef CONFIG_TPM info->tpm_version = tpm_get_version(tpm_find()); @@ -1334,6 +1322,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiMiscInfo *misc, Range *pci_hole, Range *pci_hole64, MachineState *machine) { + Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM); + Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE); CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; CrsRangeSet crs_range_set; @@ -1354,11 +1344,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, .oem_table_id = x86ms->oem_table_id }; + assert(!!piix != !!lpc); + acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); build_dbg_aml(dsdt); - if (misc->is_piix4) { + if (piix) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); @@ -1371,7 +1363,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); - } else { + } else if (lpc) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); -- cgit v1.2.3 From b496a17d45406e3ba943c8949784091cf01e4a6d Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 28 Oct 2022 12:34:19 +0200 Subject: hw/i386/acpi-build: Resolve north rather than south bridges The code currently assumes Q35 iff ICH9 and i440fx iff PIIX. Now that more AML generation has been moved into the south bridges and since the machines define themselves primarily through their north bridges, let's switch to resolving the north bridges for AML generation instead. This also allows for easier experimentation with different south bridges in the "pc" machine, e.g. with PIIX4 and VT82xx. Signed-off-by: Bernhard Beschow Message-Id: <20221028103419.93398-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 73d8a59737..d9eaa5fc4d 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -60,6 +60,7 @@ #include "hw/i386/fw_cfg.h" #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" +#include "hw/pci-host/i440fx.h" #include "hw/pci-host/q35.h" #include "hw/i386/x86-iommu.h" @@ -1322,8 +1323,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiMiscInfo *misc, Range *pci_hole, Range *pci_hole64, MachineState *machine) { - Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM); - Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE); + Object *i440fx = object_resolve_type_unambiguous(TYPE_I440FX_PCI_HOST_BRIDGE); + Object *q35 = object_resolve_type_unambiguous(TYPE_Q35_HOST_DEVICE); CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; CrsRangeSet crs_range_set; @@ -1344,13 +1345,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, .oem_table_id = x86ms->oem_table_id }; - assert(!!piix != !!lpc); + assert(!!i440fx != !!q35); acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); build_dbg_aml(dsdt); - if (piix) { + if (i440fx) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); @@ -1363,7 +1364,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); - } else if (lpc) { + } else if (q35) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); -- cgit v1.2.3 From fb1d084b4488aef990b5bafab08ecc20197fd749 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 28 Oct 2022 14:14:33 +0800 Subject: intel-iommu: don't warn guest errors when getting rid2pasid entry We use to warn on wrong rid2pasid entry. But this error could be triggered by the guest and could happens during initialization. So let's don't warn in this case. Reviewed-by: Peter Xu Signed-off-by: Jason Wang Message-Id: <20221028061436.30093-2-jasowang@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Yi Liu --- hw/i386/intel_iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6524c2ee32..271de995be 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1554,8 +1554,10 @@ static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce) if (s->root_scalable) { ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe); if (ret) { - error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32, - __func__, ret); + /* + * This error is guest triggerable. We should assumt PT + * not enabled for safety. + */ return false; } return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT); @@ -1569,14 +1571,12 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) { IntelIOMMUState *s; VTDContextEntry ce; - int ret; assert(as); s = as->iommu_state; - ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus), - as->devfn, &ce); - if (ret) { + if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn, + &ce)) { /* * Possibly failed to parse the context entry for some reason * (e.g., during init, or any guest configuration errors on -- cgit v1.2.3 From da8d439c8048f685e0333ae468b7520b82925e75 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 28 Oct 2022 14:14:34 +0800 Subject: intel-iommu: drop VTDBus We introduce VTDBus structure as an intermediate step for searching the address space. This works well with SID based matching/lookup. But when we want to support SID plus PASID based address space lookup, this intermediate steps turns out to be a burden. So the patch simply drops the VTDBus structure and use the PCIBus and devfn as the key for the g_hash_table(). This simplifies the codes and the future PASID extension. To prevent being slower for past vtd_find_as_from_bus_num() callers, a vtd_as cache indexed by the bus number is introduced to store the last recent search result of a vtd_as belongs to a specific bus. Reviewed-by: Peter Xu Signed-off-by: Jason Wang Message-Id: <20221028061436.30093-3-jasowang@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Yi Liu --- hw/i386/intel_iommu.c | 234 +++++++++++++++++++++++++------------------------- 1 file changed, 116 insertions(+), 118 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 271de995be..3d426bb326 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -61,6 +61,16 @@ } \ } +/* + * PCI bus number (or SID) is not reliable since the device is usaully + * initalized before guest can configure the PCI bridge + * (SECONDARY_BUS_NUMBER). + */ +struct vtd_as_key { + PCIBus *bus; + uint8_t devfn; +}; + static void vtd_address_space_refresh_all(IntelIOMMUState *s); static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n); @@ -210,6 +220,27 @@ static guint vtd_uint64_hash(gconstpointer v) return (guint)*(const uint64_t *)v; } +static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_as_key *key1 = v1; + const struct vtd_as_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +/* + * Note that we use pointer to PCIBus as the key, so hashing/shifting + * based on the pointer value is intended. Note that we deal with + * collisions through vtd_as_equal(). + */ +static guint vtd_as_hash(gconstpointer v) +{ + const struct vtd_as_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); +} + static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, gpointer user_data) { @@ -248,22 +279,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, static void vtd_reset_context_cache_locked(IntelIOMMUState *s) { VTDAddressSpace *vtd_as; - VTDBus *vtd_bus; - GHashTableIter bus_it; - uint32_t devfn_it; + GHashTableIter as_it; trace_vtd_context_cache_reset(); - g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); - while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (!vtd_as) { - continue; - } - vtd_as->context_cache_entry.context_cache_gen = 0; - } + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + vtd_as->context_cache_entry.context_cache_gen = 0; } s->context_cache_gen = 1; } @@ -993,32 +1016,6 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) return slpte & rsvd_mask; } -/* Find the VTD address space associated with a given bus number */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - GHashTableIter iter; - - if (vtd_bus) { - return vtd_bus; - } - - /* - * Iterate over the registered buses to find the one which - * currently holds this bus number and update the bus_num - * lookup table. - */ - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - - return NULL; -} - /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ @@ -1632,24 +1629,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) static void vtd_switch_address_space_all(IntelIOMMUState *s) { + VTDAddressSpace *vtd_as; GHashTableIter iter; - VTDBus *vtd_bus; - int i; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - for (i = 0; i < PCI_DEVFN_MAX; i++) { - if (!vtd_bus->dev_as[i]) { - continue; - } - vtd_switch_address_space(vtd_bus->dev_as[i]); - } - } -} -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) -{ - return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); + g_hash_table_iter_init(&iter, s->vtd_address_spaces); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) { + vtd_switch_address_space(vtd_as); + } } static const bool vtd_qualified_faults[] = { @@ -1686,18 +1672,37 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } +static gboolean vtd_find_as_by_sid(gpointer key, gpointer value, + gpointer user_data) +{ + struct vtd_as_key *as_key = (struct vtd_as_key *)key; + uint16_t target_sid = *(uint16_t *)user_data; + uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn); + return sid == target_sid; +} + +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +{ + uint8_t bus_num = PCI_BUS_NUM(sid); + VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num]; + + if (vtd_as && + (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) { + return vtd_as; + } + + vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid); + s->vtd_as_cache[bus_num] = vtd_as; + + return vtd_as; +} + static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) { - VTDBus *vtd_bus; VTDAddressSpace *vtd_as; bool success = false; - vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); - if (!vtd_bus) { - goto out; - } - - vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)]; + vtd_as = vtd_get_as_by_sid(s, source_id); if (!vtd_as) { goto out; } @@ -1733,7 +1738,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, VTDContextCacheEntry *cc_entry; uint64_t slpte, page_mask; uint32_t level; - uint16_t source_id = vtd_make_source_id(bus_num, devfn); + uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; bool reads = true; @@ -1905,11 +1910,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, uint16_t source_id, uint16_t func_mask) { + GHashTableIter as_it; uint16_t mask; - VTDBus *vtd_bus; VTDAddressSpace *vtd_as; uint8_t bus_n, devfn; - uint16_t devfn_it; trace_vtd_inv_desc_cc_devices(source_id, func_mask); @@ -1932,32 +1936,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, mask = ~mask; bus_n = VTD_SID_TO_BUS(source_id); - vtd_bus = vtd_find_as_from_bus_num(s, bus_n); - if (vtd_bus) { - devfn = VTD_SID_TO_DEVFN(source_id); - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { - trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), - VTD_PCI_FUNC(devfn_it)); - vtd_iommu_lock(s); - vtd_as->context_cache_entry.context_cache_gen = 0; - vtd_iommu_unlock(s); - /* - * Do switch address space when needed, in case if the - * device passthrough bit is switched. - */ - vtd_switch_address_space(vtd_as); - /* - * So a device is moving out of (or moving into) a - * domain, resync the shadow page table. - * This won't bring bad even if we have no such - * notifier registered - the IOMMU notification - * framework will skip MAP notifications if that - * happened. - */ - vtd_sync_shadow_page_table(vtd_as); - } + devfn = VTD_SID_TO_DEVFN(source_id); + + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + if ((pci_bus_num(vtd_as->bus) == bus_n) && + (vtd_as->devfn & mask) == (devfn & mask)) { + trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn), + VTD_PCI_FUNC(vtd_as->devfn)); + vtd_iommu_lock(s); + vtd_as->context_cache_entry.context_cache_gen = 0; + vtd_iommu_unlock(s); + /* + * Do switch address space when needed, in case if the + * device passthrough bit is switched. + */ + vtd_switch_address_space(vtd_as); + /* + * So a device is moving out of (or moving into) a + * domain, resync the shadow page table. + * This won't bring bad even if we have no such + * notifier registered - the IOMMU notification + * framework will skip MAP notifications if that + * happened. + */ + vtd_sync_shadow_page_table(vtd_as); } } } @@ -2473,18 +2476,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, { VTDAddressSpace *vtd_dev_as; IOMMUTLBEvent event; - struct VTDBus *vtd_bus; hwaddr addr; uint64_t sz; uint16_t sid; - uint8_t devfn; bool size; - uint8_t bus_num; addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi); sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo); - devfn = sid & 0xff; - bus_num = sid >> 8; size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi); if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || @@ -2495,12 +2493,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, return false; } - vtd_bus = vtd_find_as_from_bus_num(s, bus_num); - if (!vtd_bus) { - goto done; - } - - vtd_dev_as = vtd_bus->dev_as[devfn]; + /* + * Using sid is OK since the guest should have finished the + * initialization of both the bus and device. + */ + vtd_dev_as = vtd_get_as_by_sid(s, sid); if (!vtd_dev_as) { goto done; } @@ -3427,27 +3424,27 @@ static const MemoryRegionOps vtd_mem_ir_ops = { VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) { - uintptr_t key = (uintptr_t)bus; - VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key); + /* + * We can't simply use sid here since the bus number might not be + * initialized by the guest. + */ + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; VTDAddressSpace *vtd_dev_as; char name[128]; - if (!vtd_bus) { - uintptr_t *new_key = g_malloc(sizeof(*new_key)); - *new_key = (uintptr_t)bus; - /* No corresponding free() */ - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ - PCI_DEVFN_MAX); - vtd_bus->bus = bus; - g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus); - } + vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); + if (!vtd_dev_as) { + struct vtd_as_key *new_key = g_malloc(sizeof(*new_key)); - vtd_dev_as = vtd_bus->dev_as[devfn]; + new_key->bus = bus; + new_key->devfn = devfn; - if (!vtd_dev_as) { snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), PCI_FUNC(devfn)); - vtd_bus->dev_as[devfn] = vtd_dev_as = g_new0(VTDAddressSpace, 1); + vtd_dev_as = g_new0(VTDAddressSpace, 1); vtd_dev_as->bus = bus; vtd_dev_as->devfn = (uint8_t)devfn; @@ -3503,6 +3500,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) &vtd_dev_as->nodmar, 0); vtd_switch_address_space(vtd_dev_as); + + g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as); } return vtd_dev_as; } @@ -3881,7 +3880,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) QLIST_INIT(&s->vtd_as_with_notifiers); qemu_mutex_init(&s->iommu_lock); - memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); @@ -3903,8 +3901,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) /* No corresponding destroy */ s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, g_free, g_free); - s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, - g_free, g_free); + s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, + g_free, g_free); vtd_init(s); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); -- cgit v1.2.3 From 940e55278611bad4e179e77952da6a36a37562b0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 28 Oct 2022 14:14:35 +0800 Subject: intel-iommu: convert VTD_PE_GET_FPD_ERR() to be a function We used to have a macro for VTD_PE_GET_FPD_ERR() but it has an internal goto which prevents it from being reused. This patch convert that macro to a dedicated function and let the caller to decide what to do (e.g using goto or not). This makes sure it can be re-used for other function that requires fault reporting. Reviewed-by: Peter Xu Signed-off-by: Jason Wang Message-Id: <20221028061436.30093-4-jasowang@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Yi Liu --- hw/i386/intel_iommu.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 3d426bb326..259f720c7c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -49,17 +49,6 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) -#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\ - if (ret_fr) { \ - ret_fr = -ret_fr; \ - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { \ - trace_vtd_fault_disabled(); \ - } else { \ - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); \ - } \ - goto error; \ - } \ -} /* * PCI bus number (or SID) is not reliable since the device is usaully @@ -1716,6 +1705,19 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +static void vtd_report_fault(IntelIOMMUState *s, + int err, bool is_fpd_set, + uint16_t source_id, + hwaddr addr, + bool is_write) +{ + if (is_fpd_set && vtd_is_qualified_fault(err)) { + trace_vtd_fault_disabled(); + } else { + vtd_report_dmar_fault(s, source_id, addr, err, is_write); + } +} + /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. * @@ -1776,7 +1778,11 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!is_fpd_set && s->root_scalable) { ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, + source_id, addr, is_write); + goto error; + } } } else { ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); @@ -1784,7 +1790,11 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, if (!ret_fr && !is_fpd_set && s->root_scalable) { ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); } - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, + source_id, addr, is_write); + goto error; + } /* Update context-cache */ trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo, cc_entry->context_cache_gen, @@ -1820,7 +1830,11 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, &reads, &writes, s->aw_bits); - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, + addr, is_write); + goto error; + } page_mask = vtd_slpt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); -- cgit v1.2.3 From 1b2b12376c8a513a0c7b5e3b8ea702038d3d7db5 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 28 Oct 2022 14:14:36 +0800 Subject: intel-iommu: PASID support This patch introduce ECAP_PASID via "x-pasid-mode". Based on the existing support for scalable mode, we need to implement the following missing parts: 1) tag VTDAddressSpace with PASID and support IOMMU/DMA translation with PASID 2) tag IOTLB with PASID 3) PASID cache and its flush 4) PASID based IOTLB invalidation For simplicity PASID cache is not implemented so we can simply implement the PASID cache flush as a no and leave it to be implemented in the future. For PASID based IOTLB invalidation, since we haven't had L1 stage support, the PASID based IOTLB invalidation is not implemented yet. For PASID based device IOTLB invalidation, it requires the support for vhost so we forbid enabling device IOTLB when PASID is enabled now. Those work could be done in the future. Note that though PASID based IOMMU translation is ready but no device can issue PASID DMA right now. In this case, PCI_NO_PASID is used as PASID to identify the address without PASID. vtd_find_add_as() has been extended to provision address space with PASID which could be utilized by the future extension of PCI core to allow device model to use PASID based DMA translation. This feature would be useful for: 1) prototyping PASID support for devices like virtio 2) future vPASID work 3) future PRS and vSVA work Reviewed-by: Peter Xu Signed-off-by: Jason Wang Message-Id: <20221028061436.30093-5-jasowang@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 418 +++++++++++++++++++++++++++++++---------- hw/i386/intel_iommu_internal.h | 16 +- hw/i386/trace-events | 2 + 3 files changed, 333 insertions(+), 103 deletions(-) (limited to 'hw/i386') diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 259f720c7c..a08ee85edf 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -58,6 +58,14 @@ struct vtd_as_key { PCIBus *bus; uint8_t devfn; + uint32_t pasid; +}; + +struct vtd_iotlb_key { + uint64_t gfn; + uint32_t pasid; + uint32_t level; + uint16_t sid; }; static void vtd_address_space_refresh_all(IntelIOMMUState *s); @@ -199,14 +207,24 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as) } /* GHashTable functions */ -static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) +static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2) { - return *((const uint64_t *)v1) == *((const uint64_t *)v2); + const struct vtd_iotlb_key *key1 = v1; + const struct vtd_iotlb_key *key2 = v2; + + return key1->sid == key2->sid && + key1->pasid == key2->pasid && + key1->level == key2->level && + key1->gfn == key2->gfn; } -static guint vtd_uint64_hash(gconstpointer v) +static guint vtd_iotlb_hash(gconstpointer v) { - return (guint)*(const uint64_t *)v; + const struct vtd_iotlb_key *key = v; + + return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) | + (key->level) << VTD_IOTLB_LVL_SHIFT | + (key->pasid) << VTD_IOTLB_PASID_SHIFT; } static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) @@ -214,7 +232,8 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) const struct vtd_as_key *key1 = v1; const struct vtd_as_key *key2 = v2; - return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn) && + (key1->pasid == key2->pasid); } /* @@ -302,13 +321,6 @@ static void vtd_reset_caches(IntelIOMMUState *s) vtd_iommu_unlock(s); } -static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id, - uint32_t level) -{ - return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | - ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); -} - static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) { return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; @@ -316,15 +328,17 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) /* Must be called with IOMMU lock held */ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, - hwaddr addr) + uint32_t pasid, hwaddr addr) { + struct vtd_iotlb_key key; VTDIOTLBEntry *entry; - uint64_t key; int level; for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { - key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), - source_id, level); + key.gfn = vtd_get_iotlb_gfn(addr, level); + key.level = level; + key.sid = source_id; + key.pasid = pasid; entry = g_hash_table_lookup(s->iotlb, &key); if (entry) { goto out; @@ -338,10 +352,11 @@ out: /* Must be with IOMMU lock held */ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, uint16_t domain_id, hwaddr addr, uint64_t slpte, - uint8_t access_flags, uint32_t level) + uint8_t access_flags, uint32_t level, + uint32_t pasid) { VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); - uint64_t *key = g_malloc(sizeof(*key)); + struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); uint64_t gfn = vtd_get_iotlb_gfn(addr, level); trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); @@ -355,7 +370,13 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->slpte = slpte; entry->access_flags = access_flags; entry->mask = vtd_slpt_level_page_mask(level); - *key = vtd_get_iotlb_key(gfn, source_id, level); + entry->pasid = pasid; + + key->gfn = gfn; + key->sid = source_id; + key->level = level; + key->pasid = pasid; + g_hash_table_replace(s->iotlb, key, entry); } @@ -448,7 +469,8 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) /* Must not update F field now, should be done later */ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, uint16_t source_id, hwaddr addr, - VTDFaultReason fault, bool is_write) + VTDFaultReason fault, bool is_write, + bool is_pasid, uint32_t pasid) { uint64_t hi = 0, lo; hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); @@ -456,7 +478,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, assert(index < DMAR_FRCD_REG_NR); lo = VTD_FRCD_FI(addr); - hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) | + VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid); if (!is_write) { hi |= VTD_FRCD_T; } @@ -487,7 +510,8 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) /* Log and report an DMAR (address translation) fault to software */ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, hwaddr addr, VTDFaultReason fault, - bool is_write) + bool is_write, bool is_pasid, + uint32_t pasid) { uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); @@ -514,7 +538,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, return; } - vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); + vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, + is_write, is_pasid, pasid); if (fsts_reg & VTD_FSTS_PPF) { error_report_once("There are pending faults already, " @@ -819,13 +844,15 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce, - VTDPASIDEntry *pe) + VTDPASIDEntry *pe, + uint32_t pasid) { - uint32_t pasid; dma_addr_t pasid_dir_base; int ret = 0; - pasid = VTD_CE_GET_RID2PASID(ce); + if (pasid == PCI_NO_PASID) { + pasid = VTD_CE_GET_RID2PASID(ce); + } pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe); @@ -834,15 +861,17 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, VTDContextEntry *ce, - bool *pe_fpd_set) + bool *pe_fpd_set, + uint32_t pasid) { int ret; - uint32_t pasid; dma_addr_t pasid_dir_base; VTDPASIDDirEntry pdire; VTDPASIDEntry pe; - pasid = VTD_CE_GET_RID2PASID(ce); + if (pasid == PCI_NO_PASID) { + pasid = VTD_CE_GET_RID2PASID(ce); + } pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); /* @@ -888,12 +917,13 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce) } static uint32_t vtd_get_iova_level(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return VTD_PE_GET_LEVEL(&pe); } @@ -906,12 +936,13 @@ static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce) } static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9; } @@ -953,31 +984,33 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, } static inline uint64_t vtd_iova_limit(IntelIOMMUState *s, - VTDContextEntry *ce, uint8_t aw) + VTDContextEntry *ce, uint8_t aw, + uint32_t pasid) { - uint32_t ce_agaw = vtd_get_iova_agaw(s, ce); + uint32_t ce_agaw = vtd_get_iova_agaw(s, ce, pasid); return 1ULL << MIN(ce_agaw, aw); } /* Return true if IOVA passes range check, otherwise false. */ static inline bool vtd_iova_range_check(IntelIOMMUState *s, uint64_t iova, VTDContextEntry *ce, - uint8_t aw) + uint8_t aw, uint32_t pasid) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW * in CAP_REG and AW in context-entry. */ - return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1)); + return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1)); } static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; } @@ -1011,18 +1044,19 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes, uint8_t aw_bits) + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) { - dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce); - uint32_t level = vtd_get_iova_level(s, ce); + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; uint64_t slpte; uint64_t access_right_check; uint64_t xlat, size; - if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { - error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", - __func__, iova); + if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) { + error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," + "pasid=0x%" PRIx32 ")", __func__, iova, pasid); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -1035,8 +1069,9 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (slpte == (uint64_t)-1) { error_report_once("%s: detected read error on DMAR slpte " - "(iova=0x%" PRIx64 ")", __func__, iova); - if (level == vtd_get_iova_level(s, ce)) { + "(iova=0x%" PRIx64 ", pasid=0x%" PRIx32 ")", + __func__, iova, pasid); + if (level == vtd_get_iova_level(s, ce, pasid)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; } else { @@ -1048,15 +1083,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (!(slpte & access_right_check)) { error_report_once("%s: detected slpte permission error " "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " - "slpte=0x%" PRIx64 ", write=%d)", __func__, - iova, level, slpte, is_write); + "slpte=0x%" PRIx64 ", write=%d, pasid=0x%" + PRIx32 ")", __func__, iova, level, + slpte, is_write, pasid); return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; } if (vtd_slpte_nonzero_rsvd(slpte, level)) { error_report_once("%s: detected splte reserve non-zero " "iova=0x%" PRIx64 ", level=0x%" PRIx32 - "slpte=0x%" PRIx64 ")", __func__, iova, - level, slpte); + "slpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", + __func__, iova, level, slpte, pasid); return -VTD_FR_PAGING_ENTRY_RSVD; } @@ -1084,9 +1120,10 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, error_report_once("%s: xlat address is in interrupt range " "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " "slpte=0x%" PRIx64 ", write=%d, " - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")", + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " + "pasid=0x%" PRIx32 ")", __func__, iova, level, slpte, is_write, - xlat, size); + xlat, size, pasid); return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : -VTD_FR_INTERRUPT_ADDR; } @@ -1300,18 +1337,19 @@ next: */ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t start, uint64_t end, - vtd_page_walk_info *info) + vtd_page_walk_info *info, + uint32_t pasid) { - dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce); - uint32_t level = vtd_get_iova_level(s, ce); + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); - if (!vtd_iova_range_check(s, start, ce, info->aw)) { + if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(s, end, ce, info->aw)) { + if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) { /* Fix end so that it reaches the maximum */ - end = vtd_iova_limit(s, ce, info->aw); + end = vtd_iova_limit(s, ce, info->aw, pasid); } return vtd_page_walk_level(addr, start, end, level, true, true, info); @@ -1379,7 +1417,7 @@ static int vtd_ce_rid2pasid_check(IntelIOMMUState *s, * has valid rid2pasid setting, which includes valid * rid2pasid field and corresponding pasid entry setting */ - return vtd_ce_get_rid2pasid_entry(s, ce, &pe); + return vtd_ce_get_rid2pasid_entry(s, ce, &pe, PCI_NO_PASID); } /* Map a device to its corresponding domain (context-entry) */ @@ -1462,12 +1500,13 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, } static uint16_t vtd_get_domain_id(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return VTD_SM_PASID_ENTRY_DID(pe.val[1]); } @@ -1485,10 +1524,10 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as, .notify_unmap = true, .aw = s->aw_bits, .as = vtd_as, - .domain_id = vtd_get_domain_id(s, ce), + .domain_id = vtd_get_domain_id(s, ce, vtd_as->pasid), }; - return vtd_page_walk(s, ce, addr, addr + size, &info); + return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid); } static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) @@ -1532,13 +1571,14 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) * 1st-level translation or 2nd-level translation, it depends * on PGTT setting. */ -static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce) +static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; int ret; if (s->root_scalable) { - ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe); + ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); if (ret) { /* * This error is guest triggerable. We should assumt PT @@ -1572,19 +1612,20 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) return false; } - return vtd_dev_pt_enabled(s, &ce); + return vtd_dev_pt_enabled(s, &ce, as->pasid); } /* Return whether the device is using IOMMU translation. */ static bool vtd_switch_address_space(VTDAddressSpace *as) { - bool use_iommu; + bool use_iommu, pt; /* Whether we need to take the BQL on our own */ bool take_bql = !qemu_mutex_iothread_locked(); assert(as); use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as); + pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as); trace_vtd_switch_address_space(pci_bus_num(as->bus), VTD_PCI_SLOT(as->devfn), @@ -1604,11 +1645,53 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) if (use_iommu) { memory_region_set_enabled(&as->nodmar, false); memory_region_set_enabled(MEMORY_REGION(&as->iommu), true); + /* + * vt-d spec v3.4 3.14: + * + * """ + * Requests-with-PASID with input address in range 0xFEEx_xxxx + * are translated normally like any other request-with-PASID + * through DMA-remapping hardware. + * """ + * + * Need to disable ir for as with PASID. + */ + if (as->pasid != PCI_NO_PASID) { + memory_region_set_enabled(&as->iommu_ir, false); + } else { + memory_region_set_enabled(&as->iommu_ir, true); + } } else { memory_region_set_enabled(MEMORY_REGION(&as->iommu), false); memory_region_set_enabled(&as->nodmar, true); } + /* + * vtd-spec v3.4 3.14: + * + * """ + * Requests-with-PASID with input address in range 0xFEEx_xxxx are + * translated normally like any other request-with-PASID through + * DMA-remapping hardware. However, if such a request is processed + * using pass-through translation, it will be blocked as described + * in the paragraph below. + * + * Software must not program paging-structure entries to remap any + * address to the interrupt address range. Untranslated requests + * and translation requests that result in an address in the + * interrupt range will be blocked with condition code LGN.4 or + * SGN.8. + * """ + * + * We enable per as memory region (iommu_ir_fault) for catching + * the tranlsation for interrupt range through PASID + PT. + */ + if (pt && as->pasid != PCI_NO_PASID) { + memory_region_set_enabled(&as->iommu_ir_fault, true); + } else { + memory_region_set_enabled(&as->iommu_ir_fault, false); + } + if (take_bql) { qemu_mutex_unlock_iothread(); } @@ -1709,12 +1792,15 @@ static void vtd_report_fault(IntelIOMMUState *s, int err, bool is_fpd_set, uint16_t source_id, hwaddr addr, - bool is_write) + bool is_write, + bool is_pasid, + uint32_t pasid) { if (is_fpd_set && vtd_is_qualified_fault(err)) { trace_vtd_fault_disabled(); } else { - vtd_report_dmar_fault(s, source_id, addr, err, is_write); + vtd_report_dmar_fault(s, source_id, addr, err, is_write, + is_pasid, pasid); } } @@ -1739,13 +1825,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; uint64_t slpte, page_mask; - uint32_t level; + uint32_t level, pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; bool reads = true; bool writes = true; uint8_t access_flags; + bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; /* @@ -1758,15 +1845,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry = &vtd_as->context_cache_entry; - /* Try to fetch slpte form IOTLB */ - iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); - if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, - iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; - access_flags = iotlb_entry->access_flags; - page_mask = iotlb_entry->mask; - goto out; + /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */ + if (!rid2pasid) { + iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); + if (iotlb_entry) { + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + access_flags = iotlb_entry->access_flags; + page_mask = iotlb_entry->mask; + goto out; + } } /* Try to fetch context-entry from cache first */ @@ -1777,10 +1866,11 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, ce = cc_entry->context_entry; is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!is_fpd_set && s->root_scalable) { - ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); + ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid); if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, - source_id, addr, is_write); + source_id, addr, is_write, + false, 0); goto error; } } @@ -1788,11 +1878,12 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!ret_fr && !is_fpd_set && s->root_scalable) { - ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); + ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid); } if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, - source_id, addr, is_write); + source_id, addr, is_write, + false, 0); goto error; } /* Update context-cache */ @@ -1803,11 +1894,15 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry->context_cache_gen = s->context_cache_gen; } + if (rid2pasid) { + pasid = VTD_CE_GET_RID2PASID(&ce); + } + /* * We don't need to translate for pass-through context entries. * Also, let's ignore IOTLB caching as well for PT devices. */ - if (vtd_dev_pt_enabled(s, &ce)) { + if (vtd_dev_pt_enabled(s, &ce, pasid)) { entry->iova = addr & VTD_PAGE_MASK_4K; entry->translated_addr = entry->iova; entry->addr_mask = ~VTD_PAGE_MASK_4K; @@ -1828,18 +1923,31 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, return true; } + /* Try to fetch slpte form IOTLB for RID2PASID slow path */ + if (rid2pasid) { + iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); + if (iotlb_entry) { + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + access_flags = iotlb_entry->access_flags; + page_mask = iotlb_entry->mask; + goto out; + } + } + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, - &reads, &writes, s->aw_bits); + &reads, &writes, s->aw_bits, pasid); if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, - addr, is_write); + addr, is_write, pasid != PCI_NO_PASID, pasid); goto error; } page_mask = vtd_slpt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); - vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte, - access_flags, level); + vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), + addr, slpte, access_flags, level, pasid); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; @@ -2031,7 +2139,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce) && - domain_id == vtd_get_domain_id(s, &ce)) { + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { vtd_sync_shadow_page_table(vtd_as); } } @@ -2039,7 +2147,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, uint16_t domain_id, hwaddr addr, - uint8_t am) + uint8_t am, uint32_t pasid) { VTDAddressSpace *vtd_as; VTDContextEntry ce; @@ -2047,9 +2155,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, hwaddr size = (1 << am) * VTD_PAGE_SIZE; QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) { + if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) { + continue; + } ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce); - if (!ret && domain_id == vtd_get_domain_id(s, &ce)) { + if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { if (vtd_as_has_map_notifier(vtd_as)) { /* * As long as we have MAP notifications registered in @@ -2093,7 +2204,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, vtd_iommu_lock(s); g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); vtd_iommu_unlock(s); - vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID); } /* Flush IOTLB @@ -3162,6 +3273,7 @@ static Property vtd_properties[] = { DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), + DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_END_OF_LIST(), @@ -3436,7 +3548,64 @@ static const MemoryRegionOps vtd_mem_ir_ops = { }, }; -VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) +static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as, + hwaddr addr, bool is_write) +{ + IntelIOMMUState *s = vtd_as->iommu_state; + uint8_t bus_n = pci_bus_num(vtd_as->bus); + uint16_t sid = PCI_BUILD_BDF(bus_n, vtd_as->devfn); + bool is_fpd_set = false; + VTDContextEntry ce; + + assert(vtd_as->pasid != PCI_NO_PASID); + + /* Try out best to fetch FPD, we can't do anything more */ + if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; + if (!is_fpd_set && s->root_scalable) { + vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid); + } + } + + vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR, + is_fpd_set, sid, addr, is_write, + true, vtd_as->pasid); +} + +static MemTxResult vtd_mem_ir_fault_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + vtd_report_ir_illegal_access(opaque, addr, false); + + return MEMTX_ERROR; +} + +static MemTxResult vtd_mem_ir_fault_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + vtd_report_ir_illegal_access(opaque, addr, true); + + return MEMTX_ERROR; +} + +static const MemoryRegionOps vtd_mem_ir_fault_ops = { + .read_with_attrs = vtd_mem_ir_fault_read, + .write_with_attrs = vtd_mem_ir_fault_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, + int devfn, unsigned int pasid) { /* * We can't simply use sid here since the bus number might not be @@ -3445,6 +3614,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) struct vtd_as_key key = { .bus = bus, .devfn = devfn, + .pasid = pasid, }; VTDAddressSpace *vtd_dev_as; char name[128]; @@ -3455,13 +3625,21 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) new_key->bus = bus; new_key->devfn = devfn; + new_key->pasid = pasid; + + if (pasid == PCI_NO_PASID) { + snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), + PCI_FUNC(devfn)); + } else { + snprintf(name, sizeof(name), "vtd-%02x.%x-pasid-%x", PCI_SLOT(devfn), + PCI_FUNC(devfn), pasid); + } - snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), - PCI_FUNC(devfn)); vtd_dev_as = g_new0(VTDAddressSpace, 1); vtd_dev_as->bus = bus; vtd_dev_as->devfn = (uint8_t)devfn; + vtd_dev_as->pasid = pasid; vtd_dev_as->iommu_state = s; vtd_dev_as->context_cache_entry.context_cache_gen = 0; vtd_dev_as->iova_tree = iova_tree_new(); @@ -3502,6 +3680,24 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) VTD_INTERRUPT_ADDR_FIRST, &vtd_dev_as->iommu_ir, 1); + /* + * This region is used for catching fault to access interrupt + * range via passthrough + PASID. See also + * vtd_switch_address_space(). We can't use alias since we + * need to know the sid which is valid for MSI who uses + * bus_master_as (see msi_send_message()). + */ + memory_region_init_io(&vtd_dev_as->iommu_ir_fault, OBJECT(s), + &vtd_mem_ir_fault_ops, vtd_dev_as, "vtd-no-ir", + VTD_INTERRUPT_ADDR_SIZE); + /* + * Hook to root since when PT is enabled vtd_dev_as->iommu + * will be disabled. + */ + memory_region_add_subregion_overlap(MEMORY_REGION(&vtd_dev_as->root), + VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir_fault, 2); + /* * Hook both the containers under the root container, we * switch between DMAR & noDMAR by enable/disable @@ -3622,7 +3818,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) "legacy mode", bus_n, PCI_SLOT(vtd_as->devfn), PCI_FUNC(vtd_as->devfn), - vtd_get_domain_id(s, &ce), + vtd_get_domain_id(s, &ce, vtd_as->pasid), ce.hi, ce.lo); if (vtd_as_has_map_notifier(vtd_as)) { /* This is required only for MAP typed notifiers */ @@ -3632,10 +3828,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) .notify_unmap = false, .aw = s->aw_bits, .as = vtd_as, - .domain_id = vtd_get_domain_id(s, &ce), + .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid), }; - vtd_page_walk(s, &ce, 0, ~0ULL, &info); + vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid); } } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), @@ -3735,6 +3931,10 @@ static void vtd_init(IntelIOMMUState *s) s->ecap |= VTD_ECAP_SC; } + if (s->pasid) { + s->ecap |= VTD_ECAP_PASID; + } + vtd_reset_caches(s); /* Define registers with default values and bit semantics */ @@ -3808,7 +4008,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) assert(0 <= devfn && devfn < PCI_DEVFN_MAX); - vtd_as = vtd_find_add_as(s, bus, devfn); + vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID); return &vtd_as->as; } @@ -3851,6 +4051,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) return false; } + if (s->pasid && !s->scalable_mode) { + error_setg(errp, "Need to set scalable mode for PASID"); + return false; + } + return true; } @@ -3887,6 +4092,17 @@ static void vtd_realize(DeviceState *dev, Error **errp) X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + if (s->pasid && x86_iommu->dt_supported) { + /* + * PASID-based-Device-TLB Invalidate Descriptor is not + * implemented and it requires support from vhost layer which + * needs to be implemented in the future. + */ + error_setg(errp, "PASID based device IOTLB is not supported"); + return; + } if (!vtd_decide_config(s, errp)) { return; @@ -3913,7 +4129,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); /* No corresponding destroy */ - s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, + s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal, g_free, g_free); s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 930ce61feb..f090e61e11 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -114,8 +114,9 @@ VTD_INTERRUPT_ADDR_FIRST + 1) /* The shift of source_id in the key of IOTLB hash table */ -#define VTD_IOTLB_SID_SHIFT 36 -#define VTD_IOTLB_LVL_SHIFT 52 +#define VTD_IOTLB_SID_SHIFT 20 +#define VTD_IOTLB_LVL_SHIFT 28 +#define VTD_IOTLB_PASID_SHIFT 30 #define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ /* IOTLB_REG */ @@ -191,6 +192,7 @@ #define VTD_ECAP_SC (1ULL << 7) #define VTD_ECAP_MHMV (15ULL << 20) #define VTD_ECAP_SRS (1ULL << 31) +#define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) @@ -211,6 +213,8 @@ #define VTD_CAP_DRAIN_READ (1ULL << 55) #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) +#define VTD_PASID_ID_SHIFT 20 +#define VTD_PASID_ID_MASK ((1ULL << VTD_PASID_ID_SHIFT) - 1) /* Supported Adjusted Guest Address Widths */ #define VTD_CAP_SAGAW_SHIFT 8 @@ -262,6 +266,8 @@ #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) /* For the low 64-bit of 128-bit */ #define VTD_FRCD_FI(val) ((val) & ~0xfffULL) +#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) +#define VTD_FRCD_PP(val) (((val) & 0x1) << 31) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason { @@ -379,6 +385,11 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL +#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4) +#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4) +#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK) +#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL +#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL /* Mask for Device IOTLB Invalidate Descriptor */ #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL) @@ -413,6 +424,7 @@ typedef union VTDInvDesc VTDInvDesc; /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; + uint32_t pasid; uint64_t addr; uint8_t mask; }; diff --git a/hw/i386/trace-events b/hw/i386/trace-events index e49814dd64..04fd71bfc4 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -12,6 +12,8 @@ vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate device vtd_inv_desc_iotlb_global(void) "iotlb invalidate global" vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16 vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8 +vtd_inv_desc_iotlb_pasid_pages(uint16_t domain, uint64_t addr, uint8_t mask, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8" pasid 0x%"PRIx32 +vtd_inv_desc_iotlb_pasid(uint16_t domain, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" pasid 0x%"PRIx32 vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32 vtd_inv_desc_wait_irq(const char *msg) "%s" vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 -- cgit v1.2.3