aboutsummaryrefslogtreecommitdiff
path: root/hw/i386/pc.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386/pc.c')
-rw-r--r--hw/i386/pc.c209
1 files changed, 163 insertions, 46 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index d2b5823ffb..7280c02ce3 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -814,10 +814,122 @@ void xen_load_linux(PCMachineState *pcms)
#define PC_ROM_ALIGN 0x800
#define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA)
+static hwaddr pc_above_4g_end(PCMachineState *pcms)
+{
+ X86MachineState *x86ms = X86_MACHINE(pcms);
+
+ if (pcms->sgx_epc.size != 0) {
+ return sgx_epc_above_4g_end(&pcms->sgx_epc);
+ }
+
+ return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+}
+
+static void pc_get_device_memory_range(PCMachineState *pcms,
+ hwaddr *base,
+ ram_addr_t *device_mem_size)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ MachineState *machine = MACHINE(pcms);
+ ram_addr_t size;
+ hwaddr addr;
+
+ size = machine->maxram_size - machine->ram_size;
+ addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
+
+ if (pcmc->enforce_aligned_dimm) {
+ /* size device region assuming 1G page max alignment per slot */
+ size += (1 * GiB) * machine->ram_slots;
+ }
+
+ *base = addr;
+ *device_mem_size = size;
+}
+
+static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+{
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ hwaddr cxl_base;
+ ram_addr_t size;
+
+ if (pcmc->has_reserved_memory) {
+ pc_get_device_memory_range(pcms, &cxl_base, &size);
+ cxl_base += size;
+ } else {
+ cxl_base = pc_above_4g_end(pcms);
+ }
+
+ return cxl_base;
+}
+
+static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
+{
+ uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
+
+ if (pcms->cxl_devices_state.fixed_windows) {
+ GList *it;
+
+ start = ROUND_UP(start, 256 * MiB);
+ for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
+ CXLFixedWindow *fw = it->data;
+ start += fw->size;
+ }
+ }
+
+ return start;
+}
+
+static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
+{
+ X86CPU *cpu = X86_CPU(first_cpu);
+
+ /* 32-bit systems don't have hole64 thus return max CPU address */
+ if (cpu->phys_bits <= 32) {
+ return ((hwaddr)1 << cpu->phys_bits) - 1;
+ }
+
+ return pc_pci_hole64_start() + pci_hole64_size - 1;
+}
+
+/*
+ * AMD systems with an IOMMU have an additional hole close to the
+ * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
+ * on kernel version, VFIO may or may not let you DMA map those ranges.
+ * Starting Linux v5.4 we validate it, and can't create guests on AMD machines
+ * with certain memory sizes. It's also wrong to use those IOVA ranges
+ * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
+ * The ranges reserved for Hyper-Transport are:
+ *
+ * FD_0000_0000h - FF_FFFF_FFFFh
+ *
+ * The ranges represent the following:
+ *
+ * Base Address Top Address Use
+ *
+ * FD_0000_0000h FD_F7FF_FFFFh Reserved interrupt address space
+ * FD_F800_0000h FD_F8FF_FFFFh Interrupt/EOI IntCtl
+ * FD_F900_0000h FD_F90F_FFFFh Legacy PIC IACK
+ * FD_F910_0000h FD_F91F_FFFFh System Management
+ * FD_F920_0000h FD_FAFF_FFFFh Reserved Page Tables
+ * FD_FB00_0000h FD_FBFF_FFFFh Address Translation
+ * FD_FC00_0000h FD_FDFF_FFFFh I/O Space
+ * FD_FE00_0000h FD_FFFF_FFFFh Configuration
+ * FE_0000_0000h FE_1FFF_FFFFh Extended Configuration/Device Messages
+ * FE_2000_0000h FF_FFFF_FFFFh Reserved
+ *
+ * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
+ * Table 3: Special Address Controls (GPA) for more information.
+ */
+#define AMD_HT_START 0xfd00000000UL
+#define AMD_HT_END 0xffffffffffUL
+#define AMD_ABOVE_1TB_START (AMD_HT_END + 1)
+#define AMD_HT_SIZE (AMD_ABOVE_1TB_START - AMD_HT_START)
+
void pc_memory_init(PCMachineState *pcms,
MemoryRegion *system_memory,
MemoryRegion *rom_memory,
- MemoryRegion **ram_memory)
+ MemoryRegion **ram_memory,
+ uint64_t pci_hole64_size)
{
int linux_boot, i;
MemoryRegion *option_rom_mr;
@@ -827,7 +939,9 @@ void pc_memory_init(PCMachineState *pcms,
MachineClass *mc = MACHINE_GET_CLASS(machine);
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
X86MachineState *x86ms = X86_MACHINE(pcms);
+ hwaddr maxphysaddr, maxusedaddr;
hwaddr cxl_base, cxl_resv_end = 0;
+ X86CPU *cpu = X86_CPU(first_cpu);
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
@@ -835,6 +949,40 @@ void pc_memory_init(PCMachineState *pcms,
linux_boot = (machine->kernel_filename != NULL);
/*
+ * The HyperTransport range close to the 1T boundary is unique to AMD
+ * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
+ * to above 1T to AMD vCPUs only. @enforce_amd_1tb_hole is only false in
+ * older machine types (<= 7.0) for compatibility purposes.
+ */
+ if (IS_AMD_CPU(&cpu->env) && pcmc->enforce_amd_1tb_hole) {
+ /* Bail out if max possible address does not cross HT range */
+ if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
+ x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
+ }
+
+ /*
+ * Advertise the HT region if address space covers the reserved
+ * region or if we relocate.
+ */
+ if (cpu->phys_bits >= 40) {
+ e820_add_entry(AMD_HT_START, AMD_HT_SIZE, E820_RESERVED);
+ }
+ }
+
+ /*
+ * phys-bits is required to be appropriately configured
+ * to make sure max used GPA is reachable.
+ */
+ maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
+ maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
+ if (maxphysaddr < maxusedaddr) {
+ error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
+ " phys-bits too low (%u)",
+ maxphysaddr, maxusedaddr, cpu->phys_bits);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
* Split single memory region and use aliases to address portions of it,
* done for backwards compatibility with older qemus.
*/
@@ -850,9 +998,10 @@ void pc_memory_init(PCMachineState *pcms,
machine->ram,
x86ms->below_4g_mem_size,
x86ms->above_4g_mem_size);
- memory_region_add_subregion(system_memory, 0x100000000ULL,
+ memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start,
ram_above_4g);
- e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM);
+ e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
+ E820_RAM);
}
if (pcms->sgx_epc.size != 0) {
@@ -874,7 +1023,7 @@ void pc_memory_init(PCMachineState *pcms,
/* initialize device memory address space */
if (pcmc->has_reserved_memory &&
(machine->ram_size < machine->maxram_size)) {
- ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
+ ram_addr_t device_mem_size;
if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
error_report("unsupported amount of memory slots: %"PRIu64,
@@ -889,20 +1038,7 @@ void pc_memory_init(PCMachineState *pcms,
exit(EXIT_FAILURE);
}
- if (pcms->sgx_epc.size != 0) {
- machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- machine->device_memory->base =
- 0x100000000ULL + x86ms->above_4g_mem_size;
- }
-
- machine->device_memory->base =
- ROUND_UP(machine->device_memory->base, 1 * GiB);
-
- if (pcmc->enforce_aligned_dimm) {
- /* size device region assuming 1G page max alignment per slot */
- device_mem_size += (1 * GiB) * machine->ram_slots;
- }
+ pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size);
if ((machine->device_memory->base + device_mem_size) <
device_mem_size) {
@@ -921,17 +1057,7 @@ void pc_memory_init(PCMachineState *pcms,
MemoryRegion *mr = &pcms->cxl_devices_state.host_mr;
hwaddr cxl_size = MiB;
- if (pcmc->has_reserved_memory && machine->device_memory->base) {
- cxl_base = machine->device_memory->base;
- if (!pcmc->broken_reserved_end) {
- cxl_base += memory_region_size(&machine->device_memory->mr);
- }
- } else if (pcms->sgx_epc.size != 0) {
- cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc);
- } else {
- cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size;
- }
-
+ cxl_base = pc_get_cxl_range_start(pcms);
e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
@@ -1016,28 +1142,18 @@ uint64_t pc_pci_hole64_start(void)
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
MachineState *ms = MACHINE(pcms);
- X86MachineState *x86ms = X86_MACHINE(pcms);
uint64_t hole64_start = 0;
+ ram_addr_t size = 0;
- if (pcms->cxl_devices_state.host_mr.addr) {
- hole64_start = pcms->cxl_devices_state.host_mr.addr +
- memory_region_size(&pcms->cxl_devices_state.host_mr);
- if (pcms->cxl_devices_state.fixed_windows) {
- GList *it;
- for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- hole64_start = fw->mr.addr + memory_region_size(&fw->mr);
- }
- }
- } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
- hole64_start = ms->device_memory->base;
+ if (pcms->cxl_devices_state.is_enabled) {
+ hole64_start = pc_get_cxl_range_end(pcms);
+ } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
+ pc_get_device_memory_range(pcms, &hole64_start, &size);
if (!pcmc->broken_reserved_end) {
- hole64_start += memory_region_size(&ms->device_memory->mr);
+ hole64_start += size;
}
- } else if (pcms->sgx_epc.size != 0) {
- hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc);
} else {
- hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size;
+ hole64_start = pc_above_4g_end(pcms);
}
return ROUND_UP(hole64_start, 1 * GiB);
@@ -1787,6 +1903,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->has_reserved_memory = true;
pcmc->kvmclock_enabled = true;
pcmc->enforce_aligned_dimm = true;
+ pcmc->enforce_amd_1tb_hole = true;
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
* to be used at the moment, 32K should be enough for a while. */
pcmc->acpi_data_size = 0x20000 + 0x8000;