diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-07-26 12:57:20 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-07-26 12:57:20 -0700 |
commit | e5b6555fb8e8a91dd1d612e2e2d66bf5f43ad1dd (patch) | |
tree | 5328762ce117fbe6dbb2ac2a3b593c229f404326 | |
parent | f6cce6bcb2ef959cdd4da0e368f7c72045f21d6d (diff) | |
parent | 0522be9a0c0094088ccef7aab352c57f483ca250 (diff) |
Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
pc,virtio: fixes
Several fixes. From now on, regression fixes only.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# -----BEGIN PGP SIGNATURE-----
#
# iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmLgQr8PHG1zdEByZWRo
# YXQuY29tAAoJECgfDbjSjVRpGUUIAKtNhrnKopGm4LlRpx8zN3Jc1Jo0nb648gaM
# Oyi+Pl8+hpESUhaWN10XDk38/QuPQfIFeR2ZhfYjFTRlZE+n3X9LVlwL8ejjP8KH
# AcWm78Ff/SLA45aMKMmw74pvEDNsoPYTp7TrfeIej5ub8BIXr8+8pqDdIR9WwtWO
# PbhLNXkTT2yLEs6jCVT4/dyh7zivSkrY7G/RVmtUaFe3PgY8fdW2z3+Txz7UIMgw
# CQoGuAucCO5ToBbs2CbT0V5yxY6G5VO6Qd8g0PzDW4M6GsY/Xr5QCnyJe0jTW0d6
# Dcc7UZFAzGNzyQCxHCic9xwTO+ZcJPJlH5TwknunxOb9xwCx4Qs=
# =zN41
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 26 Jul 2022 12:38:39 PM PDT
# gpg: using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg: issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [undefined]
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469
* tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu:
hw/virtio/virtio-iommu: Enforce power-of-two notify for both MAP and UNMAP
i386/pc: restrict AMD only enforcing of 1Tb hole to new machine type
i386/pc: relocate 4g start to 1T where applicable
i386/pc: bounds check phys-bits against max used GPA
i386/pc: factor out device_memory base/size to helper
i386/pc: handle unitialized mr in pc_get_cxl_range_end()
i386/pc: factor out cxl range start to helper
i386/pc: factor out cxl range end to helper
i386/pc: factor out above-4g end to an helper
i386/pc: pass pci_hole64_size to pc_memory_init()
i386/pc: create pci-host qdev prior to pc_memory_init()
hw/i386: add 4g boundary start to X86MachineState
hw/cxl: Fix size of constant in interleave granularity function.
hw/i386/pc: Always place CXL Memory Regions after device_memory
hw/machine: Clear out left over CXL related pointer from move of state handling to machines.
acpi/nvdimm: Define trace events for NVDIMM and substitute nvdimm_debug()
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | hw/acpi/nvdimm.c | 35 | ||||
-rw-r--r-- | hw/acpi/trace-events | 13 | ||||
-rw-r--r-- | hw/i386/acpi-build.c | 2 | ||||
-rw-r--r-- | hw/i386/pc.c | 209 | ||||
-rw-r--r-- | hw/i386/pc_piix.c | 15 | ||||
-rw-r--r-- | hw/i386/pc_q35.c | 15 | ||||
-rw-r--r-- | hw/i386/sgx.c | 2 | ||||
-rw-r--r-- | hw/i386/x86.c | 1 | ||||
-rw-r--r-- | hw/pci-host/i440fx.c | 5 | ||||
-rw-r--r-- | hw/virtio/virtio-iommu.c | 47 | ||||
-rw-r--r-- | include/hw/boards.h | 1 | ||||
-rw-r--r-- | include/hw/cxl/cxl_component.h | 2 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 4 | ||||
-rw-r--r-- | include/hw/i386/x86.h | 3 | ||||
-rw-r--r-- | include/hw/mem/nvdimm.h | 8 | ||||
-rw-r--r-- | include/hw/pci-host/i440fx.h | 3 |
16 files changed, 258 insertions, 107 deletions
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 5f85b16327..31e46df0bd 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -35,6 +35,7 @@ #include "hw/nvram/fw_cfg.h" #include "hw/mem/nvdimm.h" #include "qemu/nvdimm-utils.h" +#include "trace.h" /* * define Byte Addressable Persistent Memory (PM) Region according to @@ -550,8 +551,8 @@ static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in, fit = fit_buf->fit; - nvdimm_debug("Read FIT: offset 0x%x FIT size 0x%x Dirty %s.\n", - read_fit->offset, fit->len, fit_buf->dirty ? "Yes" : "No"); + trace_acpi_nvdimm_read_fit(read_fit->offset, fit->len, + fit_buf->dirty ? "Yes" : "No"); if (read_fit->offset > fit->len) { func_ret_status = NVDIMM_DSM_RET_STATUS_INVALID; @@ -658,7 +659,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr) label_size = nvdimm->label_size; mxfer = nvdimm_get_max_xfer_label_size(); - nvdimm_debug("label_size 0x%x, max_xfer 0x%x.\n", label_size, mxfer); + trace_acpi_nvdimm_label_info(label_size, mxfer); label_size_out.func_ret_status = cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS); label_size_out.label_size = cpu_to_le32(label_size); @@ -674,20 +675,18 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm, uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID; if (offset + length < offset) { - nvdimm_debug("offset 0x%x + length 0x%x is overflow.\n", offset, - length); + trace_acpi_nvdimm_label_overflow(offset, length); return ret; } if (nvdimm->label_size < offset + length) { - nvdimm_debug("position 0x%x is beyond label data (len = %" PRIx64 ").\n", - offset + length, nvdimm->label_size); + trace_acpi_nvdimm_label_oversize(offset + length, nvdimm->label_size); return ret; } if (length > nvdimm_get_max_xfer_label_size()) { - nvdimm_debug("length (0x%x) is larger than max_xfer (0x%x).\n", - length, nvdimm_get_max_xfer_label_size()); + trace_acpi_nvdimm_label_xfer_exceed(length, + nvdimm_get_max_xfer_label_size()); return ret; } @@ -710,8 +709,8 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in, get_label_data->offset = le32_to_cpu(get_label_data->offset); get_label_data->length = le32_to_cpu(get_label_data->length); - nvdimm_debug("Read Label Data: offset 0x%x length 0x%x.\n", - get_label_data->offset, get_label_data->length); + trace_acpi_nvdimm_read_label(get_label_data->offset, + get_label_data->length); status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset, get_label_data->length); @@ -749,8 +748,8 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in, set_label_data->offset = le32_to_cpu(set_label_data->offset); set_label_data->length = le32_to_cpu(set_label_data->length); - nvdimm_debug("Write Label Data: offset 0x%x length 0x%x.\n", - set_label_data->offset, set_label_data->length); + trace_acpi_nvdimm_write_label(set_label_data->offset, + set_label_data->length); status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset, set_label_data->length); @@ -821,7 +820,7 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr) static uint64_t nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size) { - nvdimm_debug("BUG: we never read _DSM IO Port.\n"); + trace_acpi_nvdimm_read_io_port(); return 0; } @@ -832,7 +831,7 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) NvdimmDsmIn *in; hwaddr dsm_mem_addr = val; - nvdimm_debug("dsm memory address 0x%" HWADDR_PRIx ".\n", dsm_mem_addr); + trace_acpi_nvdimm_dsm_mem_addr(dsm_mem_addr); /* * The DSM memory is mapped to guest address space so an evil guest @@ -846,12 +845,10 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) in->function = le32_to_cpu(in->function); in->handle = le32_to_cpu(in->handle); - nvdimm_debug("Revision 0x%x Handler 0x%x Function 0x%x.\n", in->revision, - in->handle, in->function); + trace_acpi_nvdimm_dsm_info(in->revision, in->handle, in->function); if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) { - nvdimm_debug("Revision 0x%x is not supported, expect 0x%x.\n", - in->revision, 0x1); + trace_acpi_nvdimm_invalid_revision(in->revision); nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr); goto exit; } diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events index 2250126a22..eb60b04f9b 100644 --- a/hw/acpi/trace-events +++ b/hw/acpi/trace-events @@ -70,3 +70,16 @@ acpi_erst_reset_out(unsigned record_count) "record_count %u" acpi_erst_post_load(void *header, unsigned slot_size) "header: 0x%p slot_size %u" acpi_erst_class_init_in(void) acpi_erst_class_init_out(void) + +# nvdimm.c +acpi_nvdimm_read_fit(uint32_t offset, uint32_t len, const char *dirty) "Read FIT: offset 0x%" PRIx32 " FIT size 0x%" PRIx32 " Dirty %s" +acpi_nvdimm_label_info(uint32_t label_size, uint32_t mxfer) "label_size 0x%" PRIx32 ", max_xfer 0x%" PRIx32 +acpi_nvdimm_label_overflow(uint32_t offset, uint32_t length) "offset 0x%" PRIx32 " + length 0x%" PRIx32 " is overflow" +acpi_nvdimm_label_oversize(uint32_t pos, uint64_t size) "position 0x%" PRIx32 " is beyond label data (len = %" PRIu64 ")" +acpi_nvdimm_label_xfer_exceed(uint32_t length, uint32_t max_xfer) "length (0x%" PRIx32 ") is larger than max_xfer (0x%" PRIx32 ")" +acpi_nvdimm_read_label(uint32_t offset, uint32_t length) "Read Label Data: offset 0x%" PRIx32 " length 0x%" PRIx32 +acpi_nvdimm_write_label(uint32_t offset, uint32_t length) "Write Label Data: offset 0x%" PRIx32 " length 0x%" PRIx32 +acpi_nvdimm_read_io_port(void) "Alert: we never read _DSM IO Port" +acpi_nvdimm_dsm_mem_addr(uint64_t dsm_mem_addr) "dsm memory address 0x%" PRIx64 +acpi_nvdimm_dsm_info(uint32_t revision, uint32_t handle, uint32_t function) "Revision 0x%" PRIx32 " Handle 0x%" PRIx32 " Function 0x%" PRIx32 +acpi_nvdimm_invalid_revision(uint32_t revision) "Revision 0x%" PRIx32 " is not supported, expect 0x1" diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index cad6f5ac41..0355bd3dda 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2024,7 +2024,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) build_srat_memory(table_data, mem_base, mem_len, i - 1, MEM_AFFINITY_ENABLED); } - mem_base = 1ULL << 32; + mem_base = x86ms->above_4g_mem_start; mem_len = next_base - x86ms->below_4g_mem_size; next_base = mem_base + mem_len; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index d2b5823ffb..7280c02ce3 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -814,10 +814,122 @@ void xen_load_linux(PCMachineState *pcms) #define PC_ROM_ALIGN 0x800 #define PC_ROM_SIZE (PC_ROM_MAX - PC_ROM_MIN_VGA) +static hwaddr pc_above_4g_end(PCMachineState *pcms) +{ + X86MachineState *x86ms = X86_MACHINE(pcms); + + if (pcms->sgx_epc.size != 0) { + return sgx_epc_above_4g_end(&pcms->sgx_epc); + } + + return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size; +} + +static void pc_get_device_memory_range(PCMachineState *pcms, + hwaddr *base, + ram_addr_t *device_mem_size) +{ + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + MachineState *machine = MACHINE(pcms); + ram_addr_t size; + hwaddr addr; + + size = machine->maxram_size - machine->ram_size; + addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB); + + if (pcmc->enforce_aligned_dimm) { + /* size device region assuming 1G page max alignment per slot */ + size += (1 * GiB) * machine->ram_slots; + } + + *base = addr; + *device_mem_size = size; +} + +static uint64_t pc_get_cxl_range_start(PCMachineState *pcms) +{ + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + hwaddr cxl_base; + ram_addr_t size; + + if (pcmc->has_reserved_memory) { + pc_get_device_memory_range(pcms, &cxl_base, &size); + cxl_base += size; + } else { + cxl_base = pc_above_4g_end(pcms); + } + + return cxl_base; +} + +static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) +{ + uint64_t start = pc_get_cxl_range_start(pcms) + MiB; + + if (pcms->cxl_devices_state.fixed_windows) { + GList *it; + + start = ROUND_UP(start, 256 * MiB); + for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) { + CXLFixedWindow *fw = it->data; + start += fw->size; + } + } + + return start; +} + +static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size) +{ + X86CPU *cpu = X86_CPU(first_cpu); + + /* 32-bit systems don't have hole64 thus return max CPU address */ + if (cpu->phys_bits <= 32) { + return ((hwaddr)1 << cpu->phys_bits) - 1; + } + + return pc_pci_hole64_start() + pci_hole64_size - 1; +} + +/* + * AMD systems with an IOMMU have an additional hole close to the + * 1Tb, which are special GPAs that cannot be DMA mapped. Depending + * on kernel version, VFIO may or may not let you DMA map those ranges. + * Starting Linux v5.4 we validate it, and can't create guests on AMD machines + * with certain memory sizes. It's also wrong to use those IOVA ranges + * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse. + * The ranges reserved for Hyper-Transport are: + * + * FD_0000_0000h - FF_FFFF_FFFFh + * + * The ranges represent the following: + * + * Base Address Top Address Use + * + * FD_0000_0000h FD_F7FF_FFFFh Reserved interrupt address space + * FD_F800_0000h FD_F8FF_FFFFh Interrupt/EOI IntCtl + * FD_F900_0000h FD_F90F_FFFFh Legacy PIC IACK + * FD_F910_0000h FD_F91F_FFFFh System Management + * FD_F920_0000h FD_FAFF_FFFFh Reserved Page Tables + * FD_FB00_0000h FD_FBFF_FFFFh Address Translation + * FD_FC00_0000h FD_FDFF_FFFFh I/O Space + * FD_FE00_0000h FD_FFFF_FFFFh Configuration + * FE_0000_0000h FE_1FFF_FFFFh Extended Configuration/Device Messages + * FE_2000_0000h FF_FFFF_FFFFh Reserved + * + * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology", + * Table 3: Special Address Controls (GPA) for more information. + */ +#define AMD_HT_START 0xfd00000000UL +#define AMD_HT_END 0xffffffffffUL +#define AMD_ABOVE_1TB_START (AMD_HT_END + 1) +#define AMD_HT_SIZE (AMD_ABOVE_1TB_START - AMD_HT_START) + void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, - MemoryRegion **ram_memory) + MemoryRegion **ram_memory, + uint64_t pci_hole64_size) { int linux_boot, i; MemoryRegion *option_rom_mr; @@ -827,7 +939,9 @@ void pc_memory_init(PCMachineState *pcms, MachineClass *mc = MACHINE_GET_CLASS(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr maxphysaddr, maxusedaddr; hwaddr cxl_base, cxl_resv_end = 0; + X86CPU *cpu = X86_CPU(first_cpu); assert(machine->ram_size == x86ms->below_4g_mem_size + x86ms->above_4g_mem_size); @@ -835,6 +949,40 @@ void pc_memory_init(PCMachineState *pcms, linux_boot = (machine->kernel_filename != NULL); /* + * The HyperTransport range close to the 1T boundary is unique to AMD + * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation + * to above 1T to AMD vCPUs only. @enforce_amd_1tb_hole is only false in + * older machine types (<= 7.0) for compatibility purposes. + */ + if (IS_AMD_CPU(&cpu->env) && pcmc->enforce_amd_1tb_hole) { + /* Bail out if max possible address does not cross HT range */ + if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) { + x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START; + } + + /* + * Advertise the HT region if address space covers the reserved + * region or if we relocate. + */ + if (cpu->phys_bits >= 40) { + e820_add_entry(AMD_HT_START, AMD_HT_SIZE, E820_RESERVED); + } + } + + /* + * phys-bits is required to be appropriately configured + * to make sure max used GPA is reachable. + */ + maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size); + maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1; + if (maxphysaddr < maxusedaddr) { + error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64 + " phys-bits too low (%u)", + maxphysaddr, maxusedaddr, cpu->phys_bits); + exit(EXIT_FAILURE); + } + + /* * Split single memory region and use aliases to address portions of it, * done for backwards compatibility with older qemus. */ @@ -850,9 +998,10 @@ void pc_memory_init(PCMachineState *pcms, machine->ram, x86ms->below_4g_mem_size, x86ms->above_4g_mem_size); - memory_region_add_subregion(system_memory, 0x100000000ULL, + memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start, ram_above_4g); - e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM); + e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size, + E820_RAM); } if (pcms->sgx_epc.size != 0) { @@ -874,7 +1023,7 @@ void pc_memory_init(PCMachineState *pcms, /* initialize device memory address space */ if (pcmc->has_reserved_memory && (machine->ram_size < machine->maxram_size)) { - ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; + ram_addr_t device_mem_size; if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { error_report("unsupported amount of memory slots: %"PRIu64, @@ -889,20 +1038,7 @@ void pc_memory_init(PCMachineState *pcms, exit(EXIT_FAILURE); } - if (pcms->sgx_epc.size != 0) { - machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc); - } else { - machine->device_memory->base = - 0x100000000ULL + x86ms->above_4g_mem_size; - } - - machine->device_memory->base = - ROUND_UP(machine->device_memory->base, 1 * GiB); - - if (pcmc->enforce_aligned_dimm) { - /* size device region assuming 1G page max alignment per slot */ - device_mem_size += (1 * GiB) * machine->ram_slots; - } + pc_get_device_memory_range(pcms, &machine->device_memory->base, &device_mem_size); if ((machine->device_memory->base + device_mem_size) < device_mem_size) { @@ -921,17 +1057,7 @@ void pc_memory_init(PCMachineState *pcms, MemoryRegion *mr = &pcms->cxl_devices_state.host_mr; hwaddr cxl_size = MiB; - if (pcmc->has_reserved_memory && machine->device_memory->base) { - cxl_base = machine->device_memory->base; - if (!pcmc->broken_reserved_end) { - cxl_base += memory_region_size(&machine->device_memory->mr); - } - } else if (pcms->sgx_epc.size != 0) { - cxl_base = sgx_epc_above_4g_end(&pcms->sgx_epc); - } else { - cxl_base = 0x100000000ULL + x86ms->above_4g_mem_size; - } - + cxl_base = pc_get_cxl_range_start(pcms); e820_add_entry(cxl_base, cxl_size, E820_RESERVED); memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); memory_region_add_subregion(system_memory, cxl_base, mr); @@ -1016,28 +1142,18 @@ uint64_t pc_pci_hole64_start(void) PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); MachineState *ms = MACHINE(pcms); - X86MachineState *x86ms = X86_MACHINE(pcms); uint64_t hole64_start = 0; + ram_addr_t size = 0; - if (pcms->cxl_devices_state.host_mr.addr) { - hole64_start = pcms->cxl_devices_state.host_mr.addr + - memory_region_size(&pcms->cxl_devices_state.host_mr); - if (pcms->cxl_devices_state.fixed_windows) { - GList *it; - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - hole64_start = fw->mr.addr + memory_region_size(&fw->mr); - } - } - } else if (pcmc->has_reserved_memory && ms->device_memory->base) { - hole64_start = ms->device_memory->base; + if (pcms->cxl_devices_state.is_enabled) { + hole64_start = pc_get_cxl_range_end(pcms); + } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) { + pc_get_device_memory_range(pcms, &hole64_start, &size); if (!pcmc->broken_reserved_end) { - hole64_start += memory_region_size(&ms->device_memory->mr); + hole64_start += size; } - } else if (pcms->sgx_epc.size != 0) { - hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc); } else { - hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size; + hole64_start = pc_above_4g_end(pcms); } return ROUND_UP(hole64_start, 1 * GiB); @@ -1787,6 +1903,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->has_reserved_memory = true; pcmc->kvmclock_enabled = true; pcmc->enforce_aligned_dimm = true; + pcmc->enforce_amd_1tb_hole = true; /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index fbf9465318..a5c65c1c35 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -91,6 +91,8 @@ static void pc_init1(MachineState *machine, MemoryRegion *pci_memory; MemoryRegion *rom_memory; ram_addr_t lowmem; + uint64_t hole64_size; + DeviceState *i440fx_host; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -164,9 +166,15 @@ static void pc_init1(MachineState *machine, pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; + i440fx_host = qdev_new(host_type); + hole64_size = object_property_get_uint(OBJECT(i440fx_host), + PCI_HOST_PROP_PCI_HOLE64_SIZE, + &error_abort); } else { pci_memory = NULL; rom_memory = system_memory; + i440fx_host = NULL; + hole64_size = 0; } pc_guest_info_init(pcms); @@ -183,7 +191,7 @@ static void pc_init1(MachineState *machine, /* allocate ram and load rom/bios */ if (!xen_enabled()) { pc_memory_init(pcms, system_memory, - rom_memory, &ram_memory); + rom_memory, &ram_memory, hole64_size); } else { pc_system_flash_cleanup_unused(pcms); if (machine->kernel_filename != NULL) { @@ -200,8 +208,8 @@ static void pc_init1(MachineState *machine, const char *type = xen_enabled() ? TYPE_PIIX3_XEN_DEVICE : TYPE_PIIX3_DEVICE; - pci_bus = i440fx_init(host_type, - pci_type, + pci_bus = i440fx_init(pci_type, + i440fx_host, system_memory, system_io, machine->ram_size, x86ms->below_4g_mem_size, x86ms->above_4g_mem_size, @@ -443,6 +451,7 @@ static void pc_i440fx_7_0_machine_options(MachineClass *m) m->alias = NULL; m->is_default = false; pcmc->legacy_no_rng_seed = true; + pcmc->enforce_amd_1tb_hole = false; compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len); compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 12cc76aaf8..3a35193ff7 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -138,6 +138,7 @@ static void pc_q35_init(MachineState *machine) MachineClass *mc = MACHINE_GET_CLASS(machine); bool acpi_pcihp; bool keep_pci_slot_hpc; + uint64_t pci_hole64_size = 0; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -203,12 +204,19 @@ static void pc_q35_init(MachineState *machine) pcms->smbios_entry_point_type); } - /* allocate ram and load rom/bios */ - pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory); - /* create pci host bus */ q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE)); + if (pcmc->pci_enabled) { + pci_hole64_size = object_property_get_uint(OBJECT(q35_host), + PCI_HOST_PROP_PCI_HOLE64_SIZE, + &error_abort); + } + + /* allocate ram and load rom/bios */ + pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory, + pci_hole64_size); + object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host)); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM, OBJECT(ram_memory), NULL); @@ -379,6 +387,7 @@ static void pc_q35_7_0_machine_options(MachineClass *m) pc_q35_7_1_machine_options(m); m->alias = NULL; pcmc->legacy_no_rng_seed = true; + pcmc->enforce_amd_1tb_hole = false; compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len); compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len); } diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index a44d66ba2a..09d9c7c73d 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -295,7 +295,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) return; } - sgx_epc->base = 0x100000000ULL + x86ms->above_4g_mem_size; + sgx_epc->base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size; memory_region_init(&sgx_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX); memory_region_add_subregion(get_system_memory(), sgx_epc->base, diff --git a/hw/i386/x86.c b/hw/i386/x86.c index ecea25d249..050eedc0c8 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1391,6 +1391,7 @@ static void x86_machine_initfn(Object *obj) x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); x86ms->bus_lock_ratelimit = 0; + x86ms->above_4g_mem_start = 4 * GiB; } static void x86_machine_class_init(ObjectClass *oc, void *data) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 1c5ad5f918..d5426ef4a5 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -237,7 +237,8 @@ static void i440fx_realize(PCIDevice *dev, Error **errp) } } -PCIBus *i440fx_init(const char *host_type, const char *pci_type, +PCIBus *i440fx_init(const char *pci_type, + DeviceState *dev, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, ram_addr_t ram_size, @@ -246,7 +247,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type, MemoryRegion *pci_address_space, MemoryRegion *ram_memory) { - DeviceState *dev; PCIBus *b; PCIDevice *d; PCIHostState *s; @@ -254,7 +254,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type, unsigned i; I440FXState *i440fx; - dev = qdev_new(host_type); s = PCI_HOST_BRIDGE(dev); b = pci_root_bus_new(dev, NULL, pci_address_space, address_space_io, 0, TYPE_PCI_BUS); diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 281152d338..62e07ec2e4 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -197,6 +197,32 @@ static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) } } +static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr, + IOMMUTLBEvent *event, + hwaddr virt_start, hwaddr virt_end) +{ + uint64_t delta = virt_end - virt_start; + + event->entry.iova = virt_start; + event->entry.addr_mask = delta; + + if (delta == UINT64_MAX) { + memory_region_notify_iommu(mr, 0, *event); + } + + while (virt_start != virt_end + 1) { + uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); + + event->entry.addr_mask = mask; + event->entry.iova = virt_start; + memory_region_notify_iommu(mr, 0, *event); + virt_start += mask + 1; + if (event->entry.perm != IOMMU_NONE) { + event->entry.translated_addr += mask + 1; + } + } +} + static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, hwaddr virt_end, hwaddr paddr, uint32_t flags) @@ -215,19 +241,16 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, event.type = IOMMU_NOTIFIER_MAP; event.entry.target_as = &address_space_memory; - event.entry.addr_mask = virt_end - virt_start; - event.entry.iova = virt_start; event.entry.perm = perm; event.entry.translated_addr = paddr; - memory_region_notify_iommu(mr, 0, event); + virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); } static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, hwaddr virt_end) { IOMMUTLBEvent event; - uint64_t delta = virt_end - virt_start; if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { return; @@ -239,22 +262,8 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, event.entry.target_as = &address_space_memory; event.entry.perm = IOMMU_NONE; event.entry.translated_addr = 0; - event.entry.addr_mask = delta; - event.entry.iova = virt_start; - - if (delta == UINT64_MAX) { - memory_region_notify_iommu(mr, 0, event); - } - - while (virt_start != virt_end + 1) { - uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64); - - event.entry.addr_mask = mask; - event.entry.iova = virt_start; - memory_region_notify_iommu(mr, 0, event); - virt_start += mask + 1; - } + virtio_iommu_notify_map_unmap(mr, &event, virt_start, virt_end); } static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, diff --git a/include/hw/boards.h b/include/hw/boards.h index d94edcef28..7b416c9787 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -360,7 +360,6 @@ struct MachineState { CpuTopology smp; struct NVDIMMState *nvdimms_state; struct NumaState *numa_state; - CXLFixedMemoryWindowOptionsList *cfmws_list; }; #define DEFINE_MACHINE(namestr, machine_initfn) \ diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h index 70b5018156..94ec2f07d7 100644 --- a/include/hw/cxl/cxl_component.h +++ b/include/hw/cxl/cxl_component.h @@ -215,7 +215,7 @@ uint8_t cxl_interleave_granularity_enc(uint64_t gran, Error **errp); static inline hwaddr cxl_decode_ig(int ig) { - return 1 << (ig + 8); + return 1ULL << (ig + 8); } CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 2a8ffbcfa8..8435733bd6 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -118,6 +118,7 @@ struct PCMachineClass { bool has_reserved_memory; bool enforce_aligned_dimm; bool broken_reserved_end; + bool enforce_amd_1tb_hole; /* generate legacy CPU hotplug AML */ bool legacy_cpu_hotplug; @@ -162,7 +163,8 @@ void xen_load_linux(PCMachineState *pcms); void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, - MemoryRegion **ram_memory); + MemoryRegion **ram_memory, + uint64_t pci_hole64_size); uint64_t pc_pci_hole64_start(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(struct PCMachineState *pcms, diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 6bdf1f6ab2..62fa5774f8 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -56,6 +56,9 @@ struct X86MachineState { /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; + /* Start address of the initial RAM above 4G */ + uint64_t above_4g_mem_start; + /* CPU and apic information: */ bool apic_xrupt_override; unsigned pci_irq_mask; diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index cf8f59be44..acf887c83d 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -29,14 +29,6 @@ #include "hw/acpi/aml-build.h" #include "qom/object.h" -#define NVDIMM_DEBUG 0 -#define nvdimm_debug(fmt, ...) \ - do { \ - if (NVDIMM_DEBUG) { \ - fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \ - } \ - } while (0) - /* * The minimum label data size is required by NVDIMM Namespace * specification, see the chapter 2 Namespaces: diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index 52518dbf08..d02bf1ed6b 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -35,7 +35,8 @@ struct PCII440FXState { #define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX" -PCIBus *i440fx_init(const char *host_type, const char *pci_type, +PCIBus *i440fx_init(const char *pci_type, + DeviceState *dev, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, ram_addr_t ram_size, |