From 587f8b333c8c66e8f3e54fe30ea8e88cc7d29d21 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Tue, 4 Jul 2023 14:08:47 +0100 Subject: target/arm: Add raw_writes ops for register whose write induce TLB maintenance Some registers whose 'cooked' writefns induce TLB maintenance do not have raw_writefn ops defined. If only the writefn ops is set (ie. no raw_writefn is provided), it is assumed the cooked also work as the raw one. For those registers it is not obvious the tlb_flush works on KVM mode so better/safer setting the raw write. Signed-off-by: Eric Auger Suggested-by: Peter Maydell Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/helper.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/target/arm/helper.c b/target/arm/helper.c index d08c058e42..a0b84efab5 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -4189,14 +4189,14 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR0_EL1, - .writefn = vmsa_ttbr_write, .resetvalue = 0, + .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) } }, { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR1_EL1, - .writefn = vmsa_ttbr_write, .resetvalue = 0, + .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) } }, { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64, @@ -4456,13 +4456,13 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) }, - .writefn = vmsa_ttbr_write, }, + .writefn = vmsa_ttbr_write, .raw_writefn = raw_write }, { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) }, - .writefn = vmsa_ttbr_write, }, + .writefn = vmsa_ttbr_write, .raw_writefn = raw_write }, }; static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -5911,7 +5911,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .writefn = hcr_write, .raw_writefn = raw_write }, { .name = "HCR", .state = ARM_CP_STATE_AA32, .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, @@ -5983,6 +5983,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2, .access = PL2_RW, .writefn = vmsa_tcr_el12_write, + .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) }, { .name = "VTCR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, @@ -5999,10 +6000,10 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .type = ARM_CP_64BIT | ARM_CP_ALIAS, .access = PL2_RW, .accessfn = access_el3_aa32ns, .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2), - .writefn = vttbr_write }, + .writefn = vttbr_write, .raw_writefn = raw_write }, { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0, - .access = PL2_RW, .writefn = vttbr_write, + .access = PL2_RW, .writefn = vttbr_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) }, { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0, @@ -6014,7 +6015,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) }, { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL2_RW, .resetvalue = 0, .writefn = vmsa_tcr_ttbr_el2_write, + .access = PL2_RW, .resetvalue = 0, + .writefn = vmsa_tcr_ttbr_el2_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, @@ -6201,12 +6203,12 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "SCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3), - .resetfn = scr_reset, .writefn = scr_write }, + .resetfn = scr_reset, .writefn = scr_write, .raw_writefn = raw_write }, { .name = "SCR", .type = ARM_CP_ALIAS | ARM_CP_NEWEL, .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0, .access = PL1_RW, .accessfn = access_trap_aa32s_el1, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3), - .writefn = scr_write }, + .writefn = scr_write, .raw_writefn = raw_write }, { .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1, .access = PL3_RW, .resetvalue = 0, @@ -7927,6 +7929,7 @@ static const ARMCPRegInfo vhe_reginfo[] = { { .name = "TTBR1_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 1, .access = PL2_RW, .writefn = vmsa_tcr_ttbr_el2_write, + .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el[2]) }, #ifndef CONFIG_USER_ONLY { .name = "CNTHV_CVAL_EL2", .state = ARM_CP_STATE_AA64, -- cgit v1.2.3 From 62c2b8760b8ec9316ea4f5f4c2ce2fdaed1359ee Mon Sep 17 00:00:00 2001 From: Yuquan Wang Date: Tue, 4 Jul 2023 14:08:47 +0100 Subject: hw/arm/sbsa-ref: use XHCI to replace EHCI The current sbsa-ref cannot use EHCI controller which is only able to do 32-bit DMA, since sbsa-ref doesn't have RAM below 4GB. Hence, this uses XHCI to provide a usb controller with 64-bit DMA capablity instead of EHCI. We bump the platform version to 0.3 with this change. Although the hardware at the USB controller address changes, the firmware and Linux can both cope with this -- on an older non-XHCI-aware firmware/kernel setup the probe routine simply fails and the guest proceeds without any USB. (This isn't a loss of functionality, because the old USB controller never worked in the first place.) So we can call this a backwards-compatible change and only bump the minor version. Signed-off-by: Yuquan Wang Message-id: 20230621103847.447508-2-wangyuquan1236@phytium.com.cn [PMM: tweaked commit message; add line to docs about what changes in platform version 0.3] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- docs/system/arm/sbsa.rst | 5 ++++- hw/arm/Kconfig | 2 +- hw/arm/sbsa-ref.c | 23 +++++++++++++---------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst index a8e0b530a2..bca61608ff 100644 --- a/docs/system/arm/sbsa.rst +++ b/docs/system/arm/sbsa.rst @@ -19,7 +19,7 @@ The ``sbsa-ref`` board supports: - A configurable number of AArch64 CPUs - GIC version 3 - System bus AHCI controller - - System bus EHCI controller + - System bus XHCI controller - CDROM and hard disc on AHCI bus - E1000E ethernet card on PCIe bus - Bochs display adapter on PCIe bus @@ -68,3 +68,6 @@ Platform version changes: 0.2 GIC ITS information is present in devicetree. + +0.3 + The USB controller is an XHCI device, not EHCI diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 7de17d1e8c..7e68348440 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -266,7 +266,7 @@ config SBSA_REF select PL011 # UART select PL031 # RTC select PL061 # GPIO - select USB_EHCI_SYSBUS + select USB_XHCI_SYSBUS select WDT_SBSA select BOCHS_DISPLAY diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 82a28b2e0b..1a8519b868 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -42,6 +42,7 @@ #include "hw/pci-host/gpex.h" #include "hw/qdev-properties.h" #include "hw/usb.h" +#include "hw/usb/xhci.h" #include "hw/char/pl011.h" #include "hw/watchdog/sbsa_gwdt.h" #include "net/net.h" @@ -85,7 +86,7 @@ enum { SBSA_SECURE_UART_MM, SBSA_SECURE_MEM, SBSA_AHCI, - SBSA_EHCI, + SBSA_XHCI, }; struct SBSAMachineState { @@ -123,7 +124,7 @@ static const MemMapEntry sbsa_ref_memmap[] = { [SBSA_SMMU] = { 0x60050000, 0x00020000 }, /* Space here reserved for more SMMUs */ [SBSA_AHCI] = { 0x60100000, 0x00010000 }, - [SBSA_EHCI] = { 0x60110000, 0x00010000 }, + [SBSA_XHCI] = { 0x60110000, 0x00010000 }, /* Space here reserved for other devices */ [SBSA_PCIE_PIO] = { 0x7fff0000, 0x00010000 }, /* 32-bit address PCIE MMIO space */ @@ -143,7 +144,7 @@ static const int sbsa_ref_irqmap[] = { [SBSA_SECURE_UART] = 8, [SBSA_SECURE_UART_MM] = 9, [SBSA_AHCI] = 10, - [SBSA_EHCI] = 11, + [SBSA_XHCI] = 11, [SBSA_SMMU] = 12, /* ... to 15 */ [SBSA_GWDT_WS0] = 16, }; @@ -230,7 +231,7 @@ static void create_fdt(SBSAMachineState *sms) * fw compatibility. */ qemu_fdt_setprop_cell(fdt, "/", "machine-version-major", 0); - qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 2); + qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 3); if (ms->numa_state->have_numa_distance) { int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t); @@ -604,13 +605,15 @@ static void create_ahci(const SBSAMachineState *sms) } } -static void create_ehci(const SBSAMachineState *sms) +static void create_xhci(const SBSAMachineState *sms) { - hwaddr base = sbsa_ref_memmap[SBSA_EHCI].base; - int irq = sbsa_ref_irqmap[SBSA_EHCI]; + hwaddr base = sbsa_ref_memmap[SBSA_XHCI].base; + int irq = sbsa_ref_irqmap[SBSA_XHCI]; + DeviceState *dev = qdev_new(TYPE_XHCI_SYSBUS); - sysbus_create_simple("platform-ehci-usb", base, - qdev_get_gpio_in(sms->gic, irq)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(sms->gic, irq)); } static void create_smmu(const SBSAMachineState *sms, PCIBus *bus) @@ -832,7 +835,7 @@ static void sbsa_ref_init(MachineState *machine) create_ahci(sms); - create_ehci(sms); + create_xhci(sms); create_pcie(sms); -- cgit v1.2.3 From 3dc2afeab2964b54848715b913b6c605f36be3e1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 6 Jul 2023 12:38:19 +0100 Subject: tests/tcg/aarch64/sysregs.c: Use S syntax for id_aa64zfr0_el1 and id_aa64smfr0_el1 Some assemblers will complain about attempts to access id_aa64zfr0_el1 and id_aa64smfr0_el1 by name if the test binary isn't built for the right processor type: /tmp/ccASXpLo.s:782: Error: selected processor does not support system register name 'id_aa64zfr0_el1' /tmp/ccASXpLo.s:829: Error: selected processor does not support system register name 'id_aa64smfr0_el1' However, these registers are in the ID space and are guaranteed to read-as-zero on older CPUs, so the access is both safe and sensible. Switch to using the S syntax, as we already do for ID_AA64ISAR2_EL1 and ID_AA64MMFR2_EL1. This allows us to drop the HAS_ARMV9_SME check and the makefile machinery to adjust the CFLAGS for this test, so we don't rely on having a sufficiently new compiler to be able to check these registers. This means we're actually testing the SME ID register: no released GCC yet recognizes -march=armv9-a+sme, so that was always skipped. It also avoids a future problem if we try to switch the "do we have SME support in the toolchain" check from "in the compiler" to "in the assembler" (at which point we would otherwise run into the above errors). Signed-off-by: Peter Maydell --- tests/tcg/aarch64/Makefile.target | 7 +------ tests/tcg/aarch64/sysregs.c | 11 +++++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index cec1d4b287..ea9ceb31e6 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -61,15 +61,10 @@ AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7 mte-%: CFLAGS += -march=armv8.5-a+memtag endif -ifneq ($(CROSS_CC_HAS_SVE),) # System Registers Tests AARCH64_TESTS += sysregs -ifneq ($(CROSS_CC_HAS_ARMV9_SME),) -sysregs: CFLAGS+=-march=armv9-a+sme -DHAS_ARMV9_SME -else -sysregs: CFLAGS+=-march=armv8.1-a+sve -endif +ifneq ($(CROSS_CC_HAS_SVE),) # SVE ioctl test AARCH64_TESTS += sve-ioctls sve-ioctls: CFLAGS+=-march=armv8.1-a+sve diff --git a/tests/tcg/aarch64/sysregs.c b/tests/tcg/aarch64/sysregs.c index 46b931f781..d8eb06abcf 100644 --- a/tests/tcg/aarch64/sysregs.c +++ b/tests/tcg/aarch64/sysregs.c @@ -25,9 +25,14 @@ /* * Older assemblers don't recognize newer system register names, * but we can still access them by the Sn_n_Cn_Cn_n syntax. + * This also means we don't need to specifically request that the + * assembler enables whatever architectural features the ID registers + * syntax might be gated behind. */ #define SYS_ID_AA64ISAR2_EL1 S3_0_C0_C6_2 #define SYS_ID_AA64MMFR2_EL1 S3_0_C0_C7_2 +#define SYS_ID_AA64ZFR0_EL1 S3_0_C0_C4_4 +#define SYS_ID_AA64SMFR0_EL1 S3_0_C0_C4_5 int failed_bit_count; @@ -132,10 +137,8 @@ int main(void) /* all hidden, DebugVer fixed to 0x6 (ARMv8 debug architecture) */ get_cpu_reg_check_mask(id_aa64dfr0_el1, _m(0000,0000,0000,0006)); get_cpu_reg_check_zero(id_aa64dfr1_el1); - get_cpu_reg_check_mask(id_aa64zfr0_el1, _m(0ff0,ff0f,00ff,00ff)); -#ifdef HAS_ARMV9_SME - get_cpu_reg_check_mask(id_aa64smfr0_el1, _m(80f1,00fd,0000,0000)); -#endif + get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1, _m(0ff0,ff0f,00ff,00ff)); + get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(80f1,00fd,0000,0000)); get_cpu_reg_check_zero(id_aa64afr0_el1); get_cpu_reg_check_zero(id_aa64afr1_el1); -- cgit v1.2.3 From a9d8407016ab9d2423a00d767cad90968ba21f99 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Jul 2023 14:08:47 +0100 Subject: target/arm: Avoid splitting Zregs across lines in dump Allow the line length to extend to 548 columns. While annoyingly wide, it's still less confusing than the continuations we print. Also, the default VL used by Linux (and max for A64FX) uses only 140 columns. Signed-off-by: Richard Henderson Message-id: 20230622151201.1578522-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index a1e77698ba..f12c714bc4 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -955,7 +955,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; uint32_t psr = pstate_read(env); - int i; + int i, j; int el = arm_current_el(env); const char *ns_status; bool sve; @@ -1014,7 +1014,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } if (sve) { - int j, zcr_len = sve_vqm1_for_el(env, el); + int zcr_len = sve_vqm1_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { bool eol; @@ -1054,32 +1054,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } - for (i = 0; i < 32; i++) { - if (zcr_len == 0) { + if (zcr_len == 0) { + /* + * With vl=16, there are only 37 columns per register, + * so output two registers per line. + */ + for (i = 0; i < 32; i++) { qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s", i, env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0], i & 1 ? "\n" : " "); - } else if (zcr_len == 1) { - qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 - ":%016" PRIx64 ":%016" PRIx64 "\n", - i, env->vfp.zregs[i].d[3], env->vfp.zregs[i].d[2], - env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0]); - } else { + } + } else { + for (i = 0; i < 32; i++) { + qemu_fprintf(f, "Z%02d=", i); for (j = zcr_len; j >= 0; j--) { - bool odd = (zcr_len - j) % 2 != 0; - if (j == zcr_len) { - qemu_fprintf(f, "Z%02d[%x-%x]=", i, j, j - 1); - } else if (!odd) { - if (j > 0) { - qemu_fprintf(f, " [%x-%x]=", j, j - 1); - } else { - qemu_fprintf(f, " [%x]=", j); - } - } qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s", env->vfp.zregs[i].d[j * 2 + 1], - env->vfp.zregs[i].d[j * 2], - odd || j == 0 ? "\n" : ":"); + env->vfp.zregs[i].d[j * 2 + 0], + j ? ":" : "\n"); } } } -- cgit v1.2.3 From 270bea47a23b286c2ab3a76f720991c59680b8e6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Jul 2023 14:08:48 +0100 Subject: target/arm: Dump ZA[] when active Always print each matrix row whole, one per line, so that we get the entire matrix in the proper shape. Signed-off-by: Richard Henderson Message-id: 20230622151201.1578522-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/cpu.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index f12c714bc4..adf84f9686 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1082,6 +1082,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) i, q[1], q[0], (i & 1 ? "\n" : " ")); } } + + if (cpu_isar_feature(aa64_sme, cpu) && + FIELD_EX64(env->svcr, SVCR, ZA) && + sme_exception_el(env, el) == 0) { + int zcr_len = sve_vqm1_for_el_sm(env, el, true); + int svl = (zcr_len + 1) * 16; + int svl_lg10 = svl < 100 ? 2 : 3; + + for (i = 0; i < svl; i++) { + qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i); + for (j = zcr_len; j >= 0; --j) { + qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c", + env->zarray[i].d[2 * j + 1], + env->zarray[i].d[2 * j], + j ? ':' : '\n'); + } + } + } } #else -- cgit v1.2.3 From 1f51573f7925b80e79a29f87c7d9d6ead60960c0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Jul 2023 14:08:48 +0100 Subject: target/arm: Fix SME full tile indexing For the outer product set of insns, which take an entire matrix tile as output, the argument is not a combined tile+column. Therefore using get_tile_rowcol was incorrect, as we extracted the tile number from itself. The test case relies only on assembler support for SME, since no release of GCC recognizes -march=armv9-a+sme yet. Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1620 Signed-off-by: Richard Henderson Message-id: 20230622151201.1578522-5-richard.henderson@linaro.org Reviewed-by: Peter Maydell [PMM: dropped now-unneeded changes to sysregs CFLAGS] Signed-off-by: Peter Maydell --- target/arm/tcg/translate-sme.c | 24 ++++++++--- tests/tcg/aarch64/Makefile.target | 7 +++- tests/tcg/aarch64/sme-outprod1.c | 83 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 7 deletions(-) create mode 100644 tests/tcg/aarch64/sme-outprod1.c diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index d0054e3f77..6038b0a06f 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -95,6 +95,21 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, return addr; } +/* + * Resolve tile.size[0] to a host pointer. + * Used by e.g. outer product insns where we require the entire tile. + */ +static TCGv_ptr get_tile(DisasContext *s, int esz, int tile) +{ + TCGv_ptr addr = tcg_temp_new_ptr(); + int offset; + + offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray); + + tcg_gen_addi_ptr(addr, cpu_env, offset); + return addr; +} + static bool trans_ZERO(DisasContext *s, arg_ZERO *a) { if (!dc_isar_feature(aa64_sme, s)) { @@ -260,8 +275,7 @@ static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, return true; } - /* Sum XZR+zad to find ZAd. */ - za = get_tile_rowcol(s, esz, 31, a->zad, false); + za = get_tile(s, esz, a->zad); zn = vec_full_reg_ptr(s, a->zn); pn = pred_full_reg_ptr(s, a->pn); pm = pred_full_reg_ptr(s, a->pm); @@ -286,8 +300,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, return true; } - /* Sum XZR+zad to find ZAd. */ - za = get_tile_rowcol(s, esz, 31, a->zad, false); + za = get_tile(s, esz, a->zad); zn = vec_full_reg_ptr(s, a->zn); zm = vec_full_reg_ptr(s, a->zm); pn = pred_full_reg_ptr(s, a->pn); @@ -308,8 +321,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, return true; } - /* Sum XZR+zad to find ZAd. */ - za = get_tile_rowcol(s, esz, 31, a->zad, false); + za = get_tile(s, esz, a->zad); zn = vec_full_reg_ptr(s, a->zn); zm = vec_full_reg_ptr(s, a->zm); pn = pred_full_reg_ptr(s, a->pn); diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index ea9ceb31e6..0606dec118 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -26,7 +26,7 @@ config-cc.mak: Makefile $(call cc-option,-march=armv8.5-a, CROSS_CC_HAS_ARMV8_5); \ $(call cc-option,-mbranch-protection=standard, CROSS_CC_HAS_ARMV8_BTI); \ $(call cc-option,-march=armv8.5-a+memtag, CROSS_CC_HAS_ARMV8_MTE); \ - $(call cc-option,-march=armv9-a+sme, CROSS_CC_HAS_ARMV9_SME)) 3> config-cc.mak + $(call cc-option,-Wa$(COMMA)-march=armv9-a+sme, CROSS_AS_HAS_ARMV9_SME)) 3> config-cc.mak -include config-cc.mak ifneq ($(CROSS_CC_HAS_ARMV8_2),) @@ -61,6 +61,11 @@ AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7 mte-%: CFLAGS += -march=armv8.5-a+memtag endif +# SME Tests +ifneq ($(CROSS_AS_HAS_ARMV9_SME),) +AARCH64_TESTS += sme-outprod1 +endif + # System Registers Tests AARCH64_TESTS += sysregs diff --git a/tests/tcg/aarch64/sme-outprod1.c b/tests/tcg/aarch64/sme-outprod1.c new file mode 100644 index 0000000000..6e5972d75e --- /dev/null +++ b/tests/tcg/aarch64/sme-outprod1.c @@ -0,0 +1,83 @@ +/* + * SME outer product, 1 x 1. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include + +extern void foo(float *dst); + +asm( +" .arch_extension sme\n" +" .type foo, @function\n" +"foo:\n" +" stp x29, x30, [sp, -80]!\n" +" mov x29, sp\n" +" stp d8, d9, [sp, 16]\n" +" stp d10, d11, [sp, 32]\n" +" stp d12, d13, [sp, 48]\n" +" stp d14, d15, [sp, 64]\n" +" smstart\n" +" ptrue p0.s, vl4\n" +" fmov z0.s, #1.0\n" +/* + * An outer product of a vector of 1.0 by itself should be a matrix of 1.0. + * Note that we are using tile 1 here (za1.s) rather than tile 0. + */ +" zero {za}\n" +" fmopa za1.s, p0/m, p0/m, z0.s, z0.s\n" +/* + * Read the first 4x4 sub-matrix of elements from tile 1: + * Note that za1h should be interchangable here. + */ +" mov w12, #0\n" +" mova z0.s, p0/m, za1v.s[w12, #0]\n" +" mova z1.s, p0/m, za1v.s[w12, #1]\n" +" mova z2.s, p0/m, za1v.s[w12, #2]\n" +" mova z3.s, p0/m, za1v.s[w12, #3]\n" +/* + * And store them to the input pointer (dst in the C code): + */ +" st1w {z0.s}, p0, [x0]\n" +" add x0, x0, #16\n" +" st1w {z1.s}, p0, [x0]\n" +" add x0, x0, #16\n" +" st1w {z2.s}, p0, [x0]\n" +" add x0, x0, #16\n" +" st1w {z3.s}, p0, [x0]\n" +" smstop\n" +" ldp d8, d9, [sp, 16]\n" +" ldp d10, d11, [sp, 32]\n" +" ldp d12, d13, [sp, 48]\n" +" ldp d14, d15, [sp, 64]\n" +" ldp x29, x30, [sp], 80\n" +" ret\n" +" .size foo, . - foo" +); + +int main() +{ + float dst[16]; + int i, j; + + foo(dst); + + for (i = 0; i < 16; i++) { + if (dst[i] != 1.0f) { + break; + } + } + + if (i == 16) { + return 0; /* success */ + } + + /* failure */ + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + printf("%f ", (double)dst[i * 4 + j]); + } + printf("\n"); + } + return 1; +} -- cgit v1.2.3 From 9719f125b803f4e0fda834cd74a60dfa4ca398e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Tue, 4 Jul 2023 14:08:48 +0100 Subject: target/arm: Handle IC IVAU to improve compatibility with JITs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike architectures with precise self-modifying code semantics (e.g. x86) ARM processors do not maintain coherency for instruction execution and memory, requiring an instruction synchronization barrier on every core that will execute the new code, and on many models also the explicit use of cache management instructions. While this is required to make JITs work on actual hardware, QEMU has gotten away with not handling this since it does not emulate caches, and unconditionally invalidates code whenever the softmmu or the user-mode page protection logic detects that code has been modified. Unfortunately the latter does not work in the face of dual-mapped code (a common W^X workaround), where one page is executable and the other is writable: user-mode has no way to connect one with the other as that is only known to the kernel and the emulated application. This commit works around the issue by telling software that instruction cache invalidation is required by clearing the CPR_EL0.DIC flag (regardless of whether the emulated processor needs it), and then invalidating code in IC IVAU instructions. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1034 Co-authored-by: Richard Henderson Signed-off-by: John Högberg Reviewed-by: Richard Henderson Message-id: 168778890374.24232.3402138851538068785-1@git.sr.ht [PMM: removed unnecessary AArch64 feature check; moved "clear CTR_EL1.DIC" code up a bit so it's not in the middle of the vfp/neon related tests] Signed-off-by: Peter Maydell --- target/arm/cpu.c | 11 +++++++++++ target/arm/helper.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index adf84f9686..822efa5b2c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1694,6 +1694,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) return; } +#ifdef CONFIG_USER_ONLY + /* + * User mode relies on IC IVAU instructions to catch modification of + * dual-mapped code. + * + * Clear CTR_EL0.DIC to ensure that software that honors these flags uses + * IC IVAU even if the emulated processor does not normally require it. + */ + cpu->ctr = FIELD_DP64(cpu->ctr, CTR_EL0, DIC, 0); +#endif + if (arm_feature(env, ARM_FEATURE_AARCH64) && cpu->has_vfp != cpu->has_neon) { /* diff --git a/target/arm/helper.c b/target/arm/helper.c index a0b84efab5..8e836aaee1 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5234,6 +5234,36 @@ static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +#ifdef CONFIG_USER_ONLY +/* + * `IC IVAU` is handled to improve compatibility with JITs that dual-map their + * code to get around W^X restrictions, where one region is writable and the + * other is executable. + * + * Since the executable region is never written to we cannot detect code + * changes when running in user mode, and rely on the emulated JIT telling us + * that the code has changed by executing this instruction. + */ +static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t icache_line_mask, start_address, end_address; + const ARMCPU *cpu; + + cpu = env_archcpu(env); + + icache_line_mask = (4 << extract32(cpu->ctr, 0, 4)) - 1; + start_address = value & ~icache_line_mask; + end_address = value | icache_line_mask; + + mmap_lock(); + + tb_invalidate_phys_range(start_address, end_address); + + mmap_unlock(); +} +#endif + static const ARMCPRegInfo v8_cp_reginfo[] = { /* * Minimal set of EL0-visible registers. This will need to be expanded @@ -5273,7 +5303,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2, .access = PL1_R, .type = ARM_CP_CURRENTEL }, - /* Cache ops: all NOPs since we don't emulate caches */ + /* + * Instruction cache ops. All of these except `IC IVAU` NOP because we + * don't emulate caches. + */ { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP, @@ -5286,9 +5319,17 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .accessfn = access_tocu }, { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1, - .access = PL0_W, .type = ARM_CP_NOP, + .access = PL0_W, .fgt = FGT_ICIVAU, - .accessfn = access_tocu }, + .accessfn = access_tocu, +#ifdef CONFIG_USER_ONLY + .type = ARM_CP_NO_RAW, + .writefn = ic_ivau_write +#else + .type = ARM_CP_NOP +#endif + }, + /* Cache ops: all NOPs since we don't emulate caches */ { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, .access = PL1_W, .accessfn = aa64_cacheop_poc_access, -- cgit v1.2.3 From b52aa865259f8e26125333a178c0a9364d8ae4da Mon Sep 17 00:00:00 2001 From: Vikram Garhwal Date: Wed, 28 Jun 2023 13:27:58 -0700 Subject: tests/qtest: xlnx-canfd-test: Fix code coverity issues Following are done to fix the coverity issues: 1. Change read_data to fix the CID 1512899: Out-of-bounds access (OVERRUN) 2. Fix match_rx_tx_data to fix CID 1512900: Logically dead code (DEADCODE) 3. Replace rand() in generate_random_data() with g_rand_int() Signed-off-by: Vikram Garhwal Message-id: 20230628202758.16398-1-vikram.garhwal@amd.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- tests/qtest/xlnx-canfd-test.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/tests/qtest/xlnx-canfd-test.c b/tests/qtest/xlnx-canfd-test.c index 76ee106d4f..78ec9ef2a7 100644 --- a/tests/qtest/xlnx-canfd-test.c +++ b/tests/qtest/xlnx-canfd-test.c @@ -170,23 +170,23 @@ static void generate_random_data(uint32_t *buf_tx, bool is_canfd_frame) /* Generate random TX data for CANFD frame. */ if (is_canfd_frame) { for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) { - buf_tx[2 + i] = rand(); + buf_tx[2 + i] = g_random_int(); } } else { /* Generate random TX data for CAN frame. */ for (int i = 0; i < CAN_FRAME_SIZE - 2; i++) { - buf_tx[2 + i] = rand(); + buf_tx[2 + i] = g_random_int(); } } } -static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx) +static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx, + uint32_t frame_size) { uint32_t int_status; uint32_t fifo_status_reg_value; /* At which RX FIFO the received data is stored. */ uint8_t store_ind = 0; - bool is_canfd_frame = false; /* Read the interrupt on CANFD rx. */ int_status = qtest_readl(qts, can_base_addr + R_ISR_OFFSET) & ISR_RXOK; @@ -207,16 +207,9 @@ static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx) buf_rx[0] = qtest_readl(qts, can_base_addr + R_RX0_ID_OFFSET); buf_rx[1] = qtest_readl(qts, can_base_addr + R_RX0_DLC_OFFSET); - is_canfd_frame = (buf_rx[1] >> DLC_FD_BIT_SHIFT) & 1; - - if (is_canfd_frame) { - for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) { - buf_rx[i + 2] = qtest_readl(qts, - can_base_addr + R_RX0_DATA1_OFFSET + 4 * i); - } - } else { - buf_rx[2] = qtest_readl(qts, can_base_addr + R_RX0_DATA1_OFFSET); - buf_rx[3] = qtest_readl(qts, can_base_addr + R_RX0_DATA2_OFFSET); + for (int i = 0; i < frame_size - 2; i++) { + buf_rx[i + 2] = qtest_readl(qts, + can_base_addr + R_RX0_DATA1_OFFSET + 4 * i); } /* Clear the RX interrupt. */ @@ -272,10 +265,6 @@ static void match_rx_tx_data(const uint32_t *buf_tx, const uint32_t *buf_rx, g_assert_cmpint((buf_rx[size] & DLC_FD_BIT_MASK), ==, (buf_tx[size] & DLC_FD_BIT_MASK)); } else { - if (!is_canfd_frame && size == 4) { - break; - } - g_assert_cmpint(buf_rx[size], ==, buf_tx[size]); } @@ -318,7 +307,7 @@ static void test_can_data_transfer(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, false); send_data(qts, CANFD0_BASE_ADDR); - read_data(qts, CANFD1_BASE_ADDR, buf_rx); + read_data(qts, CANFD1_BASE_ADDR, buf_rx, CAN_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, false); qtest_quit(qts); @@ -358,7 +347,7 @@ static void test_canfd_data_transfer(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, true); send_data(qts, CANFD0_BASE_ADDR); - read_data(qts, CANFD1_BASE_ADDR, buf_rx); + read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); qtest_quit(qts); @@ -397,7 +386,7 @@ static void test_can_loopback(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, true); send_data(qts, CANFD0_BASE_ADDR); - read_data(qts, CANFD0_BASE_ADDR, buf_rx); + read_data(qts, CANFD0_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); generate_random_data(buf_tx, true); @@ -405,7 +394,7 @@ static void test_can_loopback(void) write_data(qts, CANFD1_BASE_ADDR, buf_tx, true); send_data(qts, CANFD1_BASE_ADDR); - read_data(qts, CANFD1_BASE_ADDR, buf_rx); + read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); qtest_quit(qts); -- cgit v1.2.3 From 893ca916c0b9432d46fc16f84c8d3b605a58c843 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 28 Jun 2023 13:48:21 -0300 Subject: target/arm: gdbstub: Guard M-profile code with CONFIG_TCG This code is only relevant when TCG is present in the build. Building with --disable-tcg --enable-xen on an x86 host we get: $ ../configure --target-list=x86_64-softmmu,aarch64-softmmu --disable-tcg --enable-xen $ make -j$(nproc) ... libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function `m_sysreg_ptr': ../target/arm/gdbstub.c:358: undefined reference to `arm_v7m_get_sp_ptr' ../target/arm/gdbstub.c:361: undefined reference to `arm_v7m_get_sp_ptr' libqemu-aarch64-softmmu.fa.p/target_arm_gdbstub.c.o: in function `arm_gdb_get_m_systemreg': ../target/arm/gdbstub.c:405: undefined reference to `arm_v7m_mrs_control' Signed-off-by: Fabiano Rosas Message-id: 20230628164821.16771-1-farosas@suse.de Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/gdbstub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index 03b17c814f..f421c5d041 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -324,6 +324,7 @@ static int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg) return cpu->dyn_sysreg_xml.num; } +#ifdef CONFIG_TCG typedef enum { M_SYSREG_MSP, M_SYSREG_PSP, @@ -481,6 +482,7 @@ static int arm_gen_dynamic_m_secextreg_xml(CPUState *cs, int orig_base_reg) return cpu->dyn_m_secextreg_xml.num; } #endif +#endif /* CONFIG_TCG */ const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname) { @@ -561,6 +563,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs), "system-registers.xml", 0); +#ifdef CONFIG_TCG if (arm_feature(env, ARM_FEATURE_M) && tcg_enabled()) { gdb_register_coprocessor(cs, arm_gdb_get_m_systemreg, arm_gdb_set_m_systemreg, @@ -575,4 +578,5 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) } #endif } +#endif /* CONFIG_TCG */ } -- cgit v1.2.3 From 9057e5f7c9550d1ea419ce838ae6d0df96fb062e Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Wed, 28 Jun 2023 20:09:03 +0900 Subject: hw: arm: allwinner-sramc: Set class_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AwSRAMCClass is larger than SysBusDeviceClass so the class size must be advertised accordingly. Fixes: 05def917e1 ("hw: arm: allwinner-sramc: Add SRAM Controller support for R40") Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20230628110905.38125-1-akihiko.odaki@daynix.com Signed-off-by: Peter Maydell --- hw/misc/allwinner-sramc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/misc/allwinner-sramc.c b/hw/misc/allwinner-sramc.c index a8b731f8f2..d76c24d081 100644 --- a/hw/misc/allwinner-sramc.c +++ b/hw/misc/allwinner-sramc.c @@ -159,6 +159,7 @@ static const TypeInfo allwinner_sramc_info = { .parent = TYPE_SYS_BUS_DEVICE, .instance_init = allwinner_sramc_init, .instance_size = sizeof(AwSRAMCState), + .class_size = sizeof(AwSRAMCClass), .class_init = allwinner_sramc_class_init, }; -- cgit v1.2.3 From ad18376b90c8101b8eed2fd571bdbb94636ccc2e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 23 Jun 2023 16:41:35 +0100 Subject: target/xtensa: Assert that interrupt level is within bounds In handle_interrupt() we use level as an index into the interrupt_vector[] array. This is safe because we have checked it against env->config->nlevel, but Coverity can't see that (and it is only true because each CPU config sets its XCHAL_NUM_INTLEVELS to something less than MAX_NLEVELS), so it complains about a possible array overrun (CID 1507131) Add an assert() which will make Coverity happy and catch the unlikely case of a mis-set XCHAL_NUM_INTLEVELS in future. Signed-off-by: Peter Maydell Acked-by: Max Filippov Message-id: 20230623154135.1930261-1-peter.maydell@linaro.org --- target/xtensa/exc_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c index d4823a65cd..43f6a862de 100644 --- a/target/xtensa/exc_helper.c +++ b/target/xtensa/exc_helper.c @@ -169,6 +169,9 @@ static void handle_interrupt(CPUXtensaState *env) CPUState *cs = env_cpu(env); if (level > 1) { + /* env->config->nlevel check should have ensured this */ + assert(level < sizeof(env->config->interrupt_vector)); + env->sregs[EPC1 + level - 1] = env->pc; env->sregs[EPS2 + level - 2] = env->sregs[PS]; env->sregs[PS] = -- cgit v1.2.3 From 7d8c283e10dd818457e7c6a0f729fb03857253ac Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 4 Jul 2023 14:06:46 +0100 Subject: target/arm: Suppress more TCG unimplemented features in ID registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already squash the ID register field for FEAT_SPE (the Statistical Profiling Extension) because TCG does not implement it and if we advertise it to the guest the guest will crash trying to look at non-existent system registers. Do the same for some other features which a real hardware Neoverse-V1 implements but which TCG doesn't: * FEAT_TRF (Self-hosted Trace Extension) * Trace Macrocell system register access * Memory mapped trace * FEAT_AMU (Activity Monitors Extension) * FEAT_MPAM (Memory Partitioning and Monitoring Extension) * FEAT_NV (Nested Virtualization) Most of these, like FEAT_SPE, are "introspection/trace" type features which QEMU is unlikely to ever implement. The odd-one-out here is FEAT_NV -- we could implement that and at some point we probably will. Signed-off-by: Peter Maydell Message-id: 20230704130647.2842917-2-peter.maydell@linaro.org Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson --- target/arm/cpu.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 822efa5b2c..69e2bde3c2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2069,13 +2069,38 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) if (tcg_enabled()) { /* - * Don't report the Statistical Profiling Extension in the ID - * registers, because TCG doesn't implement it yet (not even a - * minimal stub version) and guests will fall over when they - * try to access the non-existent system registers for it. + * Don't report some architectural features in the ID registers + * where TCG does not yet implement it (not even a minimal + * stub version). This avoids guests falling over when they + * try to access the non-existent system registers for them. */ + /* FEAT_SPE (Statistical Profiling Extension) */ cpu->isar.id_aa64dfr0 = FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0); + /* FEAT_TRF (Self-hosted Trace Extension) */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0); + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0); + /* Trace Macrocell system register access */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0); + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0); + /* Memory mapped trace */ + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0); + /* FEAT_AMU (Activity Monitors Extension) */ + cpu->isar.id_aa64pfr0 = + FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0); + cpu->isar.id_pfr0 = + FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0); + /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */ + cpu->isar.id_aa64pfr0 = + FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0); + /* FEAT_NV (Nested Virtualization) */ + cpu->isar.id_aa64mmfr2 = + FIELD_DP64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV, 0); } /* MPU can be configured out of a PMSA CPU either by setting has-mpu -- cgit v1.2.3 From c74138c6c040b62e941326a4fbb25a93fdd35b72 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 4 Jul 2023 14:06:47 +0100 Subject: target/arm: Define neoverse-v1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have implemented support for FEAT_LSE2, we can define a CPU model for the Neoverse-V1, and enable it for the virt and sbsa-ref boards. Signed-off-by: Peter Maydell Message-id: 20230704130647.2842917-3-peter.maydell@linaro.org Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson --- docs/system/arm/virt.rst | 1 + hw/arm/sbsa-ref.c | 1 + hw/arm/virt.c | 1 + target/arm/tcg/cpu64.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+) diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 1cab33f02e..51cdac6841 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -61,6 +61,7 @@ Supported guest CPU types: - ``a64fx`` (64-bit) - ``host`` (with KVM only) - ``neoverse-n1`` (64-bit) +- ``neoverse-v1`` (64-bit) - ``max`` (same as ``host`` for KVM; best possible emulation with TCG) Note that the default is ``cortex-a15``, so for an AArch64 guest you must diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 1a8519b868..c2e0a9fa1a 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -153,6 +153,7 @@ static const char * const valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a57"), ARM_CPU_TYPE_NAME("cortex-a72"), ARM_CPU_TYPE_NAME("neoverse-n1"), + ARM_CPU_TYPE_NAME("neoverse-v1"), ARM_CPU_TYPE_NAME("max"), }; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 3196db556e..796181e169 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -214,6 +214,7 @@ static const char *valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a76"), ARM_CPU_TYPE_NAME("a64fx"), ARM_CPU_TYPE_NAME("neoverse-n1"), + ARM_CPU_TYPE_NAME("neoverse-v1"), #endif ARM_CPU_TYPE_NAME("cortex-a53"), ARM_CPU_TYPE_NAME("cortex-a57"), diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 6fec2d8a57..8019f00bc3 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -502,6 +502,31 @@ static void define_neoverse_n1_cp_reginfo(ARMCPU *cpu) define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo); } +static const ARMCPRegInfo neoverse_v1_cp_reginfo[] = { + { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR3_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu) +{ + /* + * The Neoverse V1 has all of the Neoverse N1's IMPDEF + * registers and a few more of its own. + */ + define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo); + define_arm_cp_regs(cpu, neoverse_v1_cp_reginfo); +} + static void aarch64_neoverse_n1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -573,6 +598,108 @@ static void aarch64_neoverse_n1_initfn(Object *obj) define_neoverse_n1_cp_reginfo(cpu); } +static void aarch64_neoverse_v1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,neoverse-v1"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by 3.2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0xb444c004; /* With DIC and IDC set */ + cpu->dcz_blocksize = 4; + cpu->id_aa64afr0 = 0x00000000; + cpu->id_aa64afr1 = 0x00000000; + cpu->isar.id_aa64dfr0 = 0x000001f210305519ull; + cpu->isar.id_aa64dfr1 = 0x00000000; + cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */ + cpu->isar.id_aa64isar1 = 0x0111000001211032ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull; + cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x15011099; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x11011121; + cpu->isar.id_isar6 = 0x01100111; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x01021110; + cpu->isar.id_pfr0 = 0x21110131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x411FD402; /* r1p2 */ + cpu->revidr = 0; + + /* + * The Neoverse-V1 r1p2 TRM lists 32-bit format CCSIDR_EL1 values, + * but also says it implements CCIDX, which means they should be + * 64-bit format. So we here use values which are based on the textual + * information in chapter 2 of the TRM (and on the fact that + * sets * associativity * linesize == cachesize). + * + * The 64-bit CCSIDR_EL1 format is: + * [55:32] number of sets - 1 + * [23:3] associativity - 1 + * [2:0] log2(linesize) - 4 + * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc + * + * L1: 4-way set associative 64-byte line size, total size 64K, + * so sets is 256. + * + * L2: 8-way set associative, 64 byte line size, either 512K or 1MB. + * We pick 1MB, so this has 2048 sets. + * + * L3: No L3 (this matches the CLIDR_EL1 value). + */ + cpu->ccsidr[0] = 0x000000ff0000001aull; /* 64KB L1 dcache */ + cpu->ccsidr[1] = 0x000000ff0000001aull; /* 64KB L1 icache */ + cpu->ccsidr[2] = 0x000007ff0000003aull; /* 1MB L2 cache */ + + /* From 3.2.115 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From 3.4.8 ICC_CTLR_EL3 and 3.4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* From 3.5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From 3.7.5 ID_AA64ZFR0_EL1 */ + cpu->isar.id_aa64zfr0 = 0x0000100000100000; + cpu->sve_vq.supported = (1 << 0) /* 128bit */ + | (1 << 1); /* 256bit */ + + /* From 5.5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x41213000; + + define_neoverse_v1_cp_reginfo(cpu); + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); +} + /* * -cpu max: a CPU with as many features enabled as our emulation supports. * The version of '-cpu max' for qemu-system-arm is defined in cpu32.c; @@ -763,6 +890,7 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, + { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, }; static void aarch64_cpu_register_types(void) -- cgit v1.2.3 From c41077235168140cdd4a34fce9bd95c3d30efe9c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 4 Jul 2023 16:43:32 +0100 Subject: target/arm: Avoid over-length shift in arm_cpu_sve_finalize() error case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If you build QEMU with the clang sanitizer enabled, you can see it fire when running the arm-cpu-features test: $ QTEST_QEMU_BINARY=./build/arm-clang/qemu-system-aarch64 ./build/arm-clang/tests/qtest/arm-cpu-features [...] ../../target/arm/cpu64.c:125:19: runtime error: shift exponent 64 is too large for 64-bit type 'unsigned long long' [...] This happens because the user can specify some incorrect SVE properties that result in our calculating a max_vq of 0. We catch this and error out, but before we do that we calculate vq_mask = MAKE_64BIT_MASK(0, max_vq);$ and the MAKE_64BIT_MASK() call is only valid for lengths that are greater than zero, so we hit the undefined behaviour. Change the logic so that if max_vq is 0 we specifically set vq_mask to 0 without going via MAKE_64BIT_MASK(). This lets us drop the max_vq check from the error-exit logic, because if max_vq is 0 then vq_map must now be 0. The UB only happens in the case where the user passed us an incorrect set of SVE properties, so it's not a big problem in practice. Signed-off-by: Peter Maydell Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson Message-id: 20230704154332.3014896-1-peter.maydell@linaro.org --- target/arm/cpu64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 6eaf8e32cf..6012e4ef54 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -122,10 +122,10 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) vq = ctz32(tmp) + 1; max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ; - vq_mask = MAKE_64BIT_MASK(0, max_vq); + vq_mask = max_vq > 0 ? MAKE_64BIT_MASK(0, max_vq) : 0; vq_map = vq_supported & ~vq_init & vq_mask; - if (max_vq == 0 || vq_map == 0) { + if (vq_map == 0) { error_setg(errp, "cannot disable sve%d", vq * 128); error_append_hint(errp, "Disabling sve%d results in all " "vector lengths being disabled.\n", -- cgit v1.2.3