diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-07-06 16:19:21 +0100 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-07-06 16:19:21 +0100 |
commit | 822cb97cefe2416ce61fe8007ad69904bbe24502 (patch) | |
tree | 8bcf63d80916eb7d2a3e46d82c405fb2426db952 | |
parent | 0618e72d64e434dd6f1bc38b107670474c49fb86 (diff) | |
parent | c41077235168140cdd4a34fce9bd95c3d30efe9c (diff) |
Merge tag 'pull-target-arm-20230706' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
* Add raw_writes ops for register whose write induce TLB maintenance
* hw/arm/sbsa-ref: use XHCI to replace EHCI
* Avoid splitting Zregs across lines in dump
* Dump ZA[] when active
* Fix SME full tile indexing
* Handle IC IVAU to improve compatibility with JITs
* xlnx-canfd-test: Fix code coverity issues
* gdbstub: Guard M-profile code with CONFIG_TCG
* allwinner-sramc: Set class_size
* target/xtensa: Assert that interrupt level is within bounds
* Avoid over-length shift in arm_cpu_sve_finalize() error case
* Define new 'neoverse-v1' CPU type
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmSmwEEZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3vBcD/4vKUw6klRV7vyz/KBr2AOi
# Z1FnkLmOhwdp7CKvAVfU58TbPEJ8Fjo7OjziByV5nn/Ht9XrXcdl/E+0JamgrJ/n
# G90ZfpoY3Boan4XBukBz/KX63sT1erF4io1NxbvqLxZ2mbZWNb0D1v2qkxC5zPFE
# 97knlbSle4/VB8N6VgaPaWKVy5gmBZQwl7NUlFtB8TTZp3HPo0V77E9p1Wqpwpls
# BNbqdtgUre3dlJci2f24PmXHYraKa68qk9xGnsSae96EY2+pOHbKhoZ/Fobaor2C
# u+dfgQ3fY3aLDVKx8UESIUoqkGoVqwEbmt+pWG2rJiljLkdsI3ZsVq7p3+VGbLAN
# berL14kCC2vRQYeNUwxeh5wdNVXc58xhWI5KXQRe8hr1dKWS5LQEHWgr7g7mb0+m
# zPHqbdF4FR1DAV29vQ9WyK4zttrinFAYl+zvLyd8dX2ogoUeivR+4o3YX4hlFr4H
# vcrglZbCGqAb3oKQG3PSGliS9GYtBwodLqKEH8PfcwfOP5PIcnSVc0Kl9DSzf7um
# dAuYpaK/XW3MPx5qpWjnip4dRWUV5m/6nSCJr+fELEv3A0sGZY4pywv5NS/Yg1wE
# nXdi8D+nyx9+AAiWTcB+ePsLuDEO2gYtubfqed99TFoJbL6/b4NbH8YE6cF3N/gY
# lqFyvEIYNJZ9klf7XKnX2w==
# =/MkB
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 06 Jul 2023 02:23:13 PM BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
* tag 'pull-target-arm-20230706' of https://git.linaro.org/people/pmaydell/qemu-arm:
target/arm: Avoid over-length shift in arm_cpu_sve_finalize() error case
target/arm: Define neoverse-v1
target/arm: Suppress more TCG unimplemented features in ID registers
target/xtensa: Assert that interrupt level is within bounds
hw: arm: allwinner-sramc: Set class_size
target/arm: gdbstub: Guard M-profile code with CONFIG_TCG
tests/qtest: xlnx-canfd-test: Fix code coverity issues
target/arm: Handle IC IVAU to improve compatibility with JITs
target/arm: Fix SME full tile indexing
target/arm: Dump ZA[] when active
target/arm: Avoid splitting Zregs across lines in dump
tests/tcg/aarch64/sysregs.c: Use S syntax for id_aa64zfr0_el1 and id_aa64smfr0_el1
hw/arm/sbsa-ref: use XHCI to replace EHCI
target/arm: Add raw_writes ops for register whose write induce TLB maintenance
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | docs/system/arm/sbsa.rst | 5 | ||||
-rw-r--r-- | docs/system/arm/virt.rst | 1 | ||||
-rw-r--r-- | hw/arm/Kconfig | 2 | ||||
-rw-r--r-- | hw/arm/sbsa-ref.c | 24 | ||||
-rw-r--r-- | hw/arm/virt.c | 1 | ||||
-rw-r--r-- | hw/misc/allwinner-sramc.c | 1 | ||||
-rw-r--r-- | target/arm/cpu.c | 98 | ||||
-rw-r--r-- | target/arm/cpu64.c | 4 | ||||
-rw-r--r-- | target/arm/gdbstub.c | 4 | ||||
-rw-r--r-- | target/arm/helper.c | 70 | ||||
-rw-r--r-- | target/arm/tcg/cpu64.c | 128 | ||||
-rw-r--r-- | target/arm/tcg/translate-sme.c | 24 | ||||
-rw-r--r-- | target/xtensa/exc_helper.c | 3 | ||||
-rw-r--r-- | tests/qtest/xlnx-canfd-test.c | 33 | ||||
-rw-r--r-- | tests/tcg/aarch64/Makefile.target | 14 | ||||
-rw-r--r-- | tests/tcg/aarch64/sme-outprod1.c | 83 | ||||
-rw-r--r-- | tests/tcg/aarch64/sysregs.c | 11 |
17 files changed, 414 insertions, 92 deletions
diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst index a8e0b530a2..bca61608ff 100644 --- a/docs/system/arm/sbsa.rst +++ b/docs/system/arm/sbsa.rst @@ -19,7 +19,7 @@ The ``sbsa-ref`` board supports: - A configurable number of AArch64 CPUs - GIC version 3 - System bus AHCI controller - - System bus EHCI controller + - System bus XHCI controller - CDROM and hard disc on AHCI bus - E1000E ethernet card on PCIe bus - Bochs display adapter on PCIe bus @@ -68,3 +68,6 @@ Platform version changes: 0.2 GIC ITS information is present in devicetree. + +0.3 + The USB controller is an XHCI device, not EHCI diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 1cab33f02e..51cdac6841 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -61,6 +61,7 @@ Supported guest CPU types: - ``a64fx`` (64-bit) - ``host`` (with KVM only) - ``neoverse-n1`` (64-bit) +- ``neoverse-v1`` (64-bit) - ``max`` (same as ``host`` for KVM; best possible emulation with TCG) Note that the default is ``cortex-a15``, so for an AArch64 guest you must diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 7de17d1e8c..7e68348440 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -266,7 +266,7 @@ config SBSA_REF select PL011 # UART select PL031 # RTC select PL061 # GPIO - select USB_EHCI_SYSBUS + select USB_XHCI_SYSBUS select WDT_SBSA select BOCHS_DISPLAY diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 82a28b2e0b..c2e0a9fa1a 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -42,6 +42,7 @@ #include "hw/pci-host/gpex.h" #include "hw/qdev-properties.h" #include "hw/usb.h" +#include "hw/usb/xhci.h" #include "hw/char/pl011.h" #include "hw/watchdog/sbsa_gwdt.h" #include "net/net.h" @@ -85,7 +86,7 @@ enum { SBSA_SECURE_UART_MM, SBSA_SECURE_MEM, SBSA_AHCI, - SBSA_EHCI, + SBSA_XHCI, }; struct SBSAMachineState { @@ -123,7 +124,7 @@ static const MemMapEntry sbsa_ref_memmap[] = { [SBSA_SMMU] = { 0x60050000, 0x00020000 }, /* Space here reserved for more SMMUs */ [SBSA_AHCI] = { 0x60100000, 0x00010000 }, - [SBSA_EHCI] = { 0x60110000, 0x00010000 }, + [SBSA_XHCI] = { 0x60110000, 0x00010000 }, /* Space here reserved for other devices */ [SBSA_PCIE_PIO] = { 0x7fff0000, 0x00010000 }, /* 32-bit address PCIE MMIO space */ @@ -143,7 +144,7 @@ static const int sbsa_ref_irqmap[] = { [SBSA_SECURE_UART] = 8, [SBSA_SECURE_UART_MM] = 9, [SBSA_AHCI] = 10, - [SBSA_EHCI] = 11, + [SBSA_XHCI] = 11, [SBSA_SMMU] = 12, /* ... to 15 */ [SBSA_GWDT_WS0] = 16, }; @@ -152,6 +153,7 @@ static const char * const valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a57"), ARM_CPU_TYPE_NAME("cortex-a72"), ARM_CPU_TYPE_NAME("neoverse-n1"), + ARM_CPU_TYPE_NAME("neoverse-v1"), ARM_CPU_TYPE_NAME("max"), }; @@ -230,7 +232,7 @@ static void create_fdt(SBSAMachineState *sms) * fw compatibility. */ qemu_fdt_setprop_cell(fdt, "/", "machine-version-major", 0); - qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 2); + qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 3); if (ms->numa_state->have_numa_distance) { int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t); @@ -604,13 +606,15 @@ static void create_ahci(const SBSAMachineState *sms) } } -static void create_ehci(const SBSAMachineState *sms) +static void create_xhci(const SBSAMachineState *sms) { - hwaddr base = sbsa_ref_memmap[SBSA_EHCI].base; - int irq = sbsa_ref_irqmap[SBSA_EHCI]; + hwaddr base = sbsa_ref_memmap[SBSA_XHCI].base; + int irq = sbsa_ref_irqmap[SBSA_XHCI]; + DeviceState *dev = qdev_new(TYPE_XHCI_SYSBUS); - sysbus_create_simple("platform-ehci-usb", base, - qdev_get_gpio_in(sms->gic, irq)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(sms->gic, irq)); } static void create_smmu(const SBSAMachineState *sms, PCIBus *bus) @@ -832,7 +836,7 @@ static void sbsa_ref_init(MachineState *machine) create_ahci(sms); - create_ehci(sms); + create_xhci(sms); create_pcie(sms); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 3196db556e..796181e169 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -214,6 +214,7 @@ static const char *valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a76"), ARM_CPU_TYPE_NAME("a64fx"), ARM_CPU_TYPE_NAME("neoverse-n1"), + ARM_CPU_TYPE_NAME("neoverse-v1"), #endif ARM_CPU_TYPE_NAME("cortex-a53"), ARM_CPU_TYPE_NAME("cortex-a57"), diff --git a/hw/misc/allwinner-sramc.c b/hw/misc/allwinner-sramc.c index a8b731f8f2..d76c24d081 100644 --- a/hw/misc/allwinner-sramc.c +++ b/hw/misc/allwinner-sramc.c @@ -159,6 +159,7 @@ static const TypeInfo allwinner_sramc_info = { .parent = TYPE_SYS_BUS_DEVICE, .instance_init = allwinner_sramc_init, .instance_size = sizeof(AwSRAMCState), + .class_size = sizeof(AwSRAMCClass), .class_init = allwinner_sramc_class_init, }; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index a1e77698ba..69e2bde3c2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -955,7 +955,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; uint32_t psr = pstate_read(env); - int i; + int i, j; int el = arm_current_el(env); const char *ns_status; bool sve; @@ -1014,7 +1014,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } if (sve) { - int j, zcr_len = sve_vqm1_for_el(env, el); + int zcr_len = sve_vqm1_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { bool eol; @@ -1054,32 +1054,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } - for (i = 0; i < 32; i++) { - if (zcr_len == 0) { + if (zcr_len == 0) { + /* + * With vl=16, there are only 37 columns per register, + * so output two registers per line. + */ + for (i = 0; i < 32; i++) { qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s", i, env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0], i & 1 ? "\n" : " "); - } else if (zcr_len == 1) { - qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 - ":%016" PRIx64 ":%016" PRIx64 "\n", - i, env->vfp.zregs[i].d[3], env->vfp.zregs[i].d[2], - env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0]); - } else { + } + } else { + for (i = 0; i < 32; i++) { + qemu_fprintf(f, "Z%02d=", i); for (j = zcr_len; j >= 0; j--) { - bool odd = (zcr_len - j) % 2 != 0; - if (j == zcr_len) { - qemu_fprintf(f, "Z%02d[%x-%x]=", i, j, j - 1); - } else if (!odd) { - if (j > 0) { - qemu_fprintf(f, " [%x-%x]=", j, j - 1); - } else { - qemu_fprintf(f, " [%x]=", j); - } - } qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s", env->vfp.zregs[i].d[j * 2 + 1], - env->vfp.zregs[i].d[j * 2], - odd || j == 0 ? "\n" : ":"); + env->vfp.zregs[i].d[j * 2 + 0], + j ? ":" : "\n"); } } } @@ -1090,6 +1082,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) i, q[1], q[0], (i & 1 ? "\n" : " ")); } } + + if (cpu_isar_feature(aa64_sme, cpu) && + FIELD_EX64(env->svcr, SVCR, ZA) && + sme_exception_el(env, el) == 0) { + int zcr_len = sve_vqm1_for_el_sm(env, el, true); + int svl = (zcr_len + 1) * 16; + int svl_lg10 = svl < 100 ? 2 : 3; + + for (i = 0; i < svl; i++) { + qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i); + for (j = zcr_len; j >= 0; --j) { + qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c", + env->zarray[i].d[2 * j + 1], + env->zarray[i].d[2 * j], + j ? ':' : '\n'); + } + } + } } #else @@ -1684,6 +1694,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) return; } +#ifdef CONFIG_USER_ONLY + /* + * User mode relies on IC IVAU instructions to catch modification of + * dual-mapped code. + * + * Clear CTR_EL0.DIC to ensure that software that honors these flags uses + * IC IVAU even if the emulated processor does not normally require it. + */ + cpu->ctr = FIELD_DP64(cpu->ctr, CTR_EL0, DIC, 0); +#endif + if (arm_feature(env, ARM_FEATURE_AARCH64) && cpu->has_vfp != cpu->has_neon) { /* @@ -2048,13 +2069,38 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) if (tcg_enabled()) { /* - * Don't report the Statistical Profiling Extension in the ID - * registers, because TCG doesn't implement it yet (not even a - * minimal stub version) and guests will fall over when they - * try to access the non-existent system registers for it. + * Don't report some architectural features in the ID registers + * where TCG does not yet implement it (not even a minimal + * stub version). This avoids guests falling over when they + * try to access the non-existent system registers for them. */ + /* FEAT_SPE (Statistical Profiling Extension) */ cpu->isar.id_aa64dfr0 = FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0); + /* FEAT_TRF (Self-hosted Trace Extension) */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0); + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0); + /* Trace Macrocell system register access */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0); + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0); + /* Memory mapped trace */ + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0); + /* FEAT_AMU (Activity Monitors Extension) */ + cpu->isar.id_aa64pfr0 = + FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0); + cpu->isar.id_pfr0 = + FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0); + /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */ + cpu->isar.id_aa64pfr0 = + FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0); + /* FEAT_NV (Nested Virtualization) */ + cpu->isar.id_aa64mmfr2 = + FIELD_DP64(cpu->isar.id_aa64mmfr2, ID_AA64MMFR2, NV, 0); } /* MPU can be configured out of a PMSA CPU either by setting has-mpu diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 6eaf8e32cf..6012e4ef54 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -122,10 +122,10 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) vq = ctz32(tmp) + 1; max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ; - vq_mask = MAKE_64BIT_MASK(0, max_vq); + vq_mask = max_vq > 0 ? MAKE_64BIT_MASK(0, max_vq) : 0; vq_map = vq_supported & ~vq_init & vq_mask; - if (max_vq == 0 || vq_map == 0) { + if (vq_map == 0) { error_setg(errp, "cannot disable sve%d", vq * 128); error_append_hint(errp, "Disabling sve%d results in all " "vector lengths being disabled.\n", diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index 03b17c814f..f421c5d041 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -324,6 +324,7 @@ static int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg) return cpu->dyn_sysreg_xml.num; } +#ifdef CONFIG_TCG typedef enum { M_SYSREG_MSP, M_SYSREG_PSP, @@ -481,6 +482,7 @@ static int arm_gen_dynamic_m_secextreg_xml(CPUState *cs, int orig_base_reg) return cpu->dyn_m_secextreg_xml.num; } #endif +#endif /* CONFIG_TCG */ const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname) { @@ -561,6 +563,7 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs), "system-registers.xml", 0); +#ifdef CONFIG_TCG if (arm_feature(env, ARM_FEATURE_M) && tcg_enabled()) { gdb_register_coprocessor(cs, arm_gdb_get_m_systemreg, arm_gdb_set_m_systemreg, @@ -575,4 +578,5 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) } #endif } +#endif /* CONFIG_TCG */ } diff --git a/target/arm/helper.c b/target/arm/helper.c index d08c058e42..8e836aaee1 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -4189,14 +4189,14 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR0_EL1, - .writefn = vmsa_ttbr_write, .resetvalue = 0, + .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) } }, { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .fgt = FGT_TTBR1_EL1, - .writefn = vmsa_ttbr_write, .resetvalue = 0, + .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) } }, { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64, @@ -4456,13 +4456,13 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) }, - .writefn = vmsa_ttbr_write, }, + .writefn = vmsa_ttbr_write, .raw_writefn = raw_write }, { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) }, - .writefn = vmsa_ttbr_write, }, + .writefn = vmsa_ttbr_write, .raw_writefn = raw_write }, }; static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -5234,6 +5234,36 @@ static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, } } +#ifdef CONFIG_USER_ONLY +/* + * `IC IVAU` is handled to improve compatibility with JITs that dual-map their + * code to get around W^X restrictions, where one region is writable and the + * other is executable. + * + * Since the executable region is never written to we cannot detect code + * changes when running in user mode, and rely on the emulated JIT telling us + * that the code has changed by executing this instruction. + */ +static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t icache_line_mask, start_address, end_address; + const ARMCPU *cpu; + + cpu = env_archcpu(env); + + icache_line_mask = (4 << extract32(cpu->ctr, 0, 4)) - 1; + start_address = value & ~icache_line_mask; + end_address = value | icache_line_mask; + + mmap_lock(); + + tb_invalidate_phys_range(start_address, end_address); + + mmap_unlock(); +} +#endif + static const ARMCPRegInfo v8_cp_reginfo[] = { /* * Minimal set of EL0-visible registers. This will need to be expanded @@ -5273,7 +5303,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2, .access = PL1_R, .type = ARM_CP_CURRENTEL }, - /* Cache ops: all NOPs since we don't emulate caches */ + /* + * Instruction cache ops. All of these except `IC IVAU` NOP because we + * don't emulate caches. + */ { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP, @@ -5286,9 +5319,17 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .accessfn = access_tocu }, { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1, - .access = PL0_W, .type = ARM_CP_NOP, + .access = PL0_W, .fgt = FGT_ICIVAU, - .accessfn = access_tocu }, + .accessfn = access_tocu, +#ifdef CONFIG_USER_ONLY + .type = ARM_CP_NO_RAW, + .writefn = ic_ivau_write +#else + .type = ARM_CP_NOP +#endif + }, + /* Cache ops: all NOPs since we don't emulate caches */ { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, .access = PL1_W, .accessfn = aa64_cacheop_poc_access, @@ -5911,7 +5952,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .writefn = hcr_write, .raw_writefn = raw_write }, { .name = "HCR", .state = ARM_CP_STATE_AA32, .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, @@ -5983,6 +6024,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2, .access = PL2_RW, .writefn = vmsa_tcr_el12_write, + .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) }, { .name = "VTCR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, @@ -5999,10 +6041,10 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .type = ARM_CP_64BIT | ARM_CP_ALIAS, .access = PL2_RW, .accessfn = access_el3_aa32ns, .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2), - .writefn = vttbr_write }, + .writefn = vttbr_write, .raw_writefn = raw_write }, { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0, - .access = PL2_RW, .writefn = vttbr_write, + .access = PL2_RW, .writefn = vttbr_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) }, { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0, @@ -6014,7 +6056,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) }, { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL2_RW, .resetvalue = 0, .writefn = vmsa_tcr_ttbr_el2_write, + .access = PL2_RW, .resetvalue = 0, + .writefn = vmsa_tcr_ttbr_el2_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, @@ -6201,12 +6244,12 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "SCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3), - .resetfn = scr_reset, .writefn = scr_write }, + .resetfn = scr_reset, .writefn = scr_write, .raw_writefn = raw_write }, { .name = "SCR", .type = ARM_CP_ALIAS | ARM_CP_NEWEL, .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0, .access = PL1_RW, .accessfn = access_trap_aa32s_el1, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3), - .writefn = scr_write }, + .writefn = scr_write, .raw_writefn = raw_write }, { .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1, .access = PL3_RW, .resetvalue = 0, @@ -7927,6 +7970,7 @@ static const ARMCPRegInfo vhe_reginfo[] = { { .name = "TTBR1_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 1, .access = PL2_RW, .writefn = vmsa_tcr_ttbr_el2_write, + .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el[2]) }, #ifndef CONFIG_USER_ONLY { .name = "CNTHV_CVAL_EL2", .state = ARM_CP_STATE_AA64, diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 6fec2d8a57..8019f00bc3 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -502,6 +502,31 @@ static void define_neoverse_n1_cp_reginfo(ARMCPU *cpu) define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo); } +static const ARMCPRegInfo neoverse_v1_cp_reginfo[] = { + { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR3_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu) +{ + /* + * The Neoverse V1 has all of the Neoverse N1's IMPDEF + * registers and a few more of its own. + */ + define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo); + define_arm_cp_regs(cpu, neoverse_v1_cp_reginfo); +} + static void aarch64_neoverse_n1_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -573,6 +598,108 @@ static void aarch64_neoverse_n1_initfn(Object *obj) define_neoverse_n1_cp_reginfo(cpu); } +static void aarch64_neoverse_v1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,neoverse-v1"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by 3.2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0xb444c004; /* With DIC and IDC set */ + cpu->dcz_blocksize = 4; + cpu->id_aa64afr0 = 0x00000000; + cpu->id_aa64afr1 = 0x00000000; + cpu->isar.id_aa64dfr0 = 0x000001f210305519ull; + cpu->isar.id_aa64dfr1 = 0x00000000; + cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */ + cpu->isar.id_aa64isar1 = 0x0111000001211032ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull; + cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x15011099; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x11011121; + cpu->isar.id_isar6 = 0x01100111; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x01021110; + cpu->isar.id_pfr0 = 0x21110131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x411FD402; /* r1p2 */ + cpu->revidr = 0; + + /* + * The Neoverse-V1 r1p2 TRM lists 32-bit format CCSIDR_EL1 values, + * but also says it implements CCIDX, which means they should be + * 64-bit format. So we here use values which are based on the textual + * information in chapter 2 of the TRM (and on the fact that + * sets * associativity * linesize == cachesize). + * + * The 64-bit CCSIDR_EL1 format is: + * [55:32] number of sets - 1 + * [23:3] associativity - 1 + * [2:0] log2(linesize) - 4 + * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc + * + * L1: 4-way set associative 64-byte line size, total size 64K, + * so sets is 256. + * + * L2: 8-way set associative, 64 byte line size, either 512K or 1MB. + * We pick 1MB, so this has 2048 sets. + * + * L3: No L3 (this matches the CLIDR_EL1 value). + */ + cpu->ccsidr[0] = 0x000000ff0000001aull; /* 64KB L1 dcache */ + cpu->ccsidr[1] = 0x000000ff0000001aull; /* 64KB L1 icache */ + cpu->ccsidr[2] = 0x000007ff0000003aull; /* 1MB L2 cache */ + + /* From 3.2.115 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From 3.4.8 ICC_CTLR_EL3 and 3.4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* From 3.5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From 3.7.5 ID_AA64ZFR0_EL1 */ + cpu->isar.id_aa64zfr0 = 0x0000100000100000; + cpu->sve_vq.supported = (1 << 0) /* 128bit */ + | (1 << 1); /* 256bit */ + + /* From 5.5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x41213000; + + define_neoverse_v1_cp_reginfo(cpu); + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); +} + /* * -cpu max: a CPU with as many features enabled as our emulation supports. * The version of '-cpu max' for qemu-system-arm is defined in cpu32.c; @@ -763,6 +890,7 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, + { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, }; static void aarch64_cpu_register_types(void) diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index d0054e3f77..6038b0a06f 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -95,6 +95,21 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, return addr; } +/* + * Resolve tile.size[0] to a host pointer. + * Used by e.g. outer product insns where we require the entire tile. + */ +static TCGv_ptr get_tile(DisasContext *s, int esz, int tile) +{ + TCGv_ptr addr = tcg_temp_new_ptr(); + int offset; + + offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray); + + tcg_gen_addi_ptr(addr, cpu_env, offset); + return addr; +} + static bool trans_ZERO(DisasContext *s, arg_ZERO *a) { if (!dc_isar_feature(aa64_sme, s)) { @@ -260,8 +275,7 @@ static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, return true; } - /* Sum XZR+zad to find ZAd. */ - za = get_tile_rowcol(s, esz, 31, a->zad, false); + za = get_tile(s, esz, a->zad); zn = vec_full_reg_ptr(s, a->zn); pn = pred_full_reg_ptr(s, a->pn); pm = pred_full_reg_ptr(s, a->pm); @@ -286,8 +300,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, return true; } - /* Sum XZR+zad to find ZAd. */ - za = get_tile_rowcol(s, esz, 31, a->zad, false); + za = get_tile(s, esz, a->zad); zn = vec_full_reg_ptr(s, a->zn); zm = vec_full_reg_ptr(s, a->zm); pn = pred_full_reg_ptr(s, a->pn); @@ -308,8 +321,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, return true; } - /* Sum XZR+zad to find ZAd. */ - za = get_tile_rowcol(s, esz, 31, a->zad, false); + za = get_tile(s, esz, a->zad); zn = vec_full_reg_ptr(s, a->zn); zm = vec_full_reg_ptr(s, a->zm); pn = pred_full_reg_ptr(s, a->pn); diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c index d4823a65cd..43f6a862de 100644 --- a/target/xtensa/exc_helper.c +++ b/target/xtensa/exc_helper.c @@ -169,6 +169,9 @@ static void handle_interrupt(CPUXtensaState *env) CPUState *cs = env_cpu(env); if (level > 1) { + /* env->config->nlevel check should have ensured this */ + assert(level < sizeof(env->config->interrupt_vector)); + env->sregs[EPC1 + level - 1] = env->pc; env->sregs[EPS2 + level - 2] = env->sregs[PS]; env->sregs[PS] = diff --git a/tests/qtest/xlnx-canfd-test.c b/tests/qtest/xlnx-canfd-test.c index 76ee106d4f..78ec9ef2a7 100644 --- a/tests/qtest/xlnx-canfd-test.c +++ b/tests/qtest/xlnx-canfd-test.c @@ -170,23 +170,23 @@ static void generate_random_data(uint32_t *buf_tx, bool is_canfd_frame) /* Generate random TX data for CANFD frame. */ if (is_canfd_frame) { for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) { - buf_tx[2 + i] = rand(); + buf_tx[2 + i] = g_random_int(); } } else { /* Generate random TX data for CAN frame. */ for (int i = 0; i < CAN_FRAME_SIZE - 2; i++) { - buf_tx[2 + i] = rand(); + buf_tx[2 + i] = g_random_int(); } } } -static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx) +static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx, + uint32_t frame_size) { uint32_t int_status; uint32_t fifo_status_reg_value; /* At which RX FIFO the received data is stored. */ uint8_t store_ind = 0; - bool is_canfd_frame = false; /* Read the interrupt on CANFD rx. */ int_status = qtest_readl(qts, can_base_addr + R_ISR_OFFSET) & ISR_RXOK; @@ -207,16 +207,9 @@ static void read_data(QTestState *qts, uint64_t can_base_addr, uint32_t *buf_rx) buf_rx[0] = qtest_readl(qts, can_base_addr + R_RX0_ID_OFFSET); buf_rx[1] = qtest_readl(qts, can_base_addr + R_RX0_DLC_OFFSET); - is_canfd_frame = (buf_rx[1] >> DLC_FD_BIT_SHIFT) & 1; - - if (is_canfd_frame) { - for (int i = 0; i < CANFD_FRAME_SIZE - 2; i++) { - buf_rx[i + 2] = qtest_readl(qts, - can_base_addr + R_RX0_DATA1_OFFSET + 4 * i); - } - } else { - buf_rx[2] = qtest_readl(qts, can_base_addr + R_RX0_DATA1_OFFSET); - buf_rx[3] = qtest_readl(qts, can_base_addr + R_RX0_DATA2_OFFSET); + for (int i = 0; i < frame_size - 2; i++) { + buf_rx[i + 2] = qtest_readl(qts, + can_base_addr + R_RX0_DATA1_OFFSET + 4 * i); } /* Clear the RX interrupt. */ @@ -272,10 +265,6 @@ static void match_rx_tx_data(const uint32_t *buf_tx, const uint32_t *buf_rx, g_assert_cmpint((buf_rx[size] & DLC_FD_BIT_MASK), ==, (buf_tx[size] & DLC_FD_BIT_MASK)); } else { - if (!is_canfd_frame && size == 4) { - break; - } - g_assert_cmpint(buf_rx[size], ==, buf_tx[size]); } @@ -318,7 +307,7 @@ static void test_can_data_transfer(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, false); send_data(qts, CANFD0_BASE_ADDR); - read_data(qts, CANFD1_BASE_ADDR, buf_rx); + read_data(qts, CANFD1_BASE_ADDR, buf_rx, CAN_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, false); qtest_quit(qts); @@ -358,7 +347,7 @@ static void test_canfd_data_transfer(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, true); send_data(qts, CANFD0_BASE_ADDR); - read_data(qts, CANFD1_BASE_ADDR, buf_rx); + read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); qtest_quit(qts); @@ -397,7 +386,7 @@ static void test_can_loopback(void) write_data(qts, CANFD0_BASE_ADDR, buf_tx, true); send_data(qts, CANFD0_BASE_ADDR); - read_data(qts, CANFD0_BASE_ADDR, buf_rx); + read_data(qts, CANFD0_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); generate_random_data(buf_tx, true); @@ -405,7 +394,7 @@ static void test_can_loopback(void) write_data(qts, CANFD1_BASE_ADDR, buf_tx, true); send_data(qts, CANFD1_BASE_ADDR); - read_data(qts, CANFD1_BASE_ADDR, buf_rx); + read_data(qts, CANFD1_BASE_ADDR, buf_rx, CANFD_FRAME_SIZE); match_rx_tx_data(buf_tx, buf_rx, true); qtest_quit(qts); diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index cec1d4b287..0606dec118 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -26,7 +26,7 @@ config-cc.mak: Makefile $(call cc-option,-march=armv8.5-a, CROSS_CC_HAS_ARMV8_5); \ $(call cc-option,-mbranch-protection=standard, CROSS_CC_HAS_ARMV8_BTI); \ $(call cc-option,-march=armv8.5-a+memtag, CROSS_CC_HAS_ARMV8_MTE); \ - $(call cc-option,-march=armv9-a+sme, CROSS_CC_HAS_ARMV9_SME)) 3> config-cc.mak + $(call cc-option,-Wa$(COMMA)-march=armv9-a+sme, CROSS_AS_HAS_ARMV9_SME)) 3> config-cc.mak -include config-cc.mak ifneq ($(CROSS_CC_HAS_ARMV8_2),) @@ -61,15 +61,15 @@ AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7 mte-%: CFLAGS += -march=armv8.5-a+memtag endif -ifneq ($(CROSS_CC_HAS_SVE),) +# SME Tests +ifneq ($(CROSS_AS_HAS_ARMV9_SME),) +AARCH64_TESTS += sme-outprod1 +endif + # System Registers Tests AARCH64_TESTS += sysregs -ifneq ($(CROSS_CC_HAS_ARMV9_SME),) -sysregs: CFLAGS+=-march=armv9-a+sme -DHAS_ARMV9_SME -else -sysregs: CFLAGS+=-march=armv8.1-a+sve -endif +ifneq ($(CROSS_CC_HAS_SVE),) # SVE ioctl test AARCH64_TESTS += sve-ioctls sve-ioctls: CFLAGS+=-march=armv8.1-a+sve diff --git a/tests/tcg/aarch64/sme-outprod1.c b/tests/tcg/aarch64/sme-outprod1.c new file mode 100644 index 0000000000..6e5972d75e --- /dev/null +++ b/tests/tcg/aarch64/sme-outprod1.c @@ -0,0 +1,83 @@ +/* + * SME outer product, 1 x 1. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> + +extern void foo(float *dst); + +asm( +" .arch_extension sme\n" +" .type foo, @function\n" +"foo:\n" +" stp x29, x30, [sp, -80]!\n" +" mov x29, sp\n" +" stp d8, d9, [sp, 16]\n" +" stp d10, d11, [sp, 32]\n" +" stp d12, d13, [sp, 48]\n" +" stp d14, d15, [sp, 64]\n" +" smstart\n" +" ptrue p0.s, vl4\n" +" fmov z0.s, #1.0\n" +/* + * An outer product of a vector of 1.0 by itself should be a matrix of 1.0. + * Note that we are using tile 1 here (za1.s) rather than tile 0. + */ +" zero {za}\n" +" fmopa za1.s, p0/m, p0/m, z0.s, z0.s\n" +/* + * Read the first 4x4 sub-matrix of elements from tile 1: + * Note that za1h should be interchangable here. + */ +" mov w12, #0\n" +" mova z0.s, p0/m, za1v.s[w12, #0]\n" +" mova z1.s, p0/m, za1v.s[w12, #1]\n" +" mova z2.s, p0/m, za1v.s[w12, #2]\n" +" mova z3.s, p0/m, za1v.s[w12, #3]\n" +/* + * And store them to the input pointer (dst in the C code): + */ +" st1w {z0.s}, p0, [x0]\n" +" add x0, x0, #16\n" +" st1w {z1.s}, p0, [x0]\n" +" add x0, x0, #16\n" +" st1w {z2.s}, p0, [x0]\n" +" add x0, x0, #16\n" +" st1w {z3.s}, p0, [x0]\n" +" smstop\n" +" ldp d8, d9, [sp, 16]\n" +" ldp d10, d11, [sp, 32]\n" +" ldp d12, d13, [sp, 48]\n" +" ldp d14, d15, [sp, 64]\n" +" ldp x29, x30, [sp], 80\n" +" ret\n" +" .size foo, . - foo" +); + +int main() +{ + float dst[16]; + int i, j; + + foo(dst); + + for (i = 0; i < 16; i++) { + if (dst[i] != 1.0f) { + break; + } + } + + if (i == 16) { + return 0; /* success */ + } + + /* failure */ + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + printf("%f ", (double)dst[i * 4 + j]); + } + printf("\n"); + } + return 1; +} diff --git a/tests/tcg/aarch64/sysregs.c b/tests/tcg/aarch64/sysregs.c index 46b931f781..d8eb06abcf 100644 --- a/tests/tcg/aarch64/sysregs.c +++ b/tests/tcg/aarch64/sysregs.c @@ -25,9 +25,14 @@ /* * Older assemblers don't recognize newer system register names, * but we can still access them by the Sn_n_Cn_Cn_n syntax. + * This also means we don't need to specifically request that the + * assembler enables whatever architectural features the ID registers + * syntax might be gated behind. */ #define SYS_ID_AA64ISAR2_EL1 S3_0_C0_C6_2 #define SYS_ID_AA64MMFR2_EL1 S3_0_C0_C7_2 +#define SYS_ID_AA64ZFR0_EL1 S3_0_C0_C4_4 +#define SYS_ID_AA64SMFR0_EL1 S3_0_C0_C4_5 int failed_bit_count; @@ -132,10 +137,8 @@ int main(void) /* all hidden, DebugVer fixed to 0x6 (ARMv8 debug architecture) */ get_cpu_reg_check_mask(id_aa64dfr0_el1, _m(0000,0000,0000,0006)); get_cpu_reg_check_zero(id_aa64dfr1_el1); - get_cpu_reg_check_mask(id_aa64zfr0_el1, _m(0ff0,ff0f,00ff,00ff)); -#ifdef HAS_ARMV9_SME - get_cpu_reg_check_mask(id_aa64smfr0_el1, _m(80f1,00fd,0000,0000)); -#endif + get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1, _m(0ff0,ff0f,00ff,00ff)); + get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(80f1,00fd,0000,0000)); get_cpu_reg_check_zero(id_aa64afr0_el1); get_cpu_reg_check_zero(id_aa64afr1_el1); |