diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-02-15 10:20:04 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-02-15 10:20:04 +0000 |
commit | 81dbcfa9e1d8bab3f7c4cc923c0b40cd666f374f (patch) | |
tree | 2bb4b8e5422dc39a98281abe7017003f5335a69c | |
parent | 0266c739abbed804deabb4ccde2aa449466ac3b4 (diff) | |
parent | 0f8b09b22234460cb5b8766a25066cf6b5f06842 (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190215' into staging
target-arm queue:
* gdbstub: Send a reply to the vKill packet
* Improve codegen for neon min/max and saturating arithmetic
* Fix a bug in clearing FPSCR exception status bits
* hw/arm/armsse: Fix miswiring of expansion IRQs
* hw/intc/armv7m_nvic: Allow byte accesses to SHPR1
* MAINTAINERS: Remove Peter Crosthwaite from various entries
* arm: Allow system registers for KVM guests to be changed by QEMU code
* linux-user: support HWCAP_CPUID which exposes ID registers to user code
* Fix bug in 128-bit cmpxchg for BE Arm guests
* Implement (no-op) HACR_EL2
* Fix CRn to be 14 for PMEVTYPER/PMEVCNTR
# gpg: Signature made Fri 15 Feb 2019 10:19:14 GMT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20190215: (25 commits)
gdbstub: Send a reply to the vKill packet.
target/arm: Add missing clear_tail calls
target/arm: Use vector operations for saturation
target/arm: Split out FPSCR.QC to a vector field
target/arm: Fix set of bits kept in xregs[ARM_VFP_FPSCR]
target/arm: Split out flags setting from vfp compares
target/arm: Fix arm_cpu_dump_state vs FPSCR
target/arm: Fix vfp_gdb_get/set_reg vs FPSCR
target/arm: Remove neon min/max helpers
target/arm: Use tcg integer min/max primitives for neon
target/arm: Use vector minmax expanders for aarch32
target/arm: Use vector minmax expanders for aarch64
target/arm: Rely on optimization within tcg_gen_gvec_or
hw/arm/armsse: Fix miswiring of expansion IRQs
hw/intc/armv7m_nvic: Allow byte accesses to SHPR1
MAINTAINERS: Remove Peter Crosthwaite from various entries
arm: Allow system registers for KVM guests to be changed by QEMU code
linux-user/elfload: enable HWCAP_CPUID for AArch64
target/arm: expose remaining CPUID registers as RAZ
target/arm: expose MPIDR_EL1 to userspace
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | MAINTAINERS | 4 | ||||
-rw-r--r-- | gdbstub.c | 1 | ||||
-rw-r--r-- | hw/arm/armsse.c | 2 | ||||
-rw-r--r-- | hw/intc/armv7m_nvic.c | 4 | ||||
-rw-r--r-- | linux-user/elfload.c | 1 | ||||
-rw-r--r-- | target/arm/cpu.h | 50 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 4 | ||||
-rw-r--r-- | target/arm/helper.c | 228 | ||||
-rw-r--r-- | target/arm/helper.h | 45 | ||||
-rw-r--r-- | target/arm/kvm32.c | 20 | ||||
-rw-r--r-- | target/arm/kvm64.c | 2 | ||||
-rw-r--r-- | target/arm/machine.c | 2 | ||||
-rw-r--r-- | target/arm/neon_helper.c | 14 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 77 | ||||
-rw-r--r-- | target/arm/translate-sve.c | 6 | ||||
-rw-r--r-- | target/arm/translate.c | 219 | ||||
-rw-r--r-- | target/arm/translate.h | 4 | ||||
-rw-r--r-- | target/arm/vec_helper.c | 134 |
18 files changed, 622 insertions, 195 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index e170a4c733..ffb029f63a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -110,7 +110,6 @@ Guest CPU cores (TCG): ---------------------- Overall L: qemu-devel@nongnu.org -M: Peter Crosthwaite <crosthwaite.peter@gmail.com> M: Richard Henderson <rth@twiddle.net> R: Paolo Bonzini <pbonzini@redhat.com> S: Maintained @@ -1345,7 +1344,6 @@ F: tests/virtio-scsi-test.c T: git https://github.com/bonzini/qemu.git scsi-next SSI -M: Peter Crosthwaite <crosthwaite.peter@gmail.com> M: Alistair Francis <alistair@alistair23.me> S: Maintained F: hw/ssi/* @@ -1356,7 +1354,6 @@ F: tests/m25p80-test.c Xilinx SPI M: Alistair Francis <alistair@alistair23.me> -M: Peter Crosthwaite <crosthwaite.peter@gmail.com> S: Maintained F: hw/ssi/xilinx_* @@ -1766,7 +1763,6 @@ F: qom/cpu.c F: include/qom/cpu.h Device Tree -M: Peter Crosthwaite <crosthwaite.peter@gmail.com> M: Alexander Graf <agraf@suse.de> S: Maintained F: device_tree.c @@ -1361,6 +1361,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf) break; } else if (strncmp(p, "Kill;", 5) == 0) { /* Kill the target */ + put_packet(s, "OK"); error_report("QEMU: Terminated via GDBstub"); exit(0); } else { diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c index 5d53071a5a..9a8c49547d 100644 --- a/hw/arm/armsse.c +++ b/hw/arm/armsse.c @@ -565,7 +565,7 @@ static void armsse_realize(DeviceState *dev, Error **errp) /* Connect EXP_IRQ/EXP_CPUn_IRQ GPIOs to the NVIC's lines 32 and up */ s->exp_irqs[i] = g_new(qemu_irq, s->exp_numirq); for (j = 0; j < s->exp_numirq; j++) { - s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, i + 32); + s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + 32); } if (i == 0) { gpioname = g_strdup("EXP_IRQ"); diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 790a3d9584..ab822f4251 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -1841,7 +1841,7 @@ static MemTxResult nvic_sysreg_read(void *opaque, hwaddr addr, } } break; - case 0xd18: /* System Handler Priority (SHPR1) */ + case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */ if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) { val = 0; break; @@ -1956,7 +1956,7 @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr, } nvic_irq_update(s); return MEMTX_OK; - case 0xd18: /* System Handler Priority (SHPR1) */ + case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */ if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) { return MEMTX_OK; } diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 775a36ccdd..3a50d587ff 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -580,6 +580,7 @@ static uint32_t get_elf_hwcap(void) hwcaps |= ARM_HWCAP_A64_FP; hwcaps |= ARM_HWCAP_A64_ASIMD; + hwcaps |= ARM_HWCAP_A64_CPUID; /* probe for the extra features */ #define GET_FEATURE_ID(feat, hwcap) \ diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 47238e4245..84ae6849c2 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -577,11 +577,13 @@ typedef struct CPUARMState { ARMPredicateReg preg_tmp; #endif - uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ + uint32_t qc[4] QEMU_ALIGNED(16); int vec_len; int vec_stride; + uint32_t xregs[16]; + /* Scratch space for aa32 neon expansion. */ uint32_t scratch[8]; @@ -1427,6 +1429,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val); #define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */ #define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */ #define FPCR_DN (1 << 25) /* Default NaN enable bit */ +#define FPCR_QC (1 << 27) /* Cumulative saturation bit */ static inline uint32_t vfp_get_fpsr(CPUARMState *env) { @@ -2226,6 +2229,18 @@ static inline bool cptype_valid(int cptype) #define PL0_R (0x02 | PL1_R) #define PL0_W (0x01 | PL1_W) +/* + * For user-mode some registers are accessible to EL0 via a kernel + * trap-and-emulate ABI. In this case we define the read permissions + * as actually being PL0_R. However some bits of any given register + * may still be masked. + */ +#ifdef CONFIG_USER_ONLY +#define PL0U_R PL0_R +#else +#define PL0U_R PL1_R +#endif + #define PL3_RW (PL3_R | PL3_W) #define PL2_RW (PL2_R | PL2_W) #define PL1_RW (PL1_R | PL1_W) @@ -2452,6 +2467,30 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) } const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); +/* + * Definition of an ARM co-processor register as viewed from + * userspace. This is used for presenting sanitised versions of + * registers to userspace when emulating the Linux AArch64 CPU + * ID/feature ABI (advertised as HWCAP_CPUID). + */ +typedef struct ARMCPRegUserSpaceInfo { + /* Name of register */ + const char *name; + + /* Is the name actually a glob pattern */ + bool is_glob; + + /* Only some bits are exported to user space */ + uint64_t exported_bits; + + /* Fixed bits are applied after the mask */ + uint64_t fixed_bits; +} ARMCPRegUserSpaceInfo; + +#define REGUSERINFO_SENTINEL { .name = NULL } + +void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods); + /* CPWriteFn that can be used to implement writes-ignored behaviour */ void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); @@ -2499,18 +2538,25 @@ bool write_list_to_cpustate(ARMCPU *cpu); /** * write_cpustate_to_list: * @cpu: ARMCPU + * @kvm_sync: true if this is for syncing back to KVM * * For each register listed in the ARMCPU cpreg_indexes list, write * its value from the ARMCPUState structure into the cpreg_values list. * This is used to copy info from TCG's working data structures into * KVM or for outbound migration. * + * @kvm_sync is true if we are doing this in order to sync the + * register state back to KVM. In this case we will only update + * values in the list if the previous list->cpustate sync actually + * successfully wrote the CPU state. Otherwise we will keep the value + * that is in the list. + * * Returns: true if all register values were read correctly, * false if some register was unknown or could not be read. * Note that we do not stop early on failure -- we will attempt * reading all registers in the list. */ -bool write_cpustate_to_list(ARMCPU *cpu); +bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define ARM_CPUID_TI915T 0x54029152 #define ARM_CPUID_TI925T 0x54029252 diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 101fa6d3ea..70850e564d 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -583,8 +583,8 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, * High and low need to be switched here because this is not actually a * 128bit store but two doublewords stored consecutively */ - Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - Int128 newv = int128_make128(new_lo, new_hi); + Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); + Int128 newv = int128_make128(new_hi, new_lo); Int128 oldv; uintptr_t ra = GETPC(); uint64_t o0, o1; diff --git a/target/arm/helper.c b/target/arm/helper.c index 520ceea7a4..55e9b77bb1 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -81,7 +81,7 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) } switch (reg - nregs) { case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4; - case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4; + case 1: stl_p(buf, vfp_get_fpscr(env)); return 4; case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4; } return 0; @@ -107,7 +107,7 @@ static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) } switch (reg - nregs) { case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4; - case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4; + case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4; case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4; } return 0; @@ -264,7 +264,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri) return true; } -bool write_cpustate_to_list(ARMCPU *cpu) +bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) { /* Write the coprocessor state from cpu->env to the (index,value) list. */ int i; @@ -273,6 +273,7 @@ bool write_cpustate_to_list(ARMCPU *cpu) for (i = 0; i < cpu->cpreg_array_len; i++) { uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]); const ARMCPRegInfo *ri; + uint64_t newval; ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); if (!ri) { @@ -282,7 +283,29 @@ bool write_cpustate_to_list(ARMCPU *cpu) if (ri->type & ARM_CP_NO_RAW) { continue; } - cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri); + + newval = read_raw_cp_reg(&cpu->env, ri); + if (kvm_sync) { + /* + * Only sync if the previous list->cpustate sync succeeded. + * Rather than tracking the success/failure state for every + * item in the list, we just recheck "does the raw write we must + * have made in write_list_to_cpustate() read back OK" here. + */ + uint64_t oldval = cpu->cpreg_values[i]; + + if (oldval == newval) { + continue; + } + + write_raw_cp_reg(&cpu->env, ri, oldval); + if (read_raw_cp_reg(&cpu->env, ri) != oldval) { + continue; + } + + write_raw_cp_reg(&cpu->env, ri, newval); + } + cpu->cpreg_values[i] = newval; } return ok; } @@ -3657,13 +3680,6 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri) return mpidr_read_val(env); } -static const ARMCPRegInfo mpidr_cp_reginfo[] = { - { .name = "MPIDR", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, - .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, - REGINFO_SENTINEL -}; - static const ARMCPRegInfo lpae_cp_reginfo[] = { /* NOP AMAIR0/1 */ { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, @@ -4434,6 +4450,9 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0, .access = PL2_RW, @@ -4666,6 +4685,9 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), .writefn = hcr_writelow }, + { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, @@ -5855,25 +5877,25 @@ void register_cp_regs_for_features(ARMCPU *cpu) char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i); char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i); ARMCPRegInfo pmev_regs[] = { - { .name = pmevcntr_name, .cp = 15, .crn = 15, + { .name = pmevcntr_name, .cp = 15, .crn = 14, .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, .accessfn = pmreg_access }, { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 8 | (3 & (i >> 3)), + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, .type = ARM_CP_IO, .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, .raw_readfn = pmevcntr_rawread, .raw_writefn = pmevcntr_rawwrite }, - { .name = pmevtyper_name, .cp = 15, .crn = 15, + { .name = pmevtyper_name, .cp = 15, .crn = 14, .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, .accessfn = pmreg_access }, { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 12 | (3 & (i >> 3)), + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, .type = ARM_CP_IO, .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, @@ -6103,6 +6125,38 @@ void register_cp_regs_for_features(ARMCPU *cpu) .resetvalue = cpu->pmceid1 }, REGINFO_SENTINEL }; +#ifdef CONFIG_USER_ONLY + ARMCPRegUserSpaceInfo v8_user_idregs[] = { + { .name = "ID_AA64PFR0_EL1", + .exported_bits = 0x000f000f00ff0000, + .fixed_bits = 0x0000000000000011 }, + { .name = "ID_AA64PFR1_EL1", + .exported_bits = 0x00000000000000f0 }, + { .name = "ID_AA64PFR*_EL1_RESERVED", + .is_glob = true }, + { .name = "ID_AA64ZFR0_EL1" }, + { .name = "ID_AA64MMFR0_EL1", + .fixed_bits = 0x00000000ff000000 }, + { .name = "ID_AA64MMFR1_EL1" }, + { .name = "ID_AA64MMFR*_EL1_RESERVED", + .is_glob = true }, + { .name = "ID_AA64DFR0_EL1", + .fixed_bits = 0x0000000000000006 }, + { .name = "ID_AA64DFR1_EL1" }, + { .name = "ID_AA64DFR*_EL1_RESERVED", + .is_glob = true }, + { .name = "ID_AA64AFR*", + .is_glob = true }, + { .name = "ID_AA64ISAR0_EL1", + .exported_bits = 0x00fffffff0fffff0 }, + { .name = "ID_AA64ISAR1_EL1", + .exported_bits = 0x000000f0ffffffff }, + { .name = "ID_AA64ISAR*_EL1_RESERVED", + .is_glob = true }, + REGUSERINFO_SENTINEL + }; + modify_arm_cp_regs(v8_idregs, v8_user_idregs); +#endif /* RVBAR_EL1 is only implemented if EL1 is the highest EL */ if (!arm_feature(env, ARM_FEATURE_EL3) && !arm_feature(env, ARM_FEATURE_EL2)) { @@ -6379,6 +6433,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_OVERRIDE }; +#ifdef CONFIG_USER_ONLY + ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = { + { .name = "MIDR_EL1", + .exported_bits = 0x00000000ffffffff }, + { .name = "REVIDR_EL1" }, + REGUSERINFO_SENTINEL + }; + modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo); +#endif if (arm_feature(env, ARM_FEATURE_OMAPCP) || arm_feature(env, ARM_FEATURE_STRONGARM)) { ARMCPRegInfo *r; @@ -6412,6 +6475,20 @@ void register_cp_regs_for_features(ARMCPU *cpu) } if (arm_feature(env, ARM_FEATURE_MPIDR)) { + ARMCPRegInfo mpidr_cp_reginfo[] = { + { .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, + .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, + REGINFO_SENTINEL + }; +#ifdef CONFIG_USER_ONLY + ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = { + { .name = "MPIDR_EL1", + .fixed_bits = 0x0000000080000000 }, + REGUSERINFO_SENTINEL + }; + modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo); +#endif define_arm_cp_regs(cpu, mpidr_cp_reginfo); } @@ -6851,7 +6928,11 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, if (r->state != ARM_CP_STATE_AA32) { int mask = 0; switch (r->opc1) { - case 0: case 1: case 2: + case 0: + /* min_EL EL1, but some accessible to EL0 via kernel ABI */ + mask = PL0U_R | PL1_RW; + break; + case 1: case 2: /* min_EL EL1 */ mask = PL1_RW; break; @@ -6956,6 +7037,44 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu, } } +/* + * Modify ARMCPRegInfo for access from userspace. + * + * This is a data driven modification directed by + * ARMCPRegUserSpaceInfo. All registers become ARM_CP_CONST as + * user-space cannot alter any values and dynamic values pertaining to + * execution state are hidden from user space view anyway. + */ +void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods) +{ + const ARMCPRegUserSpaceInfo *m; + ARMCPRegInfo *r; + + for (m = mods; m->name; m++) { + GPatternSpec *pat = NULL; + if (m->is_glob) { + pat = g_pattern_spec_new(m->name); + } + for (r = regs; r->type != ARM_CP_SENTINEL; r++) { + if (pat && g_pattern_match_string(pat, r->name)) { + r->type = ARM_CP_CONST; + r->access = PL0U_R; + r->resetvalue = 0; + /* continue */ + } else if (strcmp(r->name, m->name) == 0) { + r->type = ARM_CP_CONST; + r->access = PL0U_R; + r->resetvalue &= m->exported_bits; + r->resetvalue |= m->fixed_bits; + break; + } + } + if (pat) { + g_pattern_spec_free(pat); + } + } +} + const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp) { return g_hash_table_lookup(cpregs, &encoded_cp); @@ -12585,10 +12704,9 @@ static inline int vfp_exceptbits_from_host(int host_bits) uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) { - int i; - uint32_t fpscr; + uint32_t i, fpscr; - fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff) + fpscr = env->vfp.xregs[ARM_VFP_FPSCR] | (env->vfp.vec_len << 16) | (env->vfp.vec_stride << 20); @@ -12597,8 +12715,11 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env) /* FZ16 does not generate an input denormal exception. */ i |= (get_float_exception_flags(&env->vfp.fp_status_f16) & ~float_flag_input_denormal); - fpscr |= vfp_exceptbits_from_host(i); + + i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3]; + fpscr |= i ? FPCR_QC : 0; + return fpscr; } @@ -12630,7 +12751,7 @@ static inline int vfp_exceptbits_to_host(int target_bits) void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) { int i; - uint32_t changed; + uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR]; /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */ if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) { @@ -12639,15 +12760,25 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val) /* * We don't implement trapped exception handling, so the - * trap enable bits are all RAZ/WI (not RES0!) + * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!) + * + * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC + * (which are stored in fp_status), and the other RES0 bits + * in between, then we clear all of the low 16 bits. */ - val &= ~(FPCR_IDE | FPCR_IXE | FPCR_UFE | FPCR_OFE | FPCR_DZE | FPCR_IOE); - - changed = env->vfp.xregs[ARM_VFP_FPSCR]; - env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff); + env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000; env->vfp.vec_len = (val >> 16) & 7; env->vfp.vec_stride = (val >> 20) & 3; + /* + * The bit we set within fpscr_q is arbitrary; the register as a + * whole being zero/non-zero is what counts. + */ + env->vfp.qc[0] = val & FPCR_QC; + env->vfp.qc[1] = 0; + env->vfp.qc[2] = 0; + env->vfp.qc[3] = 0; + changed ^= val; if (changed & (3 << 22)) { i = (val >> 22) & 3; @@ -12752,31 +12883,40 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) return float64_sqrt(a, &env->vfp.fp_status); } +static void softfloat_to_vfp_compare(CPUARMState *env, int cmp) +{ + uint32_t flags; + switch (cmp) { + case float_relation_equal: + flags = 0x6; + break; + case float_relation_less: + flags = 0x8; + break; + case float_relation_greater: + flags = 0x2; + break; + case float_relation_unordered: + flags = 0x3; + break; + default: + g_assert_not_reached(); + } + env->vfp.xregs[ARM_VFP_FPSCR] = + deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags); +} + /* XXX: check quiet/signaling case */ #define DO_VFP_cmp(p, type) \ void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + softfloat_to_vfp_compare(env, \ + type ## _compare_quiet(a, b, &env->vfp.fp_status)); \ } \ void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + softfloat_to_vfp_compare(env, \ + type ## _compare(a, b, &env->vfp.fp_status)); \ } DO_VFP_cmp(s, float32) DO_VFP_cmp(d, float64) diff --git a/target/arm/helper.h b/target/arm/helper.h index 53a38188c6..923e8e1525 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -276,18 +276,6 @@ DEF_HELPER_2(neon_cge_s16, i32, i32, i32) DEF_HELPER_2(neon_cge_u32, i32, i32, i32) DEF_HELPER_2(neon_cge_s32, i32, i32, i32) -DEF_HELPER_2(neon_min_u8, i32, i32, i32) -DEF_HELPER_2(neon_min_s8, i32, i32, i32) -DEF_HELPER_2(neon_min_u16, i32, i32, i32) -DEF_HELPER_2(neon_min_s16, i32, i32, i32) -DEF_HELPER_2(neon_min_u32, i32, i32, i32) -DEF_HELPER_2(neon_min_s32, i32, i32, i32) -DEF_HELPER_2(neon_max_u8, i32, i32, i32) -DEF_HELPER_2(neon_max_s8, i32, i32, i32) -DEF_HELPER_2(neon_max_u16, i32, i32, i32) -DEF_HELPER_2(neon_max_s16, i32, i32, i32) -DEF_HELPER_2(neon_max_u32, i32, i32, i32) -DEF_HELPER_2(neon_max_s32, i32, i32, i32) DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) @@ -653,6 +641,39 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #include "helper-sve.h" diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index bd51eb43c8..a75e04cc8f 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -387,24 +387,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - /* Note that we do not call write_cpustate_to_list() - * here, so we are only writing the tuple list back to - * KVM. This is safe because nothing can change the - * CPUARMState cp15 fields (in particular gdb accesses cannot) - * and so there are no changes to sync. In fact syncing would - * be wrong at this point: for a constant register where TCG and - * KVM disagree about its value, the preceding write_list_to_cpustate() - * would not have had any effect on the CPUARMState value (since the - * register is read-only), and a write_cpustate_to_list() here would - * then try to write the TCG value back into KVM -- this would either - * fail or incorrectly change the value the guest sees. - * - * If we ever want to allow the user to modify cp15 registers via - * the gdb stub, we would need to be more clever here (for instance - * tracking the set of registers kvm_arch_get_registers() successfully - * managed to update the CPUARMState with, and only allowing those - * to be written back up into the kernel). - */ + write_cpustate_to_list(cpu, true); + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 089af9c5f0..e3ba149248 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -838,6 +838,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + write_cpustate_to_list(cpu, true); + if (!write_list_to_kvmstate(cpu, level)) { return EINVAL; } diff --git a/target/arm/machine.c b/target/arm/machine.c index b292549614..124192bfc2 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque) abort(); } } else { - if (!write_cpustate_to_list(cpu)) { + if (!write_cpustate_to_list(cpu, false)) { /* This should never fail. */ abort(); } diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index c2c6491a83..ed1c6fc41c 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -15,7 +15,7 @@ #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) -#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q +#define SET_QC() env->vfp.qc[0] = 1 #define NEON_TYPE1(name, type) \ typedef struct \ @@ -581,12 +581,6 @@ NEON_VOP(cge_u32, neon_u32, 1) #undef NEON_FN #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 -NEON_VOP(min_s8, neon_s8, 4) -NEON_VOP(min_u8, neon_u8, 4) -NEON_VOP(min_s16, neon_s16, 2) -NEON_VOP(min_u16, neon_u16, 2) -NEON_VOP(min_s32, neon_s32, 1) -NEON_VOP(min_u32, neon_u32, 1) NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) NEON_POP(pmin_s16, neon_s16, 2) @@ -594,12 +588,6 @@ NEON_POP(pmin_u16, neon_u16, 2) #undef NEON_FN #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 -NEON_VOP(max_s8, neon_s8, 4) -NEON_VOP(max_u8, neon_u8, 4) -NEON_VOP(max_s16, neon_s16, 2) -NEON_VOP(max_u16, neon_u16, 2) -NEON_VOP(max_s32, neon_s32, 1) -NEON_VOP(max_u32, neon_u32, 1) NEON_POP(pmax_s8, neon_s8, 4) NEON_POP(pmax_u8, neon_u8, 4) NEON_POP(pmax_s16, neon_s16, 2) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e002251ac6..af8e4fd4be 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -10648,11 +10648,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); return; case 2: /* ORR */ - if (rn == rm) { /* MOV */ - gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); - } + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); return; case 3: /* ORN */ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); @@ -10952,6 +10948,36 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) } switch (opcode) { + case 0x01: /* SQADD, UQADD */ + tcg_gen_gvec_4(vec_full_reg_offset(s, rd), + offsetof(CPUARMState, vfp.qc), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + (u ? uqadd_op : sqadd_op) + size); + return; + case 0x05: /* SQSUB, UQSUB */ + tcg_gen_gvec_4(vec_full_reg_offset(s, rd), + offsetof(CPUARMState, vfp.qc), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + is_q ? 16 : 8, vec_full_reg_size(s), + (u ? uqsub_op : sqsub_op) + size); + return; + case 0x0c: /* SMAX, UMAX */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); + } + return; + case 0x0d: /* SMIN, UMIN */ + if (u) { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); + } else { + gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); + } + return; case 0x10: /* ADD, SUB */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); @@ -11033,16 +11059,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x1: /* SQADD, UQADD */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, - { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, - { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, - }; - genenvfn = fns[size][u]; - break; - } case 0x2: /* SRHADD, URHADD */ { static NeonGenTwoOpFn * const fns[3][2] = { @@ -11063,16 +11079,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x5: /* SQSUB, UQSUB */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, - { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, - { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, - }; - genenvfn = fns[size][u]; - break; - } case 0x8: /* SSHL, USHL */ { static NeonGenTwoOpFn * const fns[3][2] = { @@ -11113,27 +11119,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genenvfn = fns[size][u]; break; } - case 0xc: /* SMAX, UMAX */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, - { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, - { tcg_gen_smax_i32, tcg_gen_umax_i32 }, - }; - genfn = fns[size][u]; - break; - } - - case 0xd: /* SMIN, UMIN */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, - { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, - { tcg_gen_smin_i32, tcg_gen_umin_i32 }, - }; - genfn = fns[size][u]; - break; - } case 0xe: /* SABD, UABD */ case 0xf: /* SABA, UABA */ { diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index b15b615ceb..3a2eb51566 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -280,11 +280,7 @@ static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) { - if (a->rn == a->rm) { /* MOV */ - return do_mov_z(s, a->rd, a->rn); - } else { - return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); - } + return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm); } static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) diff --git a/target/arm/translate.c b/target/arm/translate.c index 66cf28c8cb..dac737f6ca 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4760,10 +4760,10 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1) } /* 32-bit pairwise ops end up the same as the elementwise versions. */ -#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32 -#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32 -#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 -#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 +#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 +#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 +#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 +#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 #define GEN_NEON_INTEGER_OP_ENV(name) do { \ switch ((size << 1) | u) { \ @@ -6148,6 +6148,142 @@ const GVecGen3 cmtst_op[4] = { .vece = MO_64 }, }; +static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_usadd_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 uqadd_op[4] = { + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_b, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_h, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_s, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_d, + .opc = INDEX_op_usadd_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + +static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_ssadd_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 sqadd_op[4] = { + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_b, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_h, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_s, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_d, + .opc = INDEX_op_ssadd_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + +static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_ussub_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 uqsub_op[4] = { + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_b, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_h, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_s, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_d, + .opc = INDEX_op_ussub_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + +static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_sssub_vec(vece, t, a, b); + tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); + tcg_gen_or_vec(vece, sat, sat, x); + tcg_temp_free_vec(x); +} + +const GVecGen4 sqsub_op[4] = { + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_b, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_h, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_s, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_d, + .opc = INDEX_op_sssub_vec, + .write_aofs = true, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -6294,15 +6430,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); break; - case 2: - if (rn == rm) { - /* VMOV */ - tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size); - } else { - /* VORR */ - tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, - vec_size, vec_size); - } + case 2: /* VORR */ + tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); break; case 3: /* VORN */ tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs, @@ -6337,6 +6467,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } return 0; + case NEON_3R_VQADD: + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, vec_size, vec_size, + (u ? uqadd_op : sqadd_op) + size); + break; + + case NEON_3R_VQSUB: + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, vec_size, vec_size, + (u ? uqsub_op : sqsub_op) + size); + break; + case NEON_3R_VMUL: /* VMUL */ if (u) { /* Polynomial case allows only P8 and is handled below. */ @@ -6374,6 +6516,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); return 0; + + case NEON_3R_VMAX: + if (u) { + tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; + case NEON_3R_VMIN: + if (u) { + tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { + tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } + return 0; } if (size == 3) { @@ -6382,24 +6543,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) neon_load_reg64(cpu_V0, rn + pass); neon_load_reg64(cpu_V1, rm + pass); switch (op) { - case NEON_3R_VQADD: - if (u) { - gen_helper_neon_qadd_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - gen_helper_neon_qadd_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } - break; - case NEON_3R_VQSUB: - if (u) { - gen_helper_neon_qsub_u64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } else { - gen_helper_neon_qsub_s64(cpu_V0, cpu_env, - cpu_V0, cpu_V1); - } - break; case NEON_3R_VSHL: if (u) { gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0); @@ -6515,18 +6658,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VHADD: GEN_NEON_INTEGER_OP(hadd); break; - case NEON_3R_VQADD: - GEN_NEON_INTEGER_OP_ENV(qadd); - break; case NEON_3R_VRHADD: GEN_NEON_INTEGER_OP(rhadd); break; case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; - case NEON_3R_VQSUB: - GEN_NEON_INTEGER_OP_ENV(qsub); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; @@ -6539,12 +6676,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VQRSHL: GEN_NEON_INTEGER_OP_ENV(qrshl); break; - case NEON_3R_VMAX: - GEN_NEON_INTEGER_OP(max); - break; - case NEON_3R_VMIN: - GEN_NEON_INTEGER_OP(min); - break; case NEON_3R_VABD: GEN_NEON_INTEGER_OP(abd); break; @@ -13634,7 +13765,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, i * 2 + 1, (uint32_t)(v >> 32), i, v); } - cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]); + cpu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env)); } } diff --git a/target/arm/translate.h b/target/arm/translate.h index 17748ddfb9..f25fe75685 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -214,6 +214,10 @@ extern const GVecGen2i ssra_op[4]; extern const GVecGen2i usra_op[4]; extern const GVecGen2i sri_op[4]; extern const GVecGen2i sli_op[4]; +extern const GVecGen4 uqadd_op[4]; +extern const GVecGen4 sqadd_op[4]; +extern const GVecGen4 uqsub_op[4]; +extern const GVecGen4 sqsub_op[4]; void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); /* diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 37f338732e..dfc635cf9a 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -36,7 +36,7 @@ #define H4(x) (x) #endif -#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q +#define SET_QC() env->vfp.qc[0] = 1 static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) { @@ -638,6 +638,7 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ d[i] = FUNC(n[i], stat); \ } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ } DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16) @@ -688,6 +689,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \ d[i] = FUNC(n[i], m[i], stat); \ } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ } DO_3OP(gvec_fadd_h, float16_add, float16) @@ -766,3 +768,133 @@ DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4) DO_FMLA_IDX(gvec_fmla_idx_d, float64, ) #undef DO_FMLA_IDX + +#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \ +void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, oprsz = simd_oprsz(desc); \ + TYPEN *d = vd, *n = vn; TYPEM *m = vm; \ + bool q = false; \ + for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \ + WTYPE dd = (WTYPE)n[i] OP m[i]; \ + if (dd < MIN) { \ + dd = MIN; \ + q = true; \ + } else if (dd > MAX) { \ + dd = MAX; \ + q = true; \ + } \ + d[i] = dd; \ + } \ + if (q) { \ + uint32_t *qc = vq; \ + qc[0] = 1; \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX) +DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX) +DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX) + +DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX) +DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX) +DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX) + +DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX) +DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX) +DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX) + +DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX) +DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX) +DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX) + +#undef DO_SAT + +void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i], mm = m[i], dd = nn + mm; + if (dd < nn) { + dd = UINT64_MAX; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i], mm = m[i], dd = nn - mm; + if (nn < mm) { + dd = 0; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + int64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i], mm = m[i], dd = nn + mm; + if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) { + dd = (nn >> 63) ^ ~INT64_MIN; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + int64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i], mm = m[i], dd = nn - mm; + if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) { + dd = (nn >> 63) ^ ~INT64_MIN; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} |