diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-04-30 15:45:34 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-04-30 15:45:34 +0100 |
commit | 126eeee6c7b516e0a348dd4d60e59dbfa4b4b513 (patch) | |
tree | 2b0239fe13bdbbf7ebf002c35668402a0498203a /target | |
parent | 16aaacb307ed607b9780c12702c44f0fe52edc7e (diff) | |
parent | 6f7b6947a6639fff15c6a0956adf0f5ec004b789 (diff) |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200430-1' into staging
target-arm queue:
* xlnx-zdma: Fix endianness handling of descriptor loading
* nrf51: Fix last GPIO CNF address
* gicv3: Use gicr_typer in arm_gicv3_icc_reset
* msf2: Add EMAC block to SmartFusion2 SoC
* New clock modelling framework
* hw/arm: versal: Setup the ADMA with 128bit bus-width
* Cadence: gem: fix wraparound in 64bit descriptors
* cadence_gem: clear RX control descriptor
* target/arm: Vectorize integer comparison vs zero
* hw/arm/virt: dt: add kaslr-seed property
* hw/arm: xlnx-zcu102: Disable unsupported FDT firmware nodes
# gpg: Signature made Thu 30 Apr 2020 15:43:54 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20200430-1: (30 commits)
hw/arm: xlnx-zcu102: Disable unsupported FDT firmware nodes
hw/arm: xlnx-zcu102: Move arm_boot_info into XlnxZCU102
device_tree: Constify compat in qemu_fdt_node_path()
device_tree: Allow name wildcards in qemu_fdt_node_path()
target/arm/cpu: Update coding style to make checkpatch.pl happy
target/arm: Make cpu_register() available for other files
target/arm: Restrict the Address Translate write operation to TCG accel
hw/arm/virt: dt: add kaslr-seed property
hw/arm/virt: dt: move creation of /secure-chosen to create_fdt()
target/arm: Vectorize integer comparison vs zero
net: cadence_gem: clear RX control descriptor
Cadence: gem: fix wraparound in 64bit descriptors
hw/arm: versal: Setup the ADMA with 128bit bus-width
qdev-monitor: print the device's clock with info qtree
hw/arm/xilinx_zynq: connect uart clocks to slcr
hw/char/cadence_uart: add clock support
hw/misc/zynq_slcr: add clock generation for uarts
docs/clocks: add device's clock documentation
qdev-clock: introduce an init array to ease the device construction
qdev: add clock input&output support to devices.
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/cpu-qom.h | 9 | ||||
-rw-r--r-- | target/arm/cpu.c | 19 | ||||
-rw-r--r-- | target/arm/cpu64.c | 8 | ||||
-rw-r--r-- | target/arm/helper.c | 17 | ||||
-rw-r--r-- | target/arm/helper.h | 27 | ||||
-rw-r--r-- | target/arm/neon_helper.c | 24 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 64 | ||||
-rw-r--r-- | target/arm/translate.c | 256 | ||||
-rw-r--r-- | target/arm/translate.h | 5 | ||||
-rw-r--r-- | target/arm/vec_helper.c | 25 |
10 files changed, 312 insertions, 142 deletions
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index d95568bf05..56395b87f6 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -35,7 +35,14 @@ struct arm_boot_info; #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU -typedef struct ARMCPUInfo ARMCPUInfo; +typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); +} ARMCPUInfo; + +void arm_cpu_register(const ARMCPUInfo *info); +void aarch64_cpu_register(const ARMCPUInfo *info); /** * ARMCPUClass: diff --git a/target/arm/cpu.c b/target/arm/cpu.c index a79f233b17..141d947775 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -582,7 +582,8 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) CPUARMState *env = &cpu->env; bool ret = false; - /* ARMv7-M interrupt masking works differently than -A or -R. + /* + * ARMv7-M interrupt masking works differently than -A or -R. * There is no FIQ/IRQ distinction. Instead of I and F bits * masking FIQ and IRQ interrupts, an exception is taken only * if it is higher priority than the current execution priority @@ -1912,7 +1913,8 @@ static void arm1026_initfn(Object *obj) static void arm1136_r2_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); - /* What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an + /* + * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an * older core than plain "arm1136". In particular this does not * have the v6K features. * These ID register values are correct for 1136 but may be wrong @@ -2693,18 +2695,13 @@ static void arm_max_initfn(Object *obj) #endif /* !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) */ -struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -}; - static const ARMCPUInfo arm_cpus[] = { #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, - /* What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an + /* + * What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an * older core than plain "arm1136". In particular this does not * have the v6K features. */ @@ -2864,7 +2861,7 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) acc->info = data; } -static void cpu_register(const ARMCPUInfo *info) +void arm_cpu_register(const ARMCPUInfo *info) { TypeInfo type_info = { .parent = TYPE_ARM_CPU, @@ -2905,7 +2902,7 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); while (info->name) { - cpu_register(info); + arm_cpu_register(info); info++; } diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 95d0c8c101..74afc28d53 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -737,12 +737,6 @@ static void aarch64_max_initfn(Object *obj) cpu_max_set_sve_max_vq, NULL, NULL, &error_fatal); } -struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -}; - static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, @@ -825,7 +819,7 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) acc->info = data; } -static void aarch64_cpu_register(const ARMCPUInfo *info) +void aarch64_cpu_register(const ARMCPUInfo *info) { TypeInfo type_info = { .parent = TYPE_AARCH64_CPU, diff --git a/target/arm/helper.c b/target/arm/helper.c index 7e9ea5d20f..dfefb9b3d9 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -3442,6 +3442,7 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } +#ifdef CONFIG_TCG static uint64_t do_ats_write(CPUARMState *env, uint64_t value, MMUAccessType access_type, ARMMMUIdx mmu_idx) { @@ -3602,9 +3603,11 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, } return par64; } +#endif /* CONFIG_TCG */ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { +#ifdef CONFIG_TCG MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; uint64_t par64; ARMMMUIdx mmu_idx; @@ -3664,17 +3667,26 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) par64 = do_ats_write(env, value, access_type, mmu_idx); A32_BANKED_CURRENT_REG_SET(env, par, par64); +#else + /* Handled by hardware accelerator. */ + g_assert_not_reached(); +#endif /* CONFIG_TCG */ } static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { +#ifdef CONFIG_TCG MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; uint64_t par64; par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2); A32_BANKED_CURRENT_REG_SET(env, par, par64); +#else + /* Handled by hardware accelerator. */ + g_assert_not_reached(); +#endif /* CONFIG_TCG */ } static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -3689,6 +3701,7 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { +#ifdef CONFIG_TCG MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; ARMMMUIdx mmu_idx; int secure = arm_is_secure_below_el3(env); @@ -3728,6 +3741,10 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, } env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx); +#else + /* Handled by hardware accelerator. */ + g_assert_not_reached(); +#endif /* CONFIG_TCG */ } #endif diff --git a/target/arm/helper.h b/target/arm/helper.h index f37b8670a5..5817626b20 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -275,19 +275,6 @@ DEF_HELPER_2(neon_hsub_u16, i32, i32, i32) DEF_HELPER_2(neon_hsub_s32, s32, s32, s32) DEF_HELPER_2(neon_hsub_u32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u8, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s8, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u16, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s16, i32, i32, i32) -DEF_HELPER_2(neon_cgt_u32, i32, i32, i32) -DEF_HELPER_2(neon_cgt_s32, i32, i32, i32) -DEF_HELPER_2(neon_cge_u8, i32, i32, i32) -DEF_HELPER_2(neon_cge_s8, i32, i32, i32) -DEF_HELPER_2(neon_cge_u16, i32, i32, i32) -DEF_HELPER_2(neon_cge_s16, i32, i32, i32) -DEF_HELPER_2(neon_cge_u32, i32, i32, i32) -DEF_HELPER_2(neon_cge_s32, i32, i32, i32) - DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) @@ -347,9 +334,6 @@ DEF_HELPER_2(neon_mul_u16, i32, i32, i32) DEF_HELPER_2(neon_tst_u8, i32, i32, i32) DEF_HELPER_2(neon_tst_u16, i32, i32, i32) DEF_HELPER_2(neon_tst_u32, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u8, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u16, i32, i32, i32) -DEF_HELPER_2(neon_ceq_u32, i32, i32, i32) DEF_HELPER_1(neon_clz_u8, i32, i32) DEF_HELPER_1(neon_clz_u16, i32, i32) @@ -686,6 +670,17 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index c7a8438b42..448be93fa1 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -562,24 +562,6 @@ uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) return dest; } -#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 -NEON_VOP(cgt_s8, neon_s8, 4) -NEON_VOP(cgt_u8, neon_u8, 4) -NEON_VOP(cgt_s16, neon_s16, 2) -NEON_VOP(cgt_u16, neon_u16, 2) -NEON_VOP(cgt_s32, neon_s32, 1) -NEON_VOP(cgt_u32, neon_u32, 1) -#undef NEON_FN - -#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 -NEON_VOP(cge_s8, neon_s8, 4) -NEON_VOP(cge_u8, neon_u8, 4) -NEON_VOP(cge_s16, neon_s16, 2) -NEON_VOP(cge_u16, neon_u16, 2) -NEON_VOP(cge_s32, neon_s32, 1) -NEON_VOP(cge_u32, neon_u32, 1) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) @@ -1135,12 +1117,6 @@ NEON_VOP(tst_u16, neon_u16, 2) NEON_VOP(tst_u32, neon_u32, 1) #undef NEON_FN -#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 -NEON_VOP(ceq_u8, neon_u8, 4) -NEON_VOP(ceq_u16, neon_u16, 2) -NEON_VOP(ceq_u32, neon_u32, 1) -#undef NEON_FN - /* Count Leading Sign/Zero Bits. */ static inline int do_clz8(uint8_t x) { diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 7580e46367..efb1c4adc4 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -594,6 +594,14 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, is_q ? 16 : 8, vec_full_reg_size(s)); } +/* Expand a 2-operand AdvSIMD vector operation using an op descriptor. */ +static void gen_gvec_op2(DisasContext *s, bool is_q, int rd, + int rn, const GVecGen2 *gvec_op) +{ + tcg_gen_gvec_2(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), + is_q ? 16 : 8, vec_full_reg_size(s), gvec_op); +} + /* Expand a 2-operand + immediate AdvSIMD vector operation using * an op descriptor. */ @@ -12366,6 +12374,15 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x8: /* CMGT, CMGE */ + gen_gvec_op2(s, is_q, rd, rn, u ? &cge0_op[size] : &cgt0_op[size]); + return; + case 0x9: /* CMEQ, CMLE */ + gen_gvec_op2(s, is_q, rd, rn, u ? &cle0_op[size] : &ceq0_op[size]); + return; + case 0xa: /* CMLT */ + gen_gvec_op2(s, is_q, rd, rn, &clt0_op[size]); + return; case 0xb: if (u) { /* ABS, NEG */ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); @@ -12403,29 +12420,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) for (pass = 0; pass < (is_q ? 4 : 2); pass++) { TCGv_i32 tcg_op = tcg_temp_new_i32(); TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGCond cond; read_vec_element_i32(s, tcg_op, rn, pass, MO_32); if (size == 2) { /* Special cases for 32 bit elements */ switch (opcode) { - case 0xa: /* CMLT */ - /* 32 bit integer comparison against zero, result is - * test ? (2^32 - 1) : 0. We implement via setcond(test) - * and inverting. - */ - cond = TCG_COND_LT; - do_cmop: - tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0); - tcg_gen_neg_i32(tcg_res, tcg_res); - break; - case 0x8: /* CMGT, CMGE */ - cond = u ? TCG_COND_GE : TCG_COND_GT; - goto do_cmop; - case 0x9: /* CMEQ, CMLE */ - cond = u ? TCG_COND_LE : TCG_COND_EQ; - goto do_cmop; case 0x4: /* CLS */ if (u) { tcg_gen_clzi_i32(tcg_res, tcg_op, 32); @@ -12522,36 +12522,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) genfn(tcg_res, cpu_env, tcg_op); break; } - case 0x8: /* CMGT, CMGE */ - case 0x9: /* CMEQ, CMLE */ - case 0xa: /* CMLT */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 }, - { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 }, - { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 }, - }; - NeonGenTwoOpFn *genfn; - int comp; - bool reverse; - TCGv_i32 tcg_zero = tcg_const_i32(0); - - /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */ - comp = (opcode - 0x8) * 2 + u; - /* ...but LE, LT are implemented as reverse GE, GT */ - reverse = (comp > 2); - if (reverse) { - comp = 4 - comp; - } - genfn = fns[comp][size]; - if (reverse) { - genfn(tcg_res, tcg_zero, tcg_op); - } else { - genfn(tcg_res, tcg_op, tcg_zero); - } - tcg_temp_free_i32(tcg_zero); - break; - } case 0x4: /* CLS, CLZ */ if (u) { if (size == 0) { diff --git a/target/arm/translate.c b/target/arm/translate.c index 9f9f4e19e0..d4ad2028f1 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -3917,6 +3917,205 @@ static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn, return 1; } +static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0); + tcg_gen_neg_i32(d, d); +} + +static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) +{ + TCGv_vec zero = tcg_const_zeros_vec_matching(d); + tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero); + tcg_temp_free_vec(zero); +} + +static const TCGOpcode vecop_list_cmp[] = { + INDEX_op_cmp_vec, 0 +}; + +const GVecGen2 ceq0_op[4] = { + { .fno = gen_helper_gvec_ceq0_b, + .fniv = gen_ceq0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_8 }, + { .fno = gen_helper_gvec_ceq0_h, + .fniv = gen_ceq0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_16 }, + { .fni4 = gen_ceq0_i32, + .fniv = gen_ceq0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_32 }, + { .fni8 = gen_ceq0_i64, + .fniv = gen_ceq0_vec, + .opt_opc = vecop_list_cmp, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + +static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0); + tcg_gen_neg_i32(d, d); +} + +static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) +{ + TCGv_vec zero = tcg_const_zeros_vec_matching(d); + tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero); + tcg_temp_free_vec(zero); +} + +const GVecGen2 cle0_op[4] = { + { .fno = gen_helper_gvec_cle0_b, + .fniv = gen_cle0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_8 }, + { .fno = gen_helper_gvec_cle0_h, + .fniv = gen_cle0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_16 }, + { .fni4 = gen_cle0_i32, + .fniv = gen_cle0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_32 }, + { .fni8 = gen_cle0_i64, + .fniv = gen_cle0_vec, + .opt_opc = vecop_list_cmp, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + +static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0); + tcg_gen_neg_i32(d, d); +} + +static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) +{ + TCGv_vec zero = tcg_const_zeros_vec_matching(d); + tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero); + tcg_temp_free_vec(zero); +} + +const GVecGen2 cge0_op[4] = { + { .fno = gen_helper_gvec_cge0_b, + .fniv = gen_cge0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_8 }, + { .fno = gen_helper_gvec_cge0_h, + .fniv = gen_cge0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_16 }, + { .fni4 = gen_cge0_i32, + .fniv = gen_cge0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_32 }, + { .fni8 = gen_cge0_i64, + .fniv = gen_cge0_vec, + .opt_opc = vecop_list_cmp, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + +static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0); + tcg_gen_neg_i32(d, d); +} + +static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) +{ + TCGv_vec zero = tcg_const_zeros_vec_matching(d); + tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero); + tcg_temp_free_vec(zero); +} + +const GVecGen2 clt0_op[4] = { + { .fno = gen_helper_gvec_clt0_b, + .fniv = gen_clt0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_8 }, + { .fno = gen_helper_gvec_clt0_h, + .fniv = gen_clt0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_16 }, + { .fni4 = gen_clt0_i32, + .fniv = gen_clt0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_32 }, + { .fni8 = gen_clt0_i64, + .fniv = gen_clt0_vec, + .opt_opc = vecop_list_cmp, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + +static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0); + tcg_gen_neg_i32(d, d); +} + +static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0); + tcg_gen_neg_i64(d, d); +} + +static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) +{ + TCGv_vec zero = tcg_const_zeros_vec_matching(d); + tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero); + tcg_temp_free_vec(zero); +} + +const GVecGen2 cgt0_op[4] = { + { .fno = gen_helper_gvec_cgt0_b, + .fniv = gen_cgt0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_8 }, + { .fno = gen_helper_gvec_cgt0_h, + .fniv = gen_cgt0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_16 }, + { .fni4 = gen_cgt0_i32, + .fniv = gen_cgt0_vec, + .opt_opc = vecop_list_cmp, + .vece = MO_32 }, + { .fni8 = gen_cgt0_i64, + .fniv = gen_cgt0_vec, + .opt_opc = vecop_list_cmp, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) { tcg_gen_vec_sar8i_i64(a, a, shift); @@ -6481,6 +6680,27 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size); break; + case NEON_2RM_VCEQ0: + tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size, + vec_size, &ceq0_op[size]); + break; + case NEON_2RM_VCGT0: + tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size, + vec_size, &cgt0_op[size]); + break; + case NEON_2RM_VCLE0: + tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size, + vec_size, &cle0_op[size]); + break; + case NEON_2RM_VCGE0: + tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size, + vec_size, &cge0_op[size]); + break; + case NEON_2RM_VCLT0: + tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size, + vec_size, &clt0_op[size]); + break; + default: elementwise: for (pass = 0; pass < (q ? 4 : 2); pass++) { @@ -6543,42 +6763,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: abort(); } break; - case NEON_2RM_VCGT0: case NEON_2RM_VCLE0: - tmp2 = tcg_const_i32(0); - switch(size) { - case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - if (op == NEON_2RM_VCLE0) { - tcg_gen_not_i32(tmp, tmp); - } - break; - case NEON_2RM_VCGE0: case NEON_2RM_VCLT0: - tmp2 = tcg_const_i32(0); - switch(size) { - case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - if (op == NEON_2RM_VCLT0) { - tcg_gen_not_i32(tmp, tmp); - } - break; - case NEON_2RM_VCEQ0: - tmp2 = tcg_const_i32(0); - switch(size) { - case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break; - default: abort(); - } - tcg_temp_free_i32(tmp2); - break; case NEON_2RM_VCGT0_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); diff --git a/target/arm/translate.h b/target/arm/translate.h index d9ea0c99cc..98b319f3f6 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -275,6 +275,11 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) uint64_t vfp_expand_imm(int size, uint8_t imm8); /* Vector operations shared between ARM and AArch64. */ +extern const GVecGen2 ceq0_op[4]; +extern const GVecGen2 clt0_op[4]; +extern const GVecGen2 cgt0_op[4]; +extern const GVecGen2 cle0_op[4]; +extern const GVecGen2 cge0_op[4]; extern const GVecGen3 mla_op[4]; extern const GVecGen3 mls_op[4]; extern const GVecGen3 cmtst_op[4]; diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 8017bd88c4..3d534188a8 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -1257,3 +1257,28 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) } } #endif + +#define DO_CMP0(NAME, TYPE, OP) \ +void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ + TYPE nn = *(TYPE *)(vn + i); \ + *(TYPE *)(vd + i) = -(nn OP 0); \ + } \ + clear_tail(vd, opr_sz, simd_maxsz(desc)); \ +} + +DO_CMP0(gvec_ceq0_b, int8_t, ==) +DO_CMP0(gvec_clt0_b, int8_t, <) +DO_CMP0(gvec_cle0_b, int8_t, <=) +DO_CMP0(gvec_cgt0_b, int8_t, >) +DO_CMP0(gvec_cge0_b, int8_t, >=) + +DO_CMP0(gvec_ceq0_h, int16_t, ==) +DO_CMP0(gvec_clt0_h, int16_t, <) +DO_CMP0(gvec_cle0_h, int16_t, <=) +DO_CMP0(gvec_cgt0_h, int16_t, >) +DO_CMP0(gvec_cge0_h, int16_t, >=) + +#undef DO_CMP0 |