diff options
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/cpu.h | 1 | ||||
-rw-r--r-- | target/arm/cpu64.c | 1 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 43 | ||||
-rw-r--r-- | target/arm/helper-a64.h | 2 | ||||
-rw-r--r-- | target/arm/helper.c | 53 | ||||
-rw-r--r-- | target/arm/helper.h | 4 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 482 | ||||
-rw-r--r-- | target/riscv/translate.c | 66 | ||||
-rw-r--r-- | target/xtensa/translate.c | 50 |
9 files changed, 514 insertions, 188 deletions
diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 44e6b77151..3b086be570 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1449,6 +1449,7 @@ enum arm_features { ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 991d764674..c50dcd4077 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -248,6 +248,7 @@ static void aarch64_max_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8_SM4); set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); set_feature(&cpu->env, ARM_FEATURE_CRC); + set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); set_feature(&cpu->env, ARM_FEATURE_V8_RDM); set_feature(&cpu->env, ARM_FEATURE_V8_FP16); set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index afb25ad20c..549ed3513e 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -636,6 +636,49 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); } +/* Writes back the old data into Rs. */ +void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + uintptr_t ra = GETPC(); +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + Int128 oldv, cmpv, newv; + + cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); + newv = int128_make128(new_lo, new_hi); + + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + + env->xregs[rs] = int128_getlo(oldv); + env->xregs[rs + 1] = int128_gethi(oldv); +#endif +} + +void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, + uint64_t new_hi, uint64_t new_lo) +{ + uintptr_t ra = GETPC(); +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + Int128 oldv, cmpv, newv; + + cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); + newv = int128_make128(new_lo, new_hi); + + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + env->xregs[rs + 1] = int128_getlo(oldv); + env->xregs[rs] = int128_gethi(oldv); +#endif +} + /* * AdvSIMD half-precision */ diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index ef4ddfe9d8..b8028ac98c 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -51,6 +51,8 @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64) +DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) diff --git a/target/arm/helper.c b/target/arm/helper.c index 0fef5d4d06..817f9d81a0 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11420,11 +11420,60 @@ VFP_CONV_FIX_A64(sq, s, 32, 64, int64) VFP_CONV_FIX(uh, s, 32, 32, uint16) VFP_CONV_FIX(ul, s, 32, 32, uint32) VFP_CONV_FIX_A64(uq, s, 32, 64, uint64) -VFP_CONV_FIX_A64(sl, h, 16, 32, int32) -VFP_CONV_FIX_A64(ul, h, 16, 32, uint32) + #undef VFP_CONV_FIX #undef VFP_CONV_FIX_FLOAT #undef VFP_CONV_FLOAT_FIX_ROUND +#undef VFP_CONV_FIX_A64 + +/* Conversion to/from f16 can overflow to infinity before/after scaling. + * Therefore we convert to f64 (which does not round), scale, + * and then convert f64 to f16 (which may round). + */ + +static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst) +{ + return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst); +} + +float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst); +} + +float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst); +} + +static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst) +{ + if (unlikely(float16_is_any_nan(f))) { + float_raise(float_flag_invalid, fpst); + return 0; + } else { + int old_exc_flags = get_float_exception_flags(fpst); + float64 ret; + + ret = float16_to_float64(f, true, fpst); + ret = float64_scalbn(ret, shift, fpst); + old_exc_flags |= get_float_exception_flags(fpst) + & float_flag_input_denormal; + set_float_exception_flags(old_exc_flags, fpst); + + return ret; + } +} + +uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst); +} + +uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst); +} /* Set the current fp rounding mode and return the old one. * The argument is a softfloat float_round_ value. diff --git a/target/arm/helper.h b/target/arm/helper.h index 34e8cc8904..1969b37f2d 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -149,8 +149,8 @@ DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr) DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 1e7c150514..4d1b220cc6 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp); /* Note that the gvec expanders operate on offsets + sizes. */ typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); @@ -2113,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_movi_i64(cpu_exclusive_addr, -1); } +static void gen_compare_and_swap(DisasContext *s, int rs, int rt, + int rn, int size) +{ + TCGv_i64 tcg_rs = cpu_reg(s, rs); + TCGv_i64 tcg_rt = cpu_reg(s, rt); + int memidx = get_mem_index(s); + TCGv_i64 addr = cpu_reg_sp(s, rn); + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx, + size | MO_ALIGN | s->be_data); +} + +static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, + int rn, int size) +{ + TCGv_i64 s1 = cpu_reg(s, rs); + TCGv_i64 s2 = cpu_reg(s, rs + 1); + TCGv_i64 t1 = cpu_reg(s, rt); + TCGv_i64 t2 = cpu_reg(s, rt + 1); + TCGv_i64 addr = cpu_reg_sp(s, rn); + int memidx = get_mem_index(s); + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + if (size == 2) { + TCGv_i64 cmp = tcg_temp_new_i64(); + TCGv_i64 val = tcg_temp_new_i64(); + + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(val, t1, t2); + tcg_gen_concat32_i64(cmp, s1, s2); + } else { + tcg_gen_concat32_i64(val, t2, t1); + tcg_gen_concat32_i64(cmp, s2, s1); + } + + tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx, + MO_64 | MO_ALIGN | s->be_data); + tcg_temp_free_i64(val); + + if (s->be_data == MO_LE) { + tcg_gen_extr32_i64(s1, s2, cmp); + } else { + tcg_gen_extr32_i64(s2, s1, cmp); + } + tcg_temp_free_i64(cmp); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); + } else { + TCGv_i64 d1 = tcg_temp_new_i64(); + TCGv_i64 d2 = tcg_temp_new_i64(); + TCGv_i64 a2 = tcg_temp_new_i64(); + TCGv_i64 c1 = tcg_temp_new_i64(); + TCGv_i64 c2 = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + /* Load the two words, in memory order. */ + tcg_gen_qemu_ld_i64(d1, addr, memidx, + MO_64 | MO_ALIGN_16 | s->be_data); + tcg_gen_addi_i64(a2, addr, 8); + tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data); + + /* Compare the two words, also in memory order. */ + tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1); + tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2); + tcg_gen_and_i64(c2, c2, c1); + + /* If compare equal, write back new data, else write back old data. */ + tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1); + tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2); + tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data); + tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data); + tcg_temp_free_i64(a2); + tcg_temp_free_i64(c1); + tcg_temp_free_i64(c2); + tcg_temp_free_i64(zero); + + /* Write back the data from memory to Rs. */ + tcg_gen_mov_i64(s1, d1); + tcg_gen_mov_i64(s2, d2); + tcg_temp_free_i64(d1); + tcg_temp_free_i64(d2); + } +} + /* Update the Sixty-Four bit (SF) registersize. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings). */ @@ -2147,62 +2245,114 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) int rt = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rt2 = extract32(insn, 10, 5); - int is_lasr = extract32(insn, 15, 1); int rs = extract32(insn, 16, 5); - int is_pair = extract32(insn, 21, 1); - int is_store = !extract32(insn, 22, 1); - int is_excl = !extract32(insn, 23, 1); + int is_lasr = extract32(insn, 15, 1); + int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; int size = extract32(insn, 30, 2); TCGv_i64 tcg_addr; - if ((!is_excl && !is_pair && !is_lasr) || - (!is_excl && is_pair) || - (is_pair && size < 2)) { - unallocated_encoding(s); + switch (o2_L_o1_o0) { + case 0x0: /* STXR */ + case 0x1: /* STLXR */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + if (is_lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false); return; - } - if (rn == 31) { - gen_check_sp_alignment(s); - } - tcg_addr = read_cpu_reg_sp(s, rn, 1); + case 0x4: /* LDXR */ + case 0x5: /* LDAXR */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, false); + if (is_lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + } + return; - /* Note that since TCG is single threaded load-acquire/store-release - * semantics require no extra if (is_lasr) { ... } handling. - */ + case 0x9: /* STLR */ + /* Generate ISS for non-exclusive accesses including LASR. */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + return; - if (is_excl) { - if (!is_store) { - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + case 0xd: /* LDAR */ + /* Generate ISS for non-exclusive accesses including LASR. */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return; + + case 0x2: case 0x3: /* CASP / STXP */ + if (size & 2) { /* STXP / STLXP */ + if (rn == 31) { + gen_check_sp_alignment(s); } - } else { if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true); + return; } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0); + if (rt2 == 31 + && ((rt | rs) & 1) == 0 + && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + /* CASP / CASPL */ + gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); + return; + } + break; - /* Generate ISS for non-exclusive accesses including LASR. */ - if (is_store) { - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + case 0x6: case 0x7: /* CASPA / LDXP */ + if (size & 2) { /* LDXP / LDAXP */ + if (rn == 31) { + gen_check_sp_alignment(s); } - do_gpr_st(s, tcg_rt, tcg_addr, size, - true, rt, iss_sf, is_lasr); - } else { - do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false, - true, rt, iss_sf, is_lasr); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, true); if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } + return; + } + if (rt2 == 31 + && ((rt | rs) & 1) == 0 + && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + /* CASPA / CASPAL */ + gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); + return; } + break; + + case 0xa: /* CAS */ + case 0xb: /* CASL */ + case 0xe: /* CASA */ + case 0xf: /* CASAL */ + if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + gen_compare_and_swap(s, rs, rt, rn, size); + return; + } + break; } + unallocated_encoding(s); } /* @@ -2715,6 +2865,88 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, } } +/* Atomic memory operations + * + * 31 30 27 26 24 22 21 16 15 12 10 5 0 + * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ + * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | + * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ + * + * Rt: the result register + * Rn: base address or SP + * Rs: the source register for the operation + * V: vector flag (always 0 as of v8.3) + * A: acquire flag + * R: release flag + */ +static void disas_ldst_atomic(DisasContext *s, uint32_t insn, + int size, int rt, bool is_vector) +{ + int rs = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int o3_opc = extract32(insn, 12, 4); + int feature = ARM_FEATURE_V8_ATOMICS; + TCGv_i64 tcg_rn, tcg_rs; + AtomicThreeOpFn *fn; + + if (is_vector) { + unallocated_encoding(s); + return; + } + switch (o3_opc) { + case 000: /* LDADD */ + fn = tcg_gen_atomic_fetch_add_i64; + break; + case 001: /* LDCLR */ + fn = tcg_gen_atomic_fetch_and_i64; + break; + case 002: /* LDEOR */ + fn = tcg_gen_atomic_fetch_xor_i64; + break; + case 003: /* LDSET */ + fn = tcg_gen_atomic_fetch_or_i64; + break; + case 004: /* LDSMAX */ + fn = tcg_gen_atomic_fetch_smax_i64; + break; + case 005: /* LDSMIN */ + fn = tcg_gen_atomic_fetch_smin_i64; + break; + case 006: /* LDUMAX */ + fn = tcg_gen_atomic_fetch_umax_i64; + break; + case 007: /* LDUMIN */ + fn = tcg_gen_atomic_fetch_umin_i64; + break; + case 010: /* SWP */ + fn = tcg_gen_atomic_xchg_i64; + break; + default: + unallocated_encoding(s); + return; + } + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_rn = cpu_reg_sp(s, rn); + tcg_rs = read_cpu_reg(s, rs, true); + + if (o3_opc == 1) { /* LDCLR */ + tcg_gen_not_i64(tcg_rs, tcg_rs); + } + + /* The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s), + s->be_data | size | MO_ALIGN); +} + /* Load/store register (all forms) */ static void disas_ldst_reg(DisasContext *s, uint32_t insn) { @@ -2725,23 +2957,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) switch (extract32(insn, 24, 2)) { case 0: - if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) { - disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); - } else { + if (extract32(insn, 21, 1) == 0) { /* Load/store register (unscaled immediate) * Load/store immediate pre/post-indexed * Load/store register unprivileged */ disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); + return; + } + switch (extract32(insn, 10, 2)) { + case 0: + disas_ldst_atomic(s, insn, size, rt, is_vector); + return; + case 2: + disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); + return; } break; case 1: disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); - break; - default: - unallocated_encoding(s); - break; + return; } + unallocated_encoding(s); } /* AdvSIMD load/store multiple structures @@ -5444,31 +5681,24 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) if (itof) { TCGv_i64 tcg_rn = cpu_reg(s, rn); + TCGv_i64 tmp; switch (type) { case 0: - { /* 32 bit */ - TCGv_i64 tmp = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); tcg_gen_ext32u_i64(tmp, tcg_rn); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64)); - tcg_gen_movi_i64(tmp, 0); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); + write_fp_dreg(s, rd, tmp); tcg_temp_free_i64(tmp); break; - } case 1: - { /* 64 bit */ - TCGv_i64 tmp = tcg_const_i64(0); - tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64)); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); - tcg_temp_free_i64(tmp); + write_fp_dreg(s, rd, tcg_rn); break; - } case 2: /* 64 bit to top half. */ tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); + clear_vec_high(s, true, rd); break; } } else { @@ -6021,15 +6251,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); break; case 0x0a: /* SMAXV / UMAXV */ - tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, - tcg_res, - tcg_res, tcg_elt, tcg_res, tcg_elt); + if (is_u) { + tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); + } else { + tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); + } break; case 0x1a: /* SMINV / UMINV */ - tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE, - tcg_res, - tcg_res, tcg_elt, tcg_res, tcg_elt); - break; + if (is_u) { + tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); + } else { + tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); + } break; default: g_assert_not_reached(); @@ -7165,13 +7398,26 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, int immh, int immb, int opcode, int rn, int rd) { - bool is_double = extract32(immh, 3, 1); - int size = is_double ? MO_64 : MO_32; - int elements; + int size, elements, fracbits; int immhb = immh << 3 | immb; - int fracbits = (is_double ? 128 : 64) - immhb; - if (!extract32(immh, 2, 2)) { + if (immh & 8) { + size = MO_64; + if (!is_scalar && !is_q) { + unallocated_encoding(s); + return; + } + } else if (immh & 4) { + size = MO_32; + } else if (immh & 2) { + size = MO_16; + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + } else { + /* immh == 0 would be a failure of the decode logic */ + g_assert(immh == 1); unallocated_encoding(s); return; } @@ -7179,20 +7425,14 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, if (is_scalar) { elements = 1; } else { - elements = is_double ? 2 : is_q ? 4 : 2; - if (is_double && !is_q) { - unallocated_encoding(s); - return; - } + elements = (8 << is_q) >> size; } + fracbits = (16 << size) - immhb; if (!fp_access_check(s)) { return; } - /* immh == 0 would be a failure of the decode logic */ - g_assert(immh); - handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); } @@ -7201,19 +7441,28 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, bool is_q, bool is_u, int immh, int immb, int rn, int rd) { - bool is_double = extract32(immh, 3, 1); int immhb = immh << 3 | immb; - int fracbits = (is_double ? 128 : 64) - immhb; - int pass; + int pass, size, fracbits; TCGv_ptr tcg_fpstatus; TCGv_i32 tcg_rmode, tcg_shift; - if (!extract32(immh, 2, 2)) { - unallocated_encoding(s); - return; - } - - if (!is_scalar && !is_q && is_double) { + if (immh & 0x8) { + size = MO_64; + if (!is_scalar && !is_q) { + unallocated_encoding(s); + return; + } + } else if (immh & 0x4) { + size = MO_32; + } else if (immh & 0x2) { + size = MO_16; + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + } else { + /* Should have split out AdvSIMD modified immediate earlier. */ + assert(immh == 1); unallocated_encoding(s); return; } @@ -7225,11 +7474,12 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, assert(!(is_scalar && is_q)); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); - tcg_fpstatus = get_fpstatus_ptr(false); + tcg_fpstatus = get_fpstatus_ptr(size == MO_16); gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + fracbits = (16 << size) - immhb; tcg_shift = tcg_const_i32(fracbits); - if (is_double) { + if (size == MO_64) { int maxpass = is_scalar ? 1 : 2; for (pass = 0; pass < maxpass; pass++) { @@ -7246,20 +7496,37 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, } clear_vec_high(s, is_q, rd); } else { - int maxpass = is_scalar ? 1 : is_q ? 4 : 2; - for (pass = 0; pass < maxpass; pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); + void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); - read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + switch (size) { + case MO_16: if (is_u) { - gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + fn = gen_helper_vfp_touhh; } else { - gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + fn = gen_helper_vfp_toshh; } + break; + case MO_32: + if (is_u) { + fn = gen_helper_vfp_touls; + } else { + fn = gen_helper_vfp_tosls; + } + break; + default: + g_assert_not_reached(); + } + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, size); + fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); if (is_scalar) { write_fp_sreg(s, rd, tcg_op); } else { - write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + write_vec_element_i32(s, tcg_op, rd, pass, size); } tcg_temp_free_i32(tcg_op); } @@ -9927,27 +10194,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) } } -/* Helper functions for 32 bit comparisons */ -static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2); -} - -static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2); -} - -static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2); -} - -static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2); -} - /* Pairwise op subgroup of C3.6.16. * * This is called directly or via the handle_3same_float for float pairwise @@ -10047,7 +10293,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { gen_max_s32, gen_max_u32 }, + { tcg_gen_smax_i32, tcg_gen_umax_i32 }, }; genfn = fns[size][u]; break; @@ -10057,7 +10303,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { gen_min_s32, gen_min_u32 }, + { tcg_gen_smin_i32, tcg_gen_umin_i32 }, }; genfn = fns[size][u]; break; @@ -10512,7 +10758,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, - { gen_max_s32, gen_max_u32 }, + { tcg_gen_smax_i32, tcg_gen_umax_i32 }, }; genfn = fns[size][u]; break; @@ -10523,7 +10769,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, - { gen_min_s32, gen_min_u32 }, + { tcg_gen_smin_i32, tcg_gen_umin_i32 }, }; genfn = fns[size][u]; break; diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 1788668c6f..ee2bbc55b0 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -716,7 +716,6 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc, TCGv src1, src2, dat; TCGLabel *l1, *l2; TCGMemOp mop; - TCGCond cond; bool aq, rl; /* Extract the size of the atomic operation. */ @@ -814,60 +813,29 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc, tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop); gen_set_gpr(rd, src2); break; - case OPC_RISC_AMOMIN: - cond = TCG_COND_LT; - goto do_minmax; + gen_get_gpr(src1, rs1); + gen_get_gpr(src2, rs2); + tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); + break; case OPC_RISC_AMOMAX: - cond = TCG_COND_GT; - goto do_minmax; + gen_get_gpr(src1, rs1); + gen_get_gpr(src2, rs2); + tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); + break; case OPC_RISC_AMOMINU: - cond = TCG_COND_LTU; - goto do_minmax; + gen_get_gpr(src1, rs1); + gen_get_gpr(src2, rs2); + tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); + break; case OPC_RISC_AMOMAXU: - cond = TCG_COND_GTU; - goto do_minmax; - do_minmax: - /* Handle the RL barrier. The AQ barrier is handled along the - parallel path by the SC atomic cmpxchg. On the serial path, - of course, barriers do not matter. */ - if (rl) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - l1 = gen_new_label(); - gen_set_label(l1); - } else { - l1 = NULL; - } - gen_get_gpr(src1, rs1); gen_get_gpr(src2, rs2); - if ((mop & MO_SSIZE) == MO_SL) { - /* Sign-extend the register comparison input. */ - tcg_gen_ext32s_tl(src2, src2); - } - dat = tcg_temp_local_new(); - tcg_gen_qemu_ld_tl(dat, src1, ctx->mem_idx, mop); - tcg_gen_movcond_tl(cond, src2, dat, src2, dat, src2); - - if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - /* Parallel context. Make this operation atomic by verifying - that the memory didn't change while we computed the result. */ - tcg_gen_atomic_cmpxchg_tl(src2, src1, dat, src2, ctx->mem_idx, mop); - - /* If the cmpxchg failed, retry. */ - /* ??? There is an assumption here that this will eventually - succeed, such that we don't live-lock. This is not unlike - a similar loop that the compiler would generate for e.g. - __atomic_fetch_and_xor, so don't worry about it. */ - tcg_gen_brcond_tl(TCG_COND_NE, dat, src2, l1); - } else { - /* Serial context. Directly store the result. */ - tcg_gen_qemu_st_tl(src2, src1, ctx->mem_idx, mop); - } - gen_set_gpr(rd, dat); - tcg_temp_free(dat); + tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); break; default: diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index aad496347d..ae0feb0254 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1526,10 +1526,8 @@ static void translate_clamps(DisasContext *dc, const uint32_t arg[], TCGv_i32 tmp1 = tcg_const_i32(-1u << arg[2]); TCGv_i32 tmp2 = tcg_const_i32((1 << arg[2]) - 1); - tcg_gen_movcond_i32(TCG_COND_GT, tmp1, - cpu_R[arg[1]], tmp1, cpu_R[arg[1]], tmp1); - tcg_gen_movcond_i32(TCG_COND_LT, cpu_R[arg[0]], - tmp1, tmp2, tmp1, tmp2); + tcg_gen_smax_i32(tmp1, tmp1, cpu_R[arg[1]]); + tcg_gen_smin_i32(cpu_R[arg[0]], tmp1, tmp2); tcg_temp_free(tmp1); tcg_temp_free(tmp2); } @@ -1854,13 +1852,35 @@ static void translate_memw(DisasContext *dc, const uint32_t arg[], tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); } -static void translate_minmax(DisasContext *dc, const uint32_t arg[], - const uint32_t par[]) +static void translate_smin(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) { if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { - tcg_gen_movcond_i32(par[0], cpu_R[arg[0]], - cpu_R[arg[1]], cpu_R[arg[2]], - cpu_R[arg[1]], cpu_R[arg[2]]); + tcg_gen_smin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); + } +} + +static void translate_umin(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) +{ + if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { + tcg_gen_umin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); + } +} + +static void translate_smax(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) +{ + if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { + tcg_gen_smax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); + } +} + +static void translate_umax(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) +{ + if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { + tcg_gen_umax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); } } @@ -2983,23 +3003,19 @@ static const XtensaOpcodeOps core_ops[] = { .par = (const uint32_t[]){TCG_COND_NE}, }, { .name = "max", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_GE}, + .translate = translate_smax, }, { .name = "maxu", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_GEU}, + .translate = translate_umax, }, { .name = "memw", .translate = translate_memw, }, { .name = "min", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_LT}, + .translate = translate_smin, }, { .name = "minu", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_LTU}, + .translate = translate_umin, }, { .name = "mov", .translate = translate_mov, |