From ecb8ab8d71aab770555a6972428b711400a27248 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:57 +0100 Subject: target/arm: Use new min/max expanders The generic expanders replace nearly identical code in the translator. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20180508151437.4232-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 46 ++++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 32 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 6d49f30b4a..60d104cc8a 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -6021,15 +6021,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); break; case 0x0a: /* SMAXV / UMAXV */ - tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, - tcg_res, - tcg_res, tcg_elt, tcg_res, tcg_elt); + if (is_u) { + tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); + } else { + tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); + } break; case 0x1a: /* SMINV / UMINV */ - tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE, - tcg_res, - tcg_res, tcg_elt, tcg_res, tcg_elt); - break; + if (is_u) { + tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); + } else { + tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); + } break; default: g_assert_not_reached(); @@ -9927,27 +9930,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) } } -/* Helper functions for 32 bit comparisons */ -static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2); -} - -static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2); -} - -static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2); -} - -static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2); -} - /* Pairwise op subgroup of C3.6.16. * * This is called directly or via the handle_3same_float for float pairwise @@ -10047,7 +10029,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { gen_max_s32, gen_max_u32 }, + { tcg_gen_smax_i32, tcg_gen_umax_i32 }, }; genfn = fns[size][u]; break; @@ -10057,7 +10039,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { gen_min_s32, gen_min_u32 }, + { tcg_gen_smin_i32, tcg_gen_umin_i32 }, }; genfn = fns[size][u]; break; @@ -10512,7 +10494,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, - { gen_max_s32, gen_max_u32 }, + { tcg_gen_smax_i32, tcg_gen_umax_i32 }, }; genfn = fns[size][u]; break; @@ -10523,7 +10505,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, - { gen_min_s32, gen_min_u32 }, + { tcg_gen_smin_i32, tcg_gen_umin_i32 }, }; genfn = fns[size][u]; break; -- cgit v1.2.3 From 68412d2ecedbab5a43b0d346cddb27e00d724aff Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:57 +0100 Subject: target/arm: Introduce ARM_FEATURE_V8_ATOMICS and initial decode The insns in the ARMv8.1-Atomics are added to the existing load/store exclusive and load/store reg opcode spaces. Rearrange the top-level decoders for these to accomodate. The Atomics insns themselves still generate Unallocated. Signed-off-by: Richard Henderson Message-id: 20180508151437.4232-8-richard.henderson@linaro.org [PMM: Drop the ARM_FEATURE_V8_1 feature flag] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 176 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 133 insertions(+), 43 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 60d104cc8a..bb8a176f9a 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -2147,62 +2147,98 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) int rt = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rt2 = extract32(insn, 10, 5); - int is_lasr = extract32(insn, 15, 1); int rs = extract32(insn, 16, 5); - int is_pair = extract32(insn, 21, 1); - int is_store = !extract32(insn, 22, 1); - int is_excl = !extract32(insn, 23, 1); + int is_lasr = extract32(insn, 15, 1); + int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; int size = extract32(insn, 30, 2); TCGv_i64 tcg_addr; - if ((!is_excl && !is_pair && !is_lasr) || - (!is_excl && is_pair) || - (is_pair && size < 2)) { - unallocated_encoding(s); + switch (o2_L_o1_o0) { + case 0x0: /* STXR */ + case 0x1: /* STLXR */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + if (is_lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false); return; - } - if (rn == 31) { - gen_check_sp_alignment(s); - } - tcg_addr = read_cpu_reg_sp(s, rn, 1); + case 0x4: /* LDXR */ + case 0x5: /* LDAXR */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, false); + if (is_lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + } + return; - /* Note that since TCG is single threaded load-acquire/store-release - * semantics require no extra if (is_lasr) { ... } handling. - */ + case 0x9: /* STLR */ + /* Generate ISS for non-exclusive accesses including LASR. */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + return; - if (is_excl) { - if (!is_store) { - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + case 0xd: /* LDAR */ + /* Generate ISS for non-exclusive accesses including LASR. */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return; + + case 0x2: case 0x3: /* CASP / STXP */ + if (size & 2) { /* STXP / STLXP */ + if (rn == 31) { + gen_check_sp_alignment(s); } - } else { if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true); + return; } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0); + /* CASP / CASPL */ + break; - /* Generate ISS for non-exclusive accesses including LASR. */ - if (is_store) { - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + case 0x6: case 0x7: /* CASP / LDXP */ + if (size & 2) { /* LDXP / LDAXP */ + if (rn == 31) { + gen_check_sp_alignment(s); } - do_gpr_st(s, tcg_rt, tcg_addr, size, - true, rt, iss_sf, is_lasr); - } else { - do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false, - true, rt, iss_sf, is_lasr); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, true); if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } + return; } + /* CASPA / CASPAL */ + break; + + case 0xa: /* CAS */ + case 0xb: /* CASL */ + case 0xe: /* CASA */ + case 0xf: /* CASAL */ + break; } + unallocated_encoding(s); } /* @@ -2715,6 +2751,55 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, } } +/* Atomic memory operations + * + * 31 30 27 26 24 22 21 16 15 12 10 5 0 + * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ + * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | + * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ + * + * Rt: the result register + * Rn: base address or SP + * Rs: the source register for the operation + * V: vector flag (always 0 as of v8.3) + * A: acquire flag + * R: release flag + */ +static void disas_ldst_atomic(DisasContext *s, uint32_t insn, + int size, int rt, bool is_vector) +{ + int rs = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int o3_opc = extract32(insn, 12, 4); + int feature = ARM_FEATURE_V8_ATOMICS; + + if (is_vector) { + unallocated_encoding(s); + return; + } + switch (o3_opc) { + case 000: /* LDADD */ + case 001: /* LDCLR */ + case 002: /* LDEOR */ + case 003: /* LDSET */ + case 004: /* LDSMAX */ + case 005: /* LDSMIN */ + case 006: /* LDUMAX */ + case 007: /* LDUMIN */ + case 010: /* SWP */ + default: + unallocated_encoding(s); + return; + } + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + (void)rs; + (void)rn; +} + /* Load/store register (all forms) */ static void disas_ldst_reg(DisasContext *s, uint32_t insn) { @@ -2725,23 +2810,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) switch (extract32(insn, 24, 2)) { case 0: - if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) { - disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); - } else { + if (extract32(insn, 21, 1) == 0) { /* Load/store register (unscaled immediate) * Load/store immediate pre/post-indexed * Load/store register unprivileged */ disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); + return; + } + switch (extract32(insn, 10, 2)) { + case 0: + disas_ldst_atomic(s, insn, size, rt, is_vector); + return; + case 2: + disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); + return; } break; case 1: disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); - break; - default: - unallocated_encoding(s); - break; + return; } + unallocated_encoding(s); } /* AdvSIMD load/store multiple structures -- cgit v1.2.3 From 74608ea45434c9b07055b21885e093528c5ed98c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:57 +0100 Subject: target/arm: Fill in disas_ldst_atomic This implements all of the v8.1-Atomics instructions except for compare-and-swap, which is decoded elsewhere. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20180508151437.4232-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index bb8a176f9a..86989fda6c 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp); /* Note that the gvec expanders operate on offsets + sizes. */ typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); @@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); int feature = ARM_FEATURE_V8_ATOMICS; + TCGv_i64 tcg_rn, tcg_rs; + AtomicThreeOpFn *fn; if (is_vector) { unallocated_encoding(s); @@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, } switch (o3_opc) { case 000: /* LDADD */ + fn = tcg_gen_atomic_fetch_add_i64; + break; case 001: /* LDCLR */ + fn = tcg_gen_atomic_fetch_and_i64; + break; case 002: /* LDEOR */ + fn = tcg_gen_atomic_fetch_xor_i64; + break; case 003: /* LDSET */ + fn = tcg_gen_atomic_fetch_or_i64; + break; case 004: /* LDSMAX */ + fn = tcg_gen_atomic_fetch_smax_i64; + break; case 005: /* LDSMIN */ + fn = tcg_gen_atomic_fetch_smin_i64; + break; case 006: /* LDUMAX */ + fn = tcg_gen_atomic_fetch_umax_i64; + break; case 007: /* LDUMIN */ + fn = tcg_gen_atomic_fetch_umin_i64; + break; case 010: /* SWP */ + fn = tcg_gen_atomic_xchg_i64; + break; default: unallocated_encoding(s); return; @@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, return; } - (void)rs; - (void)rn; + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_rn = cpu_reg_sp(s, rn); + tcg_rs = read_cpu_reg(s, rs, true); + + if (o3_opc == 1) { /* LDCLR */ + tcg_gen_not_i64(tcg_rs, tcg_rs); + } + + /* The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s), + s->be_data | size | MO_ALIGN); } /* Load/store register (all forms) */ -- cgit v1.2.3 From 44ac14b06fa33f60982923b6b8a3bf8dd2fea61d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:57 +0100 Subject: target/arm: Implement CAS and CASP Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20180508151437.4232-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 119 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 3 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 86989fda6c..fa60cf908f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -2114,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_movi_i64(cpu_exclusive_addr, -1); } +static void gen_compare_and_swap(DisasContext *s, int rs, int rt, + int rn, int size) +{ + TCGv_i64 tcg_rs = cpu_reg(s, rs); + TCGv_i64 tcg_rt = cpu_reg(s, rt); + int memidx = get_mem_index(s); + TCGv_i64 addr = cpu_reg_sp(s, rn); + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx, + size | MO_ALIGN | s->be_data); +} + +static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, + int rn, int size) +{ + TCGv_i64 s1 = cpu_reg(s, rs); + TCGv_i64 s2 = cpu_reg(s, rs + 1); + TCGv_i64 t1 = cpu_reg(s, rt); + TCGv_i64 t2 = cpu_reg(s, rt + 1); + TCGv_i64 addr = cpu_reg_sp(s, rn); + int memidx = get_mem_index(s); + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + if (size == 2) { + TCGv_i64 cmp = tcg_temp_new_i64(); + TCGv_i64 val = tcg_temp_new_i64(); + + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(val, t1, t2); + tcg_gen_concat32_i64(cmp, s1, s2); + } else { + tcg_gen_concat32_i64(val, t2, t1); + tcg_gen_concat32_i64(cmp, s2, s1); + } + + tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx, + MO_64 | MO_ALIGN | s->be_data); + tcg_temp_free_i64(val); + + if (s->be_data == MO_LE) { + tcg_gen_extr32_i64(s1, s2, cmp); + } else { + tcg_gen_extr32_i64(s2, s1, cmp); + } + tcg_temp_free_i64(cmp); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); + } else { + TCGv_i64 d1 = tcg_temp_new_i64(); + TCGv_i64 d2 = tcg_temp_new_i64(); + TCGv_i64 a2 = tcg_temp_new_i64(); + TCGv_i64 c1 = tcg_temp_new_i64(); + TCGv_i64 c2 = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + /* Load the two words, in memory order. */ + tcg_gen_qemu_ld_i64(d1, addr, memidx, + MO_64 | MO_ALIGN_16 | s->be_data); + tcg_gen_addi_i64(a2, addr, 8); + tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data); + + /* Compare the two words, also in memory order. */ + tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1); + tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2); + tcg_gen_and_i64(c2, c2, c1); + + /* If compare equal, write back new data, else write back old data. */ + tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1); + tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2); + tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data); + tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data); + tcg_temp_free_i64(a2); + tcg_temp_free_i64(c1); + tcg_temp_free_i64(c2); + tcg_temp_free_i64(zero); + + /* Write back the data from memory to Rs. */ + tcg_gen_mov_i64(s1, d1); + tcg_gen_mov_i64(s2, d2); + tcg_temp_free_i64(d1); + tcg_temp_free_i64(d2); + } +} + /* Update the Sixty-Four bit (SF) registersize. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings). */ @@ -2214,10 +2311,16 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true); return; } - /* CASP / CASPL */ + if (rt2 == 31 + && ((rt | rs) & 1) == 0 + && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + /* CASP / CASPL */ + gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); + return; + } break; - case 0x6: case 0x7: /* CASP / LDXP */ + case 0x6: case 0x7: /* CASPA / LDXP */ if (size & 2) { /* LDXP / LDAXP */ if (rn == 31) { gen_check_sp_alignment(s); @@ -2230,13 +2333,23 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) } return; } - /* CASPA / CASPAL */ + if (rt2 == 31 + && ((rt | rs) & 1) == 0 + && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + /* CASPA / CASPAL */ + gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); + return; + } break; case 0xa: /* CAS */ case 0xb: /* CASL */ case 0xe: /* CASA */ case 0xf: /* CASAL */ + if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + gen_compare_and_swap(s, rs, rt, rn, size); + return; + } break; } unallocated_encoding(s); -- cgit v1.2.3 From a6117fae4576edfe7a5a5b802a742c33112c0993 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:57 +0100 Subject: target/arm: Implement vector shifted SCVF/UCVF for fp16 While we have some of the scalar paths for *CVF for fp16, we failed to decode the fp16 version of these instructions. Cc: qemu-stable@nongnu.org Signed-off-by: Richard Henderson Message-id: 20180502221552.3873-2-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index fa60cf908f..f4e2afa72c 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -7405,13 +7405,26 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, int immh, int immb, int opcode, int rn, int rd) { - bool is_double = extract32(immh, 3, 1); - int size = is_double ? MO_64 : MO_32; - int elements; + int size, elements, fracbits; int immhb = immh << 3 | immb; - int fracbits = (is_double ? 128 : 64) - immhb; - if (!extract32(immh, 2, 2)) { + if (immh & 8) { + size = MO_64; + if (!is_scalar && !is_q) { + unallocated_encoding(s); + return; + } + } else if (immh & 4) { + size = MO_32; + } else if (immh & 2) { + size = MO_16; + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + } else { + /* immh == 0 would be a failure of the decode logic */ + g_assert(immh == 1); unallocated_encoding(s); return; } @@ -7419,20 +7432,14 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, if (is_scalar) { elements = 1; } else { - elements = is_double ? 2 : is_q ? 4 : 2; - if (is_double && !is_q) { - unallocated_encoding(s); - return; - } + elements = (8 << is_q) >> size; } + fracbits = (16 << size) - immhb; if (!fp_access_check(s)) { return; } - /* immh == 0 would be a failure of the decode logic */ - g_assert(immh); - handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); } -- cgit v1.2.3 From d0ba8e74acd299b092786ffc30b306638d395a9e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:57 +0100 Subject: target/arm: Implement vector shifted FCVT for fp16 While we have some of the scalar paths for FCVT for fp16, we failed to decode the fp16 version of these instructions. Cc: qemu-stable@nongnu.org Signed-off-by: Richard Henderson Message-id: 20180502221552.3873-3-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 63 +++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 18 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index f4e2afa72c..317f2773b4 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -7448,19 +7448,28 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, bool is_q, bool is_u, int immh, int immb, int rn, int rd) { - bool is_double = extract32(immh, 3, 1); int immhb = immh << 3 | immb; - int fracbits = (is_double ? 128 : 64) - immhb; - int pass; + int pass, size, fracbits; TCGv_ptr tcg_fpstatus; TCGv_i32 tcg_rmode, tcg_shift; - if (!extract32(immh, 2, 2)) { - unallocated_encoding(s); - return; - } - - if (!is_scalar && !is_q && is_double) { + if (immh & 0x8) { + size = MO_64; + if (!is_scalar && !is_q) { + unallocated_encoding(s); + return; + } + } else if (immh & 0x4) { + size = MO_32; + } else if (immh & 0x2) { + size = MO_16; + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + } else { + /* Should have split out AdvSIMD modified immediate earlier. */ + assert(immh == 1); unallocated_encoding(s); return; } @@ -7472,11 +7481,12 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, assert(!(is_scalar && is_q)); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); - tcg_fpstatus = get_fpstatus_ptr(false); + tcg_fpstatus = get_fpstatus_ptr(size == MO_16); gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + fracbits = (16 << size) - immhb; tcg_shift = tcg_const_i32(fracbits); - if (is_double) { + if (size == MO_64) { int maxpass = is_scalar ? 1 : 2; for (pass = 0; pass < maxpass; pass++) { @@ -7493,20 +7503,37 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, } clear_vec_high(s, is_q, rd); } else { - int maxpass = is_scalar ? 1 : is_q ? 4 : 2; - for (pass = 0; pass < maxpass; pass++) { - TCGv_i32 tcg_op = tcg_temp_new_i32(); + void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); - read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + switch (size) { + case MO_16: + if (is_u) { + fn = gen_helper_vfp_toulh; + } else { + fn = gen_helper_vfp_toslh; + } + break; + case MO_32: if (is_u) { - gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + fn = gen_helper_vfp_touls; } else { - gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + fn = gen_helper_vfp_tosls; } + break; + default: + g_assert_not_reached(); + } + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, size); + fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); if (is_scalar) { write_fp_sreg(s, rd, tcg_op); } else { - write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + write_vec_element_i32(s, tcg_op, rd, pass, size); } tcg_temp_free_i32(tcg_op); } -- cgit v1.2.3 From 88808a022c06f98d81cd3f2d105a5734c5614839 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:58 +0100 Subject: target/arm: Fix float16 to/from int16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The instruction "ucvtf v0.4h, v04h, #2", with input 0x8000u, overflows the intermediate float16 to infinity before we have a chance to scale the output. Use float64 as the intermediate type so that no input argument (uint32_t in this case) can overflow or round before scaling. Given the declared argument, the signed int32_t function has the same problem. When converting from float16 to integer, using u/int32_t instead of u/int16_t means that the bounding is incorrect. Cc: qemu-stable@nongnu.org Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20180502221552.3873-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 317f2773b4..b302171545 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -7509,9 +7509,9 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, switch (size) { case MO_16: if (is_u) { - fn = gen_helper_vfp_toulh; + fn = gen_helper_vfp_touhh; } else { - fn = gen_helper_vfp_toslh; + fn = gen_helper_vfp_toshh; } break; case MO_32: -- cgit v1.2.3 From 9a9f1f59521f46e8ff4527d9a2b52f83577e2aa3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 May 2018 18:10:58 +0100 Subject: target/arm: Clear SVE high bits for FMOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use write_fp_dreg and clear_vec_high to zero the bits that need zeroing for these cases. Cc: qemu-stable@nongnu.org Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson Message-id: 20180502221552.3873-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/translate-a64.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'target/arm/translate-a64.c') diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index b302171545..b0471c842e 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -5681,31 +5681,24 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) if (itof) { TCGv_i64 tcg_rn = cpu_reg(s, rn); + TCGv_i64 tmp; switch (type) { case 0: - { /* 32 bit */ - TCGv_i64 tmp = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); tcg_gen_ext32u_i64(tmp, tcg_rn); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64)); - tcg_gen_movi_i64(tmp, 0); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); + write_fp_dreg(s, rd, tmp); tcg_temp_free_i64(tmp); break; - } case 1: - { /* 64 bit */ - TCGv_i64 tmp = tcg_const_i64(0); - tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64)); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); - tcg_temp_free_i64(tmp); + write_fp_dreg(s, rd, tcg_rn); break; - } case 2: /* 64 bit to top half. */ tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); + clear_vec_high(s, true, rd); break; } } else { -- cgit v1.2.3