diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-06-16 10:32:28 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-06-16 10:32:28 +0100 |
commit | 77e576a9281825fc170f3b3af83f47e110549b5c (patch) | |
tree | 2112e00240a510fbed48faffe754efcb1e498e13 /target/arm/translate.c | |
parent | aa318f5b9b4ab3b6744b5305dd8ae9b96676f20e (diff) |
target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree
Convert the Neon 2-reg-scalar long multiplies to decodetree.
These are the last instructions in the group.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/translate.c')
-rw-r--r-- | target/arm/translate.c | 182 |
1 files changed, 6 insertions, 176 deletions
diff --git a/target/arm/translate.c b/target/arm/translate.c index f0db029f66..4d39bbf035 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -377,43 +377,6 @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var) tcg_gen_ext16s_i32(dest, var); } -/* 32x32->64 multiply. Marks inputs as dead. */ -static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(); - TCGv_i32 hi = tcg_temp_new_i32(); - TCGv_i64 ret; - - tcg_gen_mulu2_i32(lo, hi, a, b); - tcg_temp_free_i32(a); - tcg_temp_free_i32(b); - - ret = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(ret, lo, hi); - tcg_temp_free_i32(lo); - tcg_temp_free_i32(hi); - - return ret; -} - -static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 lo = tcg_temp_new_i32(); - TCGv_i32 hi = tcg_temp_new_i32(); - TCGv_i64 ret; - - tcg_gen_muls2_i32(lo, hi, a, b); - tcg_temp_free_i32(a); - tcg_temp_free_i32(b); - - ret = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(ret, lo, hi); - tcg_temp_free_i32(lo); - tcg_temp_free_i32(hi); - - return ret; -} - /* Swap low and high halfwords. */ static void gen_swap_half(TCGv_i32 var) { @@ -3164,58 +3127,6 @@ static inline void gen_neon_addl(int size) } } -static inline void gen_neon_negl(TCGv_i64 var, int size) -{ - switch (size) { - case 0: gen_helper_neon_negl_u16(var, var); break; - case 1: gen_helper_neon_negl_u32(var, var); break; - case 2: - tcg_gen_neg_i64(var, var); - break; - default: abort(); - } -} - -static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size) -{ - switch (size) { - case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break; - case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break; - default: abort(); - } -} - -static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b, - int size, int u) -{ - TCGv_i64 tmp; - - switch ((size << 1) | u) { - case 0: gen_helper_neon_mull_s8(dest, a, b); break; - case 1: gen_helper_neon_mull_u8(dest, a, b); break; - case 2: gen_helper_neon_mull_s16(dest, a, b); break; - case 3: gen_helper_neon_mull_u16(dest, a, b); break; - case 4: - tmp = gen_muls_i64_i32(a, b); - tcg_gen_mov_i64(dest, tmp); - tcg_temp_free_i64(tmp); - break; - case 5: - tmp = gen_mulu_i64_i32(a, b); - tcg_gen_mov_i64(dest, tmp); - tcg_temp_free_i64(tmp); - break; - default: abort(); - } - - /* gen_helper_neon_mull_[su]{8|16} do not free their parameters. - Don't forget to clean them now. */ - if (size < 2) { - tcg_temp_free_i32(a); - tcg_temp_free_i32(b); - } -} - static void gen_neon_narrow_op(int op, int u, int size, TCGv_i32 dest, TCGv_i64 src) { @@ -5120,7 +5031,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int u; int vec_size; uint32_t imm; - TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; + TCGv_i32 tmp, tmp2, tmp3, tmp5; TCGv_ptr ptr1; TCGv_i64 tmp64; @@ -5158,92 +5069,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } else { /* (insn & 0x00800010 == 0x00800000) */ if (size != 3) { - op = (insn >> 8) & 0xf; - if ((insn & (1 << 6)) == 0) { - /* Three registers of different lengths: handled by decodetree */ - return 1; - } else { - /* Two registers and a scalar. NB that for ops of this form - * the ARM ARM labels bit 24 as Q, but it is in our variable - * 'u', not 'q'. - */ - if (size == 0) { - return 1; - } - switch (op) { - case 0: /* Integer VMLA scalar */ - case 4: /* Integer VMLS scalar */ - case 8: /* Integer VMUL scalar */ - case 1: /* Float VMLA scalar */ - case 5: /* Floating point VMLS scalar */ - case 9: /* Floating point VMUL scalar */ - case 12: /* VQDMULH scalar */ - case 13: /* VQRDMULH scalar */ - case 14: /* VQRDMLAH scalar */ - case 15: /* VQRDMLSH scalar */ - return 1; /* handled by decodetree */ - - case 3: /* VQDMLAL scalar */ - case 7: /* VQDMLSL scalar */ - case 11: /* VQDMULL scalar */ - if (u == 1) { - return 1; - } - /* fall through */ - case 2: /* VMLAL sclar */ - case 6: /* VMLSL scalar */ - case 10: /* VMULL scalar */ - if (rd & 1) { - return 1; - } - tmp2 = neon_get_scalar(size, rm); - /* We need a copy of tmp2 because gen_neon_mull - * deletes it during pass 0. */ - tmp4 = tcg_temp_new_i32(); - tcg_gen_mov_i32(tmp4, tmp2); - tmp3 = neon_load_reg(rn, 1); - - for (pass = 0; pass < 2; pass++) { - if (pass == 0) { - tmp = neon_load_reg(rn, 0); - } else { - tmp = tmp3; - tmp2 = tmp4; - } - gen_neon_mull(cpu_V0, tmp, tmp2, size, u); - if (op != 11) { - neon_load_reg64(cpu_V1, rd + pass); - } - switch (op) { - case 6: - gen_neon_negl(cpu_V0, size); - /* Fall through */ - case 2: - gen_neon_addl(size); - break; - case 3: case 7: - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - if (op == 7) { - gen_neon_negl(cpu_V0, size); - } - gen_neon_addl_saturate(cpu_V0, cpu_V1, size); - break; - case 10: - /* no-op */ - break; - case 11: - gen_neon_addl_saturate(cpu_V0, cpu_V0, size); - break; - default: - abort(); - } - neon_store_reg64(cpu_V0, rd + pass); - } - break; - default: - g_assert_not_reached(); - } - } + /* + * Three registers of different lengths, or two registers and + * a scalar: handled by decodetree + */ + return 1; } else { /* size == 3 */ if (!u) { /* Extract. */ |