diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-05-28 13:30:13 -0700 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2024-05-30 15:24:38 +0100 |
commit | 76f4a8aecae7f287d6e7f3b763ae88382c3a5457 (patch) | |
tree | a12f9fc1acf4b07db71013b09f9078f84fdb8918 | |
parent | b1d592e7b0a7301eae8e3baa99744ac35db3cd2a (diff) |
target/arm: Improve vector UQADD, UQSUB, SQADD, SQSUB
No need for a full comparison; xor produces non-zero bits
for QC just fine.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240528203044.612851-3-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | target/arm/tcg/gengvec.c | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c index 22c9d17dce..bfe6885a01 100644 --- a/target/arm/tcg/gengvec.c +++ b/target/arm/tcg/gengvec.c @@ -1217,21 +1217,21 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_add_vec(vece, x, a, b); tcg_gen_usadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + INDEX_op_usadd_vec, INDEX_op_add_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_uqadd_vec, @@ -1259,21 +1259,21 @@ void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_add_vec(vece, x, a, b); tcg_gen_ssadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 + INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_sqadd_vec, @@ -1301,21 +1301,21 @@ void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_sub_vec(vece, x, a, b); tcg_gen_ussub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_uqsub_vec, @@ -1343,21 +1343,21 @@ void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } -static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, +static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, TCGv_vec a, TCGv_vec b) { TCGv_vec x = tcg_temp_new_vec_matching(t); tcg_gen_sub_vec(vece, x, a, b); tcg_gen_sssub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); } void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) { static const TCGOpcode vecop_list[] = { - INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 + INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 }; static const GVecGen4 ops[4] = { { .fniv = gen_sqsub_vec, |