aboutsummaryrefslogtreecommitdiff
path: root/tcg/tcg-op-gvec.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2023-08-30 20:09:03 -0700
committerRichard Henderson <richard.henderson@linaro.org>2023-09-16 14:57:15 +0000
commit9622c697d1bb2addec4added61e3a0f6ad5e8bfb (patch)
treeaed14848ed68a0b39f060279d292bf3b6cd5109a /tcg/tcg-op-gvec.c
parent58f8961285a7dbb1f6b9152b2ff5a11bd1813f04 (diff)
tcg: Add gvec compare with immediate and scalar operand
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Song Gao <gaosong@loongson.cn> Reviewed-by: Song Gao <gaosong@loongson.cn> Message-Id: <20230831030904.1194667-2-richard.henderson@linaro.org>
Diffstat (limited to 'tcg/tcg-op-gvec.c')
-rw-r--r--tcg/tcg-op-gvec.c149
1 files changed, 149 insertions, 0 deletions
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index e260a07c61..41b1ae18e4 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -3846,6 +3846,155 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
}
}
+static void expand_cmps_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ TCGCond cond, TCGv_vec c)
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+ tcg_gen_cmp_vec(cond, vece, t0, t1, c);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+}
+
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
+ static gen_helper_gvec_2i * const eq_fn[4] = {
+ gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
+ gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
+ };
+ static gen_helper_gvec_2i * const lt_fn[4] = {
+ gen_helper_gvec_lts8, gen_helper_gvec_lts16,
+ gen_helper_gvec_lts32, gen_helper_gvec_lts64
+ };
+ static gen_helper_gvec_2i * const le_fn[4] = {
+ gen_helper_gvec_les8, gen_helper_gvec_les16,
+ gen_helper_gvec_les32, gen_helper_gvec_les64
+ };
+ static gen_helper_gvec_2i * const ltu_fn[4] = {
+ gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
+ gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
+ };
+ static gen_helper_gvec_2i * const leu_fn[4] = {
+ gen_helper_gvec_leus8, gen_helper_gvec_leus16,
+ gen_helper_gvec_leus32, gen_helper_gvec_leus64
+ };
+ static gen_helper_gvec_2i * const * const fns[16] = {
+ [TCG_COND_EQ] = eq_fn,
+ [TCG_COND_LT] = lt_fn,
+ [TCG_COND_LE] = le_fn,
+ [TCG_COND_LTU] = ltu_fn,
+ [TCG_COND_LEU] = leu_fn,
+ };
+
+ TCGType type;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
+ do_dup(MO_8, dofs, oprsz, maxsz,
+ NULL, NULL, -(cond == TCG_COND_ALWAYS));
+ return;
+ }
+
+ /*
+ * Implement inline with a vector type, if possible.
+ * Prefer integer when 64-bit host and 64-bit comparison.
+ */
+ type = choose_vector_type(cmp_list, vece, oprsz,
+ TCG_TARGET_REG_BITS == 64 && vece == MO_64);
+ if (type != 0) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+ uint32_t some;
+
+ tcg_gen_dup_i64_vec(vece, t_vec, c);
+ switch (type) {
+ case TCG_TYPE_V256:
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_cmps_vec(vece, dofs, aofs, some, 32,
+ TCG_TYPE_V256, cond, t_vec);
+ aofs += some;
+ dofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+
+ case TCG_TYPE_V128:
+ some = QEMU_ALIGN_DOWN(oprsz, 16);
+ expand_cmps_vec(vece, dofs, aofs, some, 16,
+ TCG_TYPE_V128, cond, t_vec);
+ break;
+
+ case TCG_TYPE_V64:
+ some = QEMU_ALIGN_DOWN(oprsz, 8);
+ expand_cmps_vec(vece, dofs, aofs, some, 8,
+ TCG_TYPE_V64, cond, t_vec);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_vec(t_vec);
+ tcg_swap_vecop_list(hold_list);
+ } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_negsetcond_i64(cond, t0, t0, c);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
+ uint32_t i;
+
+ tcg_gen_extrl_i64_i32(t1, c);
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_negsetcond_i32(cond, t0, t0, t1);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ } else {
+ gen_helper_gvec_2i * const *fn = fns[cond];
+ bool inv = false;
+
+ if (fn == NULL) {
+ cond = tcg_invert_cond(cond);
+ fn = fns[cond];
+ assert(fn != NULL);
+ inv = true;
+ }
+ tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
+ return;
+ }
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, int64_t c,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(c);
+ tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
+}
+
static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
{
TCGv_i64 t = tcg_temp_ebb_new_i64();