diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2021-12-18 11:25:58 -0800 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-03-04 08:50:41 -1000 |
commit | 1d442e427a66ed90ce3efb0048ceec0f9192cb02 (patch) | |
tree | 40900368697713bf2abb0cf2283be3ae1a939765 /tcg/i386 | |
parent | a2a19b429b0daaf560e03aaa4851feb9f1fc5fac (diff) |
tcg/i386: Expand scalar rotate with avx512 insns
Expand 32-bit and 64-bit scalar rotate with VPRO[LR]V;
expand 16-bit scalar rotate with VPSHLDV.
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg/i386')
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 49 |
1 files changed, 29 insertions, 20 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 1fbb4b0593..edf0d066e7 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3602,26 +3602,6 @@ static void expand_vec_rotli(TCGType type, unsigned vece, tcg_temp_free_vec(t); } -static void expand_vec_rotls(TCGType type, unsigned vece, - TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) -{ - TCGv_i32 rsh; - TCGv_vec t; - - tcg_debug_assert(vece != MO_8); - - t = tcg_temp_new_vec(type); - rsh = tcg_temp_new_i32(); - - tcg_gen_neg_i32(rsh, lsh); - tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1); - tcg_gen_shls_vec(vece, t, v1, lsh); - tcg_gen_shrs_vec(vece, v0, v1, rsh); - tcg_gen_or_vec(vece, v0, v0, t); - tcg_temp_free_vec(t); - tcg_temp_free_i32(rsh); -} - static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec sh, bool right) { @@ -3648,6 +3628,35 @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, tcg_temp_free_vec(t); } +static void expand_vec_rotls(TCGType type, unsigned vece, + TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) +{ + TCGv_vec t = tcg_temp_new_vec(type); + + tcg_debug_assert(vece != MO_8); + + if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) { + tcg_gen_dup_i32_vec(vece, t, lsh); + if (vece >= MO_32) { + tcg_gen_rotlv_vec(vece, v0, v1, t); + } else { + expand_vec_rotv(type, vece, v0, v1, t, false); + } + } else { + TCGv_i32 rsh = tcg_temp_new_i32(); + + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1); + tcg_gen_shls_vec(vece, t, v1, lsh); + tcg_gen_shrs_vec(vece, v0, v1, rsh); + tcg_gen_or_vec(vece, v0, v0, t); + + tcg_temp_free_i32(rsh); + } + + tcg_temp_free_vec(t); +} + static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) { |