From c2262707034c2b596db41fbc682150948e939772 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 28 Jun 2021 14:58:26 +0100 Subject: target/arm: Implement MVE VSHLL Implement the MVE VHLL (vector shift left long) insn. This has two encodings: the T1 encoding is the usual shift-by-immediate format, and the T2 encoding is a special case where the shift count is always equal to the element size. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20210628135835.6690-10-peter.maydell@linaro.org --- target/arm/helper-mve.h | 9 ++++++++ target/arm/mve.decode | 53 ++++++++++++++++++++++++++++++++++++++++++---- target/arm/mve_helper.c | 32 ++++++++++++++++++++++++++++ target/arm/translate-mve.c | 15 +++++++++++++ 4 files changed, 105 insertions(+), 4 deletions(-) diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 288a8faf4e..8af0e7fd8c 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -387,3 +387,12 @@ DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index 8be04589a6..6e6032b25a 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -64,6 +64,14 @@ @2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1 @2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2 +@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0 +@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1 +# VSHLL encoding T2 where shift == esize +@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \ + qd=%qd qm=%qm size=0 shift=8 +@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \ + qd=%qd qm=%qm size=1 shift=16 + # Right shifts are encoded as N - shift, where N is the element size in bits. %rshift_i5 16:5 !function=rsub_32 %rshift_i4 16:4 !function=rsub_16 @@ -122,11 +130,35 @@ VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op -VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op -VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +# The VSHLL T2 encoding is not a @2op pattern, but is here because it +# overlaps what would be size=0b11 VMULH/VRMULH +{ + VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h -VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op -VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op + VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op +} + +{ + VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op +} + +{ + VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b + VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h + + VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op +} VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op @@ -326,3 +358,16 @@ VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w + +# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file +VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h + +VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b +VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index ac720c9ee0..8798e77cba 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -1250,3 +1250,35 @@ DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP) DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP) DO_2SHIFT_U(vrshli_u, DO_VRSHLU) DO_2SHIFT_S(vrshli_s, DO_VRSHLS) + +/* + * Long shifts taking half-sized inputs from top or bottom of the input + * vector and producing a double-width result. ESIZE, TYPE are for + * the input, and LESIZE, LTYPE for the output. + * Unlike the normal shift helpers, we do not handle negative shift counts, + * because the long shift is strictly left-only. + */ +#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + LTYPE *d = vd; \ + TYPE *m = vm; \ + uint16_t mask = mve_element_mask(env); \ + unsigned le; \ + assert(shift <= 16); \ + for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \ + LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift; \ + mergemask(&d[H##LESIZE(le)], r, mask); \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_VSHLL_ALL(OP, TOP) \ + DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t) \ + DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t) \ + DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t) \ + DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t) \ + +DO_VSHLL_ALL(vshllb, false) +DO_VSHLL_ALL(vshllt, true) diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 4030ee07f0..044462c375 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -893,3 +893,18 @@ DO_2SHIFT(VSHRI_S, vshli_s, true) DO_2SHIFT(VSHRI_U, vshli_u, true) DO_2SHIFT(VRSHRI_S, vrshli_s, true) DO_2SHIFT(VRSHRI_U, vrshli_u, true) + +#define DO_VSHLL(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ + { \ + static MVEGenTwoOpShiftFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + }; \ + return do_2shift(s, a, fns[a->size], false); \ + } + +DO_VSHLL(VSHLL_BS, vshllbs) +DO_VSHLL(VSHLL_BU, vshllbu) +DO_VSHLL(VSHLL_TS, vshllts) +DO_VSHLL(VSHLL_TU, vshlltu) -- cgit v1.2.3