diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-06-28 14:58:29 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-07-02 11:48:37 +0100 |
commit | d6f9e011e8643fb00303e3fec24dd1e424f3f5b3 (patch) | |
tree | ed281b18cd89a52db8bbb01dcbe2e3449cf25e10 /target | |
parent | 162e2655000689e44ac4c8e9e8dc413821e0adda (diff) |
target/arm: Implement MVE saturating narrowing shifts
Implement the MVE saturating shift-right-and-narrow insns
VQSHRN, VQSHRUN, VQRSHRN and VQRSHRUN.
do_srshr() is borrowed from sve_helper.c.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210628135835.6690-13-peter.maydell@linaro.org
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/helper-mve.h | 30 | ||||
-rw-r--r-- | target/arm/mve.decode | 28 | ||||
-rw-r--r-- | target/arm/mve_helper.c | 104 | ||||
-rw-r--r-- | target/arm/translate-mve.c | 12 |
4 files changed, 174 insertions, 0 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 323ac07fa3..96b4c0dfd3 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -414,3 +414,33 @@ DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index e2c177f56a..1d11387bc0 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -391,3 +391,31 @@ VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h + +VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h +VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b +VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h + +VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b +VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h +VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b +VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h + +VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h +VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b +VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h + +VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b +VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h +VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b +VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index a97942208b..3e736e8909 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -1362,5 +1362,109 @@ static inline uint64_t do_urshr(uint64_t x, unsigned sh) } } +static inline int64_t do_srshr(int64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else { + /* Rounding the sign bit always produces 0. */ + return 0; + } +} + DO_VSHRN_ALL(vshrn, DO_SHR) DO_VSHRN_ALL(vrshrn, do_urshr) + +static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max, + bool *satp) +{ + if (val > max) { + *satp = true; + return max; + } else if (val < min) { + *satp = true; + return min; + } else { + return val; + } +} + +/* Saturating narrowing right shifts */ +#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + LTYPE *m = vm; \ + TYPE *d = vd; \ + uint16_t mask = mve_element_mask(env); \ + bool qc = false; \ + unsigned le; \ + for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \ + bool sat = false; \ + TYPE r = FN(m[H##LESIZE(le)], shift, &sat); \ + mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \ + qc |= sat && (mask & 1 << (TOP * ESIZE)); \ + } \ + if (qc) { \ + env->vfp.qc[0] = qc; \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_VSHRN_SAT_UB(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \ + DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN) + +#define DO_VSHRN_SAT_UH(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \ + DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN) + +#define DO_VSHRN_SAT_SB(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \ + DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN) + +#define DO_VSHRN_SAT_SH(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \ + DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN) + +#define DO_SHRN_SB(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP) +#define DO_SHRN_UB(N, M, SATP) \ + do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP) +#define DO_SHRUN_B(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP) + +#define DO_SHRN_SH(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP) +#define DO_SHRN_UH(N, M, SATP) \ + do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP) +#define DO_SHRUN_H(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP) + +#define DO_RSHRN_SB(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP) +#define DO_RSHRN_UB(N, M, SATP) \ + do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP) +#define DO_RSHRUN_B(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP) + +#define DO_RSHRN_SH(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP) +#define DO_RSHRN_UH(N, M, SATP) \ + do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP) +#define DO_RSHRUN_H(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP) + +DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB) +DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH) +DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB) +DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH) +DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B) +DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H) + +DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB) +DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH) +DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB) +DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH) +DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B) +DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H) diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index f1a8f21b77..eef4f1f6ce 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -926,3 +926,15 @@ DO_2SHIFT_N(VSHRNB, vshrnb) DO_2SHIFT_N(VSHRNT, vshrnt) DO_2SHIFT_N(VRSHRNB, vrshrnb) DO_2SHIFT_N(VRSHRNT, vrshrnt) +DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) +DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) +DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) +DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) +DO_2SHIFT_N(VQSHRUNB, vqshrunb) +DO_2SHIFT_N(VQSHRUNT, vqshrunt) +DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) +DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) +DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) +DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) +DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) +DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) |