diff options
Diffstat (limited to 'target/arm/mve_helper.c')
-rw-r--r-- | target/arm/mve_helper.c | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index a97942208b..3e736e8909 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -1362,5 +1362,109 @@ static inline uint64_t do_urshr(uint64_t x, unsigned sh) } } +static inline int64_t do_srshr(int64_t x, unsigned sh) +{ + if (likely(sh < 64)) { + return (x >> sh) + ((x >> (sh - 1)) & 1); + } else { + /* Rounding the sign bit always produces 0. */ + return 0; + } +} + DO_VSHRN_ALL(vshrn, DO_SHR) DO_VSHRN_ALL(vrshrn, do_urshr) + +static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max, + bool *satp) +{ + if (val > max) { + *satp = true; + return max; + } else if (val < min) { + *satp = true; + return min; + } else { + return val; + } +} + +/* Saturating narrowing right shifts */ +#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \ + void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \ + void *vm, uint32_t shift) \ + { \ + LTYPE *m = vm; \ + TYPE *d = vd; \ + uint16_t mask = mve_element_mask(env); \ + bool qc = false; \ + unsigned le; \ + for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \ + bool sat = false; \ + TYPE r = FN(m[H##LESIZE(le)], shift, &sat); \ + mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \ + qc |= sat && (mask & 1 << (TOP * ESIZE)); \ + } \ + if (qc) { \ + env->vfp.qc[0] = qc; \ + } \ + mve_advance_vpt(env); \ + } + +#define DO_VSHRN_SAT_UB(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \ + DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN) + +#define DO_VSHRN_SAT_UH(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \ + DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN) + +#define DO_VSHRN_SAT_SB(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \ + DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN) + +#define DO_VSHRN_SAT_SH(BOP, TOP, FN) \ + DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \ + DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN) + +#define DO_SHRN_SB(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP) +#define DO_SHRN_UB(N, M, SATP) \ + do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP) +#define DO_SHRUN_B(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP) + +#define DO_SHRN_SH(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP) +#define DO_SHRN_UH(N, M, SATP) \ + do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP) +#define DO_SHRUN_H(N, M, SATP) \ + do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP) + +#define DO_RSHRN_SB(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP) +#define DO_RSHRN_UB(N, M, SATP) \ + do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP) +#define DO_RSHRUN_B(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP) + +#define DO_RSHRN_SH(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP) +#define DO_RSHRN_UH(N, M, SATP) \ + do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP) +#define DO_RSHRUN_H(N, M, SATP) \ + do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP) + +DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB) +DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH) +DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB) +DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH) +DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B) +DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H) + +DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB) +DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH) +DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB) +DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH) +DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B) +DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H) |