diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-08-13 17:11:54 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-08-25 10:48:49 +0100 |
commit | f0ffff5163cb503de236fc766121601592f08744 (patch) | |
tree | 80b59e16a5e42dd873029fd82853ed1b5469f50b /target/arm/mve_helper.c | |
parent | 640cdf20a25d0021f4e93b6207b648a973df320b (diff) |
target/arm: Implement MVE VMLADAV and VMLSLDAV
Implement the MVE VMLADAV and VMLSLDAV insns. Like the VMLALDAV and
VMLSLDAV insns already implemented, these accumulate multiplied
vector elements; but they accumulate a 32-bit result rather than a
64-bit one.
Note that these encodings overlap with what would be RdaHi=0b111 for
VMLALDAV, VMLSLDAV, VRMLALDAVH and VRMLSLDAVH.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/mve_helper.c')
-rw-r--r-- | target/arm/mve_helper.c | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index 72c30f360a..ea206c932b 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -1190,6 +1190,47 @@ DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=) DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=) /* + * Multiply add dual accumulate ops + */ +#define DO_DAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \ + uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \ + void *vm, uint32_t a) \ + { \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + TYPE *n = vn, *m = vm; \ + for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ + if (mask & 1) { \ + if (e & 1) { \ + a ODDACC \ + n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \ + } else { \ + a EVENACC \ + n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \ + } \ + } \ + } \ + mve_advance_vpt(env); \ + return a; \ + } + +#define DO_DAV_S(INSN, XCHG, EVENACC, ODDACC) \ + DO_DAV(INSN##b, 1, int8_t, XCHG, EVENACC, ODDACC) \ + DO_DAV(INSN##h, 2, int16_t, XCHG, EVENACC, ODDACC) \ + DO_DAV(INSN##w, 4, int32_t, XCHG, EVENACC, ODDACC) + +#define DO_DAV_U(INSN, XCHG, EVENACC, ODDACC) \ + DO_DAV(INSN##b, 1, uint8_t, XCHG, EVENACC, ODDACC) \ + DO_DAV(INSN##h, 2, uint16_t, XCHG, EVENACC, ODDACC) \ + DO_DAV(INSN##w, 4, uint32_t, XCHG, EVENACC, ODDACC) + +DO_DAV_S(vmladavs, false, +=, +=) +DO_DAV_U(vmladavu, false, +=, +=) +DO_DAV_S(vmlsdav, false, +=, -=) +DO_DAV_S(vmladavsx, true, +=, +=) +DO_DAV_S(vmlsdavx, true, +=, -=) + +/* * Rounding multiply add long dual accumulate high. In the pseudocode * this is implemented with a 72-bit internal accumulator value of which * the top 64 bits are returned. We optimize this to avoid having to |