target/arm: Implement MVE VMLADAV and VMLSLDAV

Implement the MVE VMLADAV and VMLSLDAV insns. Like the VMLALDAV and VMLSLDAV insns already implemented, these accumulate multiplied vector elements; but they accumulate a 32-bit result rather than a 64-bit one. Note that these encodings overlap with what would be RdaHi=0b111 for VMLALDAV, VMLSLDAV, VRMLALDAVH and VRMLSLDAVH. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2021-08-13 17:11:54 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2021-08-25 10:48:49 +0100
commit: f0ffff5163cb503de236fc766121601592f08744 (patch)
tree: 80b59e16a5e42dd873029fd82853ed1b5469f50b /target/arm/mve_helper.c
parent: 640cdf20a25d0021f4e93b6207b648a973df320b (diff)
1 files changed, 41 insertions, 0 deletions
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 72c30f360a..ea206c932b 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -1190,6 +1190,47 @@ DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
 DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
 
 /*
+ * Multiply add dual accumulate ops
+ */
+#define DO_DAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,         \
+                                    void *vm, uint32_t a)               \
+    {                                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        TYPE *n = vn, *m = vm;                                          \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if (mask & 1) {                                             \
+                if (e & 1) {                                            \
+                    a ODDACC                                            \
+                        n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)];     \
+                } else {                                                \
+                    a EVENACC                                           \
+                        n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)];     \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+        return a;                                                       \
+    }
+
+#define DO_DAV_S(INSN, XCHG, EVENACC, ODDACC)           \
+    DO_DAV(INSN##b, 1, int8_t, XCHG, EVENACC, ODDACC)   \
+    DO_DAV(INSN##h, 2, int16_t, XCHG, EVENACC, ODDACC)  \
+    DO_DAV(INSN##w, 4, int32_t, XCHG, EVENACC, ODDACC)
+
+#define DO_DAV_U(INSN, XCHG, EVENACC, ODDACC)           \
+    DO_DAV(INSN##b, 1, uint8_t, XCHG, EVENACC, ODDACC)  \
+    DO_DAV(INSN##h, 2, uint16_t, XCHG, EVENACC, ODDACC) \
+    DO_DAV(INSN##w, 4, uint32_t, XCHG, EVENACC, ODDACC)
+
+DO_DAV_S(vmladavs, false, +=, +=)
+DO_DAV_U(vmladavu, false, +=, +=)
+DO_DAV_S(vmlsdav, false, +=, -=)
+DO_DAV_S(vmladavsx, true, +=, +=)
+DO_DAV_S(vmlsdavx, true, +=, -=)
+
+/*
  * Rounding multiply add long dual accumulate high. In the pseudocode
  * this is implemented with a 72-bit internal accumulator value of which
  * the top 64 bits are returned. We optimize this to avoid having to
author	Peter Maydell <peter.maydell@linaro.org>	2021-08-13 17:11:54 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2021-08-25 10:48:49 +0100
commit	f0ffff5163cb503de236fc766121601592f08744 (patch)
tree	80b59e16a5e42dd873029fd82853ed1b5469f50b /target/arm/mve_helper.c
parent	640cdf20a25d0021f4e93b6207b648a973df320b (diff)