aboutsummaryrefslogtreecommitdiff
path: root/target/arm/vec_helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r--target/arm/vec_helper.c60
1 files changed, 60 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 79d2624f7b..8017bd88c4 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -1197,3 +1197,63 @@ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+
+/*
+ * 8x8->16 polynomial multiply.
+ *
+ * The byte inputs are expanded to (or extracted from) half-words.
+ * Note that neon and sve2 get the inputs from different positions.
+ * This allows 4 bytes to be processed in parallel with uint64_t.
+ */
+
+static uint64_t expand_byte_to_half(uint64_t x)
+{
+ return (x & 0x000000ff)
+ | ((x & 0x0000ff00) << 8)
+ | ((x & 0x00ff0000) << 16)
+ | ((x & 0xff000000) << 24);
+}
+
+static uint64_t pmull_h(uint64_t op1, uint64_t op2)
+{
+ uint64_t result = 0;
+ int i;
+
+ for (i = 0; i < 8; ++i) {
+ uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff;
+ result ^= op2 & mask;
+ op1 >>= 1;
+ op2 <<= 1;
+ }
+ return result;
+}
+
+void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ int hi = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t nn = n[hi], mm = m[hi];
+
+ d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+ nn >>= 32;
+ mm >>= 32;
+ d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm));
+
+ clear_tail(d, 16, simd_maxsz(desc));
+}
+
+#ifdef TARGET_AARCH64
+void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ int shift = simd_data(desc) * 8;
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull;
+ uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull;
+
+ d[i] = pmull_h(nn, mm);
+ }
+}
+#endif