diff options
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r-- | target/arm/vec_helper.c | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index 79d2624f7b..8017bd88c4 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -1197,3 +1197,63 @@ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc) } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +/* + * 8x8->16 polynomial multiply. + * + * The byte inputs are expanded to (or extracted from) half-words. + * Note that neon and sve2 get the inputs from different positions. + * This allows 4 bytes to be processed in parallel with uint64_t. + */ + +static uint64_t expand_byte_to_half(uint64_t x) +{ + return (x & 0x000000ff) + | ((x & 0x0000ff00) << 8) + | ((x & 0x00ff0000) << 16) + | ((x & 0xff000000) << 24); +} + +static uint64_t pmull_h(uint64_t op1, uint64_t op2) +{ + uint64_t result = 0; + int i; + + for (i = 0; i < 8; ++i) { + uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff; + result ^= op2 & mask; + op1 >>= 1; + op2 <<= 1; + } + return result; +} + +void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + int hi = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t nn = n[hi], mm = m[hi]; + + d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + nn >>= 32; + mm >>= 32; + d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + + clear_tail(d, 16, simd_maxsz(desc)); +} + +#ifdef TARGET_AARCH64 +void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + int shift = simd_data(desc) * 8; + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull; + uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull; + + d[i] = pmull_h(nn, mm); + } +} +#endif |