diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2018-06-29 15:11:13 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-06-29 15:11:13 +0100 |
commit | d730ecaae77ac696515207a5ef99509240fc792b (patch) | |
tree | ee70d918556a22a107f146ae972cae1f59c08272 /target/arm/vec_helper.c | |
parent | 18fc24057815bf3d956cfab892a2bc2344bd1dcb (diff) |
target/arm: Implement SVE dot product (vectors)
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20180627043328.11531-33-richard.henderson@linaro.org
[PMM: moved 'ra=%reg_movprfx' here from following patch]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r-- | target/arm/vec_helper.c | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index db5aeb9f24..c16a30c3b5 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -194,6 +194,73 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +/* Integer 8 and 16-bit dot-product. + * + * Note that for the loops herein, host endianness does not matter + * with respect to the ordering of data within the 64-bit lanes. + * All elements are treated equally, no matter where they are. + */ + +void HELPER(gvec_sdot_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint32_t *d = vd; + int8_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] += n[i * 4 + 0] * m[i * 4 + 0] + + n[i * 4 + 1] * m[i * 4 + 1] + + n[i * 4 + 2] * m[i * 4 + 2] + + n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_udot_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint32_t *d = vd; + uint8_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] += n[i * 4 + 0] * m[i * 4 + 0] + + n[i * 4 + 1] * m[i * 4 + 1] + + n[i * 4 + 2] * m[i * 4 + 2] + + n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_sdot_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd; + int16_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] += (int64_t)n[i * 4 + 0] * m[i * 4 + 0] + + (int64_t)n[i * 4 + 1] * m[i * 4 + 1] + + (int64_t)n[i * 4 + 2] * m[i * 4 + 2] + + (int64_t)n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_udot_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd; + uint16_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] += (uint64_t)n[i * 4 + 0] * m[i * 4 + 0] + + (uint64_t)n[i * 4 + 1] * m[i * 4 + 1] + + (uint64_t)n[i * 4 + 2] * m[i * 4 + 2] + + (uint64_t)n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, void *vfpst, uint32_t desc) { |