diff options
Diffstat (limited to 'target/arm/sve_helper.c')
-rw-r--r-- | target/arm/sve_helper.c | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index 5b6292929e..fa96e28639 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -7241,3 +7241,77 @@ void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc) d[i] = ror32(n[i] ^ m[i], shr); } } + +void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va, + void *status, uint32_t desc) +{ + intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4); + + for (s = 0; s < opr_sz; ++s) { + float32 *n = vn + s * sizeof(float32) * 4; + float32 *m = vm + s * sizeof(float32) * 4; + float32 *a = va + s * sizeof(float32) * 4; + float32 *d = vd + s * sizeof(float32) * 4; + float32 n00 = n[H4(0)], n01 = n[H4(1)]; + float32 n10 = n[H4(2)], n11 = n[H4(3)]; + float32 m00 = m[H4(0)], m01 = m[H4(1)]; + float32 m10 = m[H4(2)], m11 = m[H4(3)]; + float32 p0, p1; + + /* i = 0, j = 0 */ + p0 = float32_mul(n00, m00, status); + p1 = float32_mul(n01, m01, status); + d[H4(0)] = float32_add(a[H4(0)], float32_add(p0, p1, status), status); + + /* i = 0, j = 1 */ + p0 = float32_mul(n00, m10, status); + p1 = float32_mul(n01, m11, status); + d[H4(1)] = float32_add(a[H4(1)], float32_add(p0, p1, status), status); + + /* i = 1, j = 0 */ + p0 = float32_mul(n10, m00, status); + p1 = float32_mul(n11, m01, status); + d[H4(2)] = float32_add(a[H4(2)], float32_add(p0, p1, status), status); + + /* i = 1, j = 1 */ + p0 = float32_mul(n10, m10, status); + p1 = float32_mul(n11, m11, status); + d[H4(3)] = float32_add(a[H4(3)], float32_add(p0, p1, status), status); + } +} + +void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va, + void *status, uint32_t desc) +{ + intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4); + + for (s = 0; s < opr_sz; ++s) { + float64 *n = vn + s * sizeof(float64) * 4; + float64 *m = vm + s * sizeof(float64) * 4; + float64 *a = va + s * sizeof(float64) * 4; + float64 *d = vd + s * sizeof(float64) * 4; + float64 n00 = n[0], n01 = n[1], n10 = n[2], n11 = n[3]; + float64 m00 = m[0], m01 = m[1], m10 = m[2], m11 = m[3]; + float64 p0, p1; + + /* i = 0, j = 0 */ + p0 = float64_mul(n00, m00, status); + p1 = float64_mul(n01, m01, status); + d[0] = float64_add(a[0], float64_add(p0, p1, status), status); + + /* i = 0, j = 1 */ + p0 = float64_mul(n00, m10, status); + p1 = float64_mul(n01, m11, status); + d[1] = float64_add(a[1], float64_add(p0, p1, status), status); + + /* i = 1, j = 0 */ + p0 = float64_mul(n10, m00, status); + p1 = float64_mul(n11, m01, status); + d[2] = float64_add(a[2], float64_add(p0, p1, status), status); + + /* i = 1, j = 1 */ + p0 = float64_mul(n10, m10, status); + p1 = float64_mul(n11, m11, status); + d[3] = float64_add(a[3], float64_add(p0, p1, status), status); + } +} |