From 5ee5c14cacda27e904cd6b0d9e7ffe1acff42838 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 13 Apr 2019 20:42:37 -1000 Subject: tcg: Add gvec expanders for variable shift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gvec expanders perform a modulo on the shift count. If the target requires alternate behaviour, then it cannot use the generic gvec expanders anyway, and will have to have its own custom code. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/tcg-runtime-gvec.c | 144 +++++++++++++++++++++++++++++++++++++++++++ accel/tcg/tcg-runtime.h | 15 +++++ 2 files changed, 159 insertions(+) (limited to 'accel') diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c index e2c6f24262..2152fb6903 100644 --- a/accel/tcg/tcg-runtime-gvec.c +++ b/accel/tcg/tcg-runtime-gvec.c @@ -725,6 +725,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) clear_high(d, oprsz, desc); } +void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)(b + i) & 7; + *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)(b + i) & 15; + *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)(b + i) & 31; + *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)(b + i) & 63; + *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint8_t)) { + uint8_t sh = *(uint8_t *)(b + i) & 7; + *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint16_t)) { + uint8_t sh = *(uint16_t *)(b + i) & 15; + *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint32_t)) { + uint8_t sh = *(uint32_t *)(b + i) & 31; + *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { + uint8_t sh = *(uint64_t *)(b + i) & 63; + *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec8)) { + uint8_t sh = *(uint8_t *)(b + i) & 7; + *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(int16_t)) { + uint8_t sh = *(uint16_t *)(b + i) & 15; + *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec32)) { + uint8_t sh = *(uint32_t *)(b + i) & 31; + *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + +void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) +{ + intptr_t oprsz = simd_oprsz(desc); + intptr_t i; + + for (i = 0; i < oprsz; i += sizeof(vec64)) { + uint8_t sh = *(uint64_t *)(b + i) & 63; + *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; + } + clear_high(d, oprsz, desc); +} + /* If vectors are enabled, the compiler fills in -1 for true. Otherwise, we must take care of this by hand. */ #ifdef CONFIG_VECTOR16 diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index dfe325625c..ed3ce5fd91 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -254,6 +254,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) -- cgit v1.2.3