aboutsummaryrefslogtreecommitdiff
path: root/accel
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-05-14 10:08:47 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-05-14 10:08:47 +0100
commite329ad2ab72c43b56df88b34954c2c7d839bb373 (patch)
tree69180a9f32bc7cfa7b9645e7d18ba3ff32a37975 /accel
parente24f44dbeab8e54c72bdaedbd35453fb2a6c38da (diff)
parenta7b6d286cfb5205b9f5330aefc5727269b3d810f (diff)
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190513' into staging
Improve code generation for vector duplication. Add vector expansions for shifts by non-constant scalar. Add vector expansions for shifts by vector. Add integer and vector expansions for absolute value. Several patches in preparation for Altivec. Bug fix for tcg/aarch64 vs min/max. # gpg: Signature made Tue 14 May 2019 00:58:02 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth/tags/pull-tcg-20190513: (31 commits) tcg/aarch64: Do not advertise minmax for MO_64 target/xtensa: Use tcg_gen_abs_i32 target/tricore: Use tcg_gen_abs_tl target/s390x: Use tcg_gen_abs_i64 target/ppc: Use tcg_gen_abs_tl target/ppc: Use tcg_gen_abs_i32 target/cris: Use tcg_gen_abs_tl target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs tcg/aarch64: Support vector absolute value tcg/i386: Support vector absolute value tcg: Add support for vector absolute value tcg: Add support for integer absolute value tcg/i386: Support vector scalar shift opcodes tcg: Add gvec expanders for vector shift by scalar tcg/aarch64: Support vector variable shift opcodes tcg/i386: Support vector variable shift opcodes tcg: Add gvec expanders for variable shift tcg: Add INDEX_op_dupm_vec tcg/aarch64: Implement tcg_out_dupm_vec tcg/i386: Implement tcg_out_dupm_vec ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'accel')
-rw-r--r--accel/tcg/tcg-runtime-gvec.c192
-rw-r--r--accel/tcg/tcg-runtime.h20
2 files changed, 212 insertions, 0 deletions
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index e2c6f24262..0f09e0ef38 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -398,6 +398,54 @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int8_t)) {
+ int8_t aa = *(int8_t *)(a + i);
+ *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ int16_t aa = *(int16_t *)(a + i);
+ *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+ int32_t aa = *(int32_t *)(a + i);
+ *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+ int64_t aa = *(int64_t *)(a + i);
+ *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
+ }
+ clear_high(d, oprsz, desc);
+}
+
void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
@@ -725,6 +773,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec8)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec32)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(vec64)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
+ }
+ clear_high(d, oprsz, desc);
+}
+
/* If vectors are enabled, the compiler fills in -1 for true.
Otherwise, we must take care of this by hand. */
#ifdef CONFIG_VECTOR16
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index dfe325625c..6d73dc2d65 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -225,6 +225,11 @@ DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_abs64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -254,6 +259,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)