aboutsummaryrefslogtreecommitdiff
path: root/target/ppc/int_helper.c
diff options
context:
space:
mode:
authorLucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>2022-05-24 11:05:31 -0300
committerDaniel Henrique Barboza <danielhb413@gmail.com>2022-05-26 17:11:33 -0300
commit345531533f26df49e74f16dafc88408408173ece (patch)
tree8586d5eeba5982cb2ab0b283b360d671c0740669 /target/ppc/int_helper.c
parenta702c5339eda791b969ed531ce99456df7ca8451 (diff)
target/ppc: Implemented xvi*ger* instructions
Implement the following PowerISA v3.1 instructions: xvi4ger8: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) xvi4ger8pp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate xvi8ger4: VSX Vector 4-bit Signed Integer GER (rank-8 update) xvi8ger4pp: VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate xvi8ger4spp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate xvi16ger2: VSX Vector 16-bit Signed Integer GER (rank-2 update) xvi16ger2pp: VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate xvi16ger2s: VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation xvi16ger2spp: VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20220524140537.27451-3-lucas.araujo@eldorado.org.br> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Diffstat (limited to 'target/ppc/int_helper.c')
-rw-r--r--target/ppc/int_helper.c130
1 files changed, 130 insertions, 0 deletions
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index b9dd15d607..105b626d1b 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -782,6 +782,136 @@ VCT(uxs, cvtsduw, u32)
VCT(sxs, cvtsdsw, s32)
#undef VCT
+typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
+
+static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
+{
+ int64_t psum = 0;
+ for (int i = 0; i < 8; i++, mask >>= 1) {
+ if (mask & 1) {
+ psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
+ }
+ }
+ return psum;
+}
+
+static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
+{
+ int64_t psum = 0;
+ for (int i = 0; i < 4; i++, mask >>= 1) {
+ if (mask & 1) {
+ psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
+ }
+ }
+ return psum;
+}
+
+static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
+{
+ int64_t psum = 0;
+ for (int i = 0; i < 2; i++, mask >>= 1) {
+ if (mask & 1) {
+ psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16);
+ }
+ }
+ return psum;
+}
+
+static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
+ uint32_t mask, bool sat, bool acc, do_ger ger)
+{
+ uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
+ xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
+ ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
+ uint8_t xmsk_bit, ymsk_bit;
+ int64_t psum;
+ int i, j;
+ for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+ for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+ if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+ psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
+ if (acc) {
+ psum += at[i].VsrSW(j);
+ }
+ if (sat && psum > INT32_MAX) {
+ set_vscr_sat(env);
+ at[i].VsrSW(j) = INT32_MAX;
+ } else if (sat && psum < INT32_MIN) {
+ set_vscr_sat(env);
+ at[i].VsrSW(j) = INT32_MIN;
+ } else {
+ at[i].VsrSW(j) = (int32_t) psum;
+ }
+ } else {
+ at[i].VsrSW(j) = 0;
+ }
+ }
+ }
+}
+
+QEMU_FLATTEN
+void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, false, ger_rank8);
+}
+
+QEMU_FLATTEN
+void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, true, ger_rank8);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, false, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, true, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, true, true, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, false, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, true, false, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, true, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, true, true, ger_rank2);
+}
+
target_ulong helper_vclzlsbb(ppc_avr_t *r)
{
target_ulong count = 0;