aboutsummaryrefslogtreecommitdiff
path: root/target/ppc/translate/vmx-impl.inc.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/ppc/translate/vmx-impl.inc.c')
-rw-r--r--target/ppc/translate/vmx-impl.inc.c355
1 files changed, 318 insertions, 37 deletions
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 663275b729..0d71c10428 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -142,38 +142,6 @@ GEN_VR_STVE(bx, 0x07, 0x04, 1);
GEN_VR_STVE(hx, 0x07, 0x05, 2);
GEN_VR_STVE(wx, 0x07, 0x06, 4);
-static void gen_lvsl(DisasContext *ctx)
-{
- TCGv_ptr rd;
- TCGv EA;
- if (unlikely(!ctx->altivec_enabled)) {
- gen_exception(ctx, POWERPC_EXCP_VPU);
- return;
- }
- EA = tcg_temp_new();
- gen_addr_reg_index(ctx, EA);
- rd = gen_avr_ptr(rD(ctx->opcode));
- gen_helper_lvsl(rd, EA);
- tcg_temp_free(EA);
- tcg_temp_free_ptr(rd);
-}
-
-static void gen_lvsr(DisasContext *ctx)
-{
- TCGv_ptr rd;
- TCGv EA;
- if (unlikely(!ctx->altivec_enabled)) {
- gen_exception(ctx, POWERPC_EXCP_VPU);
- return;
- }
- EA = tcg_temp_new();
- gen_addr_reg_index(ctx, EA);
- rd = gen_avr_ptr(rD(ctx->opcode));
- gen_helper_lvsr(rd, EA);
- tcg_temp_free(EA);
- tcg_temp_free_ptr(rd);
-}
-
static void gen_mfvscr(DisasContext *ctx)
{
TCGv_i32 t;
@@ -316,6 +284,16 @@ static void glue(gen_, name)(DisasContext *ctx) \
tcg_temp_free_ptr(rd); \
}
+#define GEN_VXFORM_TRANS(name, opc2, opc3) \
+static void glue(gen_, name)(DisasContext *ctx) \
+{ \
+ if (unlikely(!ctx->altivec_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_VPU); \
+ return; \
+ } \
+ trans_##name(ctx); \
+}
+
#define GEN_VXFORM_ENV(name, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) \
{ \
@@ -515,6 +493,307 @@ static void gen_vmrgow(DisasContext *ctx)
tcg_temp_free_i64(avr);
}
+/*
+ * lvsl VRT,RA,RB - Load Vector for Shift Left
+ *
+ * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
+ * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
+ * Bytes sh:sh+15 of X are placed into vD.
+ */
+static void trans_lvsl(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ TCGv_i64 result = tcg_temp_new_i64();
+ TCGv_i64 sh = tcg_temp_new_i64();
+ TCGv EA = tcg_temp_new();
+
+ /* Get sh(from description) by anding EA with 0xf. */
+ gen_addr_reg_index(ctx, EA);
+ tcg_gen_extu_tl_i64(sh, EA);
+ tcg_gen_andi_i64(sh, sh, 0xfULL);
+
+ /*
+ * Create bytes sh:sh+7 of X(from description) and place them in
+ * higher doubleword of vD.
+ */
+ tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
+ tcg_gen_addi_i64(result, sh, 0x0001020304050607ull);
+ set_avr64(VT, result, true);
+ /*
+ * Create bytes sh+8:sh+15 of X(from description) and place them in
+ * lower doubleword of vD.
+ */
+ tcg_gen_addi_i64(result, sh, 0x08090a0b0c0d0e0fULL);
+ set_avr64(VT, result, false);
+
+ tcg_temp_free_i64(result);
+ tcg_temp_free_i64(sh);
+ tcg_temp_free(EA);
+}
+
+/*
+ * lvsr VRT,RA,RB - Load Vector for Shift Right
+ *
+ * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
+ * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
+ * Bytes (16-sh):(31-sh) of X are placed into vD.
+ */
+static void trans_lvsr(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ TCGv_i64 result = tcg_temp_new_i64();
+ TCGv_i64 sh = tcg_temp_new_i64();
+ TCGv EA = tcg_temp_new();
+
+
+ /* Get sh(from description) by anding EA with 0xf. */
+ gen_addr_reg_index(ctx, EA);
+ tcg_gen_extu_tl_i64(sh, EA);
+ tcg_gen_andi_i64(sh, sh, 0xfULL);
+
+ /*
+ * Create bytes (16-sh):(23-sh) of X(from description) and place them in
+ * higher doubleword of vD.
+ */
+ tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
+ tcg_gen_subfi_i64(result, 0x1011121314151617ULL, sh);
+ set_avr64(VT, result, true);
+ /*
+ * Create bytes (24-sh):(32-sh) of X(from description) and place them in
+ * lower doubleword of vD.
+ */
+ tcg_gen_subfi_i64(result, 0x18191a1b1c1d1e1fULL, sh);
+ set_avr64(VT, result, false);
+
+ tcg_temp_free_i64(result);
+ tcg_temp_free_i64(sh);
+ tcg_temp_free(EA);
+}
+
+/*
+ * vsl VRT,VRA,VRB - Vector Shift Left
+ *
+ * Shifting left 128 bit value of vA by value specified in bits 125-127 of vB.
+ * Lowest 3 bits in each byte element of register vB must be identical or
+ * result is undefined.
+ */
+static void trans_vsl(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ int VA = rA(ctx->opcode);
+ int VB = rB(ctx->opcode);
+ TCGv_i64 avrA = tcg_temp_new_i64();
+ TCGv_i64 avrB = tcg_temp_new_i64();
+ TCGv_i64 sh = tcg_temp_new_i64();
+ TCGv_i64 shifted = tcg_temp_new_i64();
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ /* Place bits 125-127 of vB in sh. */
+ get_avr64(avrB, VB, false);
+ tcg_gen_andi_i64(sh, avrB, 0x07ULL);
+
+ /*
+ * Save highest sh bits of lower doubleword element of vA in variable
+ * shifted and perform shift on lower doubleword.
+ */
+ get_avr64(avrA, VA, false);
+ tcg_gen_subfi_i64(tmp, 64, sh);
+ tcg_gen_shr_i64(shifted, avrA, tmp);
+ tcg_gen_andi_i64(shifted, shifted, 0x7fULL);
+ tcg_gen_shl_i64(avrA, avrA, sh);
+ set_avr64(VT, avrA, false);
+
+ /*
+ * Perform shift on higher doubleword element of vA and replace lowest
+ * sh bits with shifted.
+ */
+ get_avr64(avrA, VA, true);
+ tcg_gen_shl_i64(avrA, avrA, sh);
+ tcg_gen_or_i64(avrA, avrA, shifted);
+ set_avr64(VT, avrA, true);
+
+ tcg_temp_free_i64(avrA);
+ tcg_temp_free_i64(avrB);
+ tcg_temp_free_i64(sh);
+ tcg_temp_free_i64(shifted);
+ tcg_temp_free_i64(tmp);
+}
+
+/*
+ * vsr VRT,VRA,VRB - Vector Shift Right
+ *
+ * Shifting right 128 bit value of vA by value specified in bits 125-127 of vB.
+ * Lowest 3 bits in each byte element of register vB must be identical or
+ * result is undefined.
+ */
+static void trans_vsr(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ int VA = rA(ctx->opcode);
+ int VB = rB(ctx->opcode);
+ TCGv_i64 avrA = tcg_temp_new_i64();
+ TCGv_i64 avrB = tcg_temp_new_i64();
+ TCGv_i64 sh = tcg_temp_new_i64();
+ TCGv_i64 shifted = tcg_temp_new_i64();
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ /* Place bits 125-127 of vB in sh. */
+ get_avr64(avrB, VB, false);
+ tcg_gen_andi_i64(sh, avrB, 0x07ULL);
+
+ /*
+ * Save lowest sh bits of higher doubleword element of vA in variable
+ * shifted and perform shift on higher doubleword.
+ */
+ get_avr64(avrA, VA, true);
+ tcg_gen_subfi_i64(tmp, 64, sh);
+ tcg_gen_shl_i64(shifted, avrA, tmp);
+ tcg_gen_andi_i64(shifted, shifted, 0xfe00000000000000ULL);
+ tcg_gen_shr_i64(avrA, avrA, sh);
+ set_avr64(VT, avrA, true);
+ /*
+ * Perform shift on lower doubleword element of vA and replace highest
+ * sh bits with shifted.
+ */
+ get_avr64(avrA, VA, false);
+ tcg_gen_shr_i64(avrA, avrA, sh);
+ tcg_gen_or_i64(avrA, avrA, shifted);
+ set_avr64(VT, avrA, false);
+
+ tcg_temp_free_i64(avrA);
+ tcg_temp_free_i64(avrB);
+ tcg_temp_free_i64(sh);
+ tcg_temp_free_i64(shifted);
+ tcg_temp_free_i64(tmp);
+}
+
+/*
+ * vgbbd VRT,VRB - Vector Gather Bits by Bytes by Doubleword
+ *
+ * All ith bits (i in range 1 to 8) of each byte of doubleword element in source
+ * register are concatenated and placed into ith byte of appropriate doubleword
+ * element in destination register.
+ *
+ * Following solution is done for both doubleword elements of source register
+ * in parallel, in order to reduce the number of instructions needed(that's why
+ * arrays are used):
+ * First, both doubleword elements of source register vB are placed in
+ * appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
+ * loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
+ * byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
+ * tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
+ * have to be shifted right for 7 and 8 places, respectively, in order to get
+ * bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
+ * shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
+ * After first 8 iteration(first loop), all the first bits are in their final
+ * places, all second bits but second bit from eight byte are in their places...
+ * only 1 eight bit from eight byte is in it's place). In second loop we do all
+ * operations symmetrically, in order to get other half of bits in their final
+ * spots. Results for first and second doubleword elements are saved in
+ * result[0] and result[1] respectively. In the end those results are saved in
+ * appropriate doubleword element of destination register vD.
+ */
+static void trans_vgbbd(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ int VB = rB(ctx->opcode);
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ uint64_t mask = 0x8040201008040201ULL;
+ int i, j;
+
+ TCGv_i64 result[2];
+ result[0] = tcg_temp_new_i64();
+ result[1] = tcg_temp_new_i64();
+ TCGv_i64 avr[2];
+ avr[0] = tcg_temp_new_i64();
+ avr[1] = tcg_temp_new_i64();
+ TCGv_i64 tcg_mask = tcg_temp_new_i64();
+
+ tcg_gen_movi_i64(tcg_mask, mask);
+ for (j = 0; j < 2; j++) {
+ get_avr64(avr[j], VB, j);
+ tcg_gen_and_i64(result[j], avr[j], tcg_mask);
+ }
+ for (i = 1; i < 8; i++) {
+ tcg_gen_movi_i64(tcg_mask, mask >> (i * 8));
+ for (j = 0; j < 2; j++) {
+ tcg_gen_shri_i64(tmp, avr[j], i * 7);
+ tcg_gen_and_i64(tmp, tmp, tcg_mask);
+ tcg_gen_or_i64(result[j], result[j], tmp);
+ }
+ }
+ for (i = 1; i < 8; i++) {
+ tcg_gen_movi_i64(tcg_mask, mask << (i * 8));
+ for (j = 0; j < 2; j++) {
+ tcg_gen_shli_i64(tmp, avr[j], i * 7);
+ tcg_gen_and_i64(tmp, tmp, tcg_mask);
+ tcg_gen_or_i64(result[j], result[j], tmp);
+ }
+ }
+ for (j = 0; j < 2; j++) {
+ set_avr64(VT, result[j], j);
+ }
+
+ tcg_temp_free_i64(tmp);
+ tcg_temp_free_i64(tcg_mask);
+ tcg_temp_free_i64(result[0]);
+ tcg_temp_free_i64(result[1]);
+ tcg_temp_free_i64(avr[0]);
+ tcg_temp_free_i64(avr[1]);
+}
+
+/*
+ * vclzw VRT,VRB - Vector Count Leading Zeros Word
+ *
+ * Counting the number of leading zero bits of each word element in source
+ * register and placing result in appropriate word element of destination
+ * register.
+ */
+static void trans_vclzw(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ int VB = rB(ctx->opcode);
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ int i;
+
+ /* Perform count for every word element using tcg_gen_clzi_i32. */
+ for (i = 0; i < 4; i++) {
+ tcg_gen_ld_i32(tmp, cpu_env,
+ offsetof(CPUPPCState, vsr[32 + VB].u64[0]) + i * 4);
+ tcg_gen_clzi_i32(tmp, tmp, 32);
+ tcg_gen_st_i32(tmp, cpu_env,
+ offsetof(CPUPPCState, vsr[32 + VT].u64[0]) + i * 4);
+ }
+
+ tcg_temp_free_i32(tmp);
+}
+
+/*
+ * vclzd VRT,VRB - Vector Count Leading Zeros Doubleword
+ *
+ * Counting the number of leading zero bits of each doubleword element in source
+ * register and placing result in appropriate doubleword element of destination
+ * register.
+ */
+static void trans_vclzd(DisasContext *ctx)
+{
+ int VT = rD(ctx->opcode);
+ int VB = rB(ctx->opcode);
+ TCGv_i64 avr = tcg_temp_new_i64();
+
+ /* high doubleword */
+ get_avr64(avr, VB, true);
+ tcg_gen_clzi_i64(avr, avr, 64);
+ set_avr64(VT, avr, true);
+
+ /* low doubleword */
+ get_avr64(avr, VB, false);
+ tcg_gen_clzi_i64(avr, avr, 64);
+ set_avr64(VT, avr, false);
+
+ tcg_temp_free_i64(avr);
+}
+
GEN_VXFORM(vmuloub, 4, 0);
GEN_VXFORM(vmulouh, 4, 1);
GEN_VXFORM(vmulouw, 4, 2);
@@ -627,11 +906,11 @@ GEN_VXFORM(vrld, 2, 3);
GEN_VXFORM(vrldmi, 2, 3);
GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
vrldmi, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vsl, 2, 7);
+GEN_VXFORM_TRANS(vsl, 2, 7);
GEN_VXFORM(vrldnm, 2, 7);
GEN_VXFORM_DUAL(vsl, PPC_ALTIVEC, PPC_NONE, \
vrldnm, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vsr, 2, 11);
+GEN_VXFORM_TRANS(vsr, 2, 11);
GEN_VXFORM_ENV(vpkuhum, 7, 0);
GEN_VXFORM_ENV(vpkuwum, 7, 1);
GEN_VXFORM_ENV(vpkudum, 7, 17);
@@ -662,6 +941,8 @@ GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
GEN_VXFORM_HETRO(vextubrx, 6, 28)
GEN_VXFORM_HETRO(vextuhrx, 6, 29)
GEN_VXFORM_HETRO(vextuwrx, 6, 30)
+GEN_VXFORM_TRANS(lvsl, 6, 31)
+GEN_VXFORM_TRANS(lvsr, 6, 32)
GEN_VXFORM_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, \
vextuwrx, PPC_NONE, PPC2_ISA300)
@@ -1028,8 +1309,8 @@ GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
GEN_VXFORM_NOA(vclzb, 1, 28)
GEN_VXFORM_NOA(vclzh, 1, 29)
-GEN_VXFORM_NOA(vclzw, 1, 30)
-GEN_VXFORM_NOA(vclzd, 1, 31)
+GEN_VXFORM_TRANS(vclzw, 1, 30)
+GEN_VXFORM_TRANS(vclzd, 1, 31)
GEN_VXFORM_NOA_2(vnegw, 1, 24, 6)
GEN_VXFORM_NOA_2(vnegd, 1, 24, 7)
GEN_VXFORM_NOA_2(vextsb2w, 1, 24, 16)
@@ -1057,7 +1338,7 @@ GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vbpermd, 6, 23);
GEN_VXFORM(vbpermq, 6, 21);
-GEN_VXFORM_NOA(vgbbd, 6, 20);
+GEN_VXFORM_TRANS(vgbbd, 6, 20);
GEN_VXFORM(vpmsumb, 4, 16)
GEN_VXFORM(vpmsumh, 4, 17)
GEN_VXFORM(vpmsumw, 4, 18)