diff options
author | Tom Musta <tommusta@gmail.com> | 2014-02-12 15:23:15 -0600 |
---|---|---|
committer | Alexander Graf <agraf@suse.de> | 2014-03-05 03:06:59 +0100 |
commit | b8476fc7c6e205f0dc9fff3cfa199eee8af0fa27 (patch) | |
tree | 30b9cc2b867fc64d3a288450e6cd44c41e75d1c2 | |
parent | f1064f612c9783136f2c59b94a4a8da70d3a09e3 (diff) |
target-ppc: Altivec 2.07: Vector Polynomial Multiply Sum
This patch adds the Vectory Polynomial Multiply Sum instructions
introduced in Power ISA Version 2.07:
- Vectory Polynomial Multiply Sum Byte (vpmsumb)
- Vectory Polynomial Multiply Sum Halfword (vpmsumh)
- Vectory Polynomial Multiply Sum Word (vpmsumw)
- Vectory Polynomial Multiply Sum Doubleword (vpmsumd)
Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r-- | target-ppc/helper.h | 4 | ||||
-rw-r--r-- | target-ppc/int_helper.c | 70 | ||||
-rw-r--r-- | target-ppc/translate.c | 8 |
3 files changed, 82 insertions, 0 deletions
diff --git a/target-ppc/helper.h b/target-ppc/helper.h index aca712fa48..ca9eba58fa 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -311,6 +311,10 @@ DEF_HELPER_2(vpopcntw, void, avr, avr) DEF_HELPER_2(vpopcntd, void, avr, avr) DEF_HELPER_3(vbpermq, void, avr, avr, avr) DEF_HELPER_2(vgbbd, void, avr, avr) +DEF_HELPER_3(vpmsumb, void, avr, avr, avr) +DEF_HELPER_3(vpmsumh, void, avr, avr, avr) +DEF_HELPER_3(vpmsumw, void, avr, avr, avr) +DEF_HELPER_3(vpmsumd, void, avr, avr, avr) DEF_HELPER_2(xsadddp, void, env, i32) DEF_HELPER_2(xssubdp, void, env, i32) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index cd0b88a8bc..4e8e507819 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1351,6 +1351,76 @@ void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) r->u64[1] = t[1]; } +#define PMSUM(name, srcfld, trgfld, trgtyp) \ +void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ +{ \ + int i, j; \ + trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ + \ + VECTOR_FOR_INORDER_I(i, srcfld) { \ + prod[i] = 0; \ + for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ + if (a->srcfld[i] & (1ull<<j)) { \ + prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ + } \ + } \ + } \ + \ + VECTOR_FOR_INORDER_I(i, trgfld) { \ + r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ + } \ +} + +PMSUM(vpmsumb, u8, u16, uint16_t) +PMSUM(vpmsumh, u16, u32, uint32_t) +PMSUM(vpmsumw, u32, u64, uint64_t) + +void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + +#ifdef CONFIG_INT128 + int i, j; + __uint128_t prod[2]; + + VECTOR_FOR_INORDER_I(i, u64) { + prod[i] = 0; + for (j = 0; j < 64; j++) { + if (a->u64[i] & (1ull<<j)) { + prod[i] ^= (((__uint128_t)b->u64[i]) << j); + } + } + } + + r->u128 = prod[0] ^ prod[1]; + +#else + int i, j; + ppc_avr_t prod[2]; + + VECTOR_FOR_INORDER_I(i, u64) { + prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0; + for (j = 0; j < 64; j++) { + if (a->u64[i] & (1ull<<j)) { + ppc_avr_t bshift; + if (j == 0) { + bshift.u64[HI_IDX] = 0; + bshift.u64[LO_IDX] = b->u64[i]; + } else { + bshift.u64[HI_IDX] = b->u64[i] >> (64-j); + bshift.u64[LO_IDX] = b->u64[i] << j; + } + prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX]; + prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX]; + } + } + } + + r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX]; + r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX]; +#endif +} + + #if defined(HOST_WORDS_BIGENDIAN) #define PKBIG 1 #else diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 75e7f95e7c..706bd5da92 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7372,6 +7372,10 @@ GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \ vpopcntd, PPC_NONE, PPC2_ALTIVEC_207) GEN_VXFORM(vbpermq, 6, 21); GEN_VXFORM_NOA(vgbbd, 6, 20); +GEN_VXFORM(vpmsumb, 4, 16) +GEN_VXFORM(vpmsumh, 4, 17) +GEN_VXFORM(vpmsumw, 4, 18) +GEN_VXFORM(vpmsumd, 4, 19) /*** VSX extension ***/ @@ -10623,6 +10627,10 @@ GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207), GEN_VXFORM_207(vbpermq, 6, 21), GEN_VXFORM_207(vgbbd, 6, 20), +GEN_VXFORM_207(vpmsumb, 4, 16), +GEN_VXFORM_207(vpmsumh, 4, 17), +GEN_VXFORM_207(vpmsumw, 4, 18), +GEN_VXFORM_207(vpmsumd, 4, 19), GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX), GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207), |