From 86ba37edcb6556b124840f04d180a34ffcc71086 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 10:05:49 -0600 Subject: target-ppc: Add ISA2.06 bpermd Instruction This patch adds the Bit Permute Doubleword (bpermd) instruction, which was introduced in Power ISA 2.06 as part of the base 64-bit architecture. Signed-off-by: Tom Musta Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index e50bdd20ec..0e7afb3dea 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -53,6 +53,26 @@ target_ulong helper_cntlzd(target_ulong t) } #endif +#if defined(TARGET_PPC64) + +uint64_t helper_bpermd(uint64_t rs, uint64_t rb) +{ + int i; + uint64_t ra = 0; + + for (i = 0; i < 8; i++) { + int index = (rs >> (i*8)) & 0xFF; + if (index < 64) { + if (rb & (1ull << (63-index))) { + ra |= 1 << i; + } + } + } + return ra; +} + +#endif + target_ulong helper_cmpb(target_ulong rs, target_ulong rb) { target_ulong mask = 0xff; -- cgit v1.2.3 From 98d1eb2748e84f9e3118d1bd2495f0cc917ac18d Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 10:05:51 -0600 Subject: target-ppc: Add ISA2.06 divdeu[o] Instructions This patch adds the Divide Doubleword Extended Unsigned instructions. This instruction requires dividing a 128-bit value by a 64 bit value. Since 128 bit integer division is not supported in TCG, a helper is used. An architecture independent 128-bit division routine is added to host-utils. Signed-off-by: Tom Musta Reviewed-by: Richard Henderson [agraf: use ||] Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 0e7afb3dea..6f3d8fd805 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -41,6 +41,33 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2) } #endif +#if defined(TARGET_PPC64) + +uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) +{ + uint64_t rt = 0; + int overflow = 0; + + overflow = divu128(&rt, &ra, rb); + + if (unlikely(overflow)) { + rt = 0; /* Undefined */ + } + + if (oe) { + if (unlikely(overflow)) { + env->so = env->ov = 1; + } else { + env->ov = 0; + } + } + + return rt; +} + +#endif + + target_ulong helper_cntlzw(target_ulong t) { return clz32(t); -- cgit v1.2.3 From e44259b6d4f4de69a868510a198b2696f24118a1 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 10:05:52 -0600 Subject: target-ppc: Add ISA2.06 divde[o] Instructions This patch adds the Divide Doubleword Extended instructions. The implementation builds on the unsigned helper provided in the previous patch. Signed-off-by: Tom Musta Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 6f3d8fd805..920dba7891 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -65,6 +65,29 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) return rt; } +uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) +{ + int64_t rt = 0; + int64_t ra = (int64_t)rau; + int64_t rb = (int64_t)rbu; + int overflow = divs128(&rt, &ra, rb); + + if (unlikely(overflow)) { + rt = 0; /* Undefined */ + } + + if (oe) { + + if (unlikely(overflow)) { + env->so = env->ov = 1; + } else { + env->ov = 0; + } + } + + return rt; +} + #endif -- cgit v1.2.3 From 6a4fda3358ca5a21e17d553074f74d512745c4f6 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 10:05:53 -0600 Subject: target-ppc: Add ISA 2.06 divweu[o] Instructions This patch addes the Unsigned Divide Word Extended instructions which were introduced in Power ISA 2.06B. Signed-off-by: Tom Musta Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 920dba7891..45586be27c 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -41,6 +41,37 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2) } #endif +target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, + uint32_t oe) +{ + uint64_t rt = 0; + int overflow = 0; + + uint64_t dividend = (uint64_t)ra << 32; + uint64_t divisor = (uint32_t)rb; + + if (unlikely(divisor == 0)) { + overflow = 1; + } else { + rt = dividend / divisor; + overflow = rt > UINT32_MAX; + } + + if (unlikely(overflow)) { + rt = 0; /* Undefined */ + } + + if (oe) { + if (unlikely(overflow)) { + env->so = env->ov = 1; + } else { + env->ov = 0; + } + } + + return (target_ulong)rt; +} + #if defined(TARGET_PPC64) uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) -- cgit v1.2.3 From a98eb9e99df4c11966fcd56cbcb6f06e6f413a89 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Tue, 7 Jan 2014 10:05:54 -0600 Subject: target-ppc: Add ISA 2.06 divwe[o] Instructions This patch addes the signed Divide Word Extended instructions which were introduced in Power ISA 2.06B. Signed-off-by: Tom Musta Reviewed-by: Richard Henderson Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 45586be27c..71db3fb076 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -72,6 +72,38 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, return (target_ulong)rt; } +target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, + uint32_t oe) +{ + int64_t rt = 0; + int overflow = 0; + + int64_t dividend = (int64_t)ra << 32; + int64_t divisor = (int64_t)((int32_t)rb); + + if (unlikely((divisor == 0) || + ((divisor == -1ull) && (dividend == INT64_MIN)))) { + overflow = 1; + } else { + rt = dividend / divisor; + overflow = rt != (int32_t)rt; + } + + if (unlikely(overflow)) { + rt = 0; /* Undefined */ + } + + if (oe) { + if (unlikely(overflow)) { + env->so = env->ov = 1; + } else { + env->ov = 0; + } + } + + return (target_ulong)rt; +} + #if defined(TARGET_PPC64) uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) -- cgit v1.2.3 From 56eabc750862b985a6ddfc3905b534576eeee33e Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:22:59 -0600 Subject: target-ppc: Altivec 2.07: Add/Subtract Unsigned Doubleword Modulo This patch adds two Altivec unsigned doublword modulo instructions that are introduced in Power ISA Version V2.07: - vaddudm : Vector Add Unsigned Doubleword Modulo - vsubudm : Vector Subtrace Unsigned Doubleword Modulo Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 1 + 1 file changed, 1 insertion(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 71db3fb076..3e36c0aa3c 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -504,6 +504,7 @@ void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) VARITH(ubm, u8) VARITH(uhm, u16) VARITH(uwm, u32) +VARITH(udm, u64) #undef VARITH_DO #undef VARITH -- cgit v1.2.3 From aa9e930c8870d06a20b356785d3ec7d9a942a29f Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:00 -0600 Subject: target-ppc: Altivec 2.07: Change VMUL_DO to Support 64-bit Integers This VMUL_DO macro provides support for the various vmule* and vmulo* instructions. These instructions multiply vector elements, producing products that are one size larger; e.g. vmuleub multiplies unsigned 8-bit elements and produces a 16 bit unsigned element. The existing macro works correctly for the existing instructions (8-bit, and 16-bit source elements) but does not work correctly for 32-bit source elements. This patch adds an explicit cast to the multiplicands, forcing them to be of the target element type. This is required for the forthcoming patches that add the vmul[eo][us]w instructions. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 3e36c0aa3c..20d34e6231 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -983,28 +983,30 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -#define VMUL_DO(name, mul_element, prod_element, evenp) \ +#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ VECTOR_FOR_INORDER_I(i, prod_element) { \ if (evenp) { \ - r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] * \ - b->mul_element[i * 2 + HI_IDX]; \ + r->prod_element[i] = \ + (cast)a->mul_element[i * 2 + HI_IDX] * \ + (cast)b->mul_element[i * 2 + HI_IDX]; \ } else { \ - r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] * \ - b->mul_element[i * 2 + LO_IDX]; \ + r->prod_element[i] = \ + (cast)a->mul_element[i * 2 + LO_IDX] * \ + (cast)b->mul_element[i * 2 + LO_IDX]; \ } \ } \ } -#define VMUL(suffix, mul_element, prod_element) \ - VMUL_DO(mule##suffix, mul_element, prod_element, 1) \ - VMUL_DO(mulo##suffix, mul_element, prod_element, 0) -VMUL(sb, s8, s16) -VMUL(sh, s16, s32) -VMUL(ub, u8, u16) -VMUL(uh, u16, u32) +#define VMUL(suffix, mul_element, prod_element, cast) \ + VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \ + VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) +VMUL(sb, s8, s16, int16_t) +VMUL(sh, s16, s32, int32_t) +VMUL(ub, u8, u16, uint16_t) +VMUL(uh, u16, u32, uint32_t) #undef VMUL_DO #undef VMUL -- cgit v1.2.3 From 63be09365a9a4658060e85ce013cc0beaeac25e5 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:01 -0600 Subject: target-ppc: Altivec 2.07: Multiply Even/Odd Word Instructions This patch adds the Multilpy Even/Odd Word instructions that are introduced in Power ISA Version 2.07: - Vector Multiply Even Unsigned Word (vmuleuw) - Vector Multiply Even Signed Word (vmulesw) - Vector Multiply Odd Unsigned Word (vmulouw) - Vector Multiply Odd Signed Word (vmulosw) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 20d34e6231..09590c71a2 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1005,8 +1005,10 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) VMUL(sb, s8, s16, int16_t) VMUL(sh, s16, s32, int32_t) +VMUL(sw, s32, s64, int64_t) VMUL(ub, u8, u16, uint16_t) VMUL(uh, u16, u32, uint32_t) +VMUL(uw, u32, u64, uint64_t) #undef VMUL_DO #undef VMUL -- cgit v1.2.3 From 953f0f5842a8515fd85ae28ebcdc219f8e7b76fe Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:02 -0600 Subject: target-ppc: Altivec 2.07: vmuluw Instruction This patch adds the Vector Multiply Unsigned Word Modulo (vmuluwm) instruction. The existing VARITH_DO macro is re-used to (trivially) instantiate the helper code. Since bits 21-31 of any vmuluwm instruction is 137, the instruction is coded as a dual to vmulouw (bits 21-31 = 136). Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 1 + 1 file changed, 1 insertion(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 09590c71a2..7a50f4a2cf 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -505,6 +505,7 @@ VARITH(ubm, u8) VARITH(uhm, u16) VARITH(uwm, u32) VARITH(udm, u64) +VARITH_DO(muluwm, *, u32) #undef VARITH_DO #undef VARITH -- cgit v1.2.3 From f293f04ab5301f688ce7c9fe3006a787611c2485 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:03 -0600 Subject: target-ppc: Altivec 2.07: Add Vector Count Leading Zeroes This patch adds the Vector Count Leading Zeroes instructions introduced in Power ISA Version 2.07 - vclzb, vclzh, vclzw and vclzd. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 7a50f4a2cf..7fca9f093b 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1524,6 +1524,35 @@ VUPK(lsh, s32, s16, UPKLO) #undef UPKHI #undef UPKLO +#define VGENERIC_DO(name, element) \ + void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + \ + VECTOR_FOR_INORDER_I(i, element) { \ + r->element[i] = name(b->element[i]); \ + } \ + } + +#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) +#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) +#define clzw(v) clz32((v)) +#define clzd(v) clz64((v)) + +VGENERIC_DO(clzb, u8) +VGENERIC_DO(clzh, u16) +VGENERIC_DO(clzw, u32) +VGENERIC_DO(clzd, u64) + +#undef clzb +#undef clzh +#undef clzw +#undef clzd + + +#undef VGENERIC_DO + + #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX -- cgit v1.2.3 From e13500b3c36533e22842bac28f40b2c86ded8c0c Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:04 -0600 Subject: target-ppc: Altivec 2.07: Vector Population Count Instructions This patch adds the Vector Population Count instructions introduced in Power ISA Version 2.07: vpopcntb, vpopcnth, vpopcntw and vpopcntd. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 7fca9f093b..3b67ae363f 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1549,6 +1549,20 @@ VGENERIC_DO(clzd, u64) #undef clzw #undef clzd +#define popcntb(v) ctpop8(v) +#define popcnth(v) ctpop16(v) +#define popcntw(v) ctpop32(v) +#define popcntd(v) ctpop64(v) + +VGENERIC_DO(popcntb, u8) +VGENERIC_DO(popcnth, u16) +VGENERIC_DO(popcntw, u32) +VGENERIC_DO(popcntd, u64) + +#undef popcntb +#undef popcnth +#undef popcntw +#undef popcntd #undef VGENERIC_DO -- cgit v1.2.3 From 8203e31b547d38a8ec0ce04fe19e6cefe75b4391 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:05 -0600 Subject: target-ppc: Altivec 2.07: Vector Min/Max Doubleword Instructions This patch adds the Vector Minimum and Maximum Doubleword instructions that are introduced in Power ISA Version 2.07. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 3b67ae363f..fc2bff1f0f 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -824,9 +824,11 @@ void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, VMINMAX(sb, s8) VMINMAX(sh, s16) VMINMAX(sw, s32) +VMINMAX(sd, s64) VMINMAX(ub, u8) VMINMAX(uh, u16) VMINMAX(uw, u32) +VMINMAX(ud, u64) #undef VMINMAX_DO #undef VMINMAX -- cgit v1.2.3 From 024215b24210e7c7cbe129279ea94031dd55c791 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:06 -0600 Subject: target-ppc: Altivec 2.07: Pack Doubleword Instructions This patch adds the Vector Pack Doubleword instructions introduced in Power ISA Version 2.07: - Vector Pack Signed Doubleword Signed Saturate (vpksdss) - Vector Pack Signed Doubleword Unsigned Saturate (vpksdus) - Vector Pack Unsigned Doubleword Unsigned Modulo (vpkudum) - Vector Pack Unsigned Doubleword Unsigned Saturate (vpkudus) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index fc2bff1f0f..534efb59af 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1089,10 +1089,14 @@ VPK(shss, s16, s8, cvtshsb, 1) VPK(shus, s16, u8, cvtshub, 1) VPK(swss, s32, s16, cvtswsh, 1) VPK(swus, s32, u16, cvtswuh, 1) +VPK(sdss, s64, s32, cvtsdsw, 1) +VPK(sdus, s64, u32, cvtsduw, 1) VPK(uhus, u16, u8, cvtuhub, 1) VPK(uwus, u32, u16, cvtuwuh, 1) +VPK(udus, u64, u32, cvtuduw, 1) VPK(uhum, u16, u8, I, 0) VPK(uwum, u32, u16, I, 0) +VPK(udum, u64, u32, I, 0) #undef I #undef VPK #undef PKBIG -- cgit v1.2.3 From 4430e076632bc904e041914e410c51d2a9a43a99 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:07 -0600 Subject: target-ppc: Altivec 2.07: Unpack Signed Word Instructions This patch adds the Unpack Signed Word instructions introduced in Power ISA Version 2.07: - Vector Unpack High Signed Word (vupkusw) - Vector Unpack Low Signed Word (vupklsw) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 534efb59af..56e8d9a3ed 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1524,8 +1524,10 @@ VUPKPX(hpx, UPKHI) } VUPK(hsb, s16, s8, UPKHI) VUPK(hsh, s32, s16, UPKHI) +VUPK(hsw, s64, s32, UPKHI) VUPK(lsb, s16, s8, UPKLO) VUPK(lsh, s32, s16, UPKLO) +VUPK(lsw, s64, s32, UPKLO) #undef VUPK #undef UPKHI #undef UPKLO -- cgit v1.2.3 From 818692ff95f143e640b44726da59646ea7fbcc23 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:09 -0600 Subject: target-ppc: Altivec 2.07: Change Bit Masks to Support 64-bit Rotates and Shifts Existing code in the VROTATE, VSL and VSR macros for the Altivec rotate and shift helpers uses a formula to compute a bit mask used to extract the rotate/shift amount from the VRB register. What is desired is: mask = (1 << (3 + log2(sizeof(element)))) - 1 but what is implemented is: mask = (1 << (3 + (sizeof(element)/2))) - 1 This produces correct answers when "element" is uint8_t, uint16_t or uint_32t. But it breaks down when element is uint64_t. This patch corrects the situation. Since the mask is known at compile time, the macros are changed to simply accept the mask as an argument. Subsequent patches in this series will add double-word variants of rotates and shifts and thus take advantage of this fix. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 56e8d9a3ed..59b5a1fc87 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1128,23 +1128,20 @@ VRFI(p, float_round_up) VRFI(z, float_round_to_zero) #undef VRFI -#define VROTATE(suffix, element) \ +#define VROTATE(suffix, element, mask) \ void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int mask = ((1 << \ - (3 + (sizeof(a->element[0]) >> 1))) \ - - 1); \ unsigned int shift = b->element[i] & mask; \ r->element[i] = (a->element[i] << shift) | \ (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ } \ } -VROTATE(b, u8) -VROTATE(h, u16) -VROTATE(w, u32) +VROTATE(b, u8, 0x7) +VROTATE(h, u16, 0xF) +VROTATE(w, u32, 0x1F) #undef VROTATE void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) @@ -1225,23 +1222,20 @@ VSHIFT(r, RIGHT) #undef LEFT #undef RIGHT -#define VSL(suffix, element) \ +#define VSL(suffix, element, mask) \ void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int mask = ((1 << \ - (3 + (sizeof(a->element[0]) >> 1))) \ - - 1); \ unsigned int shift = b->element[i] & mask; \ \ r->element[i] = a->element[i] << shift; \ } \ } -VSL(b, u8) -VSL(h, u16) -VSL(w, u32) +VSL(b, u8, 0x7) +VSL(h, u16, 0x0F) +VSL(w, u32, 0x1F) #undef VSL void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) @@ -1325,26 +1319,22 @@ VSPLTI(h, s16, int16_t) VSPLTI(w, s32, int32_t) #undef VSPLTI -#define VSR(suffix, element) \ +#define VSR(suffix, element, mask) \ void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int mask = ((1 << \ - (3 + (sizeof(a->element[0]) >> 1))) \ - - 1); \ unsigned int shift = b->element[i] & mask; \ - \ r->element[i] = a->element[i] >> shift; \ } \ } -VSR(ab, s8) -VSR(ah, s16) -VSR(aw, s32) -VSR(b, u8) -VSR(h, u16) -VSR(w, u32) +VSR(ab, s8, 0x7) +VSR(ah, s16, 0xF) +VSR(aw, s32, 0x1F) +VSR(b, u8, 0x7) +VSR(h, u16, 0xF) +VSR(w, u32, 0x1F) #undef VSR void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) -- cgit v1.2.3 From 2fdf78e649b81a14e2c65770fdb0ac3e656a35c5 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:10 -0600 Subject: target-ppc: Altivec 2.07: Vector Doubleword Rotate and Shift Instructions This patch adds the vector doublword rotate and shift instructions introduced in Power ISA Version 2.07: - Vector Rotate Left Doubleword instruction (vrld) - Vector Shift Left Doubleword (vsld) - Vector Shift Right Doubleword (vsrd) - Vector Shift Right Algegbraic Doubleword (vsrad) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 59b5a1fc87..b4a7298523 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1142,6 +1142,7 @@ VRFI(z, float_round_to_zero) VROTATE(b, u8, 0x7) VROTATE(h, u16, 0xF) VROTATE(w, u32, 0x1F) +VROTATE(d, u64, 0x3F) #undef VROTATE void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) @@ -1236,6 +1237,7 @@ VSHIFT(r, RIGHT) VSL(b, u8, 0x7) VSL(h, u16, 0x0F) VSL(w, u32, 0x1F) +VSL(d, u64, 0x3F) #undef VSL void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) @@ -1332,9 +1334,11 @@ VSPLTI(w, s32, int32_t) VSR(ab, s8, 0x7) VSR(ah, s16, 0xF) VSR(aw, s32, 0x1F) +VSR(ad, s64, 0x3F) VSR(b, u8, 0x7) VSR(h, u16, 0xF) VSR(w, u32, 0x1F) +VSR(d, u64, 0x3F) #undef VSR void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) -- cgit v1.2.3 From b41da4ebb2658c4abaaab01e64b9d0bb67dba003 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:11 -0600 Subject: target-ppc: Altivec 2.07: Quadword Addition and Subtracation This patch adds the Vector Quadword Addition and Subtraction instructions introduced in Power ISA Version 2.07: - Vector Add Unsigned Quadword Modulo (vadduqm) - Vector Add & Write Carry Unsigned Quadword (vaddcuq) - Vector Add Extended Unsigned Quadword (vaddeuqm) - Vector Add Extended & Write Carry Unsigned Quadword (vaddecuq) - Vector Subtract Unsigned Quadword Modulo (vsubuqm) - Vector Subtract & Write Carry Unsigned Quadword (vsubcuq) - Vector Subtract Extended Unsigned Quadword (vsubeuqm) - Vector Subtract Extended & Write Carry Unsigned Quadword (vsubecuq) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index b4a7298523..72fb13ccfb 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1568,6 +1568,191 @@ VGENERIC_DO(popcntd, u64) #undef VGENERIC_DO +#if defined(HOST_WORDS_BIGENDIAN) +#define QW_ONE { .u64 = { 0, 1 } } +#else +#define QW_ONE { .u64 = { 1, 0 } } +#endif + +#ifndef CONFIG_INT128 + +static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) +{ + t->u64[0] = ~a.u64[0]; + t->u64[1] = ~a.u64[1]; +} + +static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) +{ + if (a.u64[HI_IDX] < b.u64[HI_IDX]) { + return -1; + } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) { + return 1; + } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) { + return -1; + } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) { + return 1; + } else { + return 0; + } +} + +static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) +{ + t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; + t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + + (~a.u64[LO_IDX] < b.u64[LO_IDX]); +} + +static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) +{ + ppc_avr_t not_a; + t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; + t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + + (~a.u64[LO_IDX] < b.u64[LO_IDX]); + avr_qw_not(¬_a, a); + return avr_qw_cmpu(not_a, b) < 0; +} + +#endif + +void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 + b->u128; +#else + avr_qw_add(r, *a, *b); +#endif +} + +void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 + b->u128 + (c->u128 & 1); +#else + + if (c->u64[LO_IDX] & 1) { + ppc_avr_t tmp; + + tmp.u64[HI_IDX] = 0; + tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; + avr_qw_add(&tmp, *a, tmp); + avr_qw_add(r, tmp, *b); + } else { + avr_qw_add(r, *a, *b); + } +#endif +} + +void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = (~a->u128 < b->u128); +#else + ppc_avr_t not_a; + + avr_qw_not(¬_a, *a); + + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0); +#endif +} + +void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + int carry_out = (~a->u128 < b->u128); + if (!carry_out && (c->u128 & 1)) { + carry_out = ((a->u128 + b->u128 + 1) == 0) && + ((a->u128 != 0) || (b->u128 != 0)); + } + r->u128 = carry_out; +#else + + int carry_in = c->u64[LO_IDX] & 1; + int carry_out = 0; + ppc_avr_t tmp; + + carry_out = avr_qw_addc(&tmp, *a, *b); + + if (!carry_out && carry_in) { + ppc_avr_t one = QW_ONE; + carry_out = avr_qw_addc(&tmp, tmp, one); + } + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = carry_out; +#endif +} + +void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 - b->u128; +#else + ppc_avr_t tmp; + ppc_avr_t one = QW_ONE; + + avr_qw_not(&tmp, *b); + avr_qw_add(&tmp, *a, tmp); + avr_qw_add(r, tmp, one); +#endif +} + +void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); +#else + ppc_avr_t tmp, sum; + + avr_qw_not(&tmp, *b); + avr_qw_add(&sum, *a, tmp); + + tmp.u64[HI_IDX] = 0; + tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; + avr_qw_add(r, sum, tmp); +#endif +} + +void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = (~a->u128 < ~b->u128) || + (a->u128 + ~b->u128 == (__uint128_t)-1); +#else + int carry = (avr_qw_cmpu(*a, *b) > 0); + if (!carry) { + ppc_avr_t tmp; + avr_qw_not(&tmp, *b); + avr_qw_add(&tmp, *a, tmp); + carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull)); + } + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = carry; +#endif +} + +void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + r->u128 = + (~a->u128 < ~b->u128) || + ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); +#else + int carry_in = c->u64[LO_IDX] & 1; + int carry_out = (avr_qw_cmpu(*a, *b) > 0); + if (!carry_out && carry_in) { + ppc_avr_t tmp; + avr_qw_not(&tmp, *b); + avr_qw_add(&tmp, *a, tmp); + carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull)); + } + + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = carry_out; +#endif +} + #undef VECTOR_FOR_INORDER_I #undef HI_IDX -- cgit v1.2.3 From 4d82038e4198cdb8aacdf1d605c69cef29748761 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:12 -0600 Subject: target-ppc: Altivec 2.07: vbpermq Instruction This patch adds the Vector Bit Permute Quadword (vbpermq) instruction introduced in Power ISA Version 2.07. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 72fb13ccfb..5885b7e5c1 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1038,6 +1038,37 @@ void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, *r = result; } +#if defined(HOST_WORDS_BIGENDIAN) +#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) +#define VBPERMQ_DW(index) (((index) & 0x40) != 0) +#else +#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)]) +#define VBPERMQ_DW(index) (((index) & 0x40) == 0) +#endif + +void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + uint64_t perm = 0; + + VECTOR_FOR_INORDER_I(i, u8) { + int index = VBPERMQ_INDEX(b, i); + + if (index < 128) { + uint64_t mask = (1ull << (63-(index & 0x3F))); + if (a->u64[VBPERMQ_DW(index)] & mask) { + perm |= (0x8000 >> i); + } + } + } + + r->u64[HI_IDX] = perm; + r->u64[LO_IDX] = 0; +} + +#undef VBPERMQ_INDEX +#undef VBPERMQ_DW + #if defined(HOST_WORDS_BIGENDIAN) #define PKBIG 1 #else -- cgit v1.2.3 From 6f3dab41fb9ecf1caf9779644e4267af6570dd71 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:13 -0600 Subject: target-ppc: Altivec 2.07: Doubleword Compares This patch adds the Vector Compare Doubleword instructions introduced by Power ISA Version 2.07: - Vector Compare Equal to Unsigned Doubleword (vcmpequd) - Vector Compare Greater Than Signed Doubleword (vcmpgtsd) - Vector Compare Greater Than Unsigned Doubleword (vcmpgtud) These instructions are encoded with bit 31 set to 1 and so are duals with vcmpeqfp, vcmpgtfp and vcmpbfp respectively. The helper macro for integer compares is enhanced to account for 64-bit operands. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 5885b7e5c1..27a34c06ff 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -626,15 +626,18 @@ VCF(sx, int32_to_float32, s32) void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ ppc_avr_t *a, ppc_avr_t *b) \ { \ - uint32_t ones = (uint32_t)-1; \ - uint32_t all = ones; \ - uint32_t none = 0; \ + uint64_t ones = (uint64_t)-1; \ + uint64_t all = ones; \ + uint64_t none = 0; \ int i; \ \ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - uint32_t result = (a->element[i] compare b->element[i] ? \ + uint64_t result = (a->element[i] compare b->element[i] ? \ ones : 0x0); \ switch (sizeof(a->element[0])) { \ + case 8: \ + r->u64[i] = result; \ + break; \ case 4: \ r->u32[i] = result; \ break; \ @@ -658,12 +661,15 @@ VCF(sx, int32_to_float32, s32) VCMP(equb, ==, u8) VCMP(equh, ==, u16) VCMP(equw, ==, u32) +VCMP(equd, ==, u64) VCMP(gtub, >, u8) VCMP(gtuh, >, u16) VCMP(gtuw, >, u32) +VCMP(gtud, >, u64) VCMP(gtsb, >, s8) VCMP(gtsh, >, s16) VCMP(gtsw, >, s32) +VCMP(gtsd, >, s64) #undef VCMP_DO #undef VCMP -- cgit v1.2.3 From f1064f612c9783136f2c59b94a4a8da70d3a09e3 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:14 -0600 Subject: target-ppc: Altivec 2.07: Vector Gather Bits by Bytes This patch adds the Vector Gather Bits by Bytes Doubleword (vgbbd) instruction which is introduced in Power ISA Version 2.07. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 276 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 27a34c06ff..cd0b88a8bc 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1075,6 +1075,282 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) #undef VBPERMQ_INDEX #undef VBPERMQ_DW +uint64_t VGBBD_MASKS[256] = { + 0x0000000000000000ull, /* 00 */ + 0x0000000000000080ull, /* 01 */ + 0x0000000000008000ull, /* 02 */ + 0x0000000000008080ull, /* 03 */ + 0x0000000000800000ull, /* 04 */ + 0x0000000000800080ull, /* 05 */ + 0x0000000000808000ull, /* 06 */ + 0x0000000000808080ull, /* 07 */ + 0x0000000080000000ull, /* 08 */ + 0x0000000080000080ull, /* 09 */ + 0x0000000080008000ull, /* 0A */ + 0x0000000080008080ull, /* 0B */ + 0x0000000080800000ull, /* 0C */ + 0x0000000080800080ull, /* 0D */ + 0x0000000080808000ull, /* 0E */ + 0x0000000080808080ull, /* 0F */ + 0x0000008000000000ull, /* 10 */ + 0x0000008000000080ull, /* 11 */ + 0x0000008000008000ull, /* 12 */ + 0x0000008000008080ull, /* 13 */ + 0x0000008000800000ull, /* 14 */ + 0x0000008000800080ull, /* 15 */ + 0x0000008000808000ull, /* 16 */ + 0x0000008000808080ull, /* 17 */ + 0x0000008080000000ull, /* 18 */ + 0x0000008080000080ull, /* 19 */ + 0x0000008080008000ull, /* 1A */ + 0x0000008080008080ull, /* 1B */ + 0x0000008080800000ull, /* 1C */ + 0x0000008080800080ull, /* 1D */ + 0x0000008080808000ull, /* 1E */ + 0x0000008080808080ull, /* 1F */ + 0x0000800000000000ull, /* 20 */ + 0x0000800000000080ull, /* 21 */ + 0x0000800000008000ull, /* 22 */ + 0x0000800000008080ull, /* 23 */ + 0x0000800000800000ull, /* 24 */ + 0x0000800000800080ull, /* 25 */ + 0x0000800000808000ull, /* 26 */ + 0x0000800000808080ull, /* 27 */ + 0x0000800080000000ull, /* 28 */ + 0x0000800080000080ull, /* 29 */ + 0x0000800080008000ull, /* 2A */ + 0x0000800080008080ull, /* 2B */ + 0x0000800080800000ull, /* 2C */ + 0x0000800080800080ull, /* 2D */ + 0x0000800080808000ull, /* 2E */ + 0x0000800080808080ull, /* 2F */ + 0x0000808000000000ull, /* 30 */ + 0x0000808000000080ull, /* 31 */ + 0x0000808000008000ull, /* 32 */ + 0x0000808000008080ull, /* 33 */ + 0x0000808000800000ull, /* 34 */ + 0x0000808000800080ull, /* 35 */ + 0x0000808000808000ull, /* 36 */ + 0x0000808000808080ull, /* 37 */ + 0x0000808080000000ull, /* 38 */ + 0x0000808080000080ull, /* 39 */ + 0x0000808080008000ull, /* 3A */ + 0x0000808080008080ull, /* 3B */ + 0x0000808080800000ull, /* 3C */ + 0x0000808080800080ull, /* 3D */ + 0x0000808080808000ull, /* 3E */ + 0x0000808080808080ull, /* 3F */ + 0x0080000000000000ull, /* 40 */ + 0x0080000000000080ull, /* 41 */ + 0x0080000000008000ull, /* 42 */ + 0x0080000000008080ull, /* 43 */ + 0x0080000000800000ull, /* 44 */ + 0x0080000000800080ull, /* 45 */ + 0x0080000000808000ull, /* 46 */ + 0x0080000000808080ull, /* 47 */ + 0x0080000080000000ull, /* 48 */ + 0x0080000080000080ull, /* 49 */ + 0x0080000080008000ull, /* 4A */ + 0x0080000080008080ull, /* 4B */ + 0x0080000080800000ull, /* 4C */ + 0x0080000080800080ull, /* 4D */ + 0x0080000080808000ull, /* 4E */ + 0x0080000080808080ull, /* 4F */ + 0x0080008000000000ull, /* 50 */ + 0x0080008000000080ull, /* 51 */ + 0x0080008000008000ull, /* 52 */ + 0x0080008000008080ull, /* 53 */ + 0x0080008000800000ull, /* 54 */ + 0x0080008000800080ull, /* 55 */ + 0x0080008000808000ull, /* 56 */ + 0x0080008000808080ull, /* 57 */ + 0x0080008080000000ull, /* 58 */ + 0x0080008080000080ull, /* 59 */ + 0x0080008080008000ull, /* 5A */ + 0x0080008080008080ull, /* 5B */ + 0x0080008080800000ull, /* 5C */ + 0x0080008080800080ull, /* 5D */ + 0x0080008080808000ull, /* 5E */ + 0x0080008080808080ull, /* 5F */ + 0x0080800000000000ull, /* 60 */ + 0x0080800000000080ull, /* 61 */ + 0x0080800000008000ull, /* 62 */ + 0x0080800000008080ull, /* 63 */ + 0x0080800000800000ull, /* 64 */ + 0x0080800000800080ull, /* 65 */ + 0x0080800000808000ull, /* 66 */ + 0x0080800000808080ull, /* 67 */ + 0x0080800080000000ull, /* 68 */ + 0x0080800080000080ull, /* 69 */ + 0x0080800080008000ull, /* 6A */ + 0x0080800080008080ull, /* 6B */ + 0x0080800080800000ull, /* 6C */ + 0x0080800080800080ull, /* 6D */ + 0x0080800080808000ull, /* 6E */ + 0x0080800080808080ull, /* 6F */ + 0x0080808000000000ull, /* 70 */ + 0x0080808000000080ull, /* 71 */ + 0x0080808000008000ull, /* 72 */ + 0x0080808000008080ull, /* 73 */ + 0x0080808000800000ull, /* 74 */ + 0x0080808000800080ull, /* 75 */ + 0x0080808000808000ull, /* 76 */ + 0x0080808000808080ull, /* 77 */ + 0x0080808080000000ull, /* 78 */ + 0x0080808080000080ull, /* 79 */ + 0x0080808080008000ull, /* 7A */ + 0x0080808080008080ull, /* 7B */ + 0x0080808080800000ull, /* 7C */ + 0x0080808080800080ull, /* 7D */ + 0x0080808080808000ull, /* 7E */ + 0x0080808080808080ull, /* 7F */ + 0x8000000000000000ull, /* 80 */ + 0x8000000000000080ull, /* 81 */ + 0x8000000000008000ull, /* 82 */ + 0x8000000000008080ull, /* 83 */ + 0x8000000000800000ull, /* 84 */ + 0x8000000000800080ull, /* 85 */ + 0x8000000000808000ull, /* 86 */ + 0x8000000000808080ull, /* 87 */ + 0x8000000080000000ull, /* 88 */ + 0x8000000080000080ull, /* 89 */ + 0x8000000080008000ull, /* 8A */ + 0x8000000080008080ull, /* 8B */ + 0x8000000080800000ull, /* 8C */ + 0x8000000080800080ull, /* 8D */ + 0x8000000080808000ull, /* 8E */ + 0x8000000080808080ull, /* 8F */ + 0x8000008000000000ull, /* 90 */ + 0x8000008000000080ull, /* 91 */ + 0x8000008000008000ull, /* 92 */ + 0x8000008000008080ull, /* 93 */ + 0x8000008000800000ull, /* 94 */ + 0x8000008000800080ull, /* 95 */ + 0x8000008000808000ull, /* 96 */ + 0x8000008000808080ull, /* 97 */ + 0x8000008080000000ull, /* 98 */ + 0x8000008080000080ull, /* 99 */ + 0x8000008080008000ull, /* 9A */ + 0x8000008080008080ull, /* 9B */ + 0x8000008080800000ull, /* 9C */ + 0x8000008080800080ull, /* 9D */ + 0x8000008080808000ull, /* 9E */ + 0x8000008080808080ull, /* 9F */ + 0x8000800000000000ull, /* A0 */ + 0x8000800000000080ull, /* A1 */ + 0x8000800000008000ull, /* A2 */ + 0x8000800000008080ull, /* A3 */ + 0x8000800000800000ull, /* A4 */ + 0x8000800000800080ull, /* A5 */ + 0x8000800000808000ull, /* A6 */ + 0x8000800000808080ull, /* A7 */ + 0x8000800080000000ull, /* A8 */ + 0x8000800080000080ull, /* A9 */ + 0x8000800080008000ull, /* AA */ + 0x8000800080008080ull, /* AB */ + 0x8000800080800000ull, /* AC */ + 0x8000800080800080ull, /* AD */ + 0x8000800080808000ull, /* AE */ + 0x8000800080808080ull, /* AF */ + 0x8000808000000000ull, /* B0 */ + 0x8000808000000080ull, /* B1 */ + 0x8000808000008000ull, /* B2 */ + 0x8000808000008080ull, /* B3 */ + 0x8000808000800000ull, /* B4 */ + 0x8000808000800080ull, /* B5 */ + 0x8000808000808000ull, /* B6 */ + 0x8000808000808080ull, /* B7 */ + 0x8000808080000000ull, /* B8 */ + 0x8000808080000080ull, /* B9 */ + 0x8000808080008000ull, /* BA */ + 0x8000808080008080ull, /* BB */ + 0x8000808080800000ull, /* BC */ + 0x8000808080800080ull, /* BD */ + 0x8000808080808000ull, /* BE */ + 0x8000808080808080ull, /* BF */ + 0x8080000000000000ull, /* C0 */ + 0x8080000000000080ull, /* C1 */ + 0x8080000000008000ull, /* C2 */ + 0x8080000000008080ull, /* C3 */ + 0x8080000000800000ull, /* C4 */ + 0x8080000000800080ull, /* C5 */ + 0x8080000000808000ull, /* C6 */ + 0x8080000000808080ull, /* C7 */ + 0x8080000080000000ull, /* C8 */ + 0x8080000080000080ull, /* C9 */ + 0x8080000080008000ull, /* CA */ + 0x8080000080008080ull, /* CB */ + 0x8080000080800000ull, /* CC */ + 0x8080000080800080ull, /* CD */ + 0x8080000080808000ull, /* CE */ + 0x8080000080808080ull, /* CF */ + 0x8080008000000000ull, /* D0 */ + 0x8080008000000080ull, /* D1 */ + 0x8080008000008000ull, /* D2 */ + 0x8080008000008080ull, /* D3 */ + 0x8080008000800000ull, /* D4 */ + 0x8080008000800080ull, /* D5 */ + 0x8080008000808000ull, /* D6 */ + 0x8080008000808080ull, /* D7 */ + 0x8080008080000000ull, /* D8 */ + 0x8080008080000080ull, /* D9 */ + 0x8080008080008000ull, /* DA */ + 0x8080008080008080ull, /* DB */ + 0x8080008080800000ull, /* DC */ + 0x8080008080800080ull, /* DD */ + 0x8080008080808000ull, /* DE */ + 0x8080008080808080ull, /* DF */ + 0x8080800000000000ull, /* E0 */ + 0x8080800000000080ull, /* E1 */ + 0x8080800000008000ull, /* E2 */ + 0x8080800000008080ull, /* E3 */ + 0x8080800000800000ull, /* E4 */ + 0x8080800000800080ull, /* E5 */ + 0x8080800000808000ull, /* E6 */ + 0x8080800000808080ull, /* E7 */ + 0x8080800080000000ull, /* E8 */ + 0x8080800080000080ull, /* E9 */ + 0x8080800080008000ull, /* EA */ + 0x8080800080008080ull, /* EB */ + 0x8080800080800000ull, /* EC */ + 0x8080800080800080ull, /* ED */ + 0x8080800080808000ull, /* EE */ + 0x8080800080808080ull, /* EF */ + 0x8080808000000000ull, /* F0 */ + 0x8080808000000080ull, /* F1 */ + 0x8080808000008000ull, /* F2 */ + 0x8080808000008080ull, /* F3 */ + 0x8080808000800000ull, /* F4 */ + 0x8080808000800080ull, /* F5 */ + 0x8080808000808000ull, /* F6 */ + 0x8080808000808080ull, /* F7 */ + 0x8080808080000000ull, /* F8 */ + 0x8080808080000080ull, /* F9 */ + 0x8080808080008000ull, /* FA */ + 0x8080808080008080ull, /* FB */ + 0x8080808080800000ull, /* FC */ + 0x8080808080800080ull, /* FD */ + 0x8080808080808000ull, /* FE */ + 0x8080808080808080ull, /* FF */ +}; + +void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + uint64_t t[2] = { 0, 0 }; + + VECTOR_FOR_INORDER_I(i, u8) { +#if defined(HOST_WORDS_BIGENDIAN) + t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); +#else + t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7)); +#endif + } + + r->u64[0] = t[0]; + r->u64[1] = t[1]; +} + #if defined(HOST_WORDS_BIGENDIAN) #define PKBIG 1 #else -- cgit v1.2.3 From b8476fc7c6e205f0dc9fff3cfa199eee8af0fa27 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:15 -0600 Subject: target-ppc: Altivec 2.07: Vector Polynomial Multiply Sum This patch adds the Vectory Polynomial Multiply Sum instructions introduced in Power ISA Version 2.07: - Vectory Polynomial Multiply Sum Byte (vpmsumb) - Vectory Polynomial Multiply Sum Halfword (vpmsumh) - Vectory Polynomial Multiply Sum Word (vpmsumw) - Vectory Polynomial Multiply Sum Doubleword (vpmsumd) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index cd0b88a8bc..4e8e507819 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1351,6 +1351,76 @@ void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) r->u64[1] = t[1]; } +#define PMSUM(name, srcfld, trgfld, trgtyp) \ +void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ +{ \ + int i, j; \ + trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \ + \ + VECTOR_FOR_INORDER_I(i, srcfld) { \ + prod[i] = 0; \ + for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ + if (a->srcfld[i] & (1ull<srcfld[i] << j); \ + } \ + } \ + } \ + \ + VECTOR_FOR_INORDER_I(i, trgfld) { \ + r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \ + } \ +} + +PMSUM(vpmsumb, u8, u16, uint16_t) +PMSUM(vpmsumh, u16, u32, uint32_t) +PMSUM(vpmsumw, u32, u64, uint64_t) + +void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + +#ifdef CONFIG_INT128 + int i, j; + __uint128_t prod[2]; + + VECTOR_FOR_INORDER_I(i, u64) { + prod[i] = 0; + for (j = 0; j < 64; j++) { + if (a->u64[i] & (1ull<u64[i]) << j); + } + } + } + + r->u128 = prod[0] ^ prod[1]; + +#else + int i, j; + ppc_avr_t prod[2]; + + VECTOR_FOR_INORDER_I(i, u64) { + prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0; + for (j = 0; j < 64; j++) { + if (a->u64[i] & (1ull<u64[i]; + } else { + bshift.u64[HI_IDX] = b->u64[i] >> (64-j); + bshift.u64[LO_IDX] = b->u64[i] << j; + } + prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX]; + prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX]; + } + } + } + + r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX]; + r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX]; +#endif +} + + #if defined(HOST_WORDS_BIGENDIAN) #define PKBIG 1 #else -- cgit v1.2.3 From e8f7b27b9942d02ece7df34ae2b2a09cb9da7196 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:16 -0600 Subject: target-ppc: Altivec 2.07: Binary Coded Decimal Instructions This patch add the Binary Coded Decimal instructions bcdadd. and bcdsub. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 4e8e507819..ce7c6a0ea0 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -2136,6 +2136,207 @@ void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) #endif } +#define BCD_PLUS_PREF_1 0xC +#define BCD_PLUS_PREF_2 0xF +#define BCD_PLUS_ALT_1 0xA +#define BCD_NEG_PREF 0xD +#define BCD_NEG_ALT 0xB +#define BCD_PLUS_ALT_2 0xE + +#if defined(HOST_WORDS_BIGENDIAN) +#define BCD_DIG_BYTE(n) (15 - (n/2)) +#else +#define BCD_DIG_BYTE(n) (n/2) +#endif + +static int bcd_get_sgn(ppc_avr_t *bcd) +{ + switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) { + case BCD_PLUS_PREF_1: + case BCD_PLUS_PREF_2: + case BCD_PLUS_ALT_1: + case BCD_PLUS_ALT_2: + { + return 1; + } + + case BCD_NEG_PREF: + case BCD_NEG_ALT: + { + return -1; + } + + default: + { + return 0; + } + } +} + +static int bcd_preferred_sgn(int sgn, int ps) +{ + if (sgn >= 0) { + return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; + } else { + return BCD_NEG_PREF; + } +} + +static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) +{ + uint8_t result; + if (n & 1) { + result = bcd->u8[BCD_DIG_BYTE(n)] >> 4; + } else { + result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF; + } + + if (unlikely(result > 9)) { + *invalid = true; + } + return result; +} + +static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) +{ + if (n & 1) { + bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F; + bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4); + } else { + bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0; + bcd->u8[BCD_DIG_BYTE(n)] |= digit; + } +} + +static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + int invalid = 0; + for (i = 31; i > 0; i--) { + uint8_t dig_a = bcd_get_digit(a, i, &invalid); + uint8_t dig_b = bcd_get_digit(b, i, &invalid); + if (unlikely(invalid)) { + return 0; /* doesnt matter */ + } else if (dig_a > dig_b) { + return 1; + } else if (dig_a < dig_b) { + return -1; + } + } + + return 0; +} + +static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, + int *overflow) +{ + int carry = 0; + int i; + int is_zero = 1; + for (i = 1; i <= 31; i++) { + uint8_t digit = bcd_get_digit(a, i, invalid) + + bcd_get_digit(b, i, invalid) + carry; + is_zero &= (digit == 0); + if (digit > 9) { + carry = 1; + digit -= 10; + } else { + carry = 0; + } + + bcd_put_digit(t, digit, i); + + if (unlikely(*invalid)) { + return -1; + } + } + + *overflow = carry; + return is_zero; +} + +static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, + int *overflow) +{ + int carry = 0; + int i; + int is_zero = 1; + for (i = 1; i <= 31; i++) { + uint8_t digit = bcd_get_digit(a, i, invalid) - + bcd_get_digit(b, i, invalid) + carry; + is_zero &= (digit == 0); + if (digit & 0x80) { + carry = -1; + digit += 10; + } else { + carry = 0; + } + + bcd_put_digit(t, digit, i); + + if (unlikely(*invalid)) { + return -1; + } + } + + *overflow = carry; + return is_zero; +} + +uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) +{ + + int sgna = bcd_get_sgn(a); + int sgnb = bcd_get_sgn(b); + int invalid = (sgna == 0) || (sgnb == 0); + int overflow = 0; + int zero = 0; + uint32_t cr = 0; + ppc_avr_t result = { .u64 = { 0, 0 } }; + + if (!invalid) { + if (sgna == sgnb) { + result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); + zero = bcd_add_mag(&result, a, b, &invalid, &overflow); + cr = (sgna > 0) ? 4 : 8; + } else if (bcd_cmp_mag(a, b) > 0) { + result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps); + zero = bcd_sub_mag(&result, a, b, &invalid, &overflow); + cr = (sgna > 0) ? 4 : 8; + } else { + result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps); + zero = bcd_sub_mag(&result, b, a, &invalid, &overflow); + cr = (sgnb > 0) ? 4 : 8; + } + } + + if (unlikely(invalid)) { + result.u64[HI_IDX] = result.u64[LO_IDX] = -1; + cr = 1; + } else if (overflow) { + cr |= 1; + } else if (zero) { + cr = 2; + } + + *r = result; + + return cr; +} + +uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) +{ + ppc_avr_t bcopy = *b; + int sgnb = bcd_get_sgn(b); + if (sgnb < 0) { + bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); + } else if (sgnb > 0) { + bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); + } + /* else invalid ... defer to bcdadd code for proper handling */ + + return helper_bcdadd(r, a, &bcopy, ps); +} #undef VECTOR_FOR_INORDER_I #undef HI_IDX -- cgit v1.2.3 From 557d52fa697c938aeff2784b79df55952c3bfcc1 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:17 -0600 Subject: target-ppc: Altivec 2.07: AES Instructions This patch adds the Vector AES instructions introduced in Power ISA Version 2.07: - Vector AES Cipher (vcipher) - Vector AES Cipher Last (vcipherlast) - Vector AES Inverse Cipher (vncipher) - Vector AES Inverse Cipher Last (vncipherlast) - Vector AES SubBytes (vsbox) Note that the implementation of vncipher deviates from the RTL in ISA V2.07. However it does match the verbal description in the third paragraph. The RTL will be fixed in ISA V2.07B. The implementation here has been tested against actual P8 hardware. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 280 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index ce7c6a0ea0..cd04e8ab71 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -2338,6 +2338,286 @@ uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) return helper_bcdadd(r, a, &bcopy, ps); } +static uint8_t SBOX[256] = { +0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, +0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, +0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, +0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, +0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, +0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, +0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, +0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, +0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, +0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, +0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, +0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, +0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, +0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, +0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, +0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, +0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, +0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, +0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, +0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, +0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, +0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, +0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, +0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, +0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, +0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, +0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, +0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, +0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, +0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, +0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, +0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, +}; + +static void SubBytes(ppc_avr_t *r, ppc_avr_t *a) +{ + int i; + VECTOR_FOR_INORDER_I(i, u8) { + r->u8[i] = SBOX[a->u8[i]]; + } +} + +static uint8_t InvSBOX[256] = { +0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, +0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, +0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, +0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, +0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, +0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, +0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, +0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, +0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, +0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, +0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, +0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, +0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, +0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, +0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, +0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, +0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, +0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, +0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, +0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, +0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, +0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, +0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, +0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, +0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, +0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, +0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, +0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, +0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, +0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, +0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, +0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D, +}; + +static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a) +{ + int i; + VECTOR_FOR_INORDER_I(i, u8) { + r->u8[i] = InvSBOX[a->u8[i]]; + } +} + +static uint8_t ROTL8(uint8_t x, int n) +{ + return (x << n) | (x >> (8-n)); +} + +static inline int BIT8(uint8_t x, int n) +{ + return (x & (0x80 >> n)) != 0; +} + +static uint8_t GFx02(uint8_t x) +{ + return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0); +} + +static uint8_t GFx03(uint8_t x) +{ + return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0); +} + +static uint8_t GFx09(uint8_t x) +{ + uint8_t term2 = ROTL8(x, 3); + uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) | + (BIT8(x, 2) ? 0x02 : 0); + uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0); + return x ^ term2 ^ term3 ^ term4; +} + +static uint8_t GFx0B(uint8_t x) +{ + uint8_t term2 = ROTL8(x, 1); + uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) | + (BIT8(x, 2) ? 0x01 : 0); + uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) | + (BIT8(x, 2) ? 0x08 : 0); + uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0); + uint8_t term6 = BIT8(x, 2) ? 0x10 : 0; + return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6; +} + +static uint8_t GFx0D(uint8_t x) +{ + uint8_t term2 = ROTL8(x, 2); + uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) | + (BIT8(x, 2) ? 0x03 : 0); + uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0); + uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0); + uint8_t term6 = BIT8(x, 2) ? 0x08 : 0; + return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6; +} + +static uint8_t GFx0E(uint8_t x) +{ + uint8_t term1 = ROTL8(x, 1); + uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) | + (BIT8(x, 1) ? 0x01 : 0); + uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) | + (BIT8(x, 2) ? 0x01 : 0); + uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) | + (BIT8(x, 2) ? 0x10 : 0); + uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0); + return term1 ^ term2 ^ term3 ^ term4 ^ term5; +} + +#if defined(HOST_WORDS_BIGENDIAN) +#define MCB(x, i, b) ((x)->u8[(i)*4 + (b)]) +#else +#define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))]) +#endif + +static void MixColumns(ppc_avr_t *r, ppc_avr_t *x) +{ + int i; + for (i = 0; i < 4; i++) { + MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^ + MCB(x, i, 2) ^ MCB(x, i, 3); + MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^ + GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3); + MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^ + GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3)); + MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^ + MCB(x, i, 2) ^ GFx02(MCB(x, i, 3)); + } +} + +static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x) +{ + int i; + for (i = 0; i < 4; i++) { + MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^ + GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3)); + MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^ + GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3)); + MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^ + GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3)); + MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^ + GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3)); + } +} + +static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x) +{ + MCB(r, 0, 0) = MCB(x, 0, 0); + MCB(r, 1, 0) = MCB(x, 1, 0); + MCB(r, 2, 0) = MCB(x, 2, 0); + MCB(r, 3, 0) = MCB(x, 3, 0); + + MCB(r, 0, 1) = MCB(x, 1, 1); + MCB(r, 1, 1) = MCB(x, 2, 1); + MCB(r, 2, 1) = MCB(x, 3, 1); + MCB(r, 3, 1) = MCB(x, 0, 1); + + MCB(r, 0, 2) = MCB(x, 2, 2); + MCB(r, 1, 2) = MCB(x, 3, 2); + MCB(r, 2, 2) = MCB(x, 0, 2); + MCB(r, 3, 2) = MCB(x, 1, 2); + + MCB(r, 0, 3) = MCB(x, 3, 3); + MCB(r, 1, 3) = MCB(x, 0, 3); + MCB(r, 2, 3) = MCB(x, 1, 3); + MCB(r, 3, 3) = MCB(x, 2, 3); +} + +static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x) +{ + MCB(r, 0, 0) = MCB(x, 0, 0); + MCB(r, 1, 0) = MCB(x, 1, 0); + MCB(r, 2, 0) = MCB(x, 2, 0); + MCB(r, 3, 0) = MCB(x, 3, 0); + + MCB(r, 0, 1) = MCB(x, 3, 1); + MCB(r, 1, 1) = MCB(x, 0, 1); + MCB(r, 2, 1) = MCB(x, 1, 1); + MCB(r, 3, 1) = MCB(x, 2, 1); + + MCB(r, 0, 2) = MCB(x, 2, 2); + MCB(r, 1, 2) = MCB(x, 3, 2); + MCB(r, 2, 2) = MCB(x, 0, 2); + MCB(r, 3, 2) = MCB(x, 1, 2); + + MCB(r, 0, 3) = MCB(x, 1, 3); + MCB(r, 1, 3) = MCB(x, 2, 3); + MCB(r, 2, 3) = MCB(x, 3, 3); + MCB(r, 3, 3) = MCB(x, 0, 3); +} + +#undef MCB + +void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) +{ + SubBytes(r, a); +} + +void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + ppc_avr_t vtemp1, vtemp2, vtemp3; + SubBytes(&vtemp1, a); + ShiftRows(&vtemp2, &vtemp1); + MixColumns(&vtemp3, &vtemp2); + r->u64[0] = vtemp3.u64[0] ^ b->u64[0]; + r->u64[1] = vtemp3.u64[1] ^ b->u64[1]; +} + +void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + ppc_avr_t vtemp1, vtemp2; + SubBytes(&vtemp1, a); + ShiftRows(&vtemp2, &vtemp1); + r->u64[0] = vtemp2.u64[0] ^ b->u64[0]; + r->u64[1] = vtemp2.u64[1] ^ b->u64[1]; +} + +void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + /* This differs from what is written in ISA V2.07. The RTL is */ + /* incorrect and will be fixed in V2.07B. */ + ppc_avr_t vtemp1, vtemp2, vtemp3; + InvShiftRows(&vtemp1, a); + InvSubBytes(&vtemp2, &vtemp1); + vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0]; + vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1]; + InvMixColumns(r, &vtemp3); +} + +void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + ppc_avr_t vtemp1, vtemp2; + InvShiftRows(&vtemp1, a); + InvSubBytes(&vtemp2, &vtemp1); + r->u64[0] = vtemp2.u64[0] ^ b->u64[0]; + r->u64[1] = vtemp2.u64[1] ^ b->u64[1]; +} + #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX -- cgit v1.2.3 From 57354f8f12d04efc3c38126b967fc178b56885f5 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:18 -0600 Subject: target-ppc: Altivec 2.07: Vector SHA Sigma Instructions This patch adds the Vector SHA Sigma instructions introduced in Power ISA Version 2.07: - Vector SHA-512 Sigma Doubleword (vshasigmad) - Vector SHA-256 Sigma Word (vshasigmaw) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index cd04e8ab71..e6a7ad025c 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -2618,6 +2618,88 @@ void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) r->u64[1] = vtemp2.u64[1] ^ b->u64[1]; } +#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n))) +#if defined(HOST_WORDS_BIGENDIAN) +#define EL_IDX(i) (i) +#else +#define EL_IDX(i) (3 - (i)) +#endif + +void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) +{ + int st = (st_six & 0x10) != 0; + int six = st_six & 0xF; + int i; + + VECTOR_FOR_INORDER_I(i, u32) { + if (st == 0) { + if ((six & (0x8 >> i)) == 0) { + r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^ + ROTRu32(a->u32[EL_IDX(i)], 18) ^ + (a->u32[EL_IDX(i)] >> 3); + } else { /* six.bit[i] == 1 */ + r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^ + ROTRu32(a->u32[EL_IDX(i)], 19) ^ + (a->u32[EL_IDX(i)] >> 10); + } + } else { /* st == 1 */ + if ((six & (0x8 >> i)) == 0) { + r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^ + ROTRu32(a->u32[EL_IDX(i)], 13) ^ + ROTRu32(a->u32[EL_IDX(i)], 22); + } else { /* six.bit[i] == 1 */ + r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^ + ROTRu32(a->u32[EL_IDX(i)], 11) ^ + ROTRu32(a->u32[EL_IDX(i)], 25); + } + } + } +} + +#undef ROTRu32 +#undef EL_IDX + +#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n))) +#if defined(HOST_WORDS_BIGENDIAN) +#define EL_IDX(i) (i) +#else +#define EL_IDX(i) (1 - (i)) +#endif + +void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) +{ + int st = (st_six & 0x10) != 0; + int six = st_six & 0xF; + int i; + + VECTOR_FOR_INORDER_I(i, u64) { + if (st == 0) { + if ((six & (0x8 >> (2*i))) == 0) { + r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^ + ROTRu64(a->u64[EL_IDX(i)], 8) ^ + (a->u64[EL_IDX(i)] >> 7); + } else { /* six.bit[2*i] == 1 */ + r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^ + ROTRu64(a->u64[EL_IDX(i)], 61) ^ + (a->u64[EL_IDX(i)] >> 6); + } + } else { /* st == 1 */ + if ((six & (0x8 >> (2*i))) == 0) { + r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^ + ROTRu64(a->u64[EL_IDX(i)], 34) ^ + ROTRu64(a->u64[EL_IDX(i)], 39); + } else { /* six.bit[2*i] == 1 */ + r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^ + ROTRu64(a->u64[EL_IDX(i)], 18) ^ + ROTRu64(a->u64[EL_IDX(i)], 41); + } + } + } +} + +#undef ROTRu64 +#undef EL_IDX + #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX -- cgit v1.2.3 From ac174549b730531a4d13c8281e2247e66ba0f46d Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:19 -0600 Subject: target-ppc: Altivec 2.07: Vector Permute and Exclusive OR This patch adds the Vector Permuate and Exclusive OR (vpermxor) instruction introduced in Power ISA Version 2.07. Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/int_helper.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'target-ppc/int_helper.c') diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index e6a7ad025c..63dde94b04 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -2700,6 +2700,20 @@ void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) #undef ROTRu64 #undef EL_IDX +void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + VECTOR_FOR_INORDER_I(i, u8) { + int indexA = c->u8[i] >> 4; + int indexB = c->u8[i] & 0xF; +#if defined(HOST_WORDS_BIGENDIAN) + r->u8[i] = a->u8[indexA] ^ b->u8[indexB]; +#else + r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB]; +#endif + } +} + #undef VECTOR_FOR_INORDER_I #undef HI_IDX #undef LO_IDX -- cgit v1.2.3