diff options
Diffstat (limited to 'target-arm/helper-a64.c')
-rw-r--r-- | target-arm/helper-a64.c | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index 8f53223cee..c31c45e729 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -293,3 +293,64 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) } return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); } + +/* Pairwise long add: add pairs of adjacent elements into + * double-width elements in the result (eg _s8 is an 8x8->16 op) + */ +uint64_t HELPER(neon_addlp_s8)(uint64_t a) +{ + uint64_t nsignmask = 0x0080008000800080ULL; + uint64_t wsignmask = 0x8000800080008000ULL; + uint64_t elementmask = 0x00ff00ff00ff00ffULL; + uint64_t tmp1, tmp2; + uint64_t res, signres; + + /* Extract odd elements, sign extend each to a 16 bit field */ + tmp1 = a & elementmask; + tmp1 ^= nsignmask; + tmp1 |= wsignmask; + tmp1 = (tmp1 - nsignmask) ^ wsignmask; + /* Ditto for the even elements */ + tmp2 = (a >> 8) & elementmask; + tmp2 ^= nsignmask; + tmp2 |= wsignmask; + tmp2 = (tmp2 - nsignmask) ^ wsignmask; + + /* calculate the result by summing bits 0..14, 16..22, etc, + * and then adjusting the sign bits 15, 23, etc manually. + * This ensures the addition can't overflow the 16 bit field. + */ + signres = (tmp1 ^ tmp2) & wsignmask; + res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); + res ^= signres; + + return res; +} + +uint64_t HELPER(neon_addlp_u8)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x00ff00ff00ff00ffULL; + tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; + return tmp; +} + +uint64_t HELPER(neon_addlp_s16)(uint64_t a) +{ + int32_t reslo, reshi; + + reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); + reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); + + return (uint32_t)reslo | (((uint64_t)reshi) << 32); +} + +uint64_t HELPER(neon_addlp_u16)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x0000ffff0000ffffULL; + tmp += (a >> 16) & 0x0000ffff0000ffffULL; + return tmp; +} |