diff options
Diffstat (limited to 'include/fpu/softfloat-macros.h')
-rw-r--r-- | include/fpu/softfloat-macros.h | 215 |
1 files changed, 83 insertions, 132 deletions
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h index a35ec2893a..ec4e27a595 100644 --- a/include/fpu/softfloat-macros.h +++ b/include/fpu/softfloat-macros.h @@ -83,6 +83,43 @@ this code that are retained. #define FPU_SOFTFLOAT_MACROS_H #include "fpu/softfloat-types.h" +#include "qemu/host-utils.h" + +/** + * shl_double: double-word merging left shift + * @l: left or most-significant word + * @r: right or least-significant word + * @c: shift count + * + * Shift @l left by @c bits, shifting in bits from @r. + */ +static inline uint64_t shl_double(uint64_t l, uint64_t r, int c) +{ +#if defined(__x86_64__) + asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c)); + return l; +#else + return c ? (l << c) | (r >> (64 - c)) : l; +#endif +} + +/** + * shr_double: double-word merging right shift + * @l: left or most-significant word + * @r: right or least-significant word + * @c: shift count + * + * Shift @r right by @c bits, shifting in bits from @l. + */ +static inline uint64_t shr_double(uint64_t l, uint64_t r, int c) +{ +#if defined(__x86_64__) + asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c)); + return r; +#else + return c ? (r >> c) | (l << (64 - c)) : r; +#endif +} /*---------------------------------------------------------------------------- | Shifts `a' right by the number of bits given in `count'. If any nonzero @@ -403,16 +440,12 @@ static inline void | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void - add128( - uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) +static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr) { - uint64_t z1; - - z1 = a1 + b1; - *z1Ptr = z1; - *z0Ptr = a0 + b0 + ( z1 < a1 ); - + bool c = 0; + *z1Ptr = uadd64_carry(a1, b1, &c); + *z0Ptr = uadd64_carry(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -423,34 +456,14 @@ static inline void | `z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/ -static inline void - add192( - uint64_t a0, - uint64_t a1, - uint64_t a2, - uint64_t b0, - uint64_t b1, - uint64_t b2, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) +static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2, + uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) { - uint64_t z0, z1, z2; - int8_t carry0, carry1; - - z2 = a2 + b2; - carry1 = ( z2 < a2 ); - z1 = a1 + b1; - carry0 = ( z1 < a1 ); - z0 = a0 + b0; - z1 += carry1; - z0 += ( z1 < carry1 ); - z0 += carry0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - + bool c = 0; + *z2Ptr = uadd64_carry(a2, b2, &c); + *z1Ptr = uadd64_carry(a1, b1, &c); + *z0Ptr = uadd64_carry(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -461,14 +474,12 @@ static inline void | `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void - sub128( - uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) +static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr) { - - *z1Ptr = a1 - b1; - *z0Ptr = a0 - b0 - ( a1 < b1 ); - + bool c = 0; + *z1Ptr = usub64_borrow(a1, b1, &c); + *z0Ptr = usub64_borrow(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -479,34 +490,14 @@ static inline void | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. *----------------------------------------------------------------------------*/ -static inline void - sub192( - uint64_t a0, - uint64_t a1, - uint64_t a2, - uint64_t b0, - uint64_t b1, - uint64_t b2, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) +static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t b0, uint64_t b1, uint64_t b2, + uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) { - uint64_t z0, z1, z2; - int8_t borrow0, borrow1; - - z2 = a2 - b2; - borrow1 = ( a2 < b2 ); - z1 = a1 - b1; - borrow0 = ( a1 < b1 ); - z0 = a0 - b0; - z0 -= ( z1 < borrow1 ); - z1 -= borrow1; - z0 -= borrow0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - + bool c = 0; + *z2Ptr = usub64_borrow(a2, b2, &c); + *z1Ptr = usub64_borrow(a1, b1, &c); + *z0Ptr = usub64_borrow(a0, b0, &c); } /*---------------------------------------------------------------------------- @@ -515,27 +506,10 @@ static inline void | `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) +static inline void +mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr) { - uint32_t aHigh, aLow, bHigh, bLow; - uint64_t z0, zMiddleA, zMiddleB, z1; - - aLow = a; - aHigh = a>>32; - bLow = b; - bHigh = b>>32; - z1 = ( (uint64_t) aLow ) * bLow; - zMiddleA = ( (uint64_t) aLow ) * bHigh; - zMiddleB = ( (uint64_t) aHigh ) * bLow; - z0 = ( (uint64_t) aHigh ) * bHigh; - zMiddleA += zMiddleB; - z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); - zMiddleA <<= 32; - z1 += zMiddleA; - z0 += ( z1 < zMiddleA ); - *z1Ptr = z1; - *z0Ptr = z0; - + mulu64(z1Ptr, z0Ptr, a, b); } /*---------------------------------------------------------------------------- @@ -546,24 +520,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *----------------------------------------------------------------------------*/ static inline void - mul128By64To192( - uint64_t a0, - uint64_t a1, - uint64_t b, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr - ) +mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b, + uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) { - uint64_t z0, z1, z2, more1; - - mul64To128( a1, b, &z1, &z2 ); - mul64To128( a0, b, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; + uint64_t z0, z1, m1; + mul64To128(a1, b, &m1, z2Ptr); + mul64To128(a0, b, &z0, &z1); + add128(z0, z1, 0, m1, z0Ptr, z1Ptr); } /*---------------------------------------------------------------------------- @@ -573,34 +537,21 @@ static inline void | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. *----------------------------------------------------------------------------*/ -static inline void - mul128To256( - uint64_t a0, - uint64_t a1, - uint64_t b0, - uint64_t b1, - uint64_t *z0Ptr, - uint64_t *z1Ptr, - uint64_t *z2Ptr, - uint64_t *z3Ptr - ) +static inline void mul128To256(uint64_t a0, uint64_t a1, + uint64_t b0, uint64_t b1, + uint64_t *z0Ptr, uint64_t *z1Ptr, + uint64_t *z2Ptr, uint64_t *z3Ptr) { - uint64_t z0, z1, z2, z3; - uint64_t more1, more2; - - mul64To128( a1, b1, &z2, &z3 ); - mul64To128( a1, b0, &z1, &more2 ); - add128( z1, more2, 0, z2, &z1, &z2 ); - mul64To128( a0, b0, &z0, &more1 ); - add128( z0, more1, 0, z1, &z0, &z1 ); - mul64To128( a0, b1, &more1, &more2 ); - add128( more1, more2, 0, z2, &more1, &z2 ); - add128( z0, z1, 0, more1, &z0, &z1 ); - *z3Ptr = z3; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; + uint64_t z0, z1, z2; + uint64_t m0, m1, m2, n1, n2; + + mul64To128(a1, b0, &m1, &m2); + mul64To128(a0, b1, &n1, &n2); + mul64To128(a1, b1, &z2, z3Ptr); + mul64To128(a0, b0, &z0, &z1); + add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2); + add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr); } /*---------------------------------------------------------------------------- |