aboutsummaryrefslogtreecommitdiff
path: root/fpu
diff options
context:
space:
mode:
Diffstat (limited to 'fpu')
-rw-r--r--fpu/softfloat-parts.c.inc64
-rw-r--r--fpu/softfloat.c365
2 files changed, 145 insertions, 284 deletions
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index b2c4624d8c..a897a5a743 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -751,3 +751,67 @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
g_assert_not_reached();
}
}
+
+/*
+ * Returns the result of converting the floating-point value `a' to
+ * the two's complement integer format. The conversion is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic---which means in particular that the conversion is
+ * rounded according to the current rounding mode. If `a' is a NaN,
+ * the largest positive integer is returned. Otherwise, if the
+ * conversion overflows, the largest integer with the same sign as `a'
+ * is returned.
+*/
+static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
+ float_status *s)
+{
+ int flags = 0;
+ uint64_t r;
+
+ switch (p->cls) {
+ case float_class_snan:
+ case float_class_qnan:
+ flags = float_flag_invalid;
+ r = max;
+ break;
+
+ case float_class_inf:
+ flags = float_flag_invalid;
+ r = p->sign ? min : max;
+ break;
+
+ case float_class_zero:
+ return 0;
+
+ case float_class_normal:
+ /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
+ if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
+ flags = float_flag_inexact;
+ }
+
+ if (p->exp <= DECOMPOSED_BINARY_POINT) {
+ r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
+ } else {
+ r = UINT64_MAX;
+ }
+ if (p->sign) {
+ if (r <= -(uint64_t)min) {
+ r = -r;
+ } else {
+ flags = float_flag_invalid;
+ r = min;
+ }
+ } else if (r > max) {
+ flags = float_flag_invalid;
+ r = max;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ float_raise(flags, s);
+ return r;
+}
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 5647a05d5d..0dc2203477 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -829,6 +829,16 @@ static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
#define parts_round_to_int(A, R, C, S, F) \
PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
+static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
+ float_status *s);
+static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
+ float_status *s);
+
+#define parts_float_to_sint(P, R, Z, MN, MX, S) \
+ PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -2352,69 +2362,8 @@ float128 float128_round_to_int(float128 a, float_status *s)
}
/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format. The conversion is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest positive integer is returned. Otherwise, if the
- * conversion overflows, the largest integer with the same sign as `a'
- * is returned.
-*/
-
-static int64_t round_to_int_and_pack(FloatParts64 p, FloatRoundMode rmode,
- int scale, int64_t min, int64_t max,
- float_status *s)
-{
- int flags = 0;
- uint64_t r;
-
- switch (p.cls) {
- case float_class_snan:
- case float_class_qnan:
- flags = float_flag_invalid;
- r = max;
- break;
-
- case float_class_inf:
- flags = float_flag_invalid;
- r = p.sign ? min : max;
- break;
-
- case float_class_zero:
- return 0;
-
- case float_class_normal:
- /* TODO: 62 = N - 2, frac_size for rounding */
- if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
- flags = float_flag_inexact;
- }
-
- if (p.exp <= DECOMPOSED_BINARY_POINT) {
- r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
- } else {
- r = UINT64_MAX;
- }
- if (p.sign) {
- if (r <= -(uint64_t)min) {
- r = -r;
- } else {
- flags = float_flag_invalid;
- r = min;
- }
- } else if (r > max) {
- flags = float_flag_invalid;
- r = max;
- }
- break;
-
- default:
- g_assert_not_reached();
- }
-
- float_raise(flags, s);
- return r;
-}
+ * Floating-point to signed integer conversions
+ */
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
@@ -2422,7 +2371,7 @@ int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
}
int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2431,7 +2380,7 @@ int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2440,7 +2389,7 @@ int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
@@ -2449,7 +2398,7 @@ int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
@@ -2458,7 +2407,7 @@ int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float32_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
@@ -2467,7 +2416,7 @@ int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float32_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
@@ -2476,7 +2425,7 @@ int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float32_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
@@ -2485,7 +2434,7 @@ int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
}
int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
@@ -2494,7 +2443,7 @@ int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
}
int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
@@ -2503,7 +2452,52 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
FloatParts64 p;
float64_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+ float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
+}
+
+int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+ float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+ float_status *s)
+{
+ FloatParts64 p;
+
+ bfloat16_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
+ int scale, float_status *s)
+{
+ FloatParts128 p;
+
+ float128_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
+ int scale, float_status *s)
+{
+ FloatParts128 p;
+
+ float128_unpack_canonical(&p, a, s);
+ return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
}
int8_t float16_to_int8(float16 a, float_status *s)
@@ -2556,6 +2550,16 @@ int64_t float64_to_int64(float64 a, float_status *s)
return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
}
+int32_t float128_to_int32(float128 a, float_status *s)
+{
+ return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int64_t float128_to_int64(float128 a, float_status *s)
+{
+ return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
{
return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
@@ -2601,36 +2605,14 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
-/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format.
- */
-
-int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
- float_status *s)
+int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
{
- FloatParts64 p;
-
- bfloat16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
+ return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
}
-int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
- float_status *s)
+int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
{
- FloatParts64 p;
-
- bfloat16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
-}
-
-int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
- float_status *s)
-{
- FloatParts64 p;
-
- bfloat16_unpack_canonical(&p, a, s);
- return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
+ return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
}
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
@@ -6555,191 +6537,6 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status)
}
/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode. If `a' is a NaN, the largest
-| positive integer is returned. Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int32_t float128_to_int32(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- aSig0 |= ( aSig1 != 0 );
- shiftCount = 0x4028 - aExp;
- if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
- return roundAndPackInt32(aSign, aSig0, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero. If
-| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
-| conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
-
-int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1, savedASig;
- int32_t z;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- aSig0 |= ( aSig1 != 0 );
- if ( 0x401E < aExp ) {
- if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
- goto invalid;
- }
- else if ( aExp < 0x3FFF ) {
- if (aExp || aSig0) {
- float_raise(float_flag_inexact, status);
- }
- return 0;
- }
- aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = 0x402F - aExp;
- savedASig = aSig0;
- aSig0 >>= shiftCount;
- z = aSig0;
- if ( aSign ) z = - z;
- if ( ( z < 0 ) ^ aSign ) {
- invalid:
- float_raise(float_flag_invalid, status);
- return aSign ? INT32_MIN : INT32_MAX;
- }
- if ( ( aSig0<<shiftCount ) != savedASig ) {
- float_raise(float_flag_inexact, status);
- }
- return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode. If `a' is a NaN, the largest
-| positive integer is returned. Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int64_t float128_to_int64(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = 0x402F - aExp;
- if ( shiftCount <= 0 ) {
- if ( 0x403E < aExp ) {
- float_raise(float_flag_invalid, status);
- if ( ! aSign
- || ( ( aExp == 0x7FFF )
- && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
- )
- ) {
- return INT64_MAX;
- }
- return INT64_MIN;
- }
- shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
- }
- else {
- shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
- }
- return roundAndPackInt64(aSign, aSig0, aSig1, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero.
-| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
-| the conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
-
-int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig0, aSig1;
- int64_t z;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
- shiftCount = aExp - 0x402F;
- if ( 0 < shiftCount ) {
- if ( 0x403E <= aExp ) {
- aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
- if ( ( a.high == UINT64_C(0xC03E000000000000) )
- && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
- if (aSig1) {
- float_raise(float_flag_inexact, status);
- }
- }
- else {
- float_raise(float_flag_invalid, status);
- if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
- return INT64_MAX;
- }
- }
- return INT64_MIN;
- }
- z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
- if ( (uint64_t) ( aSig1<<shiftCount ) ) {
- float_raise(float_flag_inexact, status);
- }
- }
- else {
- if ( aExp < 0x3FFF ) {
- if ( aExp | aSig0 | aSig1 ) {
- float_raise(float_flag_inexact, status);
- }
- return 0;
- }
- z = aSig0>>( - shiftCount );
- if ( aSig1
- || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
- float_raise(float_flag_inexact, status);
- }
- }
- if ( aSign ) z = - z;
- return z;
-
-}
-
-/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point value
| `a' to the 64-bit unsigned integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point