diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2020-11-13 18:17:39 -0800 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2021-05-16 07:13:51 -0500 |
commit | 9882ccaff93b5f4c8fdc775074dd92f1a9a17b61 (patch) | |
tree | 2badc628e3f02163feb7a861a2f414cf5b70e989 /fpu/softfloat.c | |
parent | c3f1875ea3004fa6ffa7788804635919d3f0f828 (diff) |
softfloat: Convert float-to-float conversions with float128
Introduce parts_float_to_float_widen and parts_float_to_float_narrow.
Use them for float128_to_float{32,64} and float{32,64}_to_float128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'fpu/softfloat.c')
-rw-r--r-- | fpu/softfloat.c | 203 |
1 files changed, 69 insertions, 134 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 80025539ef..d056b5730b 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -2092,6 +2092,35 @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s) #define parts_float_to_float(P, S) \ PARTS_GENERIC_64_128(float_to_float, P)(P, S) +static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b, + float_status *s) +{ + a->cls = b->cls; + a->sign = b->sign; + a->exp = b->exp; + + if (a->cls == float_class_normal) { + frac_truncjam(a, b); + } else if (is_nan(a->cls)) { + /* Discard the low bits of the NaN. */ + a->frac = b->frac_hi; + parts_return_nan(a, s); + } +} + +static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b, + float_status *s) +{ + a->cls = b->cls; + a->sign = b->sign; + a->exp = b->exp; + frac_widen(a, b); + + if (is_nan(a->cls)) { + parts_return_nan(a, s); + } +} + float32 float16_to_float32(float16 a, bool ieee, float_status *s) { const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp; @@ -2215,6 +2244,46 @@ bfloat16 float64_to_bfloat16(float64 a, float_status *s) return bfloat16_round_pack_canonical(&p, s); } +float32 float128_to_float32(float128 a, float_status *s) +{ + FloatParts64 p64; + FloatParts128 p128; + + float128_unpack_canonical(&p128, a, s); + parts_float_to_float_narrow(&p64, &p128, s); + return float32_round_pack_canonical(&p64, s); +} + +float64 float128_to_float64(float128 a, float_status *s) +{ + FloatParts64 p64; + FloatParts128 p128; + + float128_unpack_canonical(&p128, a, s); + parts_float_to_float_narrow(&p64, &p128, s); + return float64_round_pack_canonical(&p64, s); +} + +float128 float32_to_float128(float32 a, float_status *s) +{ + FloatParts64 p64; + FloatParts128 p128; + + float32_unpack_canonical(&p64, a, s); + parts_float_to_float_widen(&p128, &p64, s); + return float128_round_pack_canonical(&p128, s); +} + +float128 float64_to_float128(float64 a, float_status *s) +{ + FloatParts64 p64; + FloatParts128 p128; + + float64_unpack_canonical(&p64, a, s); + parts_float_to_float_widen(&p128, &p64, s); + return float128_round_pack_canonical(&p128, s); +} + /* * Rounds the floating-point value `a' to an integer, and returns the * result as a floating-point value. The operation is performed @@ -5176,38 +5245,6 @@ floatx80 float32_to_floatx80(float32 a, float_status *status) } /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float32_to_float128(float32 a, float_status *status) -{ - bool aSign; - int aExp; - uint32_t aSig; - - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - return commonNaNToFloat128(float32ToCommonNaN(a, status), status); - } - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 ); - -} - -/*---------------------------------------------------------------------------- | Returns the remainder of the single-precision floating-point value `a' | with respect to the corresponding value `b'. The operation is performed | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. @@ -5481,40 +5518,6 @@ floatx80 float64_to_floatx80(float64 a, float_status *status) } /*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the quadruple-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float64_to_float128(float64 a, float_status *status) -{ - bool aSign; - int aExp; - uint64_t aSig, zSig0, zSig1; - - a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - return commonNaNToFloat128(float64ToCommonNaN(a, status), status); - } - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); - -} - - -/*---------------------------------------------------------------------------- | Returns the remainder of the double-precision floating-point value `a' | with respect to the corresponding value `b'. The operation is performed | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. @@ -6917,74 +6920,6 @@ uint32_t float128_to_uint32(float128 a, float_status *status) /*---------------------------------------------------------------------------- | Returns the result of converting the quadruple-precision floating-point -| value `a' to the single-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float128_to_float32(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - uint32_t zSig; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat32(float128ToCommonNaN(a, status), status); - } - return packFloat32( aSign, 0xFF, 0 ); - } - aSig0 |= ( aSig1 != 0 ); - shift64RightJamming( aSig0, 18, &aSig0 ); - zSig = aSig0; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x3F81; - } - return roundAndPackFloat32(aSign, aExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float128_to_float64(float128 a, float_status *status) -{ - bool aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat64(float128ToCommonNaN(a, status), status); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - aSig0 |= ( aSig1 != 0 ); - if ( aExp || aSig0 ) { - aSig0 |= UINT64_C(0x4000000000000000); - aExp -= 0x3C01; - } - return roundAndPackFloat64(aSign, aExp, aSig0, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point | value `a' to the extended double-precision floating-point format. The | conversion is performed according to the IEC/IEEE Standard for Binary | Floating-Point Arithmetic. |