aboutsummaryrefslogtreecommitdiff
path: root/fpu
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2020-11-22 10:42:22 -0800
committerRichard Henderson <richard.henderson@linaro.org>2021-06-03 14:09:02 -0700
commit2fa3546c8f55c4548240489518784b1da4f182b5 (patch)
treebcc5f7ab7624f4b9850ef008b79834c86894b8f2 /fpu
parent572c4d862ff2b5f1525044639aa60ec5854c813d (diff)
softfloat: Move floatN_log2 to softfloat-parts.c.inc
Rename to parts$N_log2. Though this is partly a ruse, since I do not believe the code will succeed for float128 without work. Which is ok for now, because we do not need this for more than float32 and float64. Since berkeley-testfloat-3 doesn't support log2, compare float64_log2 vs the system log2. Fix the errors for inputs near 1.0: test: 3ff00000000000b0 +0x1.00000000000b0p+0 sf: 3d2fa00000000000 +0x1.fa00000000000p-45 libm: 3d2fbd422b1bd36f +0x1.fbd422b1bd36fp-45 Error in fraction: 32170028290927 ulp test: 3feec24f6770b100 +0x1.ec24f6770b100p-1 sf: bfad3740d13c9ec0 -0x1.d3740d13c9ec0p-5 libm: bfad3740d13c9e98 -0x1.d3740d13c9e98p-5 Error in fraction: 40 ulp Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'fpu')
-rw-r--r--fpu/softfloat-parts.c.inc125
-rw-r--r--fpu/softfloat.c126
2 files changed, 152 insertions, 99 deletions
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index efb81bbebe..d1bd5c6edf 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -1331,3 +1331,128 @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
g_assert_not_reached();
}
}
+
+/*
+ * Return log2(A)
+ */
+static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
+{
+ uint64_t a0, a1, r, t, ign;
+ FloatPartsN f;
+ int i, n, a_exp, f_exp;
+
+ if (unlikely(a->cls != float_class_normal)) {
+ switch (a->cls) {
+ case float_class_snan:
+ case float_class_qnan:
+ parts_return_nan(a, s);
+ return;
+ case float_class_zero:
+ /* log2(0) = -inf */
+ a->cls = float_class_inf;
+ a->sign = 1;
+ return;
+ case float_class_inf:
+ if (unlikely(a->sign)) {
+ goto d_nan;
+ }
+ return;
+ default:
+ break;
+ }
+ g_assert_not_reached();
+ }
+ if (unlikely(a->sign)) {
+ goto d_nan;
+ }
+
+ /* TODO: This algorithm looses bits too quickly for float128. */
+ g_assert(N == 64);
+
+ a_exp = a->exp;
+ f_exp = -1;
+
+ r = 0;
+ t = DECOMPOSED_IMPLICIT_BIT;
+ a0 = a->frac_hi;
+ a1 = 0;
+
+ n = fmt->frac_size + 2;
+ if (unlikely(a_exp == -1)) {
+ /*
+ * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
+ * When the value is very close to 1.0, there are lots of 1's in
+ * the msb parts of the fraction. At the end, when we subtract
+ * this value from -1.0, we can see a catastrophic loss of precision,
+ * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
+ * bits of y in the final result. To minimize this, compute as many
+ * digits as we can.
+ * ??? This case needs another algorithm to avoid this.
+ */
+ n = fmt->frac_size * 2 + 2;
+ /* Don't compute a value overlapping the sticky bit */
+ n = MIN(n, 62);
+ }
+
+ for (i = 0; i < n; i++) {
+ if (a1) {
+ mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
+ } else if (a0 & 0xffffffffull) {
+ mul64To128(a0, a0, &a0, &a1);
+ } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
+ a0 >>= 32;
+ a0 *= a0;
+ } else {
+ goto exact;
+ }
+
+ if (a0 & DECOMPOSED_IMPLICIT_BIT) {
+ if (unlikely(a_exp == 0 && r == 0)) {
+ /*
+ * When a_exp == 0, we're computing the log2 of a value
+ * [1.0,2.0). When the value is very close to 1.0, there
+ * are lots of 0's in the msb parts of the fraction.
+ * We need to compute more digits to produce a correct
+ * result -- restart at the top of the fraction.
+ * ??? This is likely to lose precision quickly, as for
+ * float128; we may need another method.
+ */
+ f_exp -= i;
+ t = r = DECOMPOSED_IMPLICIT_BIT;
+ i = 0;
+ } else {
+ r |= t;
+ }
+ } else {
+ add128(a0, a1, a0, a1, &a0, &a1);
+ }
+ t >>= 1;
+ }
+
+ /* Set sticky for inexact. */
+ r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
+
+ exact:
+ parts_sint_to_float(a, a_exp, 0, s);
+ if (r == 0) {
+ return;
+ }
+
+ memset(&f, 0, sizeof(f));
+ f.cls = float_class_normal;
+ f.frac_hi = r;
+ f.exp = f_exp - frac_normalize(&f);
+
+ if (a_exp < 0) {
+ parts_sub_normal(a, &f);
+ } else if (a_exp > 0) {
+ parts_add_normal(a, &f);
+ } else {
+ *a = f;
+ }
+ return;
+
+ d_nan:
+ float_raise(float_flag_invalid, s);
+ parts_default_nan(a, s);
+}
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 27306d6a93..c0fe191f4d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -927,6 +927,12 @@ static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
#define parts_scalbn(A, N, S) \
PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
+static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
+static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
+
+#define parts_log2(A, S, F) \
+ PARTS_GENERIC_64_128(log2, A)(A, S, F)
+
/*
* Helper functions for softfloat-parts.c.inc, per-size operations.
*/
@@ -4060,6 +4066,27 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *s)
return floatx80_round_pack_canonical(&p, s);
}
+/*
+ * log2
+ */
+float32 float32_log2(float32 a, float_status *status)
+{
+ FloatParts64 p;
+
+ float32_unpack_canonical(&p, a, status);
+ parts_log2(&p, status, &float32_params);
+ return float32_round_pack_canonical(&p, status);
+}
+
+float64 float64_log2(float64 a, float_status *status)
+{
+ FloatParts64 p;
+
+ float64_unpack_canonical(&p, a, status);
+ parts_log2(&p, status, &float64_params);
+ return float64_round_pack_canonical(&p, status);
+}
+
/*----------------------------------------------------------------------------
| The pattern for a default generated NaN.
*----------------------------------------------------------------------------*/
@@ -5247,56 +5274,6 @@ float32 float32_exp2(float32 a, float_status *status)
}
/*----------------------------------------------------------------------------
-| Returns the binary log of the single-precision floating-point value `a'.
-| The operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-float32 float32_log2(float32 a, float_status *status)
-{
- bool aSign, zSign;
- int aExp;
- uint32_t aSig, zSig, i;
-
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
-
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- }
- if ( aSign ) {
- float_raise(float_flag_invalid, status);
- return float32_default_nan(status);
- }
- if ( aExp == 0xFF ) {
- if (aSig) {
- return propagateFloat32NaN(a, float32_zero, status);
- }
- return a;
- }
-
- aExp -= 0x7F;
- aSig |= 0x00800000;
- zSign = aExp < 0;
- zSig = aExp << 23;
-
- for (i = 1 << 22; i > 0; i >>= 1) {
- aSig = ( (uint64_t)aSig * aSig ) >> 23;
- if ( aSig & 0x01000000 ) {
- aSig >>= 1;
- zSig |= i;
- }
- }
-
- if ( zSign )
- zSig = -zSig;
-
- return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
-}
-
-/*----------------------------------------------------------------------------
| Returns the remainder of the double-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
@@ -5385,55 +5362,6 @@ float64 float64_rem(float64 a, float64 b, float_status *status)
}
/*----------------------------------------------------------------------------
-| Returns the binary log of the double-precision floating-point value `a'.
-| The operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-float64 float64_log2(float64 a, float_status *status)
-{
- bool aSign, zSign;
- int aExp;
- uint64_t aSig, aSig0, aSig1, zSig, i;
- a = float64_squash_input_denormal(a, status);
-
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
-
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- }
- if ( aSign ) {
- float_raise(float_flag_invalid, status);
- return float64_default_nan(status);
- }
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return propagateFloat64NaN(a, float64_zero, status);
- }
- return a;
- }
-
- aExp -= 0x3FF;
- aSig |= UINT64_C(0x0010000000000000);
- zSign = aExp < 0;
- zSig = (uint64_t)aExp << 52;
- for (i = 1LL << 51; i > 0; i >>= 1) {
- mul64To128( aSig, aSig, &aSig0, &aSig1 );
- aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
- if ( aSig & UINT64_C(0x0020000000000000) ) {
- aSig >>= 1;
- zSig |= i;
- }
- }
-
- if ( zSign )
- zSig = -zSig;
- return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
-}
-
-/*----------------------------------------------------------------------------
| Rounds the extended double-precision floating-point value `a'
| to the precision provided by floatx80_rounding_precision and returns the
| result as an extended double-precision floating-point value.