From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/3872 Path: news.gmane.org!not-for-mail From: Szabolcs Nagy Newsgroups: gmane.linux.lib.musl.general Subject: Re: Conformance issues to address after 0.9.12 release Date: Mon, 12 Aug 2013 21:27:04 +0200 Message-ID: <20130812192704.GE5368@port70.net> References: <20130729063456.GA31564@brightrain.aerifal.cx> <20130729160046.GC25714@port70.net> <20130729210448.GG4284@brightrain.aerifal.cx> <20130730022621.GF25714@port70.net> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="AqsLC8rIMeq19msA" X-Trace: ger.gmane.org 1376335638 26599 80.91.229.3 (12 Aug 2013 19:27:18 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Mon, 12 Aug 2013 19:27:18 +0000 (UTC) To: musl@lists.openwall.com Original-X-From: musl-return-3876-gllmg-musl=m.gmane.org@lists.openwall.com Mon Aug 12 21:27:21 2013 Return-path: Envelope-to: gllmg-musl@plane.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by plane.gmane.org with smtp (Exim 4.69) (envelope-from ) id 1V8xlw-0002ag-Fg for gllmg-musl@plane.gmane.org; Mon, 12 Aug 2013 21:27:20 +0200 Original-Received: (qmail 15423 invoked by uid 550); 12 Aug 2013 19:27:18 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: Original-Received: (qmail 15409 invoked from network); 12 Aug 2013 19:27:18 -0000 Content-Disposition: inline In-Reply-To: <20130730022621.GF25714@port70.net> User-Agent: Mutt/1.5.21 (2010-09-15) Xref: news.gmane.org gmane.linux.lib.musl.general:3872 Archived-At: --AqsLC8rIMeq19msA Content-Type: text/plain; charset=us-ascii Content-Disposition: inline * Szabolcs Nagy [2013-07-30 04:26:21 +0200]: > * Rich Felker [2013-07-29 17:04:48 -0400]: > > On Mon, Jul 29, 2013 at 06:00:46PM +0200, Szabolcs Nagy wrote: > > > * Rich Felker [2013-07-29 02:34:56 -0400]: > > > > - i387 math asm does not truncate excess precision. Whether or not > > > > this omission is conforming in terms of the return value, it results > > > > in lost underflow exceptions, as demonstrated by nsz's math tests. > > > > > > the underflow problem is not i387 or excess precision related: > > > many (odd) math functions are almost the identity function > > > around x==0 (sin,asin,tan,atan,sinh,atanh,..) > > > > In the case of asin, etc it _is_ the excess precision. > > ah yes functions with x87 asm are different > > but the c code for asin has the issues i described > so it should fail to raise underflow on arm as well as discussed on irc.. iso c with annex f allows inexact to be omitted or raised spuriously, underflow may be raised spuriously but cannot be omitted, the other three flags must be raised correctly (except when explicitly specified otherwise) fdlibm was written so that all flags are respected, but there are many bugs so i think the reasonable behaviour is: - raise all flags correctly for correctly rounded functions (sqrt, fma, nextafter, fmax, rounding functions,..), otherwise - raise overflow, divbyzero, invalid flags correctly - don't care about inexact - raise underflow correctly when *possible * the requirement that underflow cannot be omitted is hard to fulfill when FLT_EVAL_METHOD!=0, for details see https://groups.google.com/forum/#!topic/comp.std.c/oosKDrY28E8 attached fenv flag fixes for some math functions (havent written commit messages yet and some other changes may be mixed into the diff) for i386 asm i check underflow first and only do the magic (store an underflowing float into memory) when it's not set (since the store is significantly slower than the flag check) however i'm not sure about the gain: if you have subnormal values around everything will be slow, avoiding an extra rounding probably does not buy much in the c version the fix can be ugly if FLT_EVAL_METHOD!=0 is supported (i use volatile hacks through FORCE_EVAL) fma, pow, atan2 are known to be broken (fma is fixed for i386 but not fmal and fmaf) --AqsLC8rIMeq19msA Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="math.diff" diff --git a/src/math/asin.c b/src/math/asin.c index 3e8f99e..c926b18 100644 --- a/src/math/asin.c +++ b/src/math/asin.c @@ -82,11 +82,9 @@ double asin(double x) } /* |x| < 0.5 */ if (ix < 0x3fe00000) { - if (ix < 0x3e500000) { - /* |x|<0x1p-26, return x with inexact if x!=0*/ - FORCE_EVAL(x + 0x1p120f); + /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ + if (ix < 0x3e500000 && ix >= 0x00100000) return x; - } return x + x*R(x*x); } /* 1 > |x| >= 0.5 */ diff --git a/src/math/asinf.c b/src/math/asinf.c index 51fe6c6..bcd304a 100644 --- a/src/math/asinf.c +++ b/src/math/asinf.c @@ -46,10 +46,9 @@ float asinf(float x) return 0/(x-x); /* asin(|x|>1) is NaN */ } if (ix < 0x3f000000) { /* |x| < 0.5 */ - if (ix < 0x39800000) { /* |x| < 2**-12 */ - FORCE_EVAL(x + 0x1p120f); - return x; /* return x with inexact if x!=0 */ - } + /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ + if (ix < 0x39800000 && ix >= 0x00800000) + return x; return x + x*R(x*x); } /* 1 > |x| >= 0.5 */ diff --git a/src/math/atan.c b/src/math/atan.c index 5a1d33e..63b5ad0 100644 --- a/src/math/atan.c +++ b/src/math/atan.c @@ -77,8 +77,9 @@ double atan(double x) } if (ix < 0x3fdc0000) { /* |x| < 0.4375 */ if (ix < 0x3e400000) { /* |x| < 2^-27 */ - /* raise inexact if x!=0 */ - FORCE_EVAL(x + 0x1p120f); + if (ix < 0x00100000) + /* raise underflow for subnormal x */ + FORCE_EVAL(x*x); return x; } id = -1; diff --git a/src/math/atanf.c b/src/math/atanf.c index ac8bfd0..178341b 100644 --- a/src/math/atanf.c +++ b/src/math/atanf.c @@ -55,8 +55,9 @@ float atanf(float x) } if (ix < 0x3ee00000) { /* |x| < 0.4375 */ if (ix < 0x39800000) { /* |x| < 2**-12 */ - /* raise inexact if x!=0 */ - FORCE_EVAL(x + 0x1p120f); + if (ix < 0x00800000) + /* raise underflow for subnormal x */ + FORCE_EVAL(x*x); return x; } id = -1; diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s index 932c754..7e49409 100644 --- a/src/math/i386/asin.s +++ b/src/math/i386/asin.s @@ -2,7 +2,20 @@ .type asinf,@function asinf: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fld %st(0) + fmul %st(1) + fstps (%esp) + pop %eax +2: ret .global asinl .type asinl,@function @@ -14,6 +27,18 @@ asinl: .type asin,@function asin: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fsts (%esp) + pop %eax +2: ret 1: fld %st(0) fld1 fsub %st(0),%st(1) diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s index 7e28b39..b46f5a3 100644 --- a/src/math/i386/atan.s +++ b/src/math/i386/atan.s @@ -2,6 +2,18 @@ .type atan,@function atan: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fsts (%esp) + pop %eax +2: ret diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s index 3cd4023..67cbf7c 100644 --- a/src/math/i386/atanf.s +++ b/src/math/i386/atanf.s @@ -2,6 +2,20 @@ .type atanf,@function atanf: flds 4(%esp) + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jb 1f fld1 fpatan ret + # subnormal x, return x with underflow +1: fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fld %st(0) + fmul %st(1) + fstps (%esp) + pop %eax +2: ret diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s index e3b42af..8cf45c4 100644 --- a/src/math/i386/exp.s +++ b/src/math/i386/exp.s @@ -2,7 +2,20 @@ .type expm1f,@function expm1f: flds 4(%esp) - jmp 1f + mov 4(%esp),%eax + add %eax,%eax + cmp $0x01000000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fld %st(0) + fmul %st(1) + fstps (%esp) + pop %eax +2: ret .global expm1l .type expm1l,@function @@ -14,10 +27,34 @@ expm1l: .type expm1,@function expm1: fldl 4(%esp) + mov 8(%esp),%eax + add %eax,%eax + cmp $0x00200000,%eax + jae 1f + # subnormal x, return x with underflow + fnstsw %ax + and $16,%ax + jnz 2f + push %eax + fsts (%esp) + pop %eax +2: ret 1: fldl2e fmulp + mov $0xc2820000,%eax + push %eax + flds (%esp) + pop %eax + fucomp %st(1) + fnstsw %ax + sahf fld1 - fld %st(1) + jb 1f + # x*log2e < -65, return -1 without underflow + fstp %st(1) + fchs + ret +1: fld %st(1) fabs fucom %st(1) fnstsw %ax diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s index 8ceb40d..61ef1dd 100644 --- a/src/math/i386/expl.s +++ b/src/math/i386/expl.s @@ -8,34 +8,27 @@ expl: fldt 4(%esp) - # special cases: 2*x is +-inf, nan or |x| < 0x1p-32 - # check (exponent|0x8000)+2 < 0xbfff+2-32 - movw 12(%esp), %ax - movw %ax, %dx - orw $0x8000, %dx - addw $2, %dx - cmpw $0xbfff-30, %dx - jnb 3f - cmpw $1, %dx - jbe 1f - # if |x|<0x1p-32 return 1+x + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 12(%esp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax fld1 - jmp 2f -1: testw %ax, %ax - jns 1f - # if 2*x == -inf,-nan return -0/x - fldz - fchs - fdivp + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) ret - # if 2*x == inf,nan return 2*x -1: fld %st(0) -2: faddp + # if |x|<0x1p-32 return 1+x +1: faddp ret - # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc # it will be wrong on non-nearest rounding mode -3: fldl2e +2: fldl2e subl $44, %esp # hi = log2e_hi*x # 2^hi = exp2l(hi) diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s index 9971e53..3203f06 100644 --- a/src/math/i386/log1p.s +++ b/src/math/i386/log1p.s @@ -7,9 +7,20 @@ log1p: fldl 4(%esp) cmp $0x3fd28f00,%eax ja 1f + cmp $0x00100000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + push %eax + fsts (%esp) + fstp %st(1) + pop %eax +1: ret diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s index 2680a8a..ada6109 100644 --- a/src/math/i386/log1pf.s +++ b/src/math/i386/log1pf.s @@ -7,9 +7,21 @@ log1pf: flds 4(%esp) cmp $0x3e940000,%eax ja 1f + cmp $0x00800000,%eax + jb 2f fyl2xp1 ret 1: fld1 faddp fyl2x ret + # subnormal x, return x with underflow +2: fnstsw %ax + and $16,%ax + jnz 1f + push %eax + fxch + fmul %st(1) + fstps (%esp) + pop %eax +1: ret diff --git a/src/math/log1p.c b/src/math/log1p.c index 6c67249..0cb71c6 100644 --- a/src/math/log1p.c +++ b/src/math/log1p.c @@ -104,9 +104,12 @@ double log1p(double x) return (x-x)/(x-x); /* log1p(x<-1)=NaN */ } if (ax < 0x3e200000) { /* |x| < 2**-29 */ - /* raise inexact */ - if (two54 + x > 0.0 && ax < 0x3c900000) /* |x| < 2**-54 */ + /* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */ + if (ax < 0x3c900000 && ax >= 0x00100000) return x; +#if FLT_EVAL_METHOD != 0 + FORCE_EVAL(x*x); +#endif return x - x*x*0.5; } if (hx > 0 || hx <= (int32_t)0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ diff --git a/src/math/log1pf.c b/src/math/log1pf.c index 39832d2..c38e0bc 100644 --- a/src/math/log1pf.c +++ b/src/math/log1pf.c @@ -43,9 +43,12 @@ float log1pf(float x) return (x-x)/(x-x); /* log1p(x<-1)=NaN */ } if (ax < 0x38000000) { /* |x| < 2**-15 */ - /* raise inexact */ - if (two25 + x > 0.0f && ax < 0x33800000) /* |x| < 2**-24 */ + /* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */ + if (ax < 0x33800000 && ax >= 0x00800000) return x; +#if FLT_EVAL_METHOD != 0 + FORCE_EVAL(x*x); +#endif return x - x*x*0.5f; } if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ diff --git a/src/math/pow.c b/src/math/pow.c index f257814..ac3abc0 100644 --- a/src/math/pow.c +++ b/src/math/pow.c @@ -143,7 +143,7 @@ double pow(double x, double y) return 1.0; else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */ return hy >= 0 ? y : 0.0; - else /* (|x|<1)**+-inf = 0,inf */ + else if ((ix|lx) != 0) /* (|x|<1)**+-inf = 0,inf if x!=0 */ return hy >= 0 ? 0.0 : -y; } if (iy == 0x3ff00000) /* y is +-1 */ diff --git a/src/math/powf.c b/src/math/powf.c index 427c896..59baf6f 100644 --- a/src/math/powf.c +++ b/src/math/powf.c @@ -90,7 +90,7 @@ float powf(float x, float y) return 1.0f; else if (ix > 0x3f800000) /* (|x|>1)**+-inf = inf,0 */ return hy >= 0 ? y : 0.0f; - else /* (|x|<1)**+-inf = 0,inf */ + else if (ix != 0) /* (|x|<1)**+-inf = 0,inf if x!=0 */ return hy >= 0 ? 0.0f: -y; } if (iy == 0x3f800000) /* y is +-1 */ diff --git a/src/math/scalbn.c b/src/math/scalbn.c index 003141e..1fec432 100644 --- a/src/math/scalbn.c +++ b/src/math/scalbn.c @@ -10,10 +10,8 @@ double scalbn(double x, int n) if (n > 1023) { x *= 0x1p1023; n -= 1023; - if (n > 1023) { - STRICT_ASSIGN(double, x, x * 0x1p1023); - return x; - } + if (n > 1023) + n = 1023; } } else if (n < -1022) { x *= 0x1p-1022; @@ -21,10 +19,8 @@ double scalbn(double x, int n) if (n < -1022) { x *= 0x1p-1022; n += 1022; - if (n < -1022) { - STRICT_ASSIGN(double, x, x * 0x1p-1022); - return x; - } + if (n < -1022) + n = -1022; } } INSERT_WORDS(scale, (uint32_t)(0x3ff+n)<<20, 0); diff --git a/src/math/scalbnf.c b/src/math/scalbnf.c index f94b5d5..c0eeaf8 100644 --- a/src/math/scalbnf.c +++ b/src/math/scalbnf.c @@ -10,10 +10,8 @@ float scalbnf(float x, int n) if (n > 127) { x *= 0x1p127f; n -= 127; - if (n > 127) { - STRICT_ASSIGN(float, x, x * 0x1p127f); - return x; - } + if (n > 127) + n = 127; } } else if (n < -126) { x *= 0x1p-126f; @@ -21,10 +19,8 @@ float scalbnf(float x, int n) if (n < -126) { x *= 0x1p-126f; n += 126; - if (n < -126) { - STRICT_ASSIGN(float, x, x * 0x1p-126f); - return x; - } + if (n < -126) + n = -126; } } SET_FLOAT_WORD(scale, (uint32_t)(0x7f+n)<<23); diff --git a/src/math/scalbnl.c b/src/math/scalbnl.c index c605b8d..7ad7688 100644 --- a/src/math/scalbnl.c +++ b/src/math/scalbnl.c @@ -17,7 +17,7 @@ long double scalbnl(long double x, int n) x *= 0x1p16383L; n -= 16383; if (n > 16383) - return x * 0x1p16383L; + n = 16383; } } else if (n < -16382) { x *= 0x1p-16382L; @@ -26,7 +26,7 @@ long double scalbnl(long double x, int n) x *= 0x1p-16382L; n += 16382; if (n < -16382) - return x * 0x1p-16382L; + n = -16382; } } scale.e = 1.0; diff --git a/src/math/sinh.c b/src/math/sinh.c index 47e36bf..00022c4 100644 --- a/src/math/sinh.c +++ b/src/math/sinh.c @@ -23,8 +23,8 @@ double sinh(double x) t = expm1(absx); if (w < 0x3ff00000) { if (w < 0x3ff00000 - (26<<20)) - /* note: inexact is raised by expm1 */ - /* note: this branch avoids underflow */ + /* note: inexact and underflow are raised by expm1 */ + /* note: this branch avoids spurious underflow */ return x; return h*(2*t - t*t/(t+1)); } diff --git a/src/math/tanh.c b/src/math/tanh.c index 0e766c5..65393c6 100644 --- a/src/math/tanh.c +++ b/src/math/tanh.c @@ -9,7 +9,7 @@ double tanh(double x) union {double f; uint64_t i;} u = {.f = x}; uint32_t w; int sign; - double t; + double_t t; /* x = |x| */ sign = u.i >> 63; @@ -22,8 +22,7 @@ double tanh(double x) if (w > 0x40340000) { /* |x| > 20 or nan */ /* note: this branch avoids raising overflow */ - /* raise inexact if x!=+-inf and handle nan */ - t = 1 + 0/(x + 0x1p-120f); + t = 1 - 0/x; } else { t = expm1(2*x); t = 1 - 2/(t+2); @@ -32,10 +31,15 @@ double tanh(double x) /* |x| > log(5/3)/2 ~= 0.2554 */ t = expm1(2*x); t = t/(t+2); - } else { - /* |x| is small, up to 2ulp error in [0.1,0.2554] */ + } else if (w >= 0x00100000) { + /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ t = expm1(-2*x); t = -t/(t+2); + } else { + /* |x| is subnormal */ + /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ + FORCE_EVAL(x*x); + t = x; } return sign ? -t : t; } diff --git a/src/math/tanhf.c b/src/math/tanhf.c index 8099ec3..10636fb 100644 --- a/src/math/tanhf.c +++ b/src/math/tanhf.c @@ -17,7 +17,7 @@ float tanhf(float x) /* |x| > log(3)/2 ~= 0.5493 or nan */ if (w > 0x41200000) { /* |x| > 10 */ - t = 1 + 0/(x + 0x1p-120f); + t = 1 + 0/x; } else { t = expm1f(2*x); t = 1 - 2/(t+2); @@ -26,10 +26,14 @@ float tanhf(float x) /* |x| > log(5/3)/2 ~= 0.2554 */ t = expm1f(2*x); t = t/(t+2); - } else { - /* |x| is small */ + } else if (w >= 0x00800000) { + /* |x| >= 0x1p-126 */ t = expm1f(-2*x); t = -t/(t+2); + } else { + /* |x| is subnormal */ + FORCE_EVAL(x*x); + t = x; } return sign ? -t : t; } diff --git a/src/math/tgamma.c b/src/math/tgamma.c index 691e86a..852dcf7 100644 --- a/src/math/tgamma.c +++ b/src/math/tgamma.c @@ -137,6 +137,7 @@ double tgamma(double x) /* x =< -184: tgamma(x)=+-0 with underflow */ if (absx >= 184) { if (x < 0) { + FORCE_EVAL(0x1p-1022/x); if (floor(x) * 0.5 == floor(x * 0.5)) return 0; return -0.0; diff --git a/src/math/x86_64/expl.s b/src/math/x86_64/expl.s index 6d5c1ce..107f3f5 100644 --- a/src/math/x86_64/expl.s +++ b/src/math/x86_64/expl.s @@ -8,32 +8,25 @@ expl: fldt 8(%rsp) - # special cases: 2*x is +-inf, nan or |x| < 0x1p-32 - # check (exponent|0x8000)+2 < 0xbfff+2-32 - movw 16(%rsp), %ax - movw %ax, %dx - orw $0x8000, %dx - addw $2, %dx - cmpw $0xbfff-30, %dx - jnb 3f - cmpw $1, %dx - jbe 1f - # if |x|<0x1p-32 return 1+x + # interesting case: 0x1p-32 <= |x| < 16384 + # check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13] + mov 16(%rsp), %ax + or $0x8000, %ax + sub $0xbfdf, %ax + cmp $45, %ax + jbe 2f + test %ax, %ax fld1 - jmp 2f -1: testw %ax, %ax - jns 1f - # if 2*x == -inf,-nan return -0/x - fldz - fchs - fdivp + js 1f + # if |x|>=0x1p14 or nan return 2^trunc(x) + fscale + fstp %st(1) ret - # if 2*x == inf,nan return 2*x -1: fld %st(0) -2: faddp + # if |x|<0x1p-32 return 1+x +1: faddp ret - # should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc + # should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc # it will be wrong on non-nearest rounding mode 3: fldl2e subq $48, %rsp --AqsLC8rIMeq19msA--