From mboxrd@z Thu Jan  1 00:00:00 1970
X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/3872
Path: news.gmane.org!not-for-mail
From: Szabolcs Nagy <nsz@port70.net>
Newsgroups: gmane.linux.lib.musl.general
Subject: Re: Conformance issues to address after 0.9.12 release
Date: Mon, 12 Aug 2013 21:27:04 +0200
Message-ID: <20130812192704.GE5368@port70.net>
References: <20130729063456.GA31564@brightrain.aerifal.cx>
 <20130729160046.GC25714@port70.net>
 <20130729210448.GG4284@brightrain.aerifal.cx>
 <20130730022621.GF25714@port70.net>
Reply-To: musl@lists.openwall.com
NNTP-Posting-Host: plane.gmane.org
Mime-Version: 1.0
Content-Type: multipart/mixed; boundary="AqsLC8rIMeq19msA"
X-Trace: ger.gmane.org 1376335638 26599 80.91.229.3 (12 Aug 2013 19:27:18 GMT)
X-Complaints-To: usenet@ger.gmane.org
NNTP-Posting-Date: Mon, 12 Aug 2013 19:27:18 +0000 (UTC)
To: musl@lists.openwall.com
Original-X-From: musl-return-3876-gllmg-musl=m.gmane.org@lists.openwall.com Mon Aug 12 21:27:21 2013
Return-path: <musl-return-3876-gllmg-musl=m.gmane.org@lists.openwall.com>
Envelope-to: gllmg-musl@plane.gmane.org
Original-Received: from mother.openwall.net ([195.42.179.200])
	by plane.gmane.org with smtp (Exim 4.69)
	(envelope-from <musl-return-3876-gllmg-musl=m.gmane.org@lists.openwall.com>)
	id 1V8xlw-0002ag-Fg
	for gllmg-musl@plane.gmane.org; Mon, 12 Aug 2013 21:27:20 +0200
Original-Received: (qmail 15423 invoked by uid 550); 12 Aug 2013 19:27:18 -0000
Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm
Precedence: bulk
List-Post: <mailto:musl@lists.openwall.com>
List-Help: <mailto:musl-help@lists.openwall.com>
List-Unsubscribe: <mailto:musl-unsubscribe@lists.openwall.com>
List-Subscribe: <mailto:musl-subscribe@lists.openwall.com>
Original-Received: (qmail 15409 invoked from network); 12 Aug 2013 19:27:18 -0000
Content-Disposition: inline
In-Reply-To: <20130730022621.GF25714@port70.net>
User-Agent: Mutt/1.5.21 (2010-09-15)
Xref: news.gmane.org gmane.linux.lib.musl.general:3872
Archived-At: <http://permalink.gmane.org/gmane.linux.lib.musl.general/3872>


--AqsLC8rIMeq19msA
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

* Szabolcs Nagy <nsz@port70.net> [2013-07-30 04:26:21 +0200]:
> * Rich Felker <dalias@aerifal.cx> [2013-07-29 17:04:48 -0400]:
> > On Mon, Jul 29, 2013 at 06:00:46PM +0200, Szabolcs Nagy wrote:
> > > * Rich Felker <dalias@aerifal.cx> [2013-07-29 02:34:56 -0400]:
> > > > - i387 math asm does not truncate excess precision. Whether or not
> > > >   this omission is conforming in terms of the return value, it results
> > > >   in lost underflow exceptions, as demonstrated by nsz's math tests.
> > > 
> > > the underflow problem is not i387 or excess precision related:
> > > many (odd) math functions are almost the identity function
> > > around x==0 (sin,asin,tan,atan,sinh,atanh,..)
> > 
> > In the case of asin, etc it _is_ the excess precision.
> 
> ah yes functions with x87 asm are different
> 
> but the c code for asin has the issues i described
> so it should fail to raise underflow on arm as well

as discussed on irc..

iso c with annex f allows inexact to be omitted or raised
spuriously, underflow may be raised spuriously but cannot
be omitted, the other three flags must be raised correctly
(except when explicitly specified otherwise)

fdlibm was written so that all flags are respected, but
there are many bugs so i think the reasonable behaviour is:

- raise all flags correctly for correctly rounded functions
(sqrt, fma, nextafter, fmax, rounding functions,..),
otherwise
- raise overflow, divbyzero, invalid flags correctly
- don't care about inexact
- raise underflow correctly when *possible

* the requirement that underflow cannot be omitted is
hard to fulfill when FLT_EVAL_METHOD!=0, for details see
https://groups.google.com/forum/#!topic/comp.std.c/oosKDrY28E8

attached fenv flag fixes for some math functions
(havent written commit messages yet and some other changes
may be mixed into the diff)

for i386 asm i check underflow first and only do the magic
(store an underflowing float into memory) when it's not set
(since the store is significantly slower than the flag check)
however i'm not sure about the gain: if you have subnormal
values around everything will be slow, avoiding an extra
rounding probably does not buy much

in the c version the fix can be ugly if FLT_EVAL_METHOD!=0
is supported (i use volatile hacks through FORCE_EVAL)
fma, pow, atan2 are known to be broken
(fma is fixed for i386 but not fmal and fmaf)

--AqsLC8rIMeq19msA
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="math.diff"

diff --git a/src/math/asin.c b/src/math/asin.c
index 3e8f99e..c926b18 100644
--- a/src/math/asin.c
+++ b/src/math/asin.c
@@ -82,11 +82,9 @@ double asin(double x)
 	}
 	/* |x| < 0.5 */
 	if (ix < 0x3fe00000) {
-		if (ix < 0x3e500000) {
-			/* |x|<0x1p-26, return x with inexact if x!=0*/
-			FORCE_EVAL(x + 0x1p120f);
+		/* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
+		if (ix < 0x3e500000 && ix >= 0x00100000)
 			return x;
-		}
 		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
diff --git a/src/math/asinf.c b/src/math/asinf.c
index 51fe6c6..bcd304a 100644
--- a/src/math/asinf.c
+++ b/src/math/asinf.c
@@ -46,10 +46,9 @@ float asinf(float x)
 		return 0/(x-x);  /* asin(|x|>1) is NaN */
 	}
 	if (ix < 0x3f000000) {  /* |x| < 0.5 */
-		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			FORCE_EVAL(x + 0x1p120f);
-			return x; /* return x with inexact if x!=0 */
-		}
+		/* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
+		if (ix < 0x39800000 && ix >= 0x00800000)
+			return x;
 		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */
diff --git a/src/math/atan.c b/src/math/atan.c
index 5a1d33e..63b5ad0 100644
--- a/src/math/atan.c
+++ b/src/math/atan.c
@@ -77,8 +77,9 @@ double atan(double x)
 	}
 	if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
 		if (ix < 0x3e400000) {  /* |x| < 2^-27 */
-			/* raise inexact if x!=0 */
-			FORCE_EVAL(x + 0x1p120f);
+			if (ix < 0x00100000)
+				/* raise underflow for subnormal x */
+				FORCE_EVAL(x*x);
 			return x;
 		}
 		id = -1;
diff --git a/src/math/atanf.c b/src/math/atanf.c
index ac8bfd0..178341b 100644
--- a/src/math/atanf.c
+++ b/src/math/atanf.c
@@ -55,8 +55,9 @@ float atanf(float x)
 	}
 	if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact if x!=0 */
-			FORCE_EVAL(x + 0x1p120f);
+			if (ix < 0x00800000)
+				/* raise underflow for subnormal x */
+				FORCE_EVAL(x*x);
 			return x;
 		}
 		id = -1;
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
index 932c754..7e49409 100644
--- a/src/math/i386/asin.s
+++ b/src/math/i386/asin.s
@@ -2,7 +2,20 @@
 .type asinf,@function
 asinf:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fld %st(0)
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+2:	ret
 
 .global asinl
 .type asinl,@function
@@ -14,6 +27,18 @@ asinl:
 .type asin,@function
 asin:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fsts (%esp)
+	pop %eax
+2:	ret
 1:	fld %st(0)
 	fld1
 	fsub %st(0),%st(1)
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
index 7e28b39..b46f5a3 100644
--- a/src/math/i386/atan.s
+++ b/src/math/i386/atan.s
@@ -2,6 +2,18 @@
 .type atan,@function
 atan:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fsts (%esp)
+	pop %eax
+2:	ret
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
index 3cd4023..67cbf7c 100644
--- a/src/math/i386/atanf.s
+++ b/src/math/i386/atanf.s
@@ -2,6 +2,20 @@
 .type atanf,@function
 atanf:
 	flds 4(%esp)
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jb 1f
 	fld1
 	fpatan
 	ret
+		# subnormal x, return x with underflow
+1:	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fld %st(0)
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+2:	ret
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
index e3b42af..8cf45c4 100644
--- a/src/math/i386/exp.s
+++ b/src/math/i386/exp.s
@@ -2,7 +2,20 @@
 .type expm1f,@function
 expm1f:
 	flds 4(%esp)
-	jmp 1f
+	mov 4(%esp),%eax
+	add %eax,%eax
+	cmp $0x01000000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fld %st(0)
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+2:	ret
 
 .global expm1l
 .type expm1l,@function
@@ -14,10 +27,34 @@ expm1l:
 .type expm1,@function
 expm1:
 	fldl 4(%esp)
+	mov 8(%esp),%eax
+	add %eax,%eax
+	cmp $0x00200000,%eax
+	jae 1f
+		# subnormal x, return x with underflow
+	fnstsw %ax
+	and $16,%ax
+	jnz 2f
+	push %eax
+	fsts (%esp)
+	pop %eax
+2:	ret
 1:	fldl2e
 	fmulp
+	mov $0xc2820000,%eax
+	push %eax
+	flds (%esp)
+	pop %eax
+	fucomp %st(1)
+	fnstsw %ax
+	sahf
 	fld1
-	fld %st(1)
+	jb 1f
+		# x*log2e < -65, return -1 without underflow
+	fstp %st(1)
+	fchs
+	ret
+1:	fld %st(1)
 	fabs
 	fucom %st(1)
 	fnstsw %ax
diff --git a/src/math/i386/expl.s b/src/math/i386/expl.s
index 8ceb40d..61ef1dd 100644
--- a/src/math/i386/expl.s
+++ b/src/math/i386/expl.s
@@ -8,34 +8,27 @@
 expl:
 	fldt 4(%esp)
 
-		# special cases: 2*x is +-inf, nan or |x| < 0x1p-32
-		# check (exponent|0x8000)+2 < 0xbfff+2-32
-	movw 12(%esp), %ax
-	movw %ax, %dx
-	orw $0x8000, %dx
-	addw $2, %dx
-	cmpw $0xbfff-30, %dx
-	jnb 3f
-	cmpw $1, %dx
-	jbe 1f
-		# if |x|<0x1p-32 return 1+x
+		# interesting case: 0x1p-32 <= |x| < 16384
+		# check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]
+	mov 12(%esp), %ax
+	or $0x8000, %ax
+	sub $0xbfdf, %ax
+	cmp $45, %ax
+	jbe 2f
+	test %ax, %ax
 	fld1
-	jmp 2f
-1:	testw %ax, %ax
-	jns 1f
-		# if 2*x == -inf,-nan return -0/x
-	fldz
-	fchs
-	fdivp
+	js 1f
+		# if |x|>=0x1p14 or nan return 2^trunc(x)
+	fscale
+	fstp %st(1)
 	ret
-		# if 2*x == inf,nan return 2*x
-1:	fld %st(0)
-2:	faddp
+		# if |x|<0x1p-32 return 1+x
+1:	faddp
 	ret
 
-		# should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
+		# should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc
 		# it will be wrong on non-nearest rounding mode
-3:	fldl2e
+2:	fldl2e
 	subl $44, %esp
 		# hi = log2e_hi*x
 		# 2^hi = exp2l(hi)
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
index 9971e53..3203f06 100644
--- a/src/math/i386/log1p.s
+++ b/src/math/i386/log1p.s
@@ -7,9 +7,20 @@ log1p:
 	fldl 4(%esp)
 	cmp $0x3fd28f00,%eax
 	ja 1f
+	cmp $0x00100000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	push %eax
+	fsts (%esp)
+	fstp %st(1)
+	pop %eax
+1:	ret
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
index 2680a8a..ada6109 100644
--- a/src/math/i386/log1pf.s
+++ b/src/math/i386/log1pf.s
@@ -7,9 +7,21 @@ log1pf:
 	flds 4(%esp)
 	cmp $0x3e940000,%eax
 	ja 1f
+	cmp $0x00800000,%eax
+	jb 2f
 	fyl2xp1
 	ret
 1:	fld1
 	faddp
 	fyl2x
 	ret
+		# subnormal x, return x with underflow
+2:	fnstsw %ax
+	and $16,%ax
+	jnz 1f
+	push %eax
+	fxch
+	fmul %st(1)
+	fstps (%esp)
+	pop %eax
+1:	ret
diff --git a/src/math/log1p.c b/src/math/log1p.c
index 6c67249..0cb71c6 100644
--- a/src/math/log1p.c
+++ b/src/math/log1p.c
@@ -104,9 +104,12 @@ double log1p(double x)
 			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */
 		}
 		if (ax < 0x3e200000) {   /* |x| < 2**-29 */
-			/* raise inexact */
-			if (two54 + x > 0.0 && ax < 0x3c900000)  /* |x| < 2**-54 */
+			/* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */
+			if (ax < 0x3c900000 && ax >= 0x00100000)
 				return x;
+#if FLT_EVAL_METHOD != 0
+			FORCE_EVAL(x*x);
+#endif
 			return x - x*x*0.5;
 		}
 		if (hx > 0 || hx <= (int32_t)0xbfd2bec4) {  /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
diff --git a/src/math/log1pf.c b/src/math/log1pf.c
index 39832d2..c38e0bc 100644
--- a/src/math/log1pf.c
+++ b/src/math/log1pf.c
@@ -43,9 +43,12 @@ float log1pf(float x)
 			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */
 		}
 		if (ax < 0x38000000) {   /* |x| < 2**-15 */
-			/* raise inexact */
-			if (two25 + x > 0.0f && ax < 0x33800000)  /* |x| < 2**-24 */
+			/* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */
+			if (ax < 0x33800000 && ax >= 0x00800000)
 				return x;
+#if FLT_EVAL_METHOD != 0
+			FORCE_EVAL(x*x);
+#endif
 			return x - x*x*0.5f;
 		}
 		if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
diff --git a/src/math/pow.c b/src/math/pow.c
index f257814..ac3abc0 100644
--- a/src/math/pow.c
+++ b/src/math/pow.c
@@ -143,7 +143,7 @@ double pow(double x, double y)
 				return 1.0;
 			else if (ix >= 0x3ff00000) /* (|x|>1)**+-inf = inf,0 */
 				return hy >= 0 ? y : 0.0;
-			else                       /* (|x|<1)**+-inf = 0,inf */
+			else if ((ix|lx) != 0)     /* (|x|<1)**+-inf = 0,inf if x!=0 */
 				return hy >= 0 ? 0.0 : -y;
 		}
 		if (iy == 0x3ff00000)    /* y is +-1 */
diff --git a/src/math/powf.c b/src/math/powf.c
index 427c896..59baf6f 100644
--- a/src/math/powf.c
+++ b/src/math/powf.c
@@ -90,7 +90,7 @@ float powf(float x, float y)
 			return 1.0f;
 		else if (ix > 0x3f800000)  /* (|x|>1)**+-inf = inf,0 */
 			return hy >= 0 ? y : 0.0f;
-		else                       /* (|x|<1)**+-inf = 0,inf */
+		else if (ix != 0)          /* (|x|<1)**+-inf = 0,inf if x!=0 */
 			return hy >= 0 ? 0.0f: -y;
 	}
 	if (iy == 0x3f800000)    /* y is +-1 */
diff --git a/src/math/scalbn.c b/src/math/scalbn.c
index 003141e..1fec432 100644
--- a/src/math/scalbn.c
+++ b/src/math/scalbn.c
@@ -10,10 +10,8 @@ double scalbn(double x, int n)
 		if (n > 1023) {
 			x *= 0x1p1023;
 			n -= 1023;
-			if (n > 1023) {
-				STRICT_ASSIGN(double, x, x * 0x1p1023);
-				return x;
-			}
+			if (n > 1023)
+				n = 1023;
 		}
 	} else if (n < -1022) {
 		x *= 0x1p-1022;
@@ -21,10 +19,8 @@ double scalbn(double x, int n)
 		if (n < -1022) {
 			x *= 0x1p-1022;
 			n += 1022;
-			if (n < -1022) {
-				STRICT_ASSIGN(double, x, x * 0x1p-1022);
-				return x;
-			}
+			if (n < -1022)
+				n = -1022;
 		}
 	}
 	INSERT_WORDS(scale, (uint32_t)(0x3ff+n)<<20, 0);
diff --git a/src/math/scalbnf.c b/src/math/scalbnf.c
index f94b5d5..c0eeaf8 100644
--- a/src/math/scalbnf.c
+++ b/src/math/scalbnf.c
@@ -10,10 +10,8 @@ float scalbnf(float x, int n)
 		if (n > 127) {
 			x *= 0x1p127f;
 			n -= 127;
-			if (n > 127) {
-				STRICT_ASSIGN(float, x, x * 0x1p127f);
-				return x;
-			}
+			if (n > 127)
+				n = 127;
 		}
 	} else if (n < -126) {
 		x *= 0x1p-126f;
@@ -21,10 +19,8 @@ float scalbnf(float x, int n)
 		if (n < -126) {
 			x *= 0x1p-126f;
 			n += 126;
-			if (n < -126) {
-				STRICT_ASSIGN(float, x, x * 0x1p-126f);
-				return x;
-			}
+			if (n < -126)
+				n = -126;
 		}
 	}
 	SET_FLOAT_WORD(scale, (uint32_t)(0x7f+n)<<23);
diff --git a/src/math/scalbnl.c b/src/math/scalbnl.c
index c605b8d..7ad7688 100644
--- a/src/math/scalbnl.c
+++ b/src/math/scalbnl.c
@@ -17,7 +17,7 @@ long double scalbnl(long double x, int n)
 			x *= 0x1p16383L;
 			n -= 16383;
 			if (n > 16383)
-				return x * 0x1p16383L;
+				n = 16383;
 		}
 	} else if (n < -16382) {
 		x *= 0x1p-16382L;
@@ -26,7 +26,7 @@ long double scalbnl(long double x, int n)
 			x *= 0x1p-16382L;
 			n += 16382;
 			if (n < -16382)
-				return x * 0x1p-16382L;
+				n = -16382;
 		}
 	}
 	scale.e = 1.0;
diff --git a/src/math/sinh.c b/src/math/sinh.c
index 47e36bf..00022c4 100644
--- a/src/math/sinh.c
+++ b/src/math/sinh.c
@@ -23,8 +23,8 @@ double sinh(double x)
 		t = expm1(absx);
 		if (w < 0x3ff00000) {
 			if (w < 0x3ff00000 - (26<<20))
-				/* note: inexact is raised by expm1 */
-				/* note: this branch avoids underflow */
+				/* note: inexact and underflow are raised by expm1 */
+				/* note: this branch avoids spurious underflow */
 				return x;
 			return h*(2*t - t*t/(t+1));
 		}
diff --git a/src/math/tanh.c b/src/math/tanh.c
index 0e766c5..65393c6 100644
--- a/src/math/tanh.c
+++ b/src/math/tanh.c
@@ -9,7 +9,7 @@ double tanh(double x)
 	union {double f; uint64_t i;} u = {.f = x};
 	uint32_t w;
 	int sign;
-	double t;
+	double_t t;
 
 	/* x = |x| */
 	sign = u.i >> 63;
@@ -22,8 +22,7 @@ double tanh(double x)
 		if (w > 0x40340000) {
 			/* |x| > 20 or nan */
 			/* note: this branch avoids raising overflow */
-			/* raise inexact if x!=+-inf and handle nan */
-			t = 1 + 0/(x + 0x1p-120f);
+			t = 1 - 0/x;
 		} else {
 			t = expm1(2*x);
 			t = 1 - 2/(t+2);
@@ -32,10 +31,15 @@ double tanh(double x)
 		/* |x| > log(5/3)/2 ~= 0.2554 */
 		t = expm1(2*x);
 		t = t/(t+2);
-	} else {
-		/* |x| is small, up to 2ulp error in [0.1,0.2554] */
+	} else if (w >= 0x00100000) {
+		/* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
 		t = expm1(-2*x);
 		t = -t/(t+2);
+	} else {
+		/* |x| is subnormal */
+		/* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
+		FORCE_EVAL(x*x);
+		t = x;
 	}
 	return sign ? -t : t;
 }
diff --git a/src/math/tanhf.c b/src/math/tanhf.c
index 8099ec3..10636fb 100644
--- a/src/math/tanhf.c
+++ b/src/math/tanhf.c
@@ -17,7 +17,7 @@ float tanhf(float x)
 		/* |x| > log(3)/2 ~= 0.5493 or nan */
 		if (w > 0x41200000) {
 			/* |x| > 10 */
-			t = 1 + 0/(x + 0x1p-120f);
+			t = 1 + 0/x;
 		} else {
 			t = expm1f(2*x);
 			t = 1 - 2/(t+2);
@@ -26,10 +26,14 @@ float tanhf(float x)
 		/* |x| > log(5/3)/2 ~= 0.2554 */
 		t = expm1f(2*x);
 		t = t/(t+2);
-	} else {
-		/* |x| is small */
+	} else if (w >= 0x00800000) {
+		/* |x| >= 0x1p-126 */
 		t = expm1f(-2*x);
 		t = -t/(t+2);
+	} else {
+		/* |x| is subnormal */
+		FORCE_EVAL(x*x);
+		t = x;
 	}
 	return sign ? -t : t;
 }
diff --git a/src/math/tgamma.c b/src/math/tgamma.c
index 691e86a..852dcf7 100644
--- a/src/math/tgamma.c
+++ b/src/math/tgamma.c
@@ -137,6 +137,7 @@ double tgamma(double x)
 	/* x =< -184: tgamma(x)=+-0 with underflow */
 	if (absx >= 184) {
 		if (x < 0) {
+			FORCE_EVAL(0x1p-1022/x);
 			if (floor(x) * 0.5 == floor(x * 0.5))
 				return 0;
 			return -0.0;
diff --git a/src/math/x86_64/expl.s b/src/math/x86_64/expl.s
index 6d5c1ce..107f3f5 100644
--- a/src/math/x86_64/expl.s
+++ b/src/math/x86_64/expl.s
@@ -8,32 +8,25 @@
 expl:
 	fldt 8(%rsp)
 
-		# special cases: 2*x is +-inf, nan or |x| < 0x1p-32
-		# check (exponent|0x8000)+2 < 0xbfff+2-32
-	movw 16(%rsp), %ax
-	movw %ax, %dx
-	orw $0x8000, %dx
-	addw $2, %dx
-	cmpw $0xbfff-30, %dx
-	jnb 3f
-	cmpw $1, %dx
-	jbe 1f
-		# if |x|<0x1p-32 return 1+x
+		# interesting case: 0x1p-32 <= |x| < 16384
+		# check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]
+	mov 16(%rsp), %ax
+	or $0x8000, %ax
+	sub $0xbfdf, %ax
+	cmp $45, %ax
+	jbe 2f
+	test %ax, %ax
 	fld1
-	jmp 2f
-1:	testw %ax, %ax
-	jns 1f
-		# if 2*x == -inf,-nan return -0/x
-	fldz
-	fchs
-	fdivp
+	js 1f
+		# if |x|>=0x1p14 or nan return 2^trunc(x)
+	fscale
+	fstp %st(1)
 	ret
-		# if 2*x == inf,nan return 2*x
-1:	fld %st(0)
-2:	faddp
+		# if |x|<0x1p-32 return 1+x
+1:	faddp
 	ret
 
-		# should be 0x1.71547652b82fe178p0 == 0x3fff b8aa3b29 5c17f0bc
+		# should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc
 		# it will be wrong on non-nearest rounding mode
 3:	fldl2e
 	subq $48, %rsp

--AqsLC8rIMeq19msA--