float scanner status, upcoming release

mailing list of musl libc
 help / color / mirror / code / Atom feed

* float scanner status, upcoming release
@ 2012-04-09 19:17 Rich Felker
  2012-04-09 19:21 ` Rich Felker
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Rich Felker @ 2012-04-09 19:17 UTC (permalink / raw)
  To: musl

Hi all,

My recent work on musl has mainly been writing the new floating point
scanning code for strtod, scanf, etc. The existing code in musl is
essentially just a placeholder; its results were seriously inaccurate
in all but the most trivial usage cases. Unfortunately, getting
accurate results, especially if you want them correctly rounded for
the current rounding direction, destination floating point precision,
and denormal corner-cases, is highly non-trivial and involves
high-precision arithmetic.

At this point, the new implementation is essentially done, but lacking
integration with libc; it's just a standalone program for reading
decimal floating point strings and converting them. Integration will
require a bit more work getting the function interface suitable for
use by both *scanf() and strto*()/wcsto*(), along with general
cleanups and implementing the now-missing (but MUCH easier) hex float
support.

I'll keep working on it as time allows, and hope to have it committed
within the next few days. At that point I'm hoping to release 0.8.8,
so please report any other pending issues that should be fixed before
release.

Rich

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-09 19:17 float scanner status, upcoming release Rich Felker
@ 2012-04-09 19:21 ` Rich Felker
  2012-04-10  0:09   ` Pascal Cuoq
  2012-04-10  0:48 ` Rich Felker
  2012-04-10 15:24 ` Rich Felker
  2 siblings, 1 reply; 8+ messages in thread
From: Rich Felker @ 2012-04-09 19:21 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 166 bytes --]

And here's the current code (standalone test program) if anybody wants
to play with it or point out how much single-letter variable names
suck or whatnot.. :-)

Rich

[-- Attachment #2: myfloat.c --]
[-- Type: text/plain, Size: 4753 bytes --]

#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <fenv.h>
#include <float.h>


#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024

#define LD_B1B_DIG 2
#define LD_B1B_MAX 9007199, 254740991, -1, -1
#define KMAX 128

#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */

#define LD_B1B_DIG 3
#define LD_B1B_MAX 18, 446744073, 709551615, -1
#define KMAX 2048

#endif

#define MASK (KMAX-1)

int dohex(FILE *f, long double *y, int sign)
{
	return -1;
}

int parsefloat(FILE *f, long double *yout)
{
	uint32_t x[KMAX];
	static const uint32_t th[] = { LD_B1B_MAX };
	int c;
	int i, j, k, a, z;
	int sign=1;
	int rp=-1, dc=0;
	int e10;
	int e2;
	long double y;
	long double frac=0;
	long double bias=0;
	int bits=64;
	int emin=-16445;

	j=0;
	k=0;

	c = getc(f);
	if (c=='+' || c=='-') {
		sign -= 2*(c=='-');
		c = getc(f);
	}

	if (c=='0') {
		j = 1;
		c = getc(f);
		if ((c|32) == 'x') {
			return dohex(f, yout, sign);
		}
	}

	/* Don't let leading zeros consume buffer space */
	for (; c=='0'; c=getc(f));

	for (; c-'0'<10U || c=='.'; c=getc(f)) {
		if (c == '.') {
			if (rp!=-1) goto mismatch;
			rp = dc;
		} else if (k < KMAX) {
			dc++;
			x[k] = x[k]*10 + c-'0';
			if (++j==9) {
				k++;
				j=0;
			}
		} else {
			dc++;
			x[KMAX-1] |= c-'0';
		}
	}
	if (rp==-1) rp=dc;
	if (k<KMAX && j) {
		for (; j<9; j++) x[k]*=10;
		k++;
		j=0;
	}

	if ((c|32)=='e') {
		c = getc(f);
		ungetc(c, f);
		if (c!='+' && c!='-' && c-'0'>=10U) {
			goto mismatch;
		}
		fscanf(f, "%d", &e10);
		c = getc(f);
		rp += e10;
		/* FIXME: overflow */
	}

	a = 0;
	z = k;
	e2 = 0;

	while (rp < 18+9*LD_B1B_DIG) {
		uint32_t carry = 0;
		e2 -= 29;
		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
			if (tmp > 1000000000) {
				carry = tmp / 1000000000;
				x[k] = tmp % 1000000000;
			} else {
				carry = 0;
				x[k] = tmp;
			}
			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
			if (k==a) break;
		}
		if (carry) {
			rp += 9;
			if (a == z) {
				z = (z-1 & MASK);
				x[z-1 & MASK] |= x[z];
			}
			a = (a-1 & MASK);
			x[a] = carry;
		}
	}

	if (rp % 9) {
		static const int p10s[] = {
			100000000, 10000000, 1000000, 100000,
			10000, 1000, 100, 10
		};
		int rpm9 = rp % 9;
		int p10 = p10s[rpm9-1];
		uint32_t carry = 0;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] % p10;
			x[k] = x[k]/p10 + carry;
			carry = 1000000000/p10 * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a)
				z = (z+1 & MASK);
			x[z-1 & MASK] = carry;
		}
		rp += 9-rpm9;
	}

	for (;;) {
		uint32_t carry = 0;
		int sh = 1;
		for (i=0; i<LD_B1B_DIG; i++) {
			k = (a+i & MASK);
			if (k == z || x[k] < th[i]) {
				i=LD_B1B_DIG;
				break;
			}
			if (x[a+i & MASK] > th[i]) break;
		}
		if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
		/* FIXME: find a way to compute optimal sh */
		if (rp > 9+9*LD_B1B_DIG) sh = 9;
		e2 += sh;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] & (1<<sh)-1;
			x[k] = (x[k]>>sh) + carry;
			carry = (1000000000>>sh) * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a)
				z = (z+1 & MASK);
			x[z-1 & MASK] = carry;
		}
	}

	for (i=a; i!=z; i=(i+1 & MASK))
		printf("%.9u ", x[i]);

	printf("[%d]\n%*s\n", e2, rp+(rp/9)+1, "^");

	for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
		y = 1000000000.0L * y + x[a+i & MASK];

	y *= sign;

//	bits=2;

	if (bits > LDBL_MANT_DIG+e2-emin) {
		bits = LDBL_MANT_DIG+e2-emin;
		if (bits<0) bits=0;
	}

	if (bits < LDBL_MANT_DIG) {
		bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
		frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
		y -= frac;
		y += bias;
	}

	if ((a+i & MASK) != z) {
	printf("old frac = %La\n", frac);
		uint32_t t = x[a+i & MASK];
		if (t < 500000000 && (t || (a+i+1 & MASK) != z))
			frac += 0.25*sign;
		else if (t > 500000000)
			frac += 0.75*sign;
		else if (t == 500000000) {
			if ((a+i+1 & MASK) == z)
				frac += 0.5*sign;
			else
				frac += 0.75*sign;
		}
	}

	printf("y = %.21Lg\n", scalbnl(y-bias,e2));
	printf("frac = %.21Lg\n", scalbnl(frac,e2));
	printf("y = %La\n", y);
	printf("frac = %La\n", frac);
	printf("bias = %La\n", bias);

	y += frac;
	y -= bias;

	y = scalbnl(y, e2);

	printf("y = %La\n", y);
	printf("y ~= %.17Lg\n", y);

	printf("%.21Lg [%.21Lg, %.21Lg]\n", y,
		nextafterl(y,-INFINITY), nextafterl(y, INFINITY));
#if 0
	printf("%.18g [%.18g, %.18g]\n", (double)y,
		nextafter(y,-INFINITY), nextafter(y, INFINITY));
	printf("%.9g [%.9g, %.9g]\n", (float)y,
		nextafterf(y,-INFINITY), nextafterf(y, INFINITY));
#endif

	return 0;
mismatch:
	return -1;
}

int main()
{
	long double y;
	//fesetround(FE_DOWNWARD);
	parsefloat(stdin, &y);
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-09 19:21 ` Rich Felker
@ 2012-04-10  0:09   ` Pascal Cuoq
  2012-04-10  0:36     ` Rich Felker
  0 siblings, 1 reply; 8+ messages in thread
From: Pascal Cuoq @ 2012-04-10  0:09 UTC (permalink / raw)
  To: musl

On Mon, Apr 9, 2012 at 9:21 PM, Rich Felker <dalias@aerifal.cx> wrote:
> And here's the current code (standalone test program) if anybody wants
> to play with it or point out how much single-letter variable names
> suck or whatnot.. :-)

Line 72:

			x[k] = x[k]*10 + c-'0';

I don't understand why this read access to x[k] is initialized.
If I change the declaration of local array x[] as:

  uint32_t x[KMAX] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};

then:

$ ./a.out
1.123
...
y = 11.123

Initializing x[] with arbitrary values shouldn't change anything
if it was not used uninitialized, right?

Even if I am getting something wrong here, you can count on me
to test the heck out of your function. I initially subscribed to
the musl mailing because I had been looking for a function like
this (although I let the musl source code distract me then).

Pascal

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-10  0:09   ` Pascal Cuoq
@ 2012-04-10  0:36     ` Rich Felker
  0 siblings, 0 replies; 8+ messages in thread
From: Rich Felker @ 2012-04-10  0:36 UTC (permalink / raw)
  To: musl

On Tue, Apr 10, 2012 at 02:09:23AM +0200, Pascal Cuoq wrote:
> On Mon, Apr 9, 2012 at 9:21 PM, Rich Felker <dalias@aerifal.cx> wrote:
> > And here's the current code (standalone test program) if anybody wants
> > to play with it or point out how much single-letter variable names
> > suck or whatnot.. :-)
> 
> Line 72:
> 
> 			x[k] = x[k]*10 + c-'0';
> 
> I don't understand why this read access to x[k] is initialized.
> If I change the declaration of local array x[] as:

You're completely right and I was just getting lucky (and being too
lazy to add the right warning options to my command line). x[0] should
be set to 0 before the loop, and when k advances, the new x[k] should
be set to 0. (Or we could pre-fill the whole array, but that's a huge
cache-thrashing waste if the number to be read is small.)

I'll send a new version of the code to the list shortly; it's very
close to what I intend to commit for use with strtod, etc.

Note that nsz has found another problem where extremely small values
(smallest subnormal range) don't seem to be getting rounded correctly,
so this and other issues need to be fixed before it goes into major
production use. Still, I think the overall concept is sound...

Rich

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-09 19:17 float scanner status, upcoming release Rich Felker
  2012-04-09 19:21 ` Rich Felker
@ 2012-04-10  0:48 ` Rich Felker
  2012-04-10 12:15   ` Szabolcs Nagy
  2012-04-10 15:24 ` Rich Felker
  2 siblings, 1 reply; 8+ messages in thread
From: Rich Felker @ 2012-04-10  0:48 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 170 bytes --]

Revised version of the code, in preparation to integrate with musl.
Still needs hex float support, and I want to eliminate the fscanf
dependency. Comments welcome.

Rich

[-- Attachment #2: floatscan.c --]
[-- Type: text/plain, Size: 5691 bytes --]

#define _XOPEN_SOURCE 700
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <fenv.h>
#include <float.h>


#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024

#define LD_B1B_DIG 2
#define LD_B1B_MAX 9007199, 254740991
#define KMAX 128

#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */

#define LD_B1B_DIG 3
#define LD_B1B_MAX 18, 446744073, 709551615
#define KMAX 2048

#endif

#define MASK (KMAX-1)


#if 0
static inline fast_unget(int c, FILE *f)
{
	if (f->rpos) f->rpos--;
}
#undef ungetc
#define ungetc fast_unget
#endif
#undef getc
#define getc getc_unlocked


static long double decfloat(FILE *f, int c, int prec, int sign, int pok, off_t *pcnt)
{
	uint32_t x[KMAX];
	static const uint32_t th[] = { LD_B1B_MAX };
	int i, j, k, a, z;
	int rp=-1, dc=0;
	int e10=0;
	int e2;
	long double y;
	long double frac=0;
	long double bias=0;
	int bits=64;
	int emin;

	switch (prec) {
	case 0:
		bits = 24;
		emin = -149;
		break;
	case 1:
		bits = 53;
		emin = -1074;
		break;
	case 2:
		bits = LDBL_MANT_DIG;
		emin = -16445;
		break;
	default:
		return 0;
	}

	j=0;
	k=0;

	if (c<0) *pcnt += (c=getc(f))>=0;

	/* Don't let leading zeros consume buffer space */
	for (; c=='0'; *pcnt += (c=getc(f))>=0);

	x[0] = 0;
	for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) {
		if (c == '.') {
			if (rp!=-1) break;
			rp = dc;
		} else if (k < KMAX) {
			dc++;
			if (j) x[k] = x[k]*10 + c-'0';
			else x[k] = c-'0';
			if (++j==9) {
				k++;
				j=0;
			}
		} else {
			dc++;
			x[KMAX-1] |= c-'0';
		}
	}
	if (rp==-1) rp=dc;

	if ((c|32)=='e') {
		c = getc(f);
		ungetc(c, f);
		if (c!='+' && c!='-' && c-'0'>=10U) {
			if (!pok) {
				*pcnt = 0;
				return 0;
			} else {
				--*pcnt;
			}
		} else {
			int tmp = 0;
			fscanf(f, "%d%n", &e10, &tmp);
			*pcnt += tmp;
			rp += e10;
			/* FIXME: overflow */
		}
	} else if (c>=0) {
		ungetc(c, f);
		--*pcnt;
	}

	if (rp==dc && (!k || (k==1 && !j)) && (bits>30 || x[0]>>bits==0)) {
		y = sign * (long double)x[0];
		return y;
	}

	if (k<KMAX && j) {
		for (; j<9; j++) x[k]*=10;
		k++;
		j=0;
	}

	a = 0;
	z = k;
	e2 = 0;

	while (rp < 18+9*LD_B1B_DIG) {
		uint32_t carry = 0;
		e2 -= 29;
		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
			if (tmp > 1000000000) {
				carry = tmp / 1000000000;
				x[k] = tmp % 1000000000;
			} else {
				carry = 0;
				x[k] = tmp;
			}
			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
			if (k==a) break;
		}
		if (carry) {
			rp += 9;
			if (a == z) {
				z = (z-1 & MASK);
				x[z-1 & MASK] |= x[z];
			}
			a = (a-1 & MASK);
			x[a] = carry;
		}
	}

	if (rp % 9) {
		static const int p10s[] = {
			100000000, 10000000, 1000000, 100000,
			10000, 1000, 100, 10
		};
		int rpm9 = rp % 9;
		int p10 = p10s[rpm9-1];
		uint32_t carry = 0;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] % p10;
			x[k] = x[k]/p10 + carry;
			carry = 1000000000/p10 * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a) {
				x[z] = carry;
				z = (z+1 & MASK);
			} else x[z-1 & MASK] |= 1;
		}
		rp += 9-rpm9;
	}

	for (;;) {
		uint32_t carry = 0;
		int sh = 1;
		for (i=0; i<LD_B1B_DIG; i++) {
			k = (a+i & MASK);
			if (k == z || x[k] < th[i]) {
				i=LD_B1B_DIG;
				break;
			}
			if (x[a+i & MASK] > th[i]) break;
		}
		if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
		/* FIXME: find a way to compute optimal sh */
		if (rp > 9+9*LD_B1B_DIG) sh = 9;
		e2 += sh;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] & (1<<sh)-1;
			x[k] = (x[k]>>sh) + carry;
			carry = (1000000000>>sh) * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a) {
				x[z] = carry;
				z = (z+1 & MASK);
			} else x[z-1 & MASK] |= 1;
		}
	}

	for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
		y = 1000000000.0L * y + x[a+i & MASK];

	y *= sign;

	if (bits > LDBL_MANT_DIG+e2-emin) {
		bits = LDBL_MANT_DIG+e2-emin;
		if (bits<0) bits=0;
	}

	if (bits < LDBL_MANT_DIG) {
		bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
		frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
		y -= frac;
		y += bias;
	}

	if ((a+i & MASK) != z) {
		uint32_t t = x[a+i & MASK];
		if (t < 500000000 && (t || (a+i+1 & MASK) != z))
			frac += 0.25*sign;
		else if (t > 500000000)
			frac += 0.75*sign;
		else if (t == 500000000) {
			if ((a+i+1 & MASK) == z)
				frac += 0.5*sign;
			else
				frac += 0.75*sign;
		}
	}

	y += frac;
	y -= bias;

	y = scalbnl(y, e2);

	return y;
}

static long double hexfloat(FILE *f, int c, int prec, int sign, int pok, off_t *pcnt)
{
	return 0;
}

long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt)
{
	int sign = 1;
	int i;

	if (c<0) *pcnt += (c=getc(f))>=0;

	if (c=='+' || c=='-') {
		sign -= 2*(c=='-');
		*pcnt += (c=getc(f))>=0;
	}

	for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
		if (i<7) c = getc(f);
	if (i==3 || i==8 || (i>3 && pok)) {
		if (i==3 && c>=0) ungetc(c, f);
		if (i==8) *pcnt += 7;
		else *pcnt += 2;
		return sign * INFINITY;
	}
	if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
		if (i<3) c = getc(f);
	if (i==3) {
		*pcnt += 2;
		return sign>0 ? NAN : -NAN;
	}

	if (i || c-'0'>10U) {
		ungetc(c, f);
		*pcnt = 0;
		return -1;
	}

	if (c=='0') {
		*pcnt += (c=getc(f))>=0;
		if ((c|32) == 'x')
			return hexfloat(f, -1, prec, sign, pok, pcnt);
	}

	return decfloat(f, c, prec, sign, pok, pcnt);
}

#include <string.h>

int main(int argc, char **argv)
{
	FILE *f = 0;
	if (argc>1) f = fmemopen(argv[1], strlen(argv[1]), "r");
	if (!f) f = stdin;

	off_t pos = 0;
	long double y = __floatscan(f, -1, 0, 1, &pos);
	printf("%.21Lg (read %lld)\n", y, (long long)pos);
	return 0;
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-10  0:48 ` Rich Felker
@ 2012-04-10 12:15   ` Szabolcs Nagy
  2012-04-10 13:02     ` Rich Felker
  0 siblings, 1 reply; 8+ messages in thread
From: Szabolcs Nagy @ 2012-04-10 12:15 UTC (permalink / raw)
  To: musl

* Rich Felker <dalias@aerifal.cx> [2012-04-09 20:48:19 -0400]:
> Revised version of the code, in preparation to integrate with musl.
> Still needs hex float support, and I want to eliminate the fscanf
> dependency. Comments welcome.
> 

this one fails if the number starts with a .
./floatscan .3

large exponent is slow:
./floatscan 1e+1000000
./floatscan 1e-1000000

subnormal tests still fail

here are some test cases where many digits are needed for correct rounding:

long double (ld80, min subnormal is 2^-16445, min normal is 2^-16382):

// 2^-16445 * 0.5 - eps (should be rounded to 0)
.1822599765941237301264202966809709908199525407846781671860490243514185844316698e-4950
// 2^-16445 * 0.5 + eps (should be rounded to 0x1p-16445L)
.1822599765941237301264202966809709908199525407846781671860490243514185844316699e-4950
// 2^-16445 * 1.5 - eps (should be rounded to 0x1p-16445L)
.5467799297823711903792608900429129724598576223540345015581470730542557532950096e-4950
// 2^-16445 * 1.5 + eps (should be rounded to 0x1p-16444L)
.5467799297823711903792608900429129724598576223540345015581470730542557532950097e-4950
// 2^-16382 + 2^-16446 - eps (should be rounded to 0x1p-16382L)
.3362103143112093506444937793915876332724499641527442230928779770593420866576777e-4931
// 2^-16382 + 2^-16446 + eps (should be rounded to 0x1.0000000000000002p-16382)
.3362103143112093506444937793915876332724499641527442230928779770593420866576778e-4931

double (min subnormal is 2^-1074, min normal is 2^-1022):

// 2^-1074 * 0.5 - eps
.2470328229206232720882843964341106861825299013071623822127928412503377536351043e-323
// 2^-1074 * 0.5 + epsA
.2470328229206232720882843964341106861825299013071623822127928412503377536351044e-323
// 2^-1074 * 1.5 - eps
.7410984687618698162648531893023320585475897039214871466383785237510132609053131e-323
// 2^-1074 * 1.5 + eps
.7410984687618698162648531893023320585475897039214871466383785237510132609053132e-323
// 2^-1022 + 2^-1075 - eps
.2225073858507201630123055637955676152503612414573018013083228724049586647606759e-307
// 2^-1022 + 2^-1075 + eps
.2225073858507201630123055637955676152503612414573018013083228724049586647606760e-307

float (min subnormal is 2^-149, min normal is 2^-126):
// 2^-149 * 0.5 - eps
.7006492321624085354618647916449580656401309709382578858785341419448955413429303e-45
// 2^-149 * 0.5 + eps
.7006492321624085354618647916449580656401309709382578858785341419448955413429304e-45
// 2^-149 * 0.5 - eps
.2101947696487225606385594374934874196920392912814773657635602425834686624028790e-44
// 2^-149 * 0.5 + eps
.2101947696487225606385594374934874196920392912814773657635602425834686624028791e-44
// 2^-126 + 2^-150 - eps
.1175494420887210724209590083408724842314472120785184615334540294131831453944281e-37
// 2^-126 + 2^-150 + eps
.1175494420887210724209590083408724842314472120785184615334540294131831453944282e-37


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-10 12:15   ` Szabolcs Nagy
@ 2012-04-10 13:02     ` Rich Felker
  0 siblings, 0 replies; 8+ messages in thread
From: Rich Felker @ 2012-04-10 13:02 UTC (permalink / raw)
  To: musl

On Tue, Apr 10, 2012 at 02:15:15PM +0200, Szabolcs Nagy wrote:
> * Rich Felker <dalias@aerifal.cx> [2012-04-09 20:48:19 -0400]:
> > Revised version of the code, in preparation to integrate with musl.
> > Still needs hex float support, and I want to eliminate the fscanf
> > dependency. Comments welcome.
> > 
> 
> this one fails if the number starts with a .
> ../floatscan .3

This is a bug in the wrapper code before it gets to decfloat() and
it's easily fixed.

> large exponent is slow:
> ../floatscan 1e+1000000
> ../floatscan 1e-1000000

I added code that collapses large and tiny exponents to infinity and
zero, but the max LD exponent is still rather slow...

> subnormal tests still fail
> 
> here are some test cases where many digits are needed for correct rounding:

Indeed. Thanks for the tests. I'll see what I can make of it.

Rich


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: float scanner status, upcoming release
  2012-04-09 19:17 float scanner status, upcoming release Rich Felker
  2012-04-09 19:21 ` Rich Felker
  2012-04-10  0:48 ` Rich Felker
@ 2012-04-10 15:24 ` Rich Felker
  2 siblings, 0 replies; 8+ messages in thread
From: Rich Felker @ 2012-04-10 15:24 UTC (permalink / raw)
  To: musl

[-- Attachment #1: Type: text/plain, Size: 299 bytes --]

One more version, with hex float support and double-rounding issues
for denormals fixed. Sending this for the record as hopefully the last
pre-integrated version of the code. Now I'm going to work on
integrating it into musl so I can run further tests (cluts, etc.)
against strtod, scanf, ...

Rich

[-- Attachment #2: floatscan.c --]
[-- Type: text/plain, Size: 8611 bytes --]

#define _XOPEN_SOURCE 700
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <fenv.h>
#include <float.h>
#include <limits.h>

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024

#define LD_B1B_DIG 2
#define LD_B1B_MAX 9007199, 254740991
#define KMAX 128

#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */

#define LD_B1B_DIG 3
#define LD_B1B_MAX 18, 446744073, 709551615
#define KMAX 2048

#endif

#define MASK (KMAX-1)


#if 0
static inline fast_unget(int c, FILE *f)
{
	if (f->rpos) f->rpos--;
}
#undef ungetc
#define ungetc fast_unget
#endif
#undef getc
#define getc getc_unlocked


static long long scanexp(FILE *f, off_t *pcnt)
{
	int c;
	int x;
	long long y;
	int neg = 0;
	
	*pcnt += (c=getc(f))>=0;
	if (c=='+' || c=='-') {
		neg = (c=='-');
		*pcnt += (c=getc(f))>=0;
		if (c-'0'>=10U) {
			if (c>=0) {
				ungetc(c, f);
				--*pcnt;
			}
			return LLONG_MIN;
		}
	}
	for (x=0; c-'0'<10U && x<INT_MAX/10; *pcnt += (c=getc(f))>=0)
		x = 10*x + c-'0';
	for (y=x; c-'0'<10U && x<LLONG_MAX/10; *pcnt += (c=getc(f))>=0)
		y = 10*y + c-'0';
	for (; c-'0'<10U; *pcnt += (c=getc(f))>=0);
	if (c>=0) {
		ungetc(c, f);
		--*pcnt;
	}
	return neg ? -y : y;
}


static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
{
	uint32_t x[KMAX];
	static const uint32_t th[] = { LD_B1B_MAX };
	int i, j, k, a, z;
	long long lrp=-1, dc=0;
	int gotdig = 0;
	int rp;
	int e10=0;
	int e2;
	long double y;
	long double frac=0;
	long double bias=0;

	j=0;
	k=0;

	if (c<0) *pcnt += (c=getc(f))>=0;

	/* Don't let leading zeros consume buffer space */
	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig=1;

	x[0] = 0;
	for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) {
		if (c == '.') {
			if (lrp!=-1) break;
			lrp = dc;
		} else if (k < KMAX) {
			dc++;
			if (j) x[k] = x[k]*10 + c-'0';
			else x[k] = c-'0';
			if (++j==9) {
				k++;
				j=0;
			}
			gotdig=1;
		} else {
			dc++;
			x[KMAX-1] |= c-'0';
		}
	}
	if (lrp==-1) lrp=dc;

	if (gotdig && (c|32)=='e') {
		e10 = scanexp(f, pcnt);
		if (e10 == LLONG_MIN) {
			if (!pok) {
				*pcnt = 0;
				return 0;
			}
			e10 = 0;
		}
		lrp += e10;
	} else if (c>=0) {
		ungetc(c, f);
		--*pcnt;
	}
	if (!gotdig) {
		*pcnt = 0;
		return 0;
	}

	if (lrp==dc && (!k || (k==1 && !j)) && (bits>30 || x[0]>>bits==0))
		return sign * (long double)x[0];
	if (lrp > -emin/2)
		return sign * LDBL_MAX * LDBL_MAX;
	if (lrp < emin-2*LDBL_MANT_DIG)
		return sign * LDBL_MIN * LDBL_MIN;

	if (k<KMAX && j) {
		for (; j<9; j++) x[k]*=10;
		k++;
		j=0;
	}

	a = 0;
	z = k;
	e2 = 0;
	rp = lrp;

	while (rp < 18+9*LD_B1B_DIG) {
		uint32_t carry = 0;
		e2 -= 29;
		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
			if (tmp > 1000000000) {
				carry = tmp / 1000000000;
				x[k] = tmp % 1000000000;
			} else {
				carry = 0;
				x[k] = tmp;
			}
			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
			if (k==a) break;
		}
		if (carry) {
			rp += 9;
			if (a == z) {
				z = (z-1 & MASK);
				x[z-1 & MASK] |= x[z];
			}
			a = (a-1 & MASK);
			x[a] = carry;
		}
	}

	if (rp % 9) {
		static const int p10s[] = {
			100000000, 10000000, 1000000, 100000,
			10000, 1000, 100, 10
		};
		int rpm9 = rp % 9;
		int p10 = p10s[rpm9-1];
		uint32_t carry = 0;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] % p10;
			x[k] = x[k]/p10 + carry;
			carry = 1000000000/p10 * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a) {
				x[z] = carry;
				z = (z+1 & MASK);
			} else x[z-1 & MASK] |= 1;
		}
		rp += 9-rpm9;
	}

	for (;;) {
		uint32_t carry = 0;
		int sh = 1;
		for (i=0; i<LD_B1B_DIG; i++) {
			k = (a+i & MASK);
			if (k == z || x[k] < th[i]) {
				i=LD_B1B_DIG;
				break;
			}
			if (x[a+i & MASK] > th[i]) break;
		}
		if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
		/* FIXME: find a way to compute optimal sh */
		if (rp > 9+9*LD_B1B_DIG) sh = 9;
		e2 += sh;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] & (1<<sh)-1;
			x[k] = (x[k]>>sh) + carry;
			carry = (1000000000>>sh) * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a) {
				x[z] = carry;
				z = (z+1 & MASK);
			} else x[z-1 & MASK] |= 1;
		}
	}

	for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
		y = 1000000000.0L * y + x[a+i & MASK];

	y *= sign;

	if (bits > LDBL_MANT_DIG+e2-emin) {
		bits = LDBL_MANT_DIG+e2-emin;
		if (bits<0) bits=0;
	}

	if (bits < LDBL_MANT_DIG) {
		bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
		frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
		y -= frac;
		y += bias;
	}

	if ((a+i & MASK) != z) {
		uint32_t t = x[a+i & MASK];
		if (t < 500000000 && (t || (a+i+1 & MASK) != z))
			frac += 0.25*sign;
		else if (t > 500000000)
			frac += 0.75*sign;
		else if (t == 500000000) {
			if ((a+i+1 & MASK) == z)
				frac += 0.5*sign;
			else
				frac += 0.75*sign;
		}
		if (LDBL_MANT_DIG-bits >= 2 && !fmodl(frac, 1))
			frac++;
	}

	y += frac;
	y -= bias;

	y = scalbnl(y, e2);

	return y;
}

static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
{
	uint32_t x = 0;
	long double y = 0;
	long double scale = 1;
	long double bias = 0;
	int gottail = 0, gotrad = 0, gotdig = 0;
	long long rp = 0;
	long long dc = 0;
	long long e2 = 0;
	int d;

	if (c<0) *pcnt += (c=getc(f))>=0;

	/* Skip leading zeros */
	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig = 1;

	if (c=='.') {
		gotrad = 1;
		*pcnt += (c=getc(f))>=0;
		/* Count zeros after the radix point before significand */
		for (rp=0; c=='0'; *pcnt += (c=getc(f))>=0, rp--) gotdig = 1;
	}

	for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; *pcnt += (c=getc(f))>=0) {
		if (c=='.') {
			if (gotrad) break;
			rp = dc;
			gotrad = 1;
		} else {
			gotdig = 1;
			if (c > '9') d = (c|32)+10-'a';
			else d = c-'0';
			if (dc<8) {
				x = x*16 + d;
			} else if (dc < LDBL_MANT_DIG/4+1) {
				y += d*(scale/=16);
			} else if (d && !gottail) {
				y += 0.5*scale;
				gottail = 1;
			}
			dc++;
		}
	}
	if (!gotdig) {
		if (c>=0) {
			ungetc(c, f);
			--*pcnt;
		}
		if (pok) *pcnt -= 1+gotrad; /* uncount the rp, x of 0x */
		else *pcnt = 0;
		return 0;
	}
	if (!gotrad) rp = dc;
	while (dc<8) x *= 16, dc++;
	if ((c|32)=='p') {
		e2 = scanexp(f, pcnt);
		if (e2 == LLONG_MIN) {
			if (!pok) {
				*pcnt = 0;
				return 0;
			}
			e2 = 0;
		}
	}
	e2 += 4*rp - 32;

	if (!x) return sign * 0.0;
	if (e2 > -emin) return sign * LDBL_MAX * LDBL_MAX;
	if (e2 < emin-2*LDBL_MANT_DIG) return sign * LDBL_MIN * LDBL_MIN;

	while (x < 0x80000000) {
		if (y>=0.5) {
			x += x + 1;
			y += y - 1;
		} else {
			x += x;
			y += y;
		}
		e2--;
	}

	if (bits > 32+e2-emin) {
		bits = 32+e2-emin;
		if (bits<0) bits=0;
	}

	if (bits < LDBL_MANT_DIG)
		bias = copysignl(scalbn(1, 32+LDBL_MANT_DIG-bits-1), sign);

	if (bits<32 && y && !(x&1)) x++, y=0;

	y = bias + sign*(long double)x + sign*y;
	y -= bias;

	return scalbnl(y, e2);
}

long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt)
{
	int sign = 1;
	int i;
	int bits;
	int emin;

	switch (prec) {
	case 0:
		bits = 24;
		emin = -149;
		break;
	case 1:
		bits = 53;
		emin = -1074;
		break;
	case 2:
		bits = LDBL_MANT_DIG;
		emin = -16445;
		break;
	default:
		return 0;
	}

	if (c<0) *pcnt += (c=getc(f))>=0;

	if (c=='+' || c=='-') {
		sign -= 2*(c=='-');
		*pcnt += (c=getc(f))>=0;
	}

	for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
		if (i<7) c = getc(f);
	if (i==3 || i==8 || (i>3 && pok)) {
		if (i==3 && c>=0) ungetc(c, f);
		if (i==8) *pcnt += 7;
		else *pcnt += 2;
		return sign * INFINITY;
	}
	if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
		if (i<3) c = getc(f);
	if (i==3) {
		*pcnt += 2;
		return sign>0 ? NAN : -NAN;
	}

	if (i) {
		if (c>=0) ungetc(c, f);
		*pcnt = 0;
		return 0;
	}

	if (c=='0') {
		*pcnt += (c=getc(f))>=0;
		if ((c|32) == 'x')
			return hexfloat(f, -1, bits, emin, sign, pok, pcnt);
		if (c>=0) {
			ungetc(c, f);
			--*pcnt;
		}
		c = '0';
	}

	return decfloat(f, c, bits, emin, sign, pok, pcnt);
}

#if 0
long double strtold(const char *s, char **p)
{
	FILE f = { .buf = s, .rpos = s, .rend = (void *)-1, .lock = -1 };
	off_t cnt;
	long double y = __floatscan(f, -1, 2, 1, &pos);
	if (p) *p = (char *)s + cnt;
	return y;
}
#endif

#include <string.h>

int main(int argc, char **argv)
{
	FILE *f = 0;
	if (argc>1) f = fmemopen(argv[1], strlen(argv[1]), "r");
	if (!f) f = stdin;

	off_t pos = 0;
	long double y = __floatscan(f, -1, 2, 1, &pos);
	printf("%La (read %lld)\n", y, (long long)pos);
	printf("%.21Lg (read %lld)\n", y, (long long)pos);
	return 0;
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-04-10 15:24 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-09 19:17 float scanner status, upcoming release Rich Felker
2012-04-09 19:21 ` Rich Felker
2012-04-10  0:09   ` Pascal Cuoq
2012-04-10  0:36     ` Rich Felker
2012-04-10  0:48 ` Rich Felker
2012-04-10 12:15   ` Szabolcs Nagy
2012-04-10 13:02     ` Rich Felker
2012-04-10 15:24 ` Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).