Re: float scanner status, upcoming release

mailing list of musl libc
 help / color / mirror / code / Atom feed

From: Rich Felker <dalias@aerifal.cx>
To: musl@lists.openwall.com
Subject: Re: float scanner status, upcoming release
Date: Tue, 10 Apr 2012 11:24:42 -0400	[thread overview]
Message-ID: <20120410152442.GD7281@brightrain.aerifal.cx> (raw)
In-Reply-To: <20120409191712.GA5269@brightrain.aerifal.cx>

[-- Attachment #1: Type: text/plain, Size: 299 bytes --]

One more version, with hex float support and double-rounding issues
for denormals fixed. Sending this for the record as hopefully the last
pre-integrated version of the code. Now I'm going to work on
integrating it into musl so I can run further tests (cluts, etc.)
against strtod, scanf, ...

Rich

[-- Attachment #2: floatscan.c --]
[-- Type: text/plain, Size: 8611 bytes --]

#define _XOPEN_SOURCE 700
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <fenv.h>
#include <float.h>
#include <limits.h>

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024

#define LD_B1B_DIG 2
#define LD_B1B_MAX 9007199, 254740991
#define KMAX 128

#else /* LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 */

#define LD_B1B_DIG 3
#define LD_B1B_MAX 18, 446744073, 709551615
#define KMAX 2048

#endif

#define MASK (KMAX-1)


#if 0
static inline fast_unget(int c, FILE *f)
{
	if (f->rpos) f->rpos--;
}
#undef ungetc
#define ungetc fast_unget
#endif
#undef getc
#define getc getc_unlocked


static long long scanexp(FILE *f, off_t *pcnt)
{
	int c;
	int x;
	long long y;
	int neg = 0;
	
	*pcnt += (c=getc(f))>=0;
	if (c=='+' || c=='-') {
		neg = (c=='-');
		*pcnt += (c=getc(f))>=0;
		if (c-'0'>=10U) {
			if (c>=0) {
				ungetc(c, f);
				--*pcnt;
			}
			return LLONG_MIN;
		}
	}
	for (x=0; c-'0'<10U && x<INT_MAX/10; *pcnt += (c=getc(f))>=0)
		x = 10*x + c-'0';
	for (y=x; c-'0'<10U && x<LLONG_MAX/10; *pcnt += (c=getc(f))>=0)
		y = 10*y + c-'0';
	for (; c-'0'<10U; *pcnt += (c=getc(f))>=0);
	if (c>=0) {
		ungetc(c, f);
		--*pcnt;
	}
	return neg ? -y : y;
}


static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
{
	uint32_t x[KMAX];
	static const uint32_t th[] = { LD_B1B_MAX };
	int i, j, k, a, z;
	long long lrp=-1, dc=0;
	int gotdig = 0;
	int rp;
	int e10=0;
	int e2;
	long double y;
	long double frac=0;
	long double bias=0;

	j=0;
	k=0;

	if (c<0) *pcnt += (c=getc(f))>=0;

	/* Don't let leading zeros consume buffer space */
	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig=1;

	x[0] = 0;
	for (; c-'0'<10U || c=='.'; *pcnt += (c=getc(f))>=0) {
		if (c == '.') {
			if (lrp!=-1) break;
			lrp = dc;
		} else if (k < KMAX) {
			dc++;
			if (j) x[k] = x[k]*10 + c-'0';
			else x[k] = c-'0';
			if (++j==9) {
				k++;
				j=0;
			}
			gotdig=1;
		} else {
			dc++;
			x[KMAX-1] |= c-'0';
		}
	}
	if (lrp==-1) lrp=dc;

	if (gotdig && (c|32)=='e') {
		e10 = scanexp(f, pcnt);
		if (e10 == LLONG_MIN) {
			if (!pok) {
				*pcnt = 0;
				return 0;
			}
			e10 = 0;
		}
		lrp += e10;
	} else if (c>=0) {
		ungetc(c, f);
		--*pcnt;
	}
	if (!gotdig) {
		*pcnt = 0;
		return 0;
	}

	if (lrp==dc && (!k || (k==1 && !j)) && (bits>30 || x[0]>>bits==0))
		return sign * (long double)x[0];
	if (lrp > -emin/2)
		return sign * LDBL_MAX * LDBL_MAX;
	if (lrp < emin-2*LDBL_MANT_DIG)
		return sign * LDBL_MIN * LDBL_MIN;

	if (k<KMAX && j) {
		for (; j<9; j++) x[k]*=10;
		k++;
		j=0;
	}

	a = 0;
	z = k;
	e2 = 0;
	rp = lrp;

	while (rp < 18+9*LD_B1B_DIG) {
		uint32_t carry = 0;
		e2 -= 29;
		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
			if (tmp > 1000000000) {
				carry = tmp / 1000000000;
				x[k] = tmp % 1000000000;
			} else {
				carry = 0;
				x[k] = tmp;
			}
			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
			if (k==a) break;
		}
		if (carry) {
			rp += 9;
			if (a == z) {
				z = (z-1 & MASK);
				x[z-1 & MASK] |= x[z];
			}
			a = (a-1 & MASK);
			x[a] = carry;
		}
	}

	if (rp % 9) {
		static const int p10s[] = {
			100000000, 10000000, 1000000, 100000,
			10000, 1000, 100, 10
		};
		int rpm9 = rp % 9;
		int p10 = p10s[rpm9-1];
		uint32_t carry = 0;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] % p10;
			x[k] = x[k]/p10 + carry;
			carry = 1000000000/p10 * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a) {
				x[z] = carry;
				z = (z+1 & MASK);
			} else x[z-1 & MASK] |= 1;
		}
		rp += 9-rpm9;
	}

	for (;;) {
		uint32_t carry = 0;
		int sh = 1;
		for (i=0; i<LD_B1B_DIG; i++) {
			k = (a+i & MASK);
			if (k == z || x[k] < th[i]) {
				i=LD_B1B_DIG;
				break;
			}
			if (x[a+i & MASK] > th[i]) break;
		}
		if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
		/* FIXME: find a way to compute optimal sh */
		if (rp > 9+9*LD_B1B_DIG) sh = 9;
		e2 += sh;
		for (k=a; k!=z; k=(k+1 & MASK)) {
			uint32_t tmp = x[k] & (1<<sh)-1;
			x[k] = (x[k]>>sh) + carry;
			carry = (1000000000>>sh) * tmp;
			if (k==a && !x[k]) {
				a = (a+1 & MASK);
				rp -= 9;
			}
		}
		if (carry) {
			if ((z+1 & MASK) != a) {
				x[z] = carry;
				z = (z+1 & MASK);
			} else x[z-1 & MASK] |= 1;
		}
	}

	for (y=i=0; i<LD_B1B_DIG && (a+i & MASK)!=z; i++)
		y = 1000000000.0L * y + x[a+i & MASK];

	y *= sign;

	if (bits > LDBL_MANT_DIG+e2-emin) {
		bits = LDBL_MANT_DIG+e2-emin;
		if (bits<0) bits=0;
	}

	if (bits < LDBL_MANT_DIG) {
		bias = copysignl(scalbn(1, 2*LDBL_MANT_DIG-bits-1), y);
		frac = fmodl(y, scalbn(1, LDBL_MANT_DIG-bits));
		y -= frac;
		y += bias;
	}

	if ((a+i & MASK) != z) {
		uint32_t t = x[a+i & MASK];
		if (t < 500000000 && (t || (a+i+1 & MASK) != z))
			frac += 0.25*sign;
		else if (t > 500000000)
			frac += 0.75*sign;
		else if (t == 500000000) {
			if ((a+i+1 & MASK) == z)
				frac += 0.5*sign;
			else
				frac += 0.75*sign;
		}
		if (LDBL_MANT_DIG-bits >= 2 && !fmodl(frac, 1))
			frac++;
	}

	y += frac;
	y -= bias;

	y = scalbnl(y, e2);

	return y;
}

static long double hexfloat(FILE *f, int c, int bits, int emin, int sign, int pok, off_t *pcnt)
{
	uint32_t x = 0;
	long double y = 0;
	long double scale = 1;
	long double bias = 0;
	int gottail = 0, gotrad = 0, gotdig = 0;
	long long rp = 0;
	long long dc = 0;
	long long e2 = 0;
	int d;

	if (c<0) *pcnt += (c=getc(f))>=0;

	/* Skip leading zeros */
	for (; c=='0'; *pcnt += (c=getc(f))>=0) gotdig = 1;

	if (c=='.') {
		gotrad = 1;
		*pcnt += (c=getc(f))>=0;
		/* Count zeros after the radix point before significand */
		for (rp=0; c=='0'; *pcnt += (c=getc(f))>=0, rp--) gotdig = 1;
	}

	for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; *pcnt += (c=getc(f))>=0) {
		if (c=='.') {
			if (gotrad) break;
			rp = dc;
			gotrad = 1;
		} else {
			gotdig = 1;
			if (c > '9') d = (c|32)+10-'a';
			else d = c-'0';
			if (dc<8) {
				x = x*16 + d;
			} else if (dc < LDBL_MANT_DIG/4+1) {
				y += d*(scale/=16);
			} else if (d && !gottail) {
				y += 0.5*scale;
				gottail = 1;
			}
			dc++;
		}
	}
	if (!gotdig) {
		if (c>=0) {
			ungetc(c, f);
			--*pcnt;
		}
		if (pok) *pcnt -= 1+gotrad; /* uncount the rp, x of 0x */
		else *pcnt = 0;
		return 0;
	}
	if (!gotrad) rp = dc;
	while (dc<8) x *= 16, dc++;
	if ((c|32)=='p') {
		e2 = scanexp(f, pcnt);
		if (e2 == LLONG_MIN) {
			if (!pok) {
				*pcnt = 0;
				return 0;
			}
			e2 = 0;
		}
	}
	e2 += 4*rp - 32;

	if (!x) return sign * 0.0;
	if (e2 > -emin) return sign * LDBL_MAX * LDBL_MAX;
	if (e2 < emin-2*LDBL_MANT_DIG) return sign * LDBL_MIN * LDBL_MIN;

	while (x < 0x80000000) {
		if (y>=0.5) {
			x += x + 1;
			y += y - 1;
		} else {
			x += x;
			y += y;
		}
		e2--;
	}

	if (bits > 32+e2-emin) {
		bits = 32+e2-emin;
		if (bits<0) bits=0;
	}

	if (bits < LDBL_MANT_DIG)
		bias = copysignl(scalbn(1, 32+LDBL_MANT_DIG-bits-1), sign);

	if (bits<32 && y && !(x&1)) x++, y=0;

	y = bias + sign*(long double)x + sign*y;
	y -= bias;

	return scalbnl(y, e2);
}

long double __floatscan(FILE *f, int c, int prec, int pok, off_t *pcnt)
{
	int sign = 1;
	int i;
	int bits;
	int emin;

	switch (prec) {
	case 0:
		bits = 24;
		emin = -149;
		break;
	case 1:
		bits = 53;
		emin = -1074;
		break;
	case 2:
		bits = LDBL_MANT_DIG;
		emin = -16445;
		break;
	default:
		return 0;
	}

	if (c<0) *pcnt += (c=getc(f))>=0;

	if (c=='+' || c=='-') {
		sign -= 2*(c=='-');
		*pcnt += (c=getc(f))>=0;
	}

	for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
		if (i<7) c = getc(f);
	if (i==3 || i==8 || (i>3 && pok)) {
		if (i==3 && c>=0) ungetc(c, f);
		if (i==8) *pcnt += 7;
		else *pcnt += 2;
		return sign * INFINITY;
	}
	if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
		if (i<3) c = getc(f);
	if (i==3) {
		*pcnt += 2;
		return sign>0 ? NAN : -NAN;
	}

	if (i) {
		if (c>=0) ungetc(c, f);
		*pcnt = 0;
		return 0;
	}

	if (c=='0') {
		*pcnt += (c=getc(f))>=0;
		if ((c|32) == 'x')
			return hexfloat(f, -1, bits, emin, sign, pok, pcnt);
		if (c>=0) {
			ungetc(c, f);
			--*pcnt;
		}
		c = '0';
	}

	return decfloat(f, c, bits, emin, sign, pok, pcnt);
}

#if 0
long double strtold(const char *s, char **p)
{
	FILE f = { .buf = s, .rpos = s, .rend = (void *)-1, .lock = -1 };
	off_t cnt;
	long double y = __floatscan(f, -1, 2, 1, &pos);
	if (p) *p = (char *)s + cnt;
	return y;
}
#endif

#include <string.h>

int main(int argc, char **argv)
{
	FILE *f = 0;
	if (argc>1) f = fmemopen(argv[1], strlen(argv[1]), "r");
	if (!f) f = stdin;

	off_t pos = 0;
	long double y = __floatscan(f, -1, 2, 1, &pos);
	printf("%La (read %lld)\n", y, (long long)pos);
	printf("%.21Lg (read %lld)\n", y, (long long)pos);
	return 0;
}

     prev parent reply	other threads:[~2012-04-10 15:24 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-09 19:17 Rich Felker
2012-04-09 19:21 ` Rich Felker
2012-04-10  0:09   ` Pascal Cuoq
2012-04-10  0:36     ` Rich Felker
2012-04-10  0:48 ` Rich Felker
2012-04-10 12:15   ` Szabolcs Nagy
2012-04-10 13:02     ` Rich Felker
2012-04-10 15:24 ` Rich Felker [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120410152442.GD7281@brightrain.aerifal.cx \
    --to=dalias@aerifal.cx \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).