zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: (provisional) underscores in constants in numeric evaluation
@ 2012-08-30 14:11 Peter Stephenson
  2012-08-30 14:18 ` Jérémie Roquet
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Peter Stephenson @ 2012-08-30 14:11 UTC (permalink / raw)
  To: Zsh Hackers' List

Another of those occasions when "wouldn't it be helpful if..." toppled
over the edge into frustration.

Some scripting languages allow dummy "_"s in numeric constants so you
can count the decimal places.  This is very convenient if you're doing
arithmetic with large integers.

It seems to me (watch this assumption closely) that if we restrict this
to handling constants in arithmetic evaluation, where we already know we
require a numeric constant and we already know the syntax needs to be
that of an arithmetic expression, all substitutions having been done
by this point, we can get away with doing this without a new option.  It
would not be safe to modify zstrtol() to do this more widely.

A quick poll of Perl and Ruby suggests they both allow underscores in
decimal constants.  That's the messiest (and least efficient) part of
this patch --- but it should be safe.  I don't fancy replacing
strtod(); floating point numbers need careful handling.

I won't be committing this any time particularly soon.

Index: Src/math.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/math.c,v
retrieving revision 1.41
diff -p -u -r1.41 math.c
--- Src/math.c	19 Jun 2011 16:26:11 -0000	1.41
+++ Src/math.c	30 Aug 2012 13:55:36 -0000
@@ -452,7 +452,7 @@ lexconstant(void)
 	nptr++;
 	if (*nptr == 'x' || *nptr == 'X') {
 	    /* Let zstrtol parse number with base */
-	    yyval.u.l = zstrtol(ptr, &ptr, 0);
+	    yyval.u.l = zstrtol_underscore(ptr, &ptr, 0, 1);
 	    /* Should we set lastbase here? */
 	    lastbase = 16;
 	    return NUM;
@@ -466,13 +466,13 @@ lexconstant(void)
 	     * it can't be a base indication (always decimal)
 	     * or a floating point number.
 	     */
-	    for (ptr2 = nptr; idigit(*ptr2); ptr2++)
+	    for (ptr2 = nptr; idigit(*ptr2) || *ptr2 == '_'; ptr2++)
 		;
 
 	    if (ptr2 > nptr && *ptr2 != '.' && *ptr2 != 'e' &&
 		*ptr2 != 'E' && *ptr2 != '#')
 	    {
-		yyval.u.l = zstrtol(ptr, &ptr, 0);
+		yyval.u.l = zstrtol_underscore(ptr, &ptr, 0, 1);
 		lastbase = 8;
 		return NUM;
 	    }
@@ -481,17 +481,43 @@ lexconstant(void)
     }
     else
     {
-	while (idigit(*nptr))
+	while (idigit(*nptr) || *nptr == '_')
 	    nptr++;
     }
 
     if (*nptr == '.' || *nptr == 'e' || *nptr == 'E') {
+	char *ptr2;
 	/* it's a float */
 	yyval.type = MN_FLOAT;
 #ifdef USE_LOCALE
 	prev_locale = dupstring(setlocale(LC_NUMERIC, NULL));
 	setlocale(LC_NUMERIC, "POSIX");
 #endif
+	if (*nptr == '.') {
+	    nptr++;
+	    while (idigit(*nptr) || *nptr == '_')
+		nptr++;
+	}
+	if (*nptr == 'e' || *nptr == 'E') {
+	    nptr++;
+	    if (*nptr == '+' || *nptr == '-')
+		nptr++;
+	    while (idigit(*nptr) || *nptr == '_')
+		nptr++;
+	}
+	for (ptr2 = ptr; ptr2 < nptr; ptr2++) {
+	    if (*ptr2 == '_') {
+		int len = nptr - ptr;
+		ptr = strdup(ptr);
+		for (ptr2 = ptr; len; len--) {
+		    if (*ptr2 == '_')
+			chuck(ptr2);
+		    else
+			ptr2++;
+		}
+		break;
+	    }
+	}
 	yyval.u.d = strtod(ptr, &nptr);
 #ifdef USE_LOCALE
 	if (prev_locale) setlocale(LC_NUMERIC, prev_locale);
@@ -503,11 +529,12 @@ lexconstant(void)
 	ptr = nptr;
     } else {
 	/* it's an integer */
-	yyval.u.l = zstrtol(ptr, &ptr, 10);
+	yyval.u.l = zstrtol_underscore(ptr, &ptr, 10, 1);
 
 	if (*ptr == '#') {
 	    ptr++;
-	    yyval.u.l = zstrtol(ptr, &ptr, lastbase = yyval.u.l);
+	    lastbase = yyval.u.l;
+	    yyval.u.l = zstrtol_underscore(ptr, &ptr, lastbase, 1);
 	}
     }
     return NUM;
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.270
diff -p -u -r1.270 utils.c
--- Src/utils.c	27 Jun 2012 07:10:29 -0000	1.270
+++ Src/utils.c	30 Aug 2012 13:55:36 -0000
@@ -2030,13 +2030,20 @@ skipparens(char inpar, char outpar, char
    return level;
 }
 
+/**/
+mod_export zlong
+zstrtol(const char *s, char **t, int base)
+{
+    return zstrtol_underscore(s, t, base, 0);
+}
+
 /* Convert string to zlong (see zsh.h).  This function (without the z) *
  * is contained in the ANSI standard C library, but a lot of them seem *
  * to be broken.                                                       */
 
 /**/
 mod_export zlong
-zstrtol(const char *s, char **t, int base)
+zstrtol_underscore(const char *s, char **t, int base, int underscore)
 {
     const char *inp, *trunc = NULL;
     zulong calc = 0, newcalc = 0;
@@ -2062,22 +2069,24 @@ zstrtol(const char *s, char **t, int bas
     if (base < 2 || base > 36) {
 	zerr("invalid base (must be 2 to 36 inclusive): %d", base);
 	return (zlong)0;
-    } else if (base <= 10)
-	for (; *s >= '0' && *s < ('0' + base); s++) {
-	    if (trunc)
+    } else if (base <= 10) {
+	for (; (*s >= '0' && *s < ('0' + base)) ||
+		 (underscore && *s == '_'); s++) {
+	    if (trunc || *s == '_')
 		continue;
 	    newcalc = calc * base + *s - '0';
 	    if (newcalc < calc)
 	    {
-	      trunc = s;
-	      continue;
+		trunc = s;
+		continue;
 	    }
 	    calc = newcalc;
 	}
-    else
+    } else {
 	for (; idigit(*s) || (*s >= 'a' && *s < ('a' + base - 10))
-	     || (*s >= 'A' && *s < ('A' + base - 10)); s++) {
-	    if (trunc)
+	     || (*s >= 'A' && *s < ('A' + base - 10))
+	     || (underscore && *s == '_'); s++) {
+	    if (trunc || *s == '_')
 		continue;
 	    newcalc = calc*base + (idigit(*s) ? (*s - '0') : (*s & 0x1f) + 9);
 	    if (newcalc < calc)
@@ -2087,6 +2096,7 @@ zstrtol(const char *s, char **t, int bas
 	    }
 	    calc = newcalc;
 	}
+    }
 
     /*
      * Special case: check for a number that was just too long for

-- 
Peter Stephenson <pws@csr.com>            Software Engineer
Tel: +44 (0)1223 692070                   Cambridge Silicon Radio Limited
Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom
More information can be found at www.csr.com. Follow CSR on Twitter at http://twitter.com/CSR_PLC and read our blog at www.csr.com/blog


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: PATCH: (provisional) underscores in constants in numeric evaluation
  2012-08-30 14:11 PATCH: (provisional) underscores in constants in numeric evaluation Peter Stephenson
@ 2012-08-30 14:18 ` Jérémie Roquet
  2012-09-03 11:40 ` Peter Stephenson
  2012-09-11 16:08 ` Peter Stephenson
  2 siblings, 0 replies; 4+ messages in thread
From: Jérémie Roquet @ 2012-08-30 14:18 UTC (permalink / raw)
  To: Zsh Hackers' List

Hi,

2012/8/30 Peter Stephenson <Peter.Stephenson@csr.com>:
> Some scripting languages allow dummy "_"s in numeric constants so you
> can count the decimal places.  This is very convenient if you're doing
> arithmetic with large integers.

Cannot say if the patch is safe or not, but I'd really enjoy this
feature (which is also available in D, btw).

Thanks!

-- 
Jérémie


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: PATCH: (provisional) underscores in constants in numeric evaluation
  2012-08-30 14:11 PATCH: (provisional) underscores in constants in numeric evaluation Peter Stephenson
  2012-08-30 14:18 ` Jérémie Roquet
@ 2012-09-03 11:40 ` Peter Stephenson
  2012-09-11 16:08 ` Peter Stephenson
  2 siblings, 0 replies; 4+ messages in thread
From: Peter Stephenson @ 2012-09-03 11:40 UTC (permalink / raw)
  To: Zsh Hackers' List

Here are some documentation and tests.

Index: Doc/Zsh/arith.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/arith.yo,v
retrieving revision 1.14
diff -p -u -r1.14 arith.yo
--- Doc/Zsh/arith.yo	19 Jun 2008 12:54:37 -0000	1.14
+++ Doc/Zsh/arith.yo	3 Sep 2012 11:34:20 -0000
@@ -48,6 +48,12 @@ The var(base)tt(#) may also be omitted, 
 base 10 is used.  For backwards compatibility the form
 `tt([)var(base)tt(])var(n)' is also accepted.
 
+An integer expression or a base given in the form
+`var(base)tt(#)var(n)', may contain underscores (`tt(_)') after the
+leading digit for visual guidance; these are ignored in computation.
+Examples are tt(1_000_000) or tt(0xffff_ffff) which are equivalent to
+tt(1000000) and tt(0xffffffff) respectively.
+
 It is also possible to specify a base to be used for output in the form
 `tt([#)var(base)tt(])', for example `tt([#16])'.  This is used when
 outputting arithmetical substitutions or when assigning to scalar
@@ -87,7 +93,9 @@ output is valid syntax for input.  If th
 Floating point constants are recognized by the presence of a decimal point
 or an exponent.  The decimal point may be the first character of the
 constant, but the exponent character tt(e) or tt(E) may not, as it will be
-taken for a parameter name.
+taken for a parameter name.  All numeric parts (before and after the
+decimal point and in the exponent) may contain underscores after the
+leading digit for visual guidance; these are ignored in computation.
 
 cindex(arithmetic operators)
 cindex(operators, arithmetic)
Index: Test/C01arith.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/C01arith.ztst,v
retrieving revision 1.18
diff -p -u -r1.18 C01arith.ztst
--- Test/C01arith.ztst	20 Jan 2010 17:18:30 -0000	1.18
+++ Test/C01arith.ztst	3 Sep 2012 11:34:20 -0000
@@ -210,3 +210,26 @@
   print $x
 0:double increment for repeated expression
 >2
+
+  # Floating point.  Default precision should take care of rounding errors.
+  print $(( 1_0.000_000e0_1 ))
+  # Integer.
+  print $(( 0x_ff_ff_ ))
+  # _ are parts of variable names that don't start with a digit
+  __myvar__=42
+  print $(( __myvar__ + $__myvar__ ))
+  # _ is not part of variable name that does start with a digit
+  # (which are substituted before math eval)
+  set -- 6
+  print $(( $1_000_000 ))
+  # Underscores in expressions with no whitespace
+  print $(( 3_000_+4_000_/2 ))
+  # Underscores may appear in the base descriptor, for what it's worth...
+  print $(( 1_6_#f_f_ ))
+0:underscores in math constants
+>100.
+>65535
+>84
+>6000000
+>5000
+>255

-- 
Peter Stephenson <pws@csr.com>            Software Engineer
Tel: +44 (0)1223 692070                   Cambridge Silicon Radio Limited
Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom
More information can be found at www.csr.com. Follow CSR on Twitter at http://twitter.com/CSR_PLC and read our blog at www.csr.com/blog


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: PATCH: (provisional) underscores in constants in numeric evaluation
  2012-08-30 14:11 PATCH: (provisional) underscores in constants in numeric evaluation Peter Stephenson
  2012-08-30 14:18 ` Jérémie Roquet
  2012-09-03 11:40 ` Peter Stephenson
@ 2012-09-11 16:08 ` Peter Stephenson
  2 siblings, 0 replies; 4+ messages in thread
From: Peter Stephenson @ 2012-09-11 16:08 UTC (permalink / raw)
  To: Zsh Hackers' List

On Thu, 30 Aug 2012 15:11:24 +0100
Peter Stephenson <Peter.Stephenson@csr.com> wrote:
> Some scripting languages allow dummy "_"s in numeric constants so you
> can count the decimal places.  This is very convenient if you're doing
> arithmetic with large integers.

I presume most people are back from holiday and have had the chance to
see this, so I've committed these two changes.

pws


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom
More information can be found at www.csr.com. Follow CSR on Twitter at http://twitter.com/CSR_PLC and read our blog at www.csr.com/blog


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-09-11 16:55 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-30 14:11 PATCH: (provisional) underscores in constants in numeric evaluation Peter Stephenson
2012-08-30 14:18 ` Jérémie Roquet
2012-09-03 11:40 ` Peter Stephenson
2012-09-11 16:08 ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).