From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 10186 invoked by alias); 30 Aug 2012 14:11:50 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 30647 Received: (qmail 10640 invoked from network); 30 Aug 2012 14:11:38 -0000 X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-2.6 required=5.0 tests=BAYES_00,RCVD_IN_DNSWL_LOW, SPF_HELO_PASS autolearn=ham version=3.3.2 Received-SPF: none (ns1.primenet.com.au: domain at csr.com does not designate permitted sender hosts) Date: Thu, 30 Aug 2012 15:11:24 +0100 From: Peter Stephenson To: "Zsh Hackers' List" Subject: PATCH: (provisional) underscores in constants in numeric evaluation Message-ID: <20120830151124.47ed724f@pwslap01u.europe.root.pri> Organization: Cambridge Silicon Radio X-Mailer: Claws Mail 3.7.9 (GTK+ 2.22.0; i386-redhat-linux-gnu) MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit X-Originating-IP: [10.101.10.18] X-Scanned-By: MailControl 8316.0 (www.mailcontrol.com) on 10.71.0.142 Another of those occasions when "wouldn't it be helpful if..." toppled over the edge into frustration. Some scripting languages allow dummy "_"s in numeric constants so you can count the decimal places. This is very convenient if you're doing arithmetic with large integers. It seems to me (watch this assumption closely) that if we restrict this to handling constants in arithmetic evaluation, where we already know we require a numeric constant and we already know the syntax needs to be that of an arithmetic expression, all substitutions having been done by this point, we can get away with doing this without a new option. It would not be safe to modify zstrtol() to do this more widely. A quick poll of Perl and Ruby suggests they both allow underscores in decimal constants. That's the messiest (and least efficient) part of this patch --- but it should be safe. I don't fancy replacing strtod(); floating point numbers need careful handling. I won't be committing this any time particularly soon. Index: Src/math.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/math.c,v retrieving revision 1.41 diff -p -u -r1.41 math.c --- Src/math.c 19 Jun 2011 16:26:11 -0000 1.41 +++ Src/math.c 30 Aug 2012 13:55:36 -0000 @@ -452,7 +452,7 @@ lexconstant(void) nptr++; if (*nptr == 'x' || *nptr == 'X') { /* Let zstrtol parse number with base */ - yyval.u.l = zstrtol(ptr, &ptr, 0); + yyval.u.l = zstrtol_underscore(ptr, &ptr, 0, 1); /* Should we set lastbase here? */ lastbase = 16; return NUM; @@ -466,13 +466,13 @@ lexconstant(void) * it can't be a base indication (always decimal) * or a floating point number. */ - for (ptr2 = nptr; idigit(*ptr2); ptr2++) + for (ptr2 = nptr; idigit(*ptr2) || *ptr2 == '_'; ptr2++) ; if (ptr2 > nptr && *ptr2 != '.' && *ptr2 != 'e' && *ptr2 != 'E' && *ptr2 != '#') { - yyval.u.l = zstrtol(ptr, &ptr, 0); + yyval.u.l = zstrtol_underscore(ptr, &ptr, 0, 1); lastbase = 8; return NUM; } @@ -481,17 +481,43 @@ lexconstant(void) } else { - while (idigit(*nptr)) + while (idigit(*nptr) || *nptr == '_') nptr++; } if (*nptr == '.' || *nptr == 'e' || *nptr == 'E') { + char *ptr2; /* it's a float */ yyval.type = MN_FLOAT; #ifdef USE_LOCALE prev_locale = dupstring(setlocale(LC_NUMERIC, NULL)); setlocale(LC_NUMERIC, "POSIX"); #endif + if (*nptr == '.') { + nptr++; + while (idigit(*nptr) || *nptr == '_') + nptr++; + } + if (*nptr == 'e' || *nptr == 'E') { + nptr++; + if (*nptr == '+' || *nptr == '-') + nptr++; + while (idigit(*nptr) || *nptr == '_') + nptr++; + } + for (ptr2 = ptr; ptr2 < nptr; ptr2++) { + if (*ptr2 == '_') { + int len = nptr - ptr; + ptr = strdup(ptr); + for (ptr2 = ptr; len; len--) { + if (*ptr2 == '_') + chuck(ptr2); + else + ptr2++; + } + break; + } + } yyval.u.d = strtod(ptr, &nptr); #ifdef USE_LOCALE if (prev_locale) setlocale(LC_NUMERIC, prev_locale); @@ -503,11 +529,12 @@ lexconstant(void) ptr = nptr; } else { /* it's an integer */ - yyval.u.l = zstrtol(ptr, &ptr, 10); + yyval.u.l = zstrtol_underscore(ptr, &ptr, 10, 1); if (*ptr == '#') { ptr++; - yyval.u.l = zstrtol(ptr, &ptr, lastbase = yyval.u.l); + lastbase = yyval.u.l; + yyval.u.l = zstrtol_underscore(ptr, &ptr, lastbase, 1); } } return NUM; Index: Src/utils.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/utils.c,v retrieving revision 1.270 diff -p -u -r1.270 utils.c --- Src/utils.c 27 Jun 2012 07:10:29 -0000 1.270 +++ Src/utils.c 30 Aug 2012 13:55:36 -0000 @@ -2030,13 +2030,20 @@ skipparens(char inpar, char outpar, char return level; } +/**/ +mod_export zlong +zstrtol(const char *s, char **t, int base) +{ + return zstrtol_underscore(s, t, base, 0); +} + /* Convert string to zlong (see zsh.h). This function (without the z) * * is contained in the ANSI standard C library, but a lot of them seem * * to be broken. */ /**/ mod_export zlong -zstrtol(const char *s, char **t, int base) +zstrtol_underscore(const char *s, char **t, int base, int underscore) { const char *inp, *trunc = NULL; zulong calc = 0, newcalc = 0; @@ -2062,22 +2069,24 @@ zstrtol(const char *s, char **t, int bas if (base < 2 || base > 36) { zerr("invalid base (must be 2 to 36 inclusive): %d", base); return (zlong)0; - } else if (base <= 10) - for (; *s >= '0' && *s < ('0' + base); s++) { - if (trunc) + } else if (base <= 10) { + for (; (*s >= '0' && *s < ('0' + base)) || + (underscore && *s == '_'); s++) { + if (trunc || *s == '_') continue; newcalc = calc * base + *s - '0'; if (newcalc < calc) { - trunc = s; - continue; + trunc = s; + continue; } calc = newcalc; } - else + } else { for (; idigit(*s) || (*s >= 'a' && *s < ('a' + base - 10)) - || (*s >= 'A' && *s < ('A' + base - 10)); s++) { - if (trunc) + || (*s >= 'A' && *s < ('A' + base - 10)) + || (underscore && *s == '_'); s++) { + if (trunc || *s == '_') continue; newcalc = calc*base + (idigit(*s) ? (*s - '0') : (*s & 0x1f) + 9); if (newcalc < calc) @@ -2087,6 +2096,7 @@ zstrtol(const char *s, char **t, int bas } calc = newcalc; } + } /* * Special case: check for a number that was just too long for -- Peter Stephenson Software Engineer Tel: +44 (0)1223 692070 Cambridge Silicon Radio Limited Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom More information can be found at www.csr.com. Follow CSR on Twitter at http://twitter.com/CSR_PLC and read our blog at www.csr.com/blog