From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 12303 invoked by alias); 19 Feb 2015 10:13:34 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 34570 Received: (qmail 10691 invoked from network); 19 Feb 2015 10:13:22 -0000 X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00,RCVD_IN_DNSWL_HI, SPF_HELO_PASS,T_HDRS_LCASE,T_MANY_HDRS_LCASE autolearn=ham version=3.3.2 X-AuditID: cbfec7f5-b7fc86d0000066b7-ea-54e5b6aa1332 Date: Thu, 19 Feb 2015 10:13:15 +0000 From: Peter Stephenson To: Zsh Hackers' List Subject: PATCH: parse from even deeper in hell Message-id: <20150219101315.477f7f95@pwslap01u.europe.root.pri> Organization: Samsung Cambridge Solution Centre X-Mailer: Claws Mail 3.7.9 (GTK+ 2.22.0; i386-redhat-linux-gnu) MIME-version: 1.0 Content-type: text/plain; charset=US-ASCII Content-transfer-encoding: 7bit X-Brightmail-Tracker: H4sIAAAAAAAAA+NgFuplluLIzCtJLcpLzFFi42I5/e/4Fd1V256GGNy4JWFxsPkhkwOjx6qD H5gCGKO4bFJSczLLUov07RK4Ms7u+c9WcMG/onFXC1sD4xHrLkZODgkBE4mdy6+zQthiEhfu rWfrYuTiEBJYyihx+vkbKGcJk8TkKZuYIZxtjBJHfq4Ga2ERUJWYtHYFC4jNJmAoMXXTbMYu Rg4OEQFtifaPYiBhYQE9ia6Tv8DKeQXsJV7s/w9Wzi+gL3H17ycmiM32EjOvnGGEqBGU+DH5 HlgNs4CWxOZtTawQtrzE5jVvmUFsIQF1iRt3d7NPYBSYhaRlFpKWWUhaFjAyr2IUTS1NLihO Ss810itOzC0uzUvXS87P3cQICcKvOxiXHrM6xCjAwajEw6vQ8SREiDWxrLgy9xCjBAezkgiv bs3TECHelMTKqtSi/Pii0pzU4kOMTBycUg2MroevxodzHCvP7qlkqnX7drb9+c1XNxdorN+u VG2usFI956ZqszrnsqBDxb9FJdpC5uQEqTMJqR/M2X0oce3Njb+ixRKXWHBXH7nEeIA3iXNN U+tJFVnTids5FiwLll0VtTtqEt+q1QqzS1Z01PkbVbx/PHvunRKFyxLtxVPO9UyMvp4kOf+y EktxRqKhFnNRcSIAs9S35yACAAA= And Tomlinson looked down and down, and saw beneath his feet The frontlet of a tortured star milk-white in Hell-Mouth heat. % print $((echo one); (echo two)) zsh: bad math expression: operator expected at `one); (ech...' At the point this goes wrong, we've actually already established this is a command substitution, not a math expression. However, we're now in the substitution code and it doesn't have any marker that that's happened. Instead, it just looks to see if there are two parentheses at the end, which there are. Note that it's not a fix to count active parentheses in the middle at that point: those aren't tokenized because we're parsing this as a string for later expansion. So the ones at the end are the first that skipparens() picks up. In any case re-counting when we've already established what's supposed to happen is a pretty kludgy fix. The fix here is to use different tokens for the first and last parenthesis for math. We then just look for the matching close marker when we find the open marker. We can't have nested math expansions so I think this ought to be robust. I've incremented the version as this changes the way strings are tokenized. The tests might more logically be with command substitution rather than arithmetic, but I've left them in order to keep the tests for is / isn't arithmetic in one place for easy comparison. pws diff --git a/Config/version.mk b/Config/version.mk index eb51638..a8eafa5 100644 --- a/Config/version.mk +++ b/Config/version.mk @@ -27,5 +27,5 @@ # This must also serve as a shell script, so do not add spaces around the # `=' signs. -VERSION=5.0.7-dev-0 -VERSION_DATE='October 8, 2014' +VERSION=5.0.7-dev-1 +VERSION_DATE='February 19, 2014' diff --git a/Src/lex.c b/Src/lex.c index 0068485..307b6e9 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -35,7 +35,7 @@ /* tokens */ /**/ -mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\"; +mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\"; /* parts of the current token */ @@ -473,8 +473,14 @@ add(int c) } \ } +enum { + CMD_OR_MATH_CMD, + CMD_OR_MATH_MATH, + CMD_OR_MATH_ERR +}; + /* - * Return 1 for math, 0 for a command, 2 for an error. If it couldn't be + * Return one of the above. If it couldn't be * parsed as math, but there was no gross error, it's a command. */ @@ -496,13 +502,13 @@ cmd_or_math(int cs_type) /* Successfully parsed, see if it was math */ c = hgetc(); if (c == ')') - return 1; /* yes */ + return CMD_OR_MATH_MATH; /* yes */ hungetc(c); lexstop = 0; c = ')'; } else if (lexstop) { /* we haven't got anything to unget */ - return 2; + return CMD_OR_MATH_ERR; } /* else unsuccessful: unget the whole thing */ hungetc(c); @@ -513,15 +519,15 @@ cmd_or_math(int cs_type) ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr); } if (errflag) - return 2; + return CMD_OR_MATH_ERR; hungetc('('); - return errflag ? 2 : 0; + return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD; } /* * Parse either a $(( ... )) or a $(...) - * Return 0 on success, 1 on failure. + * Return the same as cmd_or_math(). */ static int cmd_or_math_sub(void) @@ -529,21 +535,23 @@ cmd_or_math_sub(void) int c = hgetc(), ret; if (c == '(') { + int lexpos = (int)(lexbuf.ptr - tokstr); add(Inpar); add('('); - if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) { + if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) { + tokstr[lexpos] = Inparmath; add(')'); - return 0; + return CMD_OR_MATH_MATH; } - if (ret == 2) - return 1; + if (ret == CMD_OR_MATH_ERR) + return CMD_OR_MATH_ERR; lexbuf.ptr -= 2; lexbuf.len -= 2; } else { hungetc(c); lexstop = 0; } - return skipcomm(); + return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD; } /* Check whether we're looking at valid numeric globbing syntax * @@ -764,10 +772,10 @@ gettok(void) lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE); switch (cmd_or_math(CS_MATH)) { - case 1: + case CMD_OR_MATH_MATH: return DINPAR; - case 0: + case CMD_OR_MATH_CMD: /* * Not math, so we don't return the contents * as a string in this case. @@ -987,12 +995,19 @@ gettokstr(int c, int sub) c = Outbrack; } else if (e == '(') { add(String); - c = cmd_or_math_sub(); - if (c) { + switch (cmd_or_math_sub()) { + case CMD_OR_MATH_CMD: + c = Outpar; + break; + + case CMD_OR_MATH_MATH: + c = Outparmath; + break; + + default: peek = LEXERR; goto brk; } - c = Outpar; } else { if (e == '{') { add(c); @@ -1400,8 +1415,19 @@ dquote_parse(char endchar, int sub) c = hgetc(); if (c == '(') { add(Qstring); - err = cmd_or_math_sub(); - c = Outpar; + switch (cmd_or_math_sub()) { + case CMD_OR_MATH_CMD: + c = Outpar; + break; + + case CMD_OR_MATH_MATH: + c = Outparmath; + break; + + default: + err = 1; + break; + } } else if (c == '[') { add(String); add(Inbrack); diff --git a/Src/subst.c b/Src/subst.c index a2bb648..056b12b 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -195,7 +195,7 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub) while (!errflag && (c = *str)) { if ((qt = c == Qstring) || c == String) { - if ((c = str[1]) == Inpar) { + if ((c = str[1]) == Inpar || c == Inparmath) { if (!qt) list->list.flags |= LF_ARRAY; str++; @@ -258,6 +258,22 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub) skipparens(Inpar, Outpar, &str); #endif str--; + } else if (c == Inparmath) { + /* Math substitution of the form $((...)) */ + str[-1] = '\0'; + while (*str != Outparmath && *str) + str++; + if (*str != Outparmath) { + zerr("Failed to find end of math substitution"); + return NULL; + } + str[-1] = '\0'; + if (isset(EXECOPT)) + str = arithsubst(str2 + 2, &str3, str+1); + else + strncpy(str3, str2, 1); + setdata(node, (void *) str3); + continue; } else { endchar = c; *str = '\0'; @@ -266,16 +282,6 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub) DPUTS(!*str, "BUG: parse error in command substitution"); } *str++ = '\0'; - if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') { - /* Math substitution of the form $((...)) */ - str[-2] = '\0'; - if (isset(EXECOPT)) - str = arithsubst(str2 + 2, &str3, str); - else - strncpy(str3, str2, 1); - setdata(node, (void *) str3); - continue; - } /* It is a command substitution, which will be parsed again * * by the lexer, so we untokenize it first, but we cannot use * diff --git a/Src/zsh.h b/Src/zsh.h index dd946d2..9a97263 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -163,40 +163,42 @@ struct mathfunc { #define Hat ((char) 0x86) #define Star ((char) 0x87) #define Inpar ((char) 0x88) -#define Outpar ((char) 0x89) -#define Qstring ((char) 0x8a) -#define Equals ((char) 0x8b) -#define Bar ((char) 0x8c) -#define Inbrace ((char) 0x8d) -#define Outbrace ((char) 0x8e) -#define Inbrack ((char) 0x8f) -#define Outbrack ((char) 0x90) -#define Tick ((char) 0x91) -#define Inang ((char) 0x92) -#define Outang ((char) 0x93) -#define OutangProc ((char) 0x94) -#define Quest ((char) 0x95) -#define Tilde ((char) 0x96) -#define Qtick ((char) 0x97) -#define Comma ((char) 0x98) +#define Inparmath ((char) 0x89) +#define Outpar ((char) 0x8a) +#define Outparmath ((char) 0x8b) +#define Qstring ((char) 0x8c) +#define Equals ((char) 0x8d) +#define Bar ((char) 0x8e) +#define Inbrace ((char) 0x8f) +#define Outbrace ((char) 0x90) +#define Inbrack ((char) 0x91) +#define Outbrack ((char) 0x92) +#define Tick ((char) 0x93) +#define Inang ((char) 0x94) +#define Outang ((char) 0x95) +#define OutangProc ((char) 0x96) +#define Quest ((char) 0x97) +#define Tilde ((char) 0x98) +#define Qtick ((char) 0x99) +#define Comma ((char) 0x9a) /* * Null arguments: placeholders for single and double quotes * and backslashes. */ -#define Snull ((char) 0x99) -#define Dnull ((char) 0x9a) -#define Bnull ((char) 0x9b) +#define Snull ((char) 0x9b) +#define Dnull ((char) 0x9c) +#define Bnull ((char) 0x9d) /* * Backslash which will be returned to "\" instead of being stripped * when we turn the string into a printable format. */ -#define Bnullkeep ((char) 0x9c) +#define Bnullkeep ((char) 0x9e) /* * Null argument that does not correspond to any character. * This should be last as it does not appear in ztokens and * is used to initialise the IMETA type in inittyptab(). */ -#define Nularg ((char) 0x9d) +#define Nularg ((char) 0x9f) /* * Take care to update the use of IMETA appropriately when adding diff --git a/Test/C01arith.ztst b/Test/C01arith.ztst index 09c0822..67d78ee 100644 --- a/Test/C01arith.ztst +++ b/Test/C01arith.ztst @@ -353,3 +353,26 @@ ' 0:Non-arithmetic subst with command subsitution parse from hell >yes, this one after case in subshell + + print "a$((echo one subst) + (echo two subst))b" +0:Another tricky case that is actually a command substitution +>aone subst +>two substb + + print "x$((echo one frob); (echo two frob))y" +0:Same on a single line +>xone frob +>two froby + + # This case actually only works by accident: if it wasn't for the + # unbalanced parenthesis this would be a valid math substitution. + # Hence it's definitely not recommended code. However, it does give + # the algorithm an extra check. + print $((case foo in + foo) + print Worked OK + ;; + esac)) +0:Would-be math expansion with extra parenthesis making it a cmd subst +>Worked OK