zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: fix command substitution parsing
@ 2015-01-07 16:48 Peter Stephenson
  2015-01-09 21:26 ` Peter Stephenson
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Peter Stephenson @ 2015-01-07 16:48 UTC (permalink / raw)
  To: Zsh Hackers' List

According to the test suite enhanced with a few appropriate tests, this
fixes the problem where syntactically significant but irrelevant
unmatched closing parentheses caused command substitution to abort
early.

I don't believe that either, so we'll need to tease out whatever
oddities remain.  I think it's basically good enough to push, and the
problems will only emerge when I do, but I'll use it myself for a day or
so first.

I did this by allowing lexsave() and lexrestore() to save and restore
different layers separately.  This is made a bit hairy by the fact that
zsh doesn't really have layers.  However, it looks like I've basically
got away with saving and restoring the parser and lexer while keeping
history and input continuous.

Otherwise, it's following the method previously suggested: parse the
code once in situ, but throw that away when it gets turned into a string
and simply reparse it as an argument to the command substitution when
the time comes.  So this isn't mega-efficient, but I don't think it's a
problem and don't see a better way without quite serious structural
changes that are definitely not worthwhile overall.

I'm not 100% convinced by the ingetc() -> zshlex_raw_add() hack, but it
seems to work.

I'll probably follow up on those changes and turn lexsave() and
lexrestore() into context save and restore dispatching to different
modules, which is sort of starting to look like a respectable
implementation.  Also, that stuff in skipcomm() is calling out for
structs (and it's partly there to work around the effect of save and
restore, so that may want more detailed control).

I haven't tried using ZCONTEXT_LEX and ZCONTEXT_PARSE separately, don't
know if that's really a flier, and have no use case for that anyway.
But they are logically separate... well... different modules.

Note that one thing is logically in the wrong place (should be the
parser) but practically in the right one (actually in the history),
namely the command stack.  Associating it with the history means that
you get the following effect interactively with appropriate prompt
settings:

  % echo $(if true; then
  cmdsubst then> case foo in
  cmdsubst then case> foo)
  cmdsubst then case> echo Hi from the parser
  cmdsubst then case> ;;
  cmdsubst then case> esac
  cmdsubst then> fi
  cmdsubst> )
  Hi from the parser

for which I'm claiming a small but unspecified number of extra brownie
points.

This single patch has got some experiments squashed out but I may
auction my git repository for charity...

diff --git a/Src/init.c b/Src/init.c
index 3059087..080fc85 100644
--- a/Src/init.c
+++ b/Src/init.c
@@ -142,7 +142,8 @@ loop(int toplevel, int justonce)
 	use_exit_printed = 0;
 	intr();			/* interrupts on            */
 	lexinit();              /* initialize lexical state */
-	if (!(prog = parse_event())) {	/* if we couldn't parse a list */
+	if (!(prog = parse_event(ENDINPUT))) {
+	    /* if we couldn't parse a list */
 	    hend(NULL);
 	    if ((tok == ENDINPUT && !errflag) ||
 		(tok == LEXERR && (!isset(SHINSTDIN) || !toplevel)) ||
diff --git a/Src/input.c b/Src/input.c
index 9552331..04dda5a 100644
--- a/Src/input.c
+++ b/Src/input.c
@@ -179,12 +179,12 @@ shingetline(void)
 /* Get the next character from the input.
  * Will call inputline() to get a new line where necessary.
  */
-  
+
 /**/
 int
 ingetc(void)
 {
-    int lastc;
+    int lastc = ' ';
 
     if (lexstop)
 	return ' ';
@@ -196,7 +196,7 @@ ingetc(void)
 		continue;
 	    if (((inbufflags & INP_LINENO) || !strin) && lastc == '\n')
 		lineno++;
-	    return lastc;
+	    break;
 	}
 
 	/*
@@ -208,7 +208,7 @@ ingetc(void)
 	 */
 	if (!inbufct && (strin || errflag)) {
 	    lexstop = 1;
-	    return ' ';
+	    break;
 	}
 	/* If the next element down the input stack is a continuation of
 	 * this, use it.
@@ -219,8 +219,10 @@ ingetc(void)
 	}
 	/* As a last resort, get some more input */
 	if (inputline())
-	    return ' ';
+	    break;
     }
+    zshlex_raw_add(lastc);
+    return lastc;
 }
 
 /* Read a line from the current command stream and store it as input */
@@ -426,6 +428,7 @@ inungetc(int c)
 	    inbufleft = 0;
 	    inbuf = inbufptr = "";
 	}
+	zshlex_raw_back();
     }
 }
 
diff --git a/Src/lex.c b/Src/lex.c
index 4addf80..d440f3d 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -148,6 +148,16 @@ mod_export int parend;
 /**/
 mod_export int nocomments;
 
+/* add raw input characters while parsing command substitution */
+
+/**/
+static int lex_add_raw;
+
+/* variables associated with the above */
+
+static char *tokstr_raw, *bptr_raw;
+static int len_raw, bsiz_raw;
+
 /* text of punctuation tokens */
 
 /**/
@@ -216,6 +226,11 @@ struct lexstack {
     char *bptr;
     int bsiz;
     int len;
+    int lex_add_raw;
+    char *tokstr_raw;
+    char *bptr_raw;
+    int bsiz_raw;
+    int len_raw;
     short *chwords;
     int chwordlen;
     int chwordpos;
@@ -241,89 +256,121 @@ struct lexstack {
 
 static struct lexstack *lstack = NULL;
 
-/* save the lexical state */
+/* save the context or parts thereof */
 
 /* is this a hack or what? */
 
 /**/
 mod_export void
-lexsave(void)
+lexsave_partial(int parts)
 {
     struct lexstack *ls;
 
     ls = (struct lexstack *)malloc(sizeof(struct lexstack));
 
-    ls->incmdpos = incmdpos;
-    ls->incond = incond;
-    ls->incasepat = incasepat;
-    ls->dbparens = dbparens;
-    ls->isfirstln = isfirstln;
-    ls->isfirstch = isfirstch;
-    ls->histactive = histactive;
-    ls->histdone = histdone;
-    ls->lexflags = lexflags;
-    ls->stophist = stophist;
-    stophist = 0;
-    if (!lstack) {
-	/* top level, make this version visible to ZLE */
-	zle_chline = chline;
-	/* ensure line stored is NULL-terminated */
-	if (hptr)
-	    *hptr = '\0';
+    if (parts & ZCONTEXT_LEX) {
+	ls->incmdpos = incmdpos;
+	ls->incond = incond;
+	ls->incasepat = incasepat;
+	ls->dbparens = dbparens;
+	ls->isfirstln = isfirstln;
+	ls->isfirstch = isfirstch;
+	ls->lexflags = lexflags;
+
+	ls->tok = tok;
+	ls->isnewlin = isnewlin;
+	ls->tokstr = tokstr;
+	ls->zshlextext = zshlextext;
+	ls->bptr = bptr;
+	ls->bsiz = bsiz;
+	ls->len = len;
+	ls->lex_add_raw = lex_add_raw;
+	ls->tokstr_raw = tokstr_raw;
+	ls->bptr_raw = bptr_raw;
+	ls->bsiz_raw = bsiz_raw;
+	ls->len_raw = len_raw;
+	ls->lexstop = lexstop;
+	ls->toklineno = toklineno;
+
+	tokstr = zshlextext = bptr = NULL;
+	bsiz = 256;
+	tokstr_raw = bptr_raw = NULL;
+	bsiz_raw = len_raw = lex_add_raw = 0;
+
+	inredir = 0;
+    }
+    if (parts & ZCONTEXT_HIST) {
+	if (!lstack) {
+	    /* top level, make this version visible to ZLE */
+	    zle_chline = chline;
+	    /* ensure line stored is NULL-terminated */
+	    if (hptr)
+		*hptr = '\0';
+	}
+	ls->histactive = histactive;
+	ls->histdone = histdone;
+	ls->stophist = stophist;
+	ls->hline = chline;
+	ls->hptr = hptr;
+	ls->chwords = chwords;
+	ls->chwordlen = chwordlen;
+	ls->chwordpos = chwordpos;
+	ls->hwgetword = hwgetword;
+	ls->hgetc = hgetc;
+	ls->hungetc = hungetc;
+	ls->hwaddc = hwaddc;
+	ls->hwbegin = hwbegin;
+	ls->hwend = hwend;
+	ls->addtoline = addtoline;
+	ls->hlinesz = hlinesz;
+	/*
+	 * We save and restore the command stack with history
+	 * as it's visible to the user interactively, so if
+	 * we're preserving history state we'll continue to
+	 * show the current set of commands from input.
+	 */
+	ls->cstack = cmdstack;
+	ls->csp = cmdsp;
+
+	stophist = 0;
+	chline = NULL;
+	hptr = NULL;
+	histactive = 0;
+	cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
+	cmdsp = 0;
+    }
+    if (parts & ZCONTEXT_PARSE) {
+	ls->hdocs = hdocs;
+	ls->eclen = eclen;
+	ls->ecused = ecused;
+	ls->ecnpats = ecnpats;
+	ls->ecbuf = ecbuf;
+	ls->ecstrs = ecstrs;
+	ls->ecsoffs = ecsoffs;
+	ls->ecssub = ecssub;
+	ls->ecnfunc = ecnfunc;
+	ecbuf = NULL;
+	hdocs = NULL;
     }
-    ls->hline = chline;
-    chline = NULL;
-    ls->hptr = hptr;
-    hptr = NULL;
-    ls->hlinesz = hlinesz;
-    ls->cstack = cmdstack;
-    ls->csp = cmdsp;
-    cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
-    ls->tok = tok;
-    ls->isnewlin = isnewlin;
-    ls->tokstr = tokstr;
-    ls->zshlextext = zshlextext;
-    ls->bptr = bptr;
-    tokstr = zshlextext = bptr = NULL;
-    ls->bsiz = bsiz;
-    bsiz = 256;
-    ls->len = len;
-    ls->chwords = chwords;
-    ls->chwordlen = chwordlen;
-    ls->chwordpos = chwordpos;
-    ls->hwgetword = hwgetword;
-    ls->lexstop = lexstop;
-    ls->hdocs = hdocs;
-    ls->hgetc = hgetc;
-    ls->hungetc = hungetc;
-    ls->hwaddc = hwaddc;
-    ls->hwbegin = hwbegin;
-    ls->hwend = hwend;
-    ls->addtoline = addtoline;
-    ls->eclen = eclen;
-    ls->ecused = ecused;
-    ls->ecnpats = ecnpats;
-    ls->ecbuf = ecbuf;
-    ls->ecstrs = ecstrs;
-    ls->ecsoffs = ecsoffs;
-    ls->ecssub = ecssub;
-    ls->ecnfunc = ecnfunc;
-    ls->toklineno = toklineno;
-    cmdsp = 0;
-    inredir = 0;
-    hdocs = NULL;
-    histactive = 0;
-    ecbuf = NULL;
 
     ls->next = lstack;
     lstack = ls;
 }
 
-/* restore lexical state */
+/* save context in full */
 
 /**/
 mod_export void
-lexrestore(void)
+lexsave(void)
+{
+    lexsave_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
+}
+
+/* restore context or part therefore */
+
+/**/
+mod_export void
+lexrestore_partial(int parts)
 {
     struct lexstack *ln = lstack;
 
@@ -332,65 +379,89 @@ lexrestore(void)
     queue_signals();
     lstack = lstack->next;
 
-    if (!lstack) {
-	/* Back to top level: don't need special ZLE value */
-	DPUTS(ln->hline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
-	zle_chline = NULL;
+    if (parts & ZCONTEXT_LEX) {
+	incmdpos = ln->incmdpos;
+	incond = ln->incond;
+	incasepat = ln->incasepat;
+	dbparens = ln->dbparens;
+	isfirstln = ln->isfirstln;
+	isfirstch = ln->isfirstch;
+	lexflags = ln->lexflags;
+	tok = ln->tok;
+	isnewlin = ln->isnewlin;
+	tokstr = ln->tokstr;
+	zshlextext = ln->zshlextext;
+	bptr = ln->bptr;
+	bsiz = ln->bsiz;
+	len = ln->len;
+	lex_add_raw = ln->lex_add_raw;
+	tokstr_raw = ln->tokstr_raw;
+	bptr_raw = ln->bptr_raw;
+	bsiz_raw = ln->bsiz_raw;
+	len_raw = ln->len_raw;
+	lexstop = ln->lexstop;
+	toklineno = ln->toklineno;
+    }
+
+    if (parts & ZCONTEXT_HIST) {
+	if (!lstack) {
+	    /* Back to top level: don't need special ZLE value */
+	    DPUTS(ln->hline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
+	    zle_chline = NULL;
+	}
+	histactive = ln->histactive;
+	histdone = ln->histdone;
+	stophist = ln->stophist;
+	chline = ln->hline;
+	hptr = ln->hptr;
+	chwords = ln->chwords;
+	chwordlen = ln->chwordlen;
+	chwordpos = ln->chwordpos;
+	hwgetword = ln->hwgetword;
+	hgetc = ln->hgetc;
+	hungetc = ln->hungetc;
+	hwaddc = ln->hwaddc;
+	hwbegin = ln->hwbegin;
+	hwend = ln->hwend;
+	addtoline = ln->addtoline;
+	hlinesz = ln->hlinesz;
+	if (cmdstack)
+	    zfree(cmdstack, CMDSTACKSZ);
+	cmdstack = ln->cstack;
+	cmdsp = ln->csp;
+    }
+
+    if (parts & ZCONTEXT_PARSE) {
+	if (ecbuf)
+	    zfree(ecbuf, eclen);
+
+	hdocs = ln->hdocs;
+	eclen = ln->eclen;
+	ecused = ln->ecused;
+	ecnpats = ln->ecnpats;
+	ecbuf = ln->ecbuf;
+	ecstrs = ln->ecstrs;
+	ecsoffs = ln->ecsoffs;
+	ecssub = ln->ecssub;
+	ecnfunc = ln->ecnfunc;
+
+	errflag &= ~ERRFLAG_ERROR;
     }
 
-    incmdpos = ln->incmdpos;
-    incond = ln->incond;
-    incasepat = ln->incasepat;
-    dbparens = ln->dbparens;
-    isfirstln = ln->isfirstln;
-    isfirstch = ln->isfirstch;
-    histactive = ln->histactive;
-    histdone = ln->histdone;
-    lexflags = ln->lexflags;
-    stophist = ln->stophist;
-    chline = ln->hline;
-    hptr = ln->hptr;
-    if (cmdstack)
-	zfree(cmdstack, CMDSTACKSZ);
-    cmdstack = ln->cstack;
-    cmdsp = ln->csp;
-    tok = ln->tok;
-    isnewlin = ln->isnewlin;
-    tokstr = ln->tokstr;
-    zshlextext = ln->zshlextext;
-    bptr = ln->bptr;
-    bsiz = ln->bsiz;
-    len = ln->len;
-    chwords = ln->chwords;
-    chwordlen = ln->chwordlen;
-    chwordpos = ln->chwordpos;
-    hwgetword = ln->hwgetword;
-    lexstop = ln->lexstop;
-    hdocs = ln->hdocs;
-    hgetc = ln->hgetc;
-    hungetc = ln->hungetc;
-    hwaddc = ln->hwaddc;
-    hwbegin = ln->hwbegin;
-    hwend = ln->hwend;
-    addtoline = ln->addtoline;
-    if (ecbuf)
-	zfree(ecbuf, eclen);
-    eclen = ln->eclen;
-    ecused = ln->ecused;
-    ecnpats = ln->ecnpats;
-    ecbuf = ln->ecbuf;
-    ecstrs = ln->ecstrs;
-    ecsoffs = ln->ecsoffs;
-    ecssub = ln->ecssub;
-    ecnfunc = ln->ecnfunc;
-    hlinesz = ln->hlinesz;
-    toklineno = ln->toklineno;
-    errflag &= ~ERRFLAG_ERROR;
     free(ln);
 
     unqueue_signals();
 }
 
+/* complete restore context */
+
+/**/
+mod_export void
+lexrestore(void)
+{
+    lexrestore_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
+}
+
 /**/
 void
 zshlex(void)
@@ -1905,80 +1976,151 @@ exalias(void)
     return 0;
 }
 
-/* skip (...) */
+/**/
+void
+zshlex_raw_add(int c)
+{
+    if (!lex_add_raw)
+	return;
+
+    *bptr_raw++ = c;
+    if (bsiz_raw == ++len_raw) {
+	int newbsiz = bsiz_raw * 2;
+
+	tokstr_raw = (char *)hrealloc(tokstr_raw, bsiz_raw, newbsiz);
+	bptr_raw = tokstr_raw + len_raw;
+	memset(bptr_raw, 0, newbsiz - bsiz_raw);
+	bsiz_raw = newbsiz;
+    }
+}
+
+/**/
+void
+zshlex_raw_back(void)
+{
+    if (!lex_add_raw)
+	return;
+    bptr_raw--;
+    len_raw--;
+}
+
+/*
+ * Skip (...) for command-style substitutions: $(...), <(...), >(...)
+ *
+ * In order to ensure we don't stop at closing parentheses with
+ * some other syntactic significance, we'll parse the input until
+ * we find an unmatched closing parenthesis.  However, we'll throw
+ * away the result of the parsing and just keep the string we've built
+ * up on the way.
+ */
 
 /**/
 static int
 skipcomm(void)
 {
-    int pct = 1, c, start = 1;
+    char *new_tokstr, *new_bptr = bptr_raw;
+    int new_len, new_bsiz, new_lexstop, new_lex_add_raw;
 
     cmdpush(CS_CMDSUBST);
     SETPARBEGIN
-    c = Inpar;
-    do {
-	int iswhite;
-	add(c);
-	c = hgetc();
-	if (itok(c) || lexstop)
-	    break;
-	iswhite = inblank(c);
-	switch (c) {
-	case '(':
-	    pct++;
-	    break;
-	case ')':
-	    pct--;
-	    break;
-	case '\\':
-	    add(c);
-	    c = hgetc();
-	    break;
-	case '\'': {
-	    int strquote = bptr[-1] == '$';
-	    add(c);
-	    STOPHIST
-	    while ((c = hgetc()) != '\'' && !lexstop) {
-		if (c == '\\' && strquote) {
-		    add(c);
-		    c = hgetc();
-		}
-		add(c);
-	    }
-	    ALLOWHIST
-	    break;
-	}
-	case '\"':
-	    add(c);
-	    while ((c = hgetc()) != '\"' && !lexstop)
-		if (c == '\\') {
-		    add(c);
-		    add(hgetc());
-		} else
-		    add(c);
-	    break;
-	case '`':
-	    add(c);
-	    while ((c = hgetc()) != '`' && !lexstop)
-		if (c == '\\')
-		    add(c), add(hgetc());
-		else
-		    add(c);
-	    break;
-	case '#':
-	    if (start) {
-		add(c);
-		while ((c = hgetc()) != '\n' && !lexstop)
-		    add(c);
-		iswhite = 1;
-	    }
-	    break;
+    add(Inpar);
+
+    new_lex_add_raw = lex_add_raw + 1;
+    if (!lex_add_raw) {
+	/*
+	 * We'll combine the string so far with the input
+	 * read in for the command substitution.  To do this
+	 * we'll just propagate the current tokstr etc. as the
+	 * variables used for adding raw input, and
+	 * ensure we swap those for the real tokstr etc. at the end.
+	 *
+	 * However, we need to save and restore the rest of the
+	 * lexical and parse state as we're effectively parsing
+	 * an internal string.  Because we're still parsing it from
+	 * the original input source (we have to --- we don't know
+	 * when to stop inputting it otherwise and can't rely on
+	 * the input being recoverable until we've read it) we need
+	 * to keep the same history context.
+	 */
+	new_tokstr = tokstr;
+	new_bptr = bptr;
+	new_len = len;
+	new_bsiz = bsiz;
+
+	lexsave_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+    } else {
+	/*
+	 * Set up for nested command subsitution, however
+	 * we don't actually need the string until we get
+	 * back to the top level and recover the lot.
+	 * The $() body just appears empty.
+	 *
+	 * We do need to propagate the raw variables which would
+	 * otherwise by cleared, though.
+	 */
+	new_tokstr = tokstr_raw;
+	new_bptr = bptr_raw;
+	new_len = len_raw;
+	new_bsiz = bsiz_raw;
+
+	lexsave_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+    }
+    tokstr_raw = new_tokstr;
+    bsiz_raw = new_bsiz;
+    len_raw = new_len;
+    bptr_raw = new_bptr;
+    lex_add_raw = new_lex_add_raw;
+
+    if (!parse_event(OUTPAR) || tok != OUTPAR)
+	lexstop = 1;
+     /* Outpar lexical token gets added in caller if present */
+
+    /*
+     * We're going to keep the full raw input string
+     * as the current token string after popping the stack.
+     */
+    new_tokstr = tokstr_raw;
+    new_bptr = bptr_raw;
+    new_len = len_raw;
+    new_bsiz = bsiz_raw;
+    /*
+     * We're also going to propagate the lexical state:
+     * if we couldn't parse the command substitution we
+     * can't continue.
+     */
+    new_lexstop = lexstop;
+
+    lexrestore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+
+    if (lex_add_raw) {
+	/*
+	 * Keep going, so retain the raw variables.
+	 */
+	tokstr_raw = new_tokstr;
+	bptr_raw = new_bptr;
+	len_raw = new_len;
+	bsiz_raw = new_bsiz;
+    } else {
+	if (!new_lexstop) {
+	    /* Ignore the ')' added on input */
+	    new_len--;
+	    *--new_bptr = '\0';
 	}
-	start = iswhite;
+
+	/*
+	 * Convince the rest of lex.c we were examining a string
+	 * all along.
+	 */
+	tokstr = new_tokstr;
+	bptr = new_bptr;
+	len = new_len;
+	bsiz = new_bsiz;
+	lexstop = new_lexstop;
     }
-    while (pct);
+
     if (!lexstop)
 	SETPAREND
     cmdpop();
+
     return lexstop;
 }
diff --git a/Src/parse.c b/Src/parse.c
index c1709e0..fa37ca3 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -361,7 +361,8 @@ ecstrcode(char *s)
 
 /* Initialise wordcode buffer. */
 
-static void
+/**/
+void
 init_parse(void)
 {
     if (ecbuf) zfree(ecbuf, eclen);
@@ -443,11 +444,15 @@ clear_hdocs()
  * event	: ENDINPUT
  *			| SEPER
  *			| sublist [ SEPER | AMPER | AMPERBANG ]
+ *
+ * cmdsubst indicates our event is part of a command-style
+ * substitution terminated by the token indicationg, usual closing
+ * parenthesis.  In other cases endtok is ENDINPUT.
  */
 
 /**/
 Eprog
-parse_event(void)
+parse_event(int endtok)
 {
     tok = ENDINPUT;
     incmdpos = 1;
@@ -455,36 +460,42 @@ parse_event(void)
     zshlex();
     init_parse();
 
-    if (!par_event()) {
+    if (!par_event(endtok)) {
         clear_hdocs();
         return NULL;
     }
+    if (endtok != ENDINPUT) {
+	/* don't need to build an eprog for this */
+	return &dummy_eprog;
+    }
     return bld_eprog(1);
 }
 
 /**/
-static int
-par_event(void)
+int
+par_event(int endtok)
 {
     int r = 0, p, c = 0;
 
     while (tok == SEPER) {
-	if (isnewlin > 0)
+	if (isnewlin > 0 && endtok == ENDINPUT)
 	    return 0;
 	zshlex();
     }
     if (tok == ENDINPUT)
 	return 0;
+    if (tok == endtok)
+	return 0;
 
     p = ecadd(0);
 
     if (par_sublist(&c)) {
-	if (tok == ENDINPUT) {
+	if (tok == ENDINPUT || tok == endtok) {
 	    set_list_code(p, Z_SYNC, c);
 	    r = 1;
 	} else if (tok == SEPER) {
 	    set_list_code(p, Z_SYNC, c);
-	    if (isnewlin <= 0)
+	    if (isnewlin <= 0 || endtok != ENDINPUT)
 		zshlex();
 	    r = 1;
 	} else if (tok == AMPER) {
@@ -513,7 +524,7 @@ par_event(void)
     } else {
 	int oec = ecused;
 
-	if (!par_event()) {
+	if (!par_event(endtok)) {
 	    ecused = oec;
 	    ecbuf[p] |= wc_bdata(Z_END);
 	}
diff --git a/Src/zsh.h b/Src/zsh.h
index b366e0f..475b782 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -421,6 +421,15 @@ enum {
 #define META_HEAPDUP	6
 #define META_HREALLOC	7
 
+/* Context to save and restore (bit fields) */
+enum {
+    /* History mechanism */
+    ZCONTEXT_HIST       = (1<<0),
+    /* Lexical analyser */
+    ZCONTEXT_LEX        = (1<<1),
+    /* Parser */
+    ZCONTEXT_PARSE      = (1<<2)
+};
 
 /**************************/
 /* Abstract types for zsh */
diff --git a/Test/D08cmdsubst.ztst b/Test/D08cmdsubst.ztst
index 5661b0a..a4c69a0 100644
--- a/Test/D08cmdsubst.ztst
+++ b/Test/D08cmdsubst.ztst
@@ -106,3 +106,45 @@
 >34
 >"
 >" OK
+
+ echo $(case foo in
+ foo)
+ echo This test worked.
+ ;;
+ bar)
+ echo This test failed in a rather bizarre way.
+ ;;
+ *)
+ echo This test failed.
+ ;;
+ esac)
+0:Parsing of command substitution with unmatched parentheses: case, basic
+>This test worked.
+
+ echo "$(case bar in
+ foo)
+ echo This test spoobed.
+ ;;
+ bar)
+ echo This test plurbled.
+ ;;
+ *)
+ echo This test bzonked.
+ ;;
+ esac)"
+0:Parsing of command substitution with unmatched parentheses: case with quotes
+>This test plurbled.
+
+ echo before $(
+ echo start; echo unpretentious |
+ while read line; do
+   case $line in
+   u*)
+   print Word began with u
+   print and ended with a crunch
+   ;;
+   esac
+ done | sed -e 's/Word/Universe/'; echo end
+ ) after
+0:Parsing of command substitution with ummatched parentheses: with frills
+>before start Universe began with u and ended with a crunch end after


-- 
Peter Stephenson <p.stephenson@samsung.com>  Principal Software Engineer
Tel: +44 (0)1223 434724                Samsung Cambridge Solution Centre
St John's House, St John's Innovation Park, Cowley Road,
Cambridge, CB4 0DS, UK


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: PATCH: fix command substitution parsing
  2015-01-07 16:48 PATCH: fix command substitution parsing Peter Stephenson
@ 2015-01-09 21:26 ` Peter Stephenson
  2015-01-10  1:28   ` Bart Schaefer
  2015-01-10 19:56 ` Mikael Magnusson
  2015-01-11 19:02 ` Peter Stephenson
  2 siblings, 1 reply; 6+ messages in thread
From: Peter Stephenson @ 2015-01-09 21:26 UTC (permalink / raw)
  To: Zsh Hackers' List

On Wed, 07 Jan 2015 16:48:36 +0000
Peter Stephenson <p.stephenson@samsung.com> wrote:
> I did this by allowing lexsave() and lexrestore() to save and restore
> different layers separately.  This is made a bit hairy by the fact that
> zsh doesn't really have layers.  However, it looks like I've basically
> got away with saving and restoring the parser and lexer while keeping
> history and input continuous.
>...
> I'll probably follow up on those changes and turn lexsave() and
> lexrestore() into context save and restore dispatching to different
> modules, which is sort of starting to look like a respectable
> implementation.

I'm a bit sorry I thought of this; I can't see it leading to any good.
Still...

This creates a separate module context.c with zcontext_save() and
zcontext_restore() and makes each of hist.c, lex.c and parse.c
responsible for its own variables, initialisation as well as save and
restore.  The problem is the notion of "its own variables" isn't very
well defined with the not very well defined interface between the
modules.  I've gone with the approach that where the variable is defined
is where it belongs.

This exposed various duplications in initialising some of the key
variables, in particular incmdpos, which I've tried to tame.  The
function init_parse_status(), although a fairly insignificant function,
illustrates the problem:  the variables "belong" to parse.c so are
initialised there but also need initialising for lexical analysis not
tied to syntactic parsing.  So there's some duplication, but it is at
least in one well-defined place with a comment saying so.

"inredir" was set to 0 on saving the context.  I strongly suspect that
what was really needed was setting to 0 with the other similar variables
when initialising the context nested (in C lexical scope) inside the one
being saved, so I've done that.  However, this raises questions of
whether other variables being reset on saving the context should also be
handled like that.

Also, a couple of variables including inredir weren't being saved and
restored at all, which isn't safe for asynchronous use by traps, so I've
saved them.  I haven't checked generally for this; this was just the key
set defined at the top of parse.c.

Some variables can possibly now become statics, although not many ---
the parse.c ones were the ones used for signalling with lex.c, so remain
global, and the lex.c ones could already be static if appropriate
because the save and restore were done there.  To be checked.

Although this tidies up save and restore, it would be more pukka to pass
down variables on the stack.  I don't think that's really feasible
without switching to an object orientated language.  Only Mikael's
theoretical infinite number of novices with typewriters can do that...

And so on.

And this doesn't even fix anything ever reported.

pws


From 6822e89da93051d07222af52ba5253d1854b2b27 Mon Sep 17 00:00:00 2001
From: Peter Stephenson <p.w.stephenson@ntlworld.com>
Date: Thu, 8 Jan 2015 21:39:26 +0000
Subject: [PATCH] Rearrange context saving.

Variables are now associated with the module that declares them, being
initialised and saved/restored there.  However, as many variables are
used for communication between modules, many of them are set in multiple
places, so the assignment is ambiguous.
---
 Src/Zle/compcore.c    |   4 +-
 Src/Zle/compctl.c     |   8 +-
 Src/Zle/textobjects.c |   4 +-
 Src/Zle/zle_tricky.c  |  24 ++--
 Src/builtin.c         |   8 +-
 Src/context.c         | 116 ++++++++++++++++++
 Src/exec.c            |   8 +-
 Src/hist.c            |  88 +++++++++++++-
 Src/init.c            |   4 +-
 Src/lex.c             | 321 ++++++++++----------------------------------------
 Src/parse.c           |  83 ++++++++++++-
 Src/signals.c         |   4 +-
 Src/zsh.h             |  65 ++++++++++
 Src/zsh.mdd           |   3 +-
 14 files changed, 441 insertions(+), 299 deletions(-)
 create mode 100644 Src/context.c

diff --git a/Src/Zle/compcore.c b/Src/Zle/compcore.c
index f505605..000f9da 100644
--- a/Src/Zle/compcore.c
+++ b/Src/Zle/compcore.c
@@ -1524,7 +1524,7 @@ set_comp_sep(void)
     ol = zlemetaline;
     addedx = 1;
     noerrs = 1;
-    lexsave();
+    zcontext_save();
     lexflags = LEXFLAGS_ZLE;
     /*
      * tl is the length of the temporary string including
@@ -1673,7 +1673,7 @@ set_comp_sep(void)
     inpop();
     errflag &= ~ERRFLAG_ERROR;
     noerrs = ne;
-    lexrestore();
+    zcontext_restore();
     wb = owb;
     we = owe;
     zlemetaline = ol;
diff --git a/Src/Zle/compctl.c b/Src/Zle/compctl.c
index 2a80e6c..43dd4e2 100644
--- a/Src/Zle/compctl.c
+++ b/Src/Zle/compctl.c
@@ -2795,7 +2795,7 @@ sep_comp_string(char *ss, char *s, int noffs)
      * get the words we have to expand.                        */
     addedx = 1;
     noerrs = 1;
-    lexsave();
+    zcontext_save();
     lexflags = LEXFLAGS_ZLE;
     tmp = (char *) zhalloc(tl = sl + 3 + strlen(s));
     strcpy(tmp, ss);
@@ -2849,7 +2849,7 @@ sep_comp_string(char *ss, char *s, int noffs)
     inpop();
     errflag &= ~ERRFLAG_ERROR;
     noerrs = ne;
-    lexrestore();
+    zcontext_restore();
     wb = owb;
     we = owe;
     zlemetacs = ocs;
@@ -3707,7 +3707,7 @@ makecomplistflags(Compctl cc, char *s, int incmd, int compadd)
 
 	/* Put the string in the lexer buffer and call the lexer to *
 	 * get the words we have to expand.                        */
-	lexsave();
+	zcontext_save();
 	lexflags = LEXFLAGS_ZLE;
 	tmpbuf = (char *)zhalloc(strlen(cc->str) + 5);
 	sprintf(tmpbuf, "foo %s", cc->str); /* KLUDGE! */
@@ -3726,7 +3726,7 @@ makecomplistflags(Compctl cc, char *s, int incmd, int compadd)
 	strinend();
 	inpop();
 	errflag &= ~ERRFLAG_ERROR;
-	lexrestore();
+	zcontext_restore();
 	/* Fine, now do full expansion. */
 	prefork(foo, 0);
 	if (!errflag) {
diff --git a/Src/Zle/textobjects.c b/Src/Zle/textobjects.c
index 37d2c0a..9b3277a 100644
--- a/Src/Zle/textobjects.c
+++ b/Src/Zle/textobjects.c
@@ -241,7 +241,7 @@ selectargument(UNUSED(char **args))
 
     addedx = 0;
     noerrs = 1;
-    lexsave();
+    zcontext_save();
     lexflags = LEXFLAGS_ACTIVE;
     linein = zlegetline(&ll, &cs);
     zlemetall = ll;
@@ -277,7 +277,7 @@ selectargument(UNUSED(char **args))
     inpop();
     errflag &= ~ERRFLAG_ERROR;
     noerrs = ne;
-    lexrestore();
+    zcontext_restore();
     zlemetacs = ocs;
     wb = owb;
     we = owe;
diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c
index 950c22f..f18ad17 100644
--- a/Src/Zle/zle_tricky.c
+++ b/Src/Zle/zle_tricky.c
@@ -698,7 +698,7 @@ docomplete(int lst)
     freeheap();
     /* Save the lexer state, in case the completion code uses the lexer *
      * somewhere (e.g. when processing a compctl -s flag).              */
-    lexsave();
+    zcontext_save();
     if (inwhat == IN_ENV)
 	lincmd = 0;
     if (s) {
@@ -868,7 +868,7 @@ docomplete(int lst)
     } else
 	ret = 1;
     /* Reset the lexer state, pop the heap. */
-    lexrestore();
+    zcontext_restore();
     popheap();
 
     dat[0] = lst;
@@ -1164,7 +1164,7 @@ get_comp_string(void)
     varname = NULL;
     insubscr = 0;
     clwpos = -1;
-    lexsave();
+    zcontext_save();
     lexflags = LEXFLAGS_ZLE;
     inpush(dupstrspace(linptr), 0, NULL);
     strinbeg(0);
@@ -1422,7 +1422,7 @@ get_comp_string(void)
 		zlemetall -= parend;
 		zlemetaline[zlemetall + addedx] = '\0';
 	    }
-	    lexrestore();
+	    zcontext_restore();
 	    tt = NULL;
 	    goto start;
 	}
@@ -1496,12 +1496,12 @@ get_comp_string(void)
 	if (tmp) {
 	    tmp = NULL;
 	    linptr = zlemetaline;
-	    lexrestore();
+	    zcontext_restore();
 	    addedx = 0;
 	    goto start;
 	}
 	noaliases = ona;
-	lexrestore();
+	zcontext_restore();
 	return NULL;
     }
 
@@ -2151,7 +2151,7 @@ get_comp_string(void)
 	    offs = boffs;
 	}
     }
-    lexrestore();
+    zcontext_restore();
 
     return (char *)s;
 }
@@ -2791,7 +2791,7 @@ doexpandhist(void)
     expanding = 1;
     excs = zlemetacs;
     zlemetall = zlemetacs = 0;
-    lexsave();
+    zcontext_save();
     /* We push ol as it will remain unchanged */
     inpush(ol, 0, NULL);
     strinbeg(1);
@@ -2803,7 +2803,7 @@ doexpandhist(void)
     } while (tok != ENDINPUT && tok != LEXERR);
     while (!lexstop)
 	hgetc();
-    /* We have to save errflags because it's reset in lexrestore. Since  *
+    /* We have to save errflags because it's reset in zcontext_restore. Since  *
      * noerrs was set to 1 errflag is true if there was a habort() which *
      * means that the expanded string is unusable.                       */
     err = errflag;
@@ -2811,7 +2811,7 @@ doexpandhist(void)
     noaliases = ona;
     strinend();
     inpop();
-    lexrestore();
+    zcontext_restore();
     expanding = 0;
 
     if (!err) {
@@ -2910,7 +2910,7 @@ getcurcmd(void)
     int curlincmd;
     char *s = NULL;
 
-    lexsave();
+    zcontext_save();
     lexflags = LEXFLAGS_ZLE;
     metafy_line();
     inpush(dupstrspace(zlemetaline), 0, NULL);
@@ -2934,7 +2934,7 @@ getcurcmd(void)
     inpop();
     errflag &= ~ERRFLAG_ERROR;
     unmetafy_line();
-    lexrestore();
+    zcontext_restore();
 
     return s;
 }
diff --git a/Src/builtin.c b/Src/builtin.c
index d210826..8abe728 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -6102,7 +6102,7 @@ bin_test(char *name, char **argv, UNUSED(Options ops), int func)
 	}
     }
 
-    lexsave();
+    zcontext_save();
     testargs = argv;
     tok = NULLTOK;
     condlex = testlex;
@@ -6112,16 +6112,16 @@ bin_test(char *name, char **argv, UNUSED(Options ops), int func)
 
     if (errflag) {
 	errflag &= ~ERRFLAG_ERROR;
-	lexrestore();
+	zcontext_restore();
 	return 1;
     }
 
     if (!prog || tok == LEXERR) {
 	zwarnnam(name, tokstr ? "parse error" : "argument expected");
-	lexrestore();
+	zcontext_restore();
 	return 1;
     }
-    lexrestore();
+    zcontext_restore();
 
     if (*curtestarg) {
 	zwarnnam(name, "too many arguments");
diff --git a/Src/context.c b/Src/context.c
new file mode 100644
index 0000000..bd8d191
--- /dev/null
+++ b/Src/context.c
@@ -0,0 +1,116 @@
+/*
+ * context.c - context save and restore
+ *
+ * This file is part of zsh, the Z shell.
+ *
+ * Copyright (c) 1992-1997 Paul Falstad
+ * All rights reserved.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and to distribute modified versions of this software for any
+ * purpose, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * In no event shall Paul Falstad or the Zsh Development Group be liable
+ * to any party for direct, indirect, special, incidental, or consequential
+ * damages arising out of the use of this software and its documentation,
+ * even if Paul Falstad and the Zsh Development Group have been advised of
+ * the possibility of such damage.
+ *
+ * Paul Falstad and the Zsh Development Group specifically disclaim any
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose.  The software
+ * provided hereunder is on an "as is" basis, and Paul Falstad and the
+ * Zsh Development Group have no obligation to provide maintenance,
+ * support, updates, enhancements, or modifications.
+ *
+ */
+/*
+ * This short file provides a home for the stack of saved contexts.
+ * The actions for saving and restoring are encapsulated within
+ * individual modules.
+ */
+
+#include "zsh.mdh"
+#include "context.pro"
+
+struct context_stack {
+    struct context_stack *next;
+
+    struct hist_stack hist_stack;
+    struct lex_stack lex_stack;
+    struct parse_stack parse_stack;
+};
+
+static struct context_stack *cstack;
+
+/* save some or all of current context */
+
+/**/
+mod_export void
+zcontext_save_partial(int parts)
+{
+    struct context_stack *cs;
+
+    cs = (struct context_stack *)malloc(sizeof(struct context_stack));
+
+    if (parts & ZCONTEXT_HIST) {
+	hist_context_save(&cs->hist_stack, !cstack);
+    }
+    if (parts & ZCONTEXT_LEX) {
+	lex_context_save(&cs->lex_stack, !cstack);
+    }
+    if (parts & ZCONTEXT_PARSE) {
+	parse_context_save(&cs->parse_stack, !cstack);
+    }
+
+    cs->next = cstack;
+    cstack = cs;
+}
+
+/* save context in full */
+
+/**/
+mod_export void
+zcontext_save(void)
+{
+    zcontext_save_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
+}
+
+/* restore context or part thereof */
+
+/**/
+mod_export void
+zcontext_restore_partial(int parts)
+{
+    struct context_stack *cs = cstack;
+
+    DPUTS(!cstack, "BUG: zcontext_restore() without zcontext_save()");
+
+    queue_signals();
+    cstack = cstack->next;
+
+    if (parts & ZCONTEXT_HIST) {
+	hist_context_restore(&cs->hist_stack, !cstack);
+    }
+    if (parts & ZCONTEXT_LEX) {
+	lex_context_restore(&cs->lex_stack, !cstack);
+    }
+    if (parts & ZCONTEXT_PARSE) {
+	parse_context_restore(&cs->parse_stack, !cstack);
+    }
+
+    free(cs);
+
+    unqueue_signals();
+}
+
+/* restore full context */
+
+/**/
+mod_export void
+zcontext_restore(void)
+{
+    zcontext_restore_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
+}
diff --git a/Src/exec.c b/Src/exec.c
index ab92910..7b64951 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -217,7 +217,7 @@ parse_string(char *s, int reset_lineno)
     Eprog p;
     zlong oldlineno;
 
-    lexsave();
+    zcontext_save();
     inpush(s, INP_LINENO, NULL);
     strinbeg(0);
     oldlineno = lineno;
@@ -229,7 +229,7 @@ parse_string(char *s, int reset_lineno)
 	lastval = 1;
     strinend();
     inpop();
-    lexrestore();
+    zcontext_restore();
     return p;
 }
 
@@ -3349,9 +3349,9 @@ execcmd(Estate state, int input, int output, int how, int last1)
 		 * The copy uses the wordcode parsing area, so save and
 		 * restore state.
 		 */
-		lexsave();
+		zcontext_save();
 		redir_prog = eccopyredirs(&s);
-		lexrestore();
+		zcontext_restore();
 	    } else
 		redir_prog = NULL;
 	    
diff --git a/Src/hist.c b/Src/hist.c
index e65d78b..447f00e 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -222,6 +222,85 @@ static int histsave_stack_pos = 0;
 
 static zlong histfile_linect;
 
+/* save history context */
+
+/**/
+void
+hist_context_save(struct hist_stack *hs, int toplevel)
+{
+    if (toplevel) {
+	/* top level, make this version visible to ZLE */
+	zle_chline = chline;
+	/* ensure line stored is NULL-terminated */
+	if (hptr)
+	    *hptr = '\0';
+    }
+    hs->histactive = histactive;
+    hs->histdone = histdone;
+    hs->stophist = stophist;
+    hs->hline = chline;
+    hs->hptr = hptr;
+    hs->chwords = chwords;
+    hs->chwordlen = chwordlen;
+    hs->chwordpos = chwordpos;
+    hs->hwgetword = hwgetword;
+    hs->hgetc = hgetc;
+    hs->hungetc = hungetc;
+    hs->hwaddc = hwaddc;
+    hs->hwbegin = hwbegin;
+    hs->hwend = hwend;
+    hs->addtoline = addtoline;
+    hs->hlinesz = hlinesz;
+    /*
+     * We save and restore the command stack with history
+     * as it's visible to the user interactively, so if
+     * we're preserving history state we'll continue to
+     * show the current set of commands from input.
+     */
+    hs->cstack = cmdstack;
+    hs->csp = cmdsp;
+
+    stophist = 0;
+    chline = NULL;
+    hptr = NULL;
+    histactive = 0;
+    cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
+    cmdsp = 0;
+}
+
+/**/
+void
+hist_context_restore(const struct hist_stack *hs, int toplevel)
+{
+    if (toplevel) {
+	/* Back to top level: don't need special ZLE value */
+	DPUTS(hs->hline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
+	zle_chline = NULL;
+    }
+    histactive = hs->histactive;
+    histdone = hs->histdone;
+    stophist = hs->stophist;
+    chline = hs->hline;
+    hptr = hs->hptr;
+    chwords = hs->chwords;
+    chwordlen = hs->chwordlen;
+    chwordpos = hs->chwordpos;
+    hwgetword = hs->hwgetword;
+    hgetc = hs->hgetc;
+    hungetc = hs->hungetc;
+    hwaddc = hs->hwaddc;
+    hwbegin = hs->hwbegin;
+    hwend = hs->hwend;
+    addtoline = hs->addtoline;
+    hlinesz = hs->hlinesz;
+    if (cmdstack)
+	zfree(cmdstack, CMDSTACKSZ);
+    cmdstack = hs->cstack;
+    cmdsp = hs->csp;
+}
+
+/* restore history context */
+
 /* add a character to the current history word */
 
 static void
@@ -815,6 +894,11 @@ strinbeg(int dohist)
     strin++;
     hbegin(dohist);
     lexinit();
+    /*
+     * Also initialise some variables owned by the parser but
+     * used for communication between the parser and lexer.
+     */
+    init_parse_status();
 }
 
 /* done reading a string */
@@ -2992,7 +3076,7 @@ bufferwords(LinkList list, char *buf, int *index, int flags)
     opts[RCQUOTES] = 0;
     addedx = 0;
     noerrs = 1;
-    lexsave();
+    zcontext_save();
     lexflags = flags | LEXFLAGS_ACTIVE;
     /*
      * Are we handling comments?
@@ -3189,7 +3273,7 @@ bufferwords(LinkList list, char *buf, int *index, int flags)
     errflag &= ~ERRFLAG_ERROR;
     nocomments = onc;
     noerrs = ne;
-    lexrestore();
+    zcontext_restore();
     zlemetacs = ocs;
     zlemetall = oll;
     wb = owb;
diff --git a/Src/init.c b/Src/init.c
index 080fc85..e7d86fe 100644
--- a/Src/init.c
+++ b/Src/init.c
@@ -107,7 +107,7 @@ loop(int toplevel, int justonce)
 
     pushheap();
     if (!toplevel)
-	lexsave();
+	zcontext_save();
     for (;;) {
 	freeheap();
 	if (stophist == 3)	/* re-entry via preprompt() */
@@ -227,7 +227,7 @@ loop(int toplevel, int justonce)
     }
     err = errflag;
     if (!toplevel)
-	lexrestore();
+	zcontext_restore();
     popheap();
 
     if (err)
diff --git a/Src/lex.c b/Src/lex.c
index d440f3d..69441b2 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -203,263 +203,64 @@ static int dbparens;
 static int len = 0, bsiz = 256;
 static char *bptr;
 
-struct lexstack {
-    struct lexstack *next;
-
-    int incmdpos;
-    int incond;
-    int incasepat;
-    int dbparens;
-    int isfirstln;
-    int isfirstch;
-    int histactive;
-    int histdone;
-    int lexflags;
-    int stophist;
-    int hlinesz;
-    char *hline;
-    char *hptr;
-    enum lextok tok;
-    int isnewlin;
-    char *tokstr;
-    char *zshlextext;
-    char *bptr;
-    int bsiz;
-    int len;
-    int lex_add_raw;
-    char *tokstr_raw;
-    char *bptr_raw;
-    int bsiz_raw;
-    int len_raw;
-    short *chwords;
-    int chwordlen;
-    int chwordpos;
-    int hwgetword;
-    int lexstop;
-    struct heredocs *hdocs;
-    int (*hgetc) _((void));
-    void (*hungetc) _((int));
-    void (*hwaddc) _((int));
-    void (*hwbegin) _((int));
-    void (*hwend) _((void));
-    void (*addtoline) _((int));
-
-    int eclen, ecused, ecnpats;
-    Wordcode ecbuf;
-    Eccstr ecstrs;
-    int ecsoffs, ecssub, ecnfunc;
-
-    unsigned char *cstack;
-    int csp;
-    zlong toklineno;
-};
-
-static struct lexstack *lstack = NULL;
-
-/* save the context or parts thereof */
-
-/* is this a hack or what? */
+/* save lexical context */
 
 /**/
-mod_export void
-lexsave_partial(int parts)
-{
-    struct lexstack *ls;
-
-    ls = (struct lexstack *)malloc(sizeof(struct lexstack));
-
-    if (parts & ZCONTEXT_LEX) {
-	ls->incmdpos = incmdpos;
-	ls->incond = incond;
-	ls->incasepat = incasepat;
-	ls->dbparens = dbparens;
-	ls->isfirstln = isfirstln;
-	ls->isfirstch = isfirstch;
-	ls->lexflags = lexflags;
-
-	ls->tok = tok;
-	ls->isnewlin = isnewlin;
-	ls->tokstr = tokstr;
-	ls->zshlextext = zshlextext;
-	ls->bptr = bptr;
-	ls->bsiz = bsiz;
-	ls->len = len;
-	ls->lex_add_raw = lex_add_raw;
-	ls->tokstr_raw = tokstr_raw;
-	ls->bptr_raw = bptr_raw;
-	ls->bsiz_raw = bsiz_raw;
-	ls->len_raw = len_raw;
-	ls->lexstop = lexstop;
-	ls->toklineno = toklineno;
-
-	tokstr = zshlextext = bptr = NULL;
-	bsiz = 256;
-	tokstr_raw = bptr_raw = NULL;
-	bsiz_raw = len_raw = lex_add_raw = 0;
-
-	inredir = 0;
-    }
-    if (parts & ZCONTEXT_HIST) {
-	if (!lstack) {
-	    /* top level, make this version visible to ZLE */
-	    zle_chline = chline;
-	    /* ensure line stored is NULL-terminated */
-	    if (hptr)
-		*hptr = '\0';
-	}
-	ls->histactive = histactive;
-	ls->histdone = histdone;
-	ls->stophist = stophist;
-	ls->hline = chline;
-	ls->hptr = hptr;
-	ls->chwords = chwords;
-	ls->chwordlen = chwordlen;
-	ls->chwordpos = chwordpos;
-	ls->hwgetword = hwgetword;
-	ls->hgetc = hgetc;
-	ls->hungetc = hungetc;
-	ls->hwaddc = hwaddc;
-	ls->hwbegin = hwbegin;
-	ls->hwend = hwend;
-	ls->addtoline = addtoline;
-	ls->hlinesz = hlinesz;
-	/*
-	 * We save and restore the command stack with history
-	 * as it's visible to the user interactively, so if
-	 * we're preserving history state we'll continue to
-	 * show the current set of commands from input.
-	 */
-	ls->cstack = cmdstack;
-	ls->csp = cmdsp;
-
-	stophist = 0;
-	chline = NULL;
-	hptr = NULL;
-	histactive = 0;
-	cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
-	cmdsp = 0;
-    }
-    if (parts & ZCONTEXT_PARSE) {
-	ls->hdocs = hdocs;
-	ls->eclen = eclen;
-	ls->ecused = ecused;
-	ls->ecnpats = ecnpats;
-	ls->ecbuf = ecbuf;
-	ls->ecstrs = ecstrs;
-	ls->ecsoffs = ecsoffs;
-	ls->ecssub = ecssub;
-	ls->ecnfunc = ecnfunc;
-	ecbuf = NULL;
-	hdocs = NULL;
-    }
-
-    ls->next = lstack;
-    lstack = ls;
-}
-
-/* save context in full */
-
-/**/
-mod_export void
-lexsave(void)
-{
-    lexsave_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
-}
-
-/* restore context or part therefore */
-
-/**/
-mod_export void
-lexrestore_partial(int parts)
+void
+lex_context_save(struct lex_stack *ls, int toplevel)
 {
-    struct lexstack *ln = lstack;
-
-    DPUTS(!lstack, "BUG: lexrestore() without lexsave()");
-
-    queue_signals();
-    lstack = lstack->next;
-
-    if (parts & ZCONTEXT_LEX) {
-	incmdpos = ln->incmdpos;
-	incond = ln->incond;
-	incasepat = ln->incasepat;
-	dbparens = ln->dbparens;
-	isfirstln = ln->isfirstln;
-	isfirstch = ln->isfirstch;
-	lexflags = ln->lexflags;
-	tok = ln->tok;
-	isnewlin = ln->isnewlin;
-	tokstr = ln->tokstr;
-	zshlextext = ln->zshlextext;
-	bptr = ln->bptr;
-	bsiz = ln->bsiz;
-	len = ln->len;
-	lex_add_raw = ln->lex_add_raw;
-	tokstr_raw = ln->tokstr_raw;
-	bptr_raw = ln->bptr_raw;
-	bsiz_raw = ln->bsiz_raw;
-	len_raw = ln->len_raw;
-	lexstop = ln->lexstop;
-	toklineno = ln->toklineno;
-    }
-
-    if (parts & ZCONTEXT_HIST) {
-	if (!lstack) {
-	    /* Back to top level: don't need special ZLE value */
-	    DPUTS(ln->hline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
-	    zle_chline = NULL;
-	}
-	histactive = ln->histactive;
-	histdone = ln->histdone;
-	stophist = ln->stophist;
-	chline = ln->hline;
-	hptr = ln->hptr;
-	chwords = ln->chwords;
-	chwordlen = ln->chwordlen;
-	chwordpos = ln->chwordpos;
-	hwgetword = ln->hwgetword;
-	hgetc = ln->hgetc;
-	hungetc = ln->hungetc;
-	hwaddc = ln->hwaddc;
-	hwbegin = ln->hwbegin;
-	hwend = ln->hwend;
-	addtoline = ln->addtoline;
-	hlinesz = ln->hlinesz;
-	if (cmdstack)
-	    zfree(cmdstack, CMDSTACKSZ);
-	cmdstack = ln->cstack;
-	cmdsp = ln->csp;
-    }
-
-    if (parts & ZCONTEXT_PARSE) {
-	if (ecbuf)
-	    zfree(ecbuf, eclen);
-
-	hdocs = ln->hdocs;
-	eclen = ln->eclen;
-	ecused = ln->ecused;
-	ecnpats = ln->ecnpats;
-	ecbuf = ln->ecbuf;
-	ecstrs = ln->ecstrs;
-	ecsoffs = ln->ecsoffs;
-	ecssub = ln->ecssub;
-	ecnfunc = ln->ecnfunc;
-
-	errflag &= ~ERRFLAG_ERROR;
-    }
-
-    free(ln);
-
-    unqueue_signals();
+    (void)toplevel;
+
+    ls->dbparens = dbparens;
+    ls->isfirstln = isfirstln;
+    ls->isfirstch = isfirstch;
+    ls->lexflags = lexflags;
+
+    ls->tok = tok;
+    ls->tokstr = tokstr;
+    ls->zshlextext = zshlextext;
+    ls->bptr = bptr;
+    ls->bsiz = bsiz;
+    ls->len = len;
+    ls->lex_add_raw = lex_add_raw;
+    ls->tokstr_raw = tokstr_raw;
+    ls->bptr_raw = bptr_raw;
+    ls->bsiz_raw = bsiz_raw;
+    ls->len_raw = len_raw;
+    ls->lexstop = lexstop;
+    ls->toklineno = toklineno;
+
+    tokstr = zshlextext = bptr = NULL;
+    bsiz = 256;
+    tokstr_raw = bptr_raw = NULL;
+    bsiz_raw = len_raw = lex_add_raw = 0;
 }
 
-/* complete restore context */
+/* restore lexical context */
 
 /**/
 mod_export void
-lexrestore(void)
+lex_context_restore(const struct lex_stack *ls, int toplevel)
 {
-    lexrestore_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
+    (void)toplevel;
+
+    dbparens = ls->dbparens;
+    isfirstln = ls->isfirstln;
+    isfirstch = ls->isfirstch;
+    lexflags = ls->lexflags;
+    tok = ls->tok;
+    tokstr = ls->tokstr;
+    zshlextext = ls->zshlextext;
+    bptr = ls->bptr;
+    bsiz = ls->bsiz;
+    len = ls->len;
+    lex_add_raw = ls->lex_add_raw;
+    tokstr_raw = ls->tokstr_raw;
+    bptr_raw = ls->bptr_raw;
+    bsiz_raw = ls->bsiz_raw;
+    len_raw = ls->len_raw;
+    lexstop = ls->lexstop;
+    toklineno = ls->toklineno;
 }
 
 /**/
@@ -634,9 +435,7 @@ initlextabs(void)
 void
 lexinit(void)
 {
-    incond = incasepat = nocorrect =
-    infor = dbparens = lexstop = 0;
-    incmdpos = 1;
+    nocorrect = dbparens = lexstop = 0;
     tok = ENDINPUT;
 }
 
@@ -1725,7 +1524,7 @@ parsestrnoerr(char *s)
 {
     int l = strlen(s), err;
 
-    lexsave();
+    zcontext_save();
     untokenize(s);
     inpush(dupstring(s), 0, NULL);
     strinbeg(0);
@@ -1737,7 +1536,7 @@ parsestrnoerr(char *s)
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
-    lexrestore();
+    zcontext_restore();
     return err;
 }
 
@@ -1756,7 +1555,7 @@ parse_subscript(char *s, int sub, int endchar)
 
     if (!*s || *s == endchar)
 	return 0;
-    lexsave();
+    zcontext_save();
     untokenize(t = dupstring(s));
     inpush(t, 0, NULL);
     strinbeg(0);
@@ -1776,7 +1575,7 @@ parse_subscript(char *s, int sub, int endchar)
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
-    lexrestore();
+    zcontext_restore();
     return s;
 }
 
@@ -1794,7 +1593,7 @@ parse_subst_string(char *s)
 
     if (!*s || !strcmp(s, nulstring))
 	return 0;
-    lexsave();
+    zcontext_save();
     untokenize(s);
     inpush(dupstring(s), 0, NULL);
     strinbeg(0);
@@ -1807,7 +1606,7 @@ parse_subst_string(char *s)
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
-    lexrestore();
+    zcontext_restore();
     /* Keep any interrupt error status */
     errflag = err | (errflag & ERRFLAG_INT);
     if (ctok == LEXERR) {
@@ -1817,7 +1616,7 @@ parse_subst_string(char *s)
 #ifdef DEBUG
     /*
      * Historical note: we used to check here for olen (the value of len
-     * before lexrestore()) == l, but that's not necessarily the case if
+     * before zcontext_restore()) == l, but that's not necessarily the case if
      * we stripped an RCQUOTE.
      */
     if (ctok != STRING || (errflag && !noerrs)) {
@@ -2047,7 +1846,7 @@ skipcomm(void)
 	new_len = len;
 	new_bsiz = bsiz;
 
-	lexsave_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
     } else {
 	/*
 	 * Set up for nested command subsitution, however
@@ -2063,7 +1862,7 @@ skipcomm(void)
 	new_len = len_raw;
 	new_bsiz = bsiz_raw;
 
-	lexsave_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
     }
     tokstr_raw = new_tokstr;
     bsiz_raw = new_bsiz;
@@ -2090,7 +1889,7 @@ skipcomm(void)
      */
     new_lexstop = lexstop;
 
-    lexrestore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+    zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
 
     if (lex_add_raw) {
 	/*
diff --git a/Src/parse.c b/Src/parse.c
index fa37ca3..0b54a90 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -31,7 +31,7 @@
 #include "parse.pro"
 
 /* != 0 if we are about to read a command word */
- 
+
 /**/
 mod_export int incmdpos;
 
@@ -242,6 +242,67 @@ int ecsoffs, ecssub, ecnfunc;
 #define EC_DOUBLE_THRESHOLD  32768
 #define EC_INCREMENT         1024
 
+/* save parse context */
+
+/**/
+void
+parse_context_save(struct parse_stack *ps, int toplevel)
+{
+    (void)toplevel;
+
+    ps->incmdpos = incmdpos;
+    ps->aliasspaceflag = aliasspaceflag;
+    ps->incond = incond;
+    ps->inredir = inredir;
+    ps->incasepat = incasepat;
+    ps->isnewlin = isnewlin;
+    ps->infor = infor;
+
+    ps->hdocs = hdocs;
+    ps->eclen = eclen;
+    ps->ecused = ecused;
+    ps->ecnpats = ecnpats;
+    ps->ecbuf = ecbuf;
+    ps->ecstrs = ecstrs;
+    ps->ecsoffs = ecsoffs;
+    ps->ecssub = ecssub;
+    ps->ecnfunc = ecnfunc;
+    ecbuf = NULL;
+    hdocs = NULL;
+}
+
+/* restore parse context */
+
+/**/
+void
+parse_context_restore(const struct parse_stack *ps, int toplevel)
+{
+    (void)toplevel;
+
+    if (ecbuf)
+	zfree(ecbuf, eclen);
+
+    incmdpos = ps->incmdpos;
+    aliasspaceflag = ps->aliasspaceflag;
+    incond = ps->incond;
+    inredir = ps->inredir;
+    incasepat = ps->incasepat;
+    incasepat = ps->incasepat;
+    isnewlin = ps->isnewlin;
+    infor = ps->infor;
+
+    hdocs = ps->hdocs;
+    eclen = ps->eclen;
+    ecused = ps->ecused;
+    ecnpats = ps->ecnpats;
+    ecbuf = ps->ecbuf;
+    ecstrs = ps->ecstrs;
+    ecsoffs = ps->ecsoffs;
+    ecssub = ps->ecssub;
+    ecnfunc = ps->ecnfunc;
+
+    errflag &= ~ERRFLAG_ERROR;
+}
 
 /* Adjust pointers in here-doc structs. */
 
@@ -359,6 +420,21 @@ ecstrcode(char *s)
     } while (0)
 
 
+/**/
+mod_export void
+init_parse_status(void)
+{
+    /*
+     * These variables are currently declared by the parser, so we
+     * initialise them here.  Possibly they are more naturally declared
+     * by the lexical anaylser; however, as they are used for signalling
+     * between the two it's a bit ambiguous.  We clear them when
+     * using the lexical analyser for strings as well as here.
+     */
+    incasepat = incond = inredir = infor = 0;
+    incmdpos = 1;
+}
+
 /* Initialise wordcode buffer. */
 
 /**/
@@ -373,6 +449,8 @@ init_parse(void)
     ecsoffs = ecnpats = 0;
     ecssub = 0;
     ecnfunc = 0;
+
+    init_parse_status();
 }
 
 /* Build eprog. */
@@ -539,9 +617,8 @@ parse_list(void)
     int c = 0;
 
     tok = ENDINPUT;
-    incmdpos = 1;
-    zshlex();
     init_parse();
+    zshlex();
     par_list(&c);
     if (tok != ENDINPUT) {
         clear_hdocs();
diff --git a/Src/signals.c b/Src/signals.c
index 899f121..3950ad1 100644
--- a/Src/signals.c
+++ b/Src/signals.c
@@ -1210,7 +1210,7 @@ dotrapargs(int sig, int *sigtr, void *sigfn)
     intrap++;
     *sigtr |= ZSIG_IGNORED;
 
-    lexsave();
+    zcontext_save();
     /* execsave will save the old trap_return and trap_state */
     execsave();
     breaks = retflag = 0;
@@ -1265,7 +1265,7 @@ dotrapargs(int sig, int *sigtr, void *sigfn)
     new_trap_return = trap_return;
 
     execrestore();
-    lexrestore();
+    zcontext_restore();
 
     if (new_trap_state == TRAP_STATE_FORCE_RETURN &&
 	/* zero return from function isn't special */
diff --git a/Src/zsh.h b/Src/zsh.h
index 475b782..8fb4f97 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -2691,6 +2691,71 @@ struct sortelt {
 
 typedef struct sortelt *SortElt;
 
+/*********************************************************/
+/* Structures to save and restore for individual modules */
+/*********************************************************/
+
+/* History */
+struct hist_stack {
+    int histactive;
+    int histdone;
+    int stophist;
+    int hlinesz;
+    char *hline;
+    char *hptr;
+    short *chwords;
+    int chwordlen;
+    int chwordpos;
+    int hwgetword;
+    int (*hgetc) _((void));
+    void (*hungetc) _((int));
+    void (*hwaddc) _((int));
+    void (*hwbegin) _((int));
+    void (*hwend) _((void));
+    void (*addtoline) _((int));
+    unsigned char *cstack;
+    int csp;
+};
+
+/* Lexical analyser */
+struct lex_stack {
+    int dbparens;
+    int isfirstln;
+    int isfirstch;
+    int lexflags;
+    enum lextok tok;
+    char *tokstr;
+    char *zshlextext;
+    char *bptr;
+    int bsiz;
+    int len;
+    int lex_add_raw;
+    char *tokstr_raw;
+    char *bptr_raw;
+    int bsiz_raw;
+    int len_raw;
+    int lexstop;
+    zlong toklineno;
+};
+
+/* Parser */
+struct parse_stack {
+    struct heredocs *hdocs;
+
+    int incmdpos;
+    int aliasspaceflag;
+    int incond;
+    int inredir;
+    int incasepat;
+    int isnewlin;
+    int infor;
+
+    int eclen, ecused, ecnpats;
+    Wordcode ecbuf;
+    Eccstr ecstrs;
+    int ecsoffs, ecssub, ecnfunc;
+};
+
 /************************/
 /* Flags to casemodifiy */
 /************************/
diff --git a/Src/zsh.mdd b/Src/zsh.mdd
index 9a8c923..f0379d2 100644
--- a/Src/zsh.mdd
+++ b/Src/zsh.mdd
@@ -9,7 +9,8 @@ alwayslink=1
 
 # autobins not specified because of alwayslink
 
-objects="builtin.o compat.o cond.o exec.o glob.o hashtable.o hashnameddir.o \
+objects="builtin.o compat.o cond.o context.o \
+exec.o glob.o hashtable.o hashnameddir.o \
 hist.o init.o input.o jobs.o lex.o linklist.o loop.o math.o \
 mem.o module.o options.o params.o parse.o pattern.o prompt.o signals.o \
 signames.o sort.o string.o subst.o text.o utils.o watch.o"
-- 
2.1.0



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: PATCH: fix command substitution parsing
  2015-01-09 21:26 ` Peter Stephenson
@ 2015-01-10  1:28   ` Bart Schaefer
  0 siblings, 0 replies; 6+ messages in thread
From: Bart Schaefer @ 2015-01-10  1:28 UTC (permalink / raw)
  To: Zsh Hackers' List

On Jan 9,  9:26pm, Peter Stephenson wrote:
}
} + * This file is part of zsh, the Z shell.
} + *
} + * Copyright (c) 1992-1997 Paul Falstad
} + * All rights reserved.

Er, no.  You can't put Paul's copyright in there, because you're not Paul,
and anyway it's meaningless to assert a copyright that's 17+ years in the
past.  You're supposed to put your own name and the current date in any
newly-created files, just with the rest of the "Zsh Development Group"
verbiage.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: PATCH: fix command substitution parsing
  2015-01-07 16:48 PATCH: fix command substitution parsing Peter Stephenson
  2015-01-09 21:26 ` Peter Stephenson
@ 2015-01-10 19:56 ` Mikael Magnusson
  2015-01-10 20:25   ` Peter Stephenson
  2015-01-11 19:02 ` Peter Stephenson
  2 siblings, 1 reply; 6+ messages in thread
From: Mikael Magnusson @ 2015-01-10 19:56 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Wed, Jan 7, 2015 at 5:48 PM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
> According to the test suite enhanced with a few appropriate tests, this
> fixes the problem where syntactically significant but irrelevant
> unmatched closing parentheses caused command substitution to abort
> early.
>
> I don't believe that either, so we'll need to tease out whatever
> oddities remain.  I think it's basically good enough to push, and the
> problems will only emerge when I do, but I'll use it myself for a day or
> so first.

Daniel Hahler found the following and reported it on irc, bisected to
this commit,
% alias foo=bar
% f() $(foo)
% which f
f () {
    $(foobar)
}

-- 
Mikael Magnusson


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: PATCH: fix command substitution parsing
  2015-01-10 19:56 ` Mikael Magnusson
@ 2015-01-10 20:25   ` Peter Stephenson
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Stephenson @ 2015-01-10 20:25 UTC (permalink / raw)
  To: Zsh Hackers' List

On Sat, 10 Jan 2015 20:56:58 +0100
Mikael Magnusson <mikachu@gmail.com> wrote:
> % alias foo=bar
> % f() $(foo)
> % which f
> f () {
>     $(foobar)
> }

diff --git a/Src/input.c b/Src/input.c
index 04dda5a..2ecac7b 100644
--- a/Src/input.c
+++ b/Src/input.c
@@ -537,6 +537,12 @@ inpush(char *str, int flags, Alias inalias)
 static void
 inpoptop(void)
 {
+    if (!lexstop) {
+	inbufflags &= ~INP_ALCONT;
+	while (inbufptr > inbuf)
+	    inungetc(inbufptr[-1]);
+    }
+
     if (inbuf && (inbufflags & INP_FREE))
 	free(inbuf);
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: PATCH: fix command substitution parsing
  2015-01-07 16:48 PATCH: fix command substitution parsing Peter Stephenson
  2015-01-09 21:26 ` Peter Stephenson
  2015-01-10 19:56 ` Mikael Magnusson
@ 2015-01-11 19:02 ` Peter Stephenson
  2 siblings, 0 replies; 6+ messages in thread
From: Peter Stephenson @ 2015-01-11 19:02 UTC (permalink / raw)
  To: Zsh Hackers' List

On Wed, 07 Jan 2015 16:48:36 +0000
Peter Stephenson <p.stephenson@samsung.com> wrote:
> Also, that stuff in skipcomm() is calling out for structs

Trivial, though removing that file static variable "len" looks like a
good deed: there was, in fact, one shadowed variable.

pws

diff --git a/Src/lex.c b/Src/lex.c
index 69441b2..b0cd86c 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -155,8 +155,8 @@ static int lex_add_raw;
 
 /* variables associated with the above */
 
-static char *tokstr_raw, *bptr_raw;
-static int len_raw, bsiz_raw;
+static char *tokstr_raw;
+static struct lexbufstate lexbuf_raw;
 
 /* text of punctuation tokens */
 
@@ -200,8 +200,7 @@ mod_export char *tokstrings[WHILE + 1] = {
 /* lexical state */
 
 static int dbparens;
-static int len = 0, bsiz = 256;
-static char *bptr;
+static struct lexbufstate lexbuf = { NULL, 256, 0 };
 
 /* save lexical context */
 
@@ -219,21 +218,17 @@ lex_context_save(struct lex_stack *ls, int toplevel)
     ls->tok = tok;
     ls->tokstr = tokstr;
     ls->zshlextext = zshlextext;
-    ls->bptr = bptr;
-    ls->bsiz = bsiz;
-    ls->len = len;
+    ls->lexbuf = lexbuf;
     ls->lex_add_raw = lex_add_raw;
     ls->tokstr_raw = tokstr_raw;
-    ls->bptr_raw = bptr_raw;
-    ls->bsiz_raw = bsiz_raw;
-    ls->len_raw = len_raw;
+    ls->lexbuf_raw = lexbuf_raw;
     ls->lexstop = lexstop;
     ls->toklineno = toklineno;
 
-    tokstr = zshlextext = bptr = NULL;
-    bsiz = 256;
-    tokstr_raw = bptr_raw = NULL;
-    bsiz_raw = len_raw = lex_add_raw = 0;
+    tokstr = zshlextext = lexbuf.ptr = NULL;
+    lexbuf.siz = 256;
+    tokstr_raw = lexbuf_raw.ptr = NULL;
+    lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
 }
 
 /* restore lexical context */
@@ -251,14 +246,10 @@ lex_context_restore(const struct lex_stack *ls, int toplevel)
     tok = ls->tok;
     tokstr = ls->tokstr;
     zshlextext = ls->zshlextext;
-    bptr = ls->bptr;
-    bsiz = ls->bsiz;
-    len = ls->len;
+    lexbuf = ls->lexbuf;
     lex_add_raw = ls->lex_add_raw;
     tokstr_raw = ls->tokstr_raw;
-    bptr_raw = ls->bptr_raw;
-    bsiz_raw = ls->bsiz_raw;
-    len_raw = ls->len_raw;
+    lexbuf_raw = ls->lexbuf_raw;
     lexstop = ls->lexstop;
     toklineno = ls->toklineno;
 }
@@ -445,17 +436,18 @@ lexinit(void)
 void
 add(int c)
 {
-    *bptr++ = c;
-    if (bsiz == ++len) {
-	int newbsiz = bsiz * 2;
+    *lexbuf.ptr++ = c;
+    if (lexbuf.siz == ++lexbuf.len) {
+	int newbsiz = lexbuf.siz * 2;
 
-	if (newbsiz > inbufct && inbufct > bsiz)
+	if (newbsiz > inbufct && inbufct > lexbuf.siz)
 	    newbsiz = inbufct;
 
-	bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz));
+	tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
+	lexbuf.ptr = tokstr + lexbuf.len;
 	/* len == bsiz, so bptr is at the start of newly allocated memory */
-	memset(bptr, 0, newbsiz - bsiz);
-	bsiz = newbsiz;
+	memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
+	lexbuf.siz = newbsiz;
     }
 }
 
@@ -482,13 +474,13 @@ add(int c)
 static int
 cmd_or_math(int cs_type)
 {
-    int oldlen = len;
+    int oldlen = lexbuf.len;
     int c;
 
     cmdpush(cs_type);
     c = dquote_parse(')', 0);
     cmdpop();
-    *bptr = '\0';
+    *lexbuf.ptr = '\0';
     if (!c) {
 	/* Successfully parsed, see if it was math */
 	c = hgetc();
@@ -504,9 +496,10 @@ cmd_or_math(int cs_type)
     /* else unsuccessful: unget the whole thing */
     hungetc(c);
     lexstop = 0;
-    while (len > oldlen) {
-	len--;
-	hungetc(itok(*--bptr) ? ztokens[*bptr - Pound] : *bptr);
+    while (lexbuf.len > oldlen) {
+	lexbuf.len--;
+	hungetc(itok(*--lexbuf.ptr) ?
+		ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
     }
     hungetc('(');
     return 0;
@@ -531,8 +524,8 @@ cmd_or_math_sub(void)
 	}
 	if (ret == 2)
 	    return 1;
-	bptr -= 2;
-	len -= 2;
+	lexbuf.ptr -= 2;
+	lexbuf.len -= 2;
     } else {
 	hungetc(c);
 	lexstop = 0;
@@ -596,13 +589,13 @@ gettok(void)
     hwbegin(-1-(qbang && c == bangchar));
     /* word includes the last character read and possibly \ before ! */
     if (dbparens) {
-	len = 0;
-	bptr = tokstr = (char *) hcalloc(bsiz = LEX_HEAP_SIZE);
+	lexbuf.len = 0;
+	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 	hungetc(c);
 	cmdpush(CS_MATH);
 	c = dquote_parse(infor ? ';' : ')', 0);
 	cmdpop();
-	*bptr = '\0';
+	*lexbuf.ptr = '\0';
 	if (!c && infor) {
 	    infor--;
 	    return DINPAR;
@@ -650,8 +643,9 @@ gettok(void)
 	 * newlines being inserted into the history. */
 
 	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
-	    len = 0;
-	    bptr = tokstr = (char *)hcalloc(bsiz = LEX_HEAP_SIZE);
+	    lexbuf.len = 0;
+	    lexbuf.ptr = tokstr =
+		(char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 	    add(c);
 	}
 	hwend();
@@ -666,7 +660,7 @@ gettok(void)
 	    peek = LEXERR;
 	else {
 	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
-		*bptr = '\0';
+		*lexbuf.ptr = '\0';
 		if (!lexstop)
 		    hungetc(c);
 		peek = STRING;
@@ -752,8 +746,9 @@ gettok(void)
 		return DINPAR;
 	    }
 	    if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
-		len = 0;
-		bptr = tokstr = (char *) hcalloc(bsiz = LEX_HEAP_SIZE);
+		lexbuf.len = 0;
+		lexbuf.ptr = tokstr = (char *)
+		    hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 		switch (cmd_or_math(CS_MATH)) {
 		case 1:
 		    return DINPAR;
@@ -902,8 +897,8 @@ gettokstr(int c, int sub)
 
     peek = STRING;
     if (!sub) {
-	len = 0;
-	bptr = tokstr = (char *) hcalloc(bsiz = LEX_HEAP_SIZE);
+	lexbuf.len = 0;
+	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
     }
     for (;;) {
 	int act;
@@ -939,7 +934,7 @@ gettokstr(int c, int sub)
 	    if (fdpar) {
 		/* this is a single word `(   )', treat as INOUTPAR */
 		add(c);
-		*bptr = '\0';
+		*lexbuf.ptr = '\0';
 		return INOUTPAR;
 	    }
 	    if ((sub || in_brace_param) && isset(SHGLOB))
@@ -1014,9 +1009,9 @@ gettokstr(int c, int sub)
 	    if (isset(SHGLOB)) {
 		if (sub || in_brace_param)
 		    break;
-		if (incasepat && !len)
+		if (incasepat && !lexbuf.len)
 		    return INPAR;
-		if (!isset(KSHGLOB) && len)
+		if (!isset(KSHGLOB) && lexbuf.len)
 		    goto brk;
 	    }
 	    if (!in_brace_param) {
@@ -1073,9 +1068,9 @@ gettokstr(int c, int sub)
 	    if (isset(IGNOREBRACES) || sub)
 		c = '{';
 	    else {
-		if (!len && incmdpos) {
+		if (!lexbuf.len && incmdpos) {
 		    add('{');
-		    *bptr = '\0';
+		    *lexbuf.ptr = '\0';
 		    return STRING;
 		}
 		if (in_brace_param) {
@@ -1161,23 +1156,23 @@ gettokstr(int c, int sub)
 			   incmdpos && !bct && !brct) {
 		    char *t = tokstr;
 		    if (idigit(*t))
-			while (++t < bptr && idigit(*t));
+			while (++t < lexbuf.ptr && idigit(*t));
 		    else {
-			int sav = *bptr;
-			*bptr = '\0';
+			int sav = *lexbuf.ptr;
+			*lexbuf.ptr = '\0';
 			t = itype_end(t, IIDENT, 0);
-			if (t < bptr) {
+			if (t < lexbuf.ptr) {
 			    skipparens(Inbrack, Outbrack, &t);
 			} else {
-			    *bptr = sav;
+			    *lexbuf.ptr = sav;
 			}
 		    }
 		    if (*t == '+')
 			t++;
-		    if (t == bptr) {
+		    if (t == lexbuf.ptr) {
 			e = hgetc();
 			if (e == '(' && incmdpos) {
-			    *bptr = '\0';
+			    *lexbuf.ptr = '\0';
 			    return ENVARRAY;
 			}
 			hungetc(e);
@@ -1214,7 +1209,7 @@ gettokstr(int c, int sub)
 		goto brk;
 	    break;
 	case LX2_QUOTE: {
-	    int strquote = (len && bptr[-1] == String);
+	    int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
 
 	    add(Snull);
 	    cmdpush(CS_QUOTE);
@@ -1237,8 +1232,8 @@ gettokstr(int c, int sub)
 			else
 			    add('\\');
 		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
-			if (bptr[-1] == '\\')
-			    bptr--, len--;
+			if (lexbuf.ptr[-1] == '\\')
+			    lexbuf.ptr--, lexbuf.len--;
 			else
 			    break;
 		    }
@@ -1328,15 +1323,16 @@ gettokstr(int c, int sub)
 	while(bct-- >= in_brace_param)
 	    cmdpop();
 	zerr("closing brace expected");
-    } else if (unset(IGNOREBRACES) && !sub && len > 1 &&
-	       peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) {
+    } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
+	       peek == STRING && lexbuf.ptr[-1] == '}' &&
+	       lexbuf.ptr[-2] != Bnull) {
 	/* hack to get {foo} command syntax work */
-	bptr--;
-	len--;
+	lexbuf.ptr--;
+	lexbuf.len--;
 	lexstop = 0;
 	hungetc('}');
     }
-    *bptr = '\0';
+    *lexbuf.ptr = '\0';
     DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
     return peek;
 }
@@ -1528,11 +1524,11 @@ parsestrnoerr(char *s)
     untokenize(s);
     inpush(dupstring(s), 0, NULL);
     strinbeg(0);
-    len = 0;
-    bptr = tokstr = s;
-    bsiz = l + 1;
+    lexbuf.len = 0;
+    lexbuf.ptr = tokstr = s;
+    lexbuf.siz = l + 1;
     err = dquote_parse('\0', 1);
-    *bptr = '\0';
+    *lexbuf.ptr = '\0';
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
@@ -1559,18 +1555,18 @@ parse_subscript(char *s, int sub, int endchar)
     untokenize(t = dupstring(s));
     inpush(t, 0, NULL);
     strinbeg(0);
-    len = 0;
-    bptr = tokstr = s;
-    bsiz = l + 1;
+    lexbuf.len = 0;
+    lexbuf.ptr = tokstr = s;
+    lexbuf.siz = l + 1;
     err = dquote_parse(endchar, sub);
     if (err) {
-	err = *bptr;
-	*bptr = '\0';
+	err = *lexbuf.ptr;
+	*lexbuf.ptr = '\0';
 	untokenize(s);
-	*bptr = err;
+	*lexbuf.ptr = err;
 	s = NULL;
     } else {
-	s = bptr;
+	s = lexbuf.ptr;
     }
     strinend();
     inpop();
@@ -1597,9 +1593,9 @@ parse_subst_string(char *s)
     untokenize(s);
     inpush(dupstring(s), 0, NULL);
     strinbeg(0);
-    len = 0;
-    bptr = tokstr = s;
-    bsiz = l + 1;
+    lexbuf.len = 0;
+    lexbuf.ptr = tokstr = s;
+    lexbuf.siz = l + 1;
     c = hgetc();
     ctok = gettokstr(c, 1);
     err = errflag;
@@ -1615,7 +1611,7 @@ parse_subst_string(char *s)
     }
 #ifdef DEBUG
     /*
-     * Historical note: we used to check here for olen (the value of len
+     * Historical note: we used to check here for olen (the value of lexbuf.len
      * before zcontext_restore()) == l, but that's not necessarily the case if
      * we stripped an RCQUOTE.
      */
@@ -1782,14 +1778,14 @@ zshlex_raw_add(int c)
     if (!lex_add_raw)
 	return;
 
-    *bptr_raw++ = c;
-    if (bsiz_raw == ++len_raw) {
-	int newbsiz = bsiz_raw * 2;
+    *lexbuf_raw.ptr++ = c;
+    if (lexbuf_raw.siz == ++lexbuf_raw.len) {
+	int newbsiz = lexbuf_raw.siz * 2;
 
-	tokstr_raw = (char *)hrealloc(tokstr_raw, bsiz_raw, newbsiz);
-	bptr_raw = tokstr_raw + len_raw;
-	memset(bptr_raw, 0, newbsiz - bsiz_raw);
-	bsiz_raw = newbsiz;
+	tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
+	lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
+	memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
+	lexbuf_raw.siz = newbsiz;
     }
 }
 
@@ -1799,8 +1795,8 @@ zshlex_raw_back(void)
 {
     if (!lex_add_raw)
 	return;
-    bptr_raw--;
-    len_raw--;
+    lexbuf_raw.ptr--;
+    lexbuf_raw.len--;
 }
 
 /*
@@ -1817,8 +1813,9 @@ zshlex_raw_back(void)
 static int
 skipcomm(void)
 {
-    char *new_tokstr, *new_bptr = bptr_raw;
-    int new_len, new_bsiz, new_lexstop, new_lex_add_raw;
+    char *new_tokstr;
+    int new_lexstop, new_lex_add_raw;
+    struct lexbufstate new_lexbuf;
 
     cmdpush(CS_CMDSUBST);
     SETPARBEGIN
@@ -1842,9 +1839,7 @@ skipcomm(void)
 	 * to keep the same history context.
 	 */
 	new_tokstr = tokstr;
-	new_bptr = bptr;
-	new_len = len;
-	new_bsiz = bsiz;
+	new_lexbuf = lexbuf;
 
 	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
     } else {
@@ -1858,16 +1853,12 @@ skipcomm(void)
 	 * otherwise by cleared, though.
 	 */
 	new_tokstr = tokstr_raw;
-	new_bptr = bptr_raw;
-	new_len = len_raw;
-	new_bsiz = bsiz_raw;
+	new_lexbuf = lexbuf_raw;
 
 	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
     }
     tokstr_raw = new_tokstr;
-    bsiz_raw = new_bsiz;
-    len_raw = new_len;
-    bptr_raw = new_bptr;
+    lexbuf_raw = new_lexbuf;
     lex_add_raw = new_lex_add_raw;
 
     if (!parse_event(OUTPAR) || tok != OUTPAR)
@@ -1879,9 +1870,7 @@ skipcomm(void)
      * as the current token string after popping the stack.
      */
     new_tokstr = tokstr_raw;
-    new_bptr = bptr_raw;
-    new_len = len_raw;
-    new_bsiz = bsiz_raw;
+    new_lexbuf = lexbuf_raw;
     /*
      * We're also going to propagate the lexical state:
      * if we couldn't parse the command substitution we
@@ -1896,14 +1885,12 @@ skipcomm(void)
 	 * Keep going, so retain the raw variables.
 	 */
 	tokstr_raw = new_tokstr;
-	bptr_raw = new_bptr;
-	len_raw = new_len;
-	bsiz_raw = new_bsiz;
+	lexbuf_raw = new_lexbuf;
     } else {
 	if (!new_lexstop) {
 	    /* Ignore the ')' added on input */
-	    new_len--;
-	    *--new_bptr = '\0';
+	    new_lexbuf.len--;
+	    *--new_lexbuf.ptr = '\0';
 	}
 
 	/*
@@ -1911,9 +1898,7 @@ skipcomm(void)
 	 * all along.
 	 */
 	tokstr = new_tokstr;
-	bptr = new_bptr;
-	len = new_len;
-	bsiz = new_bsiz;
+	lexbuf = new_lexbuf;
 	lexstop = new_lexstop;
     }
 
diff --git a/Src/zsh.h b/Src/zsh.h
index 8fb4f97..94e9ffc 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -2717,6 +2717,27 @@ struct hist_stack {
     int csp;
 };
 
+/*
+ * State of a lexical token buffer.
+ *
+ * It would be neater to include the pointer to the start of the buffer,
+ * however the current code structure means that the standard instance
+ * of this, tokstr, is visible in lots of places, so that's not
+ * convenient.
+ */
+
+struct lexbufstate {
+    /*
+     * Next character to be added.
+     * Set to NULL when the buffer is to be visible from elsewhere.
+     */
+    char *ptr;
+    /* Allocated buffer size */
+    int siz;
+    /* Length in use */
+    int len;
+};
+
 /* Lexical analyser */
 struct lex_stack {
     int dbparens;
@@ -2726,14 +2747,10 @@ struct lex_stack {
     enum lextok tok;
     char *tokstr;
     char *zshlextext;
-    char *bptr;
-    int bsiz;
-    int len;
+    struct lexbufstate lexbuf;
     int lex_add_raw;
     char *tokstr_raw;
-    char *bptr_raw;
-    int bsiz_raw;
-    int len_raw;
+    struct lexbufstate lexbuf_raw;
     int lexstop;
     zlong toklineno;
 };


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-01-11 19:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-07 16:48 PATCH: fix command substitution parsing Peter Stephenson
2015-01-09 21:26 ` Peter Stephenson
2015-01-10  1:28   ` Bart Schaefer
2015-01-10 19:56 ` Mikael Magnusson
2015-01-10 20:25   ` Peter Stephenson
2015-01-11 19:02 ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).