zsh-workers
 help / color / mirror / code / Atom feed
* [PATCH] Improved implementation of ${{var} cmd} etc.
@ 2024-03-30 21:47 Bart Schaefer
  2024-03-31  3:47 ` Bart Schaefer
  0 siblings, 1 reply; 2+ messages in thread
From: Bart Schaefer @ 2024-03-30 21:47 UTC (permalink / raw)
  To: Zsh hackers list

[-- Attachment #1: Type: text/plain, Size: 1882 bytes --]

This REPLACES the patch in workers/52831
for changing ${|var|cmd} to ${{var} cmd}.

Besides generally that, it implements the following (also mentioning
changes vs 52831):

Clarify or expand comments, use more meaningful variable names.
Refactor to avoid scanning "{var} cmd" three times to separate "var" from "cmd".
Require space after {var} to limit vulnerability to ${{var}} vs ${${var}} typos.
Lift tokenize() block to avoid code duplication.
Signal handling paranoia.
Update tests and add additional special case tests.
Now only ${|cmd} has magic local $REPLY, rather than all 3 forms.

Some details about the latter:  I've stepped onto the slippery slope
of having a ".zsh" namespace for internal parameters.  ${{var} cmd}
doesn't use a local at all now, and ${|cmd} creates a local $REPLY as
before, but ${ cmd } creates a local readonly ${.zsh.cmdsubst}.
Making it readonly (and unset) prevents user code in cmd from
attempting to use that name nefariously.  After executing the cmd, the
readonly state is cleared and the output is assigned to it.

Then later when we're ready to substitute the value and apply any
state passed down from ${(flags)${|cmd}} etc., each of {var} or REPLY
or .zsh.cmdsubst can be referenced the same way.  There's probably
some optimization to be had, i.e., avoid assignment only to fetch the
value back immediately, but the two forms where user code assigns to a
parameter are required to work that way, so it's no worse to have the
stdout branch do the same under the hood.

RE the "edge cases" in the other thread:

${{var}} is a bad substitution because of the required space, noted above.
${{var} }, ${{var};}, ${{} }, etc., substitute the empty string rather
than erroring.
${{REPLY[2]} REPLY=abc} now substitutes "b" because REPLY is not
implicitly local.


This does not yet update the Doc, nor do anything about the "break" situation.

[-- Attachment #2: nofork-doublecurly-p2.txt --]
[-- Type: text/plain, Size: 11362 bytes --]

diff --git a/Src/lex.c b/Src/lex.c
index 31b130b07..700af2da1 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1423,7 +1423,7 @@ gettokstr(int c, int sub)
 	if (lexstop)
 	    break;
 	if (!cmdsubst && in_brace_param && act == LX2_STRING &&
-	    (c == '|' || c == Bar || inblank(c))) {
+	    (c == '|' || c == Bar || c == '{' || c == Inbrace || inblank(c))) {
 	    cmdsubst = in_brace_param;
 	    cmdpush(CS_CURSH);
 	} else if (in_pattern == 2 && c != '/')
diff --git a/Src/subst.c b/Src/subst.c
index 9d20a2d0e..f0d6c7a7b 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1866,10 +1866,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      * joining the array into a string (for compatibility with ksh/bash).
      */
     int quoted_array_with_offset = 0;
-    /* Indicates ${|...;} */
-    char *rplyvar = NULL;
-    /* Indicates ${ ... ;} */
-    char *rplytmp = NULL;
+    /*
+     * Nofork substitution controls
+     */
+    char *rplyvar = NULL;    /* Indicates ${|...;} or ${{var} ...;} */
+    char *rplytmp = NULL;    /* Indicates ${ ... ;} */
 
     *s++ = '\0';
     /*
@@ -1897,14 +1898,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      * flags in parentheses, but also one ksh hack.
      */
     if (c == Inbrace) {
-	/* The command string to be run by ${|...;} */
-	char *cmdarg = NULL;
+	/* For processing nofork command substitution string */
+	char *cmdarg = NULL, *endvar = NULL, inchar = *++s;
+	char *outbracep = s, sav = *s;
+	Param rplypm = NULL;
 	size_t slen = 0;
 	int trim = (!EMULATION(EMULATE_ZSH)) ? 2 : !qt;
-	inbrace = 1;
-	s++;
 
-        /* Short-path for the nofork command substitution ${|cmd;}
+	inbrace = 1;	/* Outer scope boolean, see above */
+
+        /* Handling for nofork command substitution e.g. ${|cmd;}
 	 * See other comments about kludges for why this is here.
 	 *
          * The command string is extracted and executed, and the
@@ -1913,48 +1916,77 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
          * should not be part of command substitution in any case.
          * Use ${(U)${|cmd;}} as you would for ${(U)$(cmd;)}.
 	 */
-	if (*s == '|' || *s == Bar || inblank(*s)) {
-	    char *outbracep = s;
-	    char sav = *s;
+	if (inchar == '|' || inchar == Bar || inblank(inchar)) {
 	    *s = Inbrace;
-	    if (skipparens(Inbrace, Outbrace, &outbracep) == 0) {
+	    if (skipparens(Inbrace, Outbrace, &outbracep) == 0)
 		slen = outbracep - s - 1;
-		if ((*s = sav) != Bar) {
-		    sav = *outbracep;
-		    *outbracep = '\0';
-		    tokenize(s);
-		    *outbracep = sav;
+	    *s = sav;
+	    if (inchar == '|')
+		inchar = Bar;	/* Simplify later compares */
+	} else if (inchar == '{' || inchar == Inbrace) {
+	    *s = Inbrace;
+	    if ((outbracep = itype_end(s+1, INAMESPC, 0))) {
+		if (*outbracep == Inbrack &&
+		    (outbracep = parse_subscript(++outbracep, 1, ']')))
+		    ++outbracep;
+	    }
+
+	    /* If we reached the first close brace, find the last */
+	    if (outbracep && *outbracep == Outbrace) {
+		char outchar = inchar == Inbrace ? Outbrace : '}';
+		endvar = outbracep++;
+
+		/* Require space to avoid ${{var}} typo for ${${var}} */
+		if (!inblank(*outbracep)) {
+		    zerr("bad substitution");
+		    return NULL;
 		}
+
+		*endvar = '|';	/* Almost anything but braces/brackets */
+		outbracep = s;
+		if (skipparens(Inbrace, outchar, &outbracep) == 0)
+		    *endvar = Outbrace;
+		else {	/* Never happens? */
+		    *endvar = outchar;
+		    outbracep = endvar + 1;
+		}
+		slen = outbracep - s - 1;
+		if (inchar != Inbrace)
+		    outbracep[-1] = Outbrace;
+		*s = sav;
+		inchar = Inbrace;	/* Simplify later compares */
+	    } else {
+		zerr("bad substitution");
+		return NULL;
 	    }
 	}
 	if (slen > 1) {
 	    char *outbracep = s + slen;
+	    if (!itok(*s) || inblank(inchar)) {
+		/* This tokenize() is important */
+		char sav = *outbracep;
+		*outbracep = '\0';
+		tokenize(s);
+		*outbracep = sav;
+	    }
 	    if (*outbracep == Outbrace) {
-		if ((rplyvar = itype_end(s+1, INAMESPC, 0))) {
-		    if (*rplyvar == Inbrack &&
-			(rplyvar = parse_subscript(++rplyvar, 1, ']')))
-			++rplyvar;
-		}
-		if (rplyvar == s+1 && *rplyvar == Bar) {
-		    /* Is ${||...} a subtitution error or a syntax error?
-		    zerr("bad substitution");
-		    return NULL;
-		    */
+		if (endvar == s+1) {
+		    /* For consistency with ${} we allow ${{}...} */
 		    rplyvar = NULL;
 		}
-		if (rplyvar && *rplyvar == Bar) {
-		    cmdarg = dupstrpfx(rplyvar+1, outbracep-rplyvar-1);
-		    rplyvar = dupstrpfx(s+1,rplyvar-s-1);
+		if (endvar && *endvar == Outbrace) {
+		    cmdarg = dupstrpfx(endvar+1, outbracep-endvar-1);
+		    rplyvar = dupstrpfx(s+1,endvar-s-1);
 		} else {
 		    cmdarg = dupstrpfx(s+1, outbracep-s-1);
 		    rplyvar = "REPLY";
 		}
-		if (inblank(*s)) {
+		if (inblank(inchar)) {
 		    /*
-		     * Admittedly a hack.  Take advantage of the enforced
-		     * locality of REPLY and the semantics of $(<file) to
+		     * Admittedly a hack.  Take advantage of the added
+		     * parameter scope and the semantics of $(<file) to
 		     * construct a command to write/read a temporary file.
-		     * Then fall through to the regular handling of $REPLY
+		     * Then fall through to the regular parameter handling
 		     * to manage word splitting, expansion flags, etc.
 		     */
 		    char *outfmt = ">| %s {\n%s\n;}";	/* 13 */
@@ -1977,22 +2009,39 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	}
 
 	if (rplyvar) {
-	    Param pm;
-	    /* char *rplyval = getsparam("REPLY"); */
+	    /* char *rplyval = getsparam("REPLY");  cf. Future? below */
 	    startparamscope(); /* "local" behaves as if in a function */
-	    pm = createparam("REPLY", PM_LOCAL|PM_UNSET);
-	    if (pm)	/* Shouldn't createparam() do this? */
-		pm->level = locallevel;
-	    /* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */
+	    if (inchar == Bar) {
+		/* rplyvar should be REPLY at this point, but create
+		 * hardwired name anyway to expose any bugs elsewhere
+		 */
+		rplypm = createparam("REPLY", PM_LOCAL|PM_UNSET|PM_HIDE);
+		if (rplypm)	/* Shouldn't createparam() do this? */
+		    rplypm->level = locallevel;
+		/* Future?  Expose global value of $REPLY if any? */
+		/* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */
+	    } else if (inblank(inchar)) {
+		rplypm = createparam(".zsh.cmdsubst",
+				     PM_LOCAL|PM_UNSET|PM_HIDE|
+				     PM_READONLY_SPECIAL);
+		if (rplypm)
+		    rplypm->level = locallevel;
+	    }
+	    if (inchar != Inbrace && !rplypm) {
+		zerr("failed to create scope for command substitution");
+		return NULL;
+	    }
 	}
 
 	if (rplyvar && cmdarg && *cmdarg) {
 	    int obreaks = breaks;
 	    Eprog cmdprog;
 	    /* Execute the shell command */
+	    queue_signals();
 	    untokenize(cmdarg);
 	    cmdprog = parse_string(cmdarg, 0);
 	    if (cmdprog) {
+		/* exec.c handles dont_queue_signals() */
 		execode(cmdprog, 1, 0, "cmdsubst");
 		cmdoutval = lastval;
 		/* "return" behaves as if in a function */
@@ -2002,6 +2051,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 		}
 	    } else	/* parse error */
 		errflag |= ERRFLAG_ERROR;
+	    if (rplypm)
+		rplypm->node.flags &= ~PM_READONLY_SPECIAL;
 	    if (rplytmp && !errflag) {
 		int onoerrs = noerrs, rplylen;
 		noerrs = 2;
@@ -2017,15 +2068,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 		}
 		noerrs = onoerrs;
 		if (rplylen >= 0)
-		    setsparam("REPLY", metafy(cmdarg, rplylen, META_REALLOC));
+		    setsparam(rplyvar, metafy(cmdarg, rplylen, META_REALLOC));
 	    }
+	    unqueue_signals();
 	}
 
 	if (rplytmp)
 	    unlink(rplytmp);
 	if (rplyvar) {
-	    if (strcmp(rplyvar, "REPLY") == 0) {
-		if ((val = dupstring(getsparam("REPLY"))))
+	    if (inchar != Inbrace) {
+		if ((val = dupstring(getsparam(rplyvar))))
 		    vunset = 0;
 		else {
 		    vunset = 1;
diff --git a/Test/D10nofork.ztst b/Test/D10nofork.ztst
index fc6b84613..5bb10266f 100644
--- a/Test/D10nofork.ztst
+++ b/Test/D10nofork.ztst
@@ -14,6 +14,28 @@
 0:Basic substitution and REPLY scoping
 >INNER OUTER
 
+  reply=(x OUTER x)
+  purl ${{reply} reply=(\{ INNER \})} $reply
+0:Basic substitution, brace quoting, and array result
+>{
+>INNER
+>}
+>{
+>INNER
+>}
+
+  () {
+    setopt localoptions ignorebraces
+    purl ${{reply} reply=({ INNER })} $reply
+  }
+0:Basic substitution, ignorebraces, and array result
+>{
+>INNER
+>}
+>{
+>INNER
+>}
+
   purr ${| REPLY=first}:${| REPLY=second}:$REPLY
 0:re-scoping of REPLY in one statement
 >first:second:OUTER
@@ -229,7 +251,7 @@ F:Why not use this error in the previous case as well?
 >26
 
   unset reply
-  purl ${|reply| reply=(1 2 ${| REPLY=3 } 4) }
+  purl ${{reply} reply=(1 2 ${| REPLY=3 } 4) }
   typeset -p reply
 0:array behavior with global assignment
 >1
@@ -315,7 +337,7 @@ F:status of "print" should hide return
 
   unset zz
   outer=GLOBAL
-  purr "${|zz|
+  purr "${{zz}
    local outer=LOCAL
    zz=NONLOCAL
   } $outer $?"
@@ -440,7 +462,8 @@ F:must do this before evaluating the next test block
 0:ignored braces, part 1
 >buried}
 
-  purr "${ purr ${REPLY:-buried}}}"
+  # Global $REPLY still set from earlier test
+  purr "${ purr ${REPLY:+buried}}}"
 0:ignored braces, part 2
 >buried
 >}
@@ -453,6 +476,7 @@ F:must do this before evaluating the next test block
 1:ignored braces, part 4
 ?(eval):3: parse error near `}'
 
+  unsetopt ignorebraces
   # "break" blocks function calls in outer loop
   # Could use print, but that might get fixed
   repeat 3 do purr ${
@@ -467,11 +491,25 @@ F:must do this before evaluating the next test block
 ?1
 ?2
 
-  print -u $ZTST_fd ${ZTST_testname}: TEST COMPLETE
-0:make sure we got to the end
-F:some tests might silently break the test harness
+ # Cannot "purr": break skips pending function calls
+ # Use "repeat" to avoid infinite loop on failure 
+ repeat 3 do; echo ${|REPLY=x; break }; done
+ repeat 3 do; echo ${{x} x=y; break }; done
+ repeat 3 do; echo ${ echo z; break }; done
+0:break after assignment completes the assignment
+>x
+>y
+>z
+
+ # Subshell because error exits
+ ( purr ${ echo ${unset?oops} } )
+1:error handling (without crashing)
+*?*unset: oops
+
+ purr ${ .zsh.cmdsubst=error }
+1:reserved parameter name (without crashing)
+*?*.zsh.cmdsubst: can't modify read-only parameter
 
 %clean
 
   unfunction purr purl
-  unsetopt ignorebraces
diff --git a/Test/V10private.ztst b/Test/V10private.ztst
index ed51316f3..26004a2dc 100644
--- a/Test/V10private.ztst
+++ b/Test/V10private.ztst
@@ -497,7 +497,7 @@ F:Better if caught in checkclobberparam() but exec.c doesn't know scope
  () {
    private z=outer
    print ${(t)z} $z
-   print ${| REPLY=${|z| z=nofork} }
+   print ${| REPLY=${{z} z=nofork} }
    print ${(t)z} $z
  }
 0:nofork may write to private in calling function
@@ -518,9 +518,9 @@ F:Better if caught in checkclobberparam() but exec.c doesn't know scope
  () {
    private z=outer
    print ${(t)z} $z
-   print ${|z|
+   print ${{z}
      private q
-     z=${|q| q=nofork}
+     z=${{q} q=nofork}
    }
    print ${(t)z} $z
  }
@@ -533,7 +533,7 @@ F:Better if caught in checkclobberparam() but exec.c doesn't know scope
    print ${|
      () { REPLY="{$q}" }
    }
-   print ${|q|
+   print ${{q}
      () { q=nofork }
    }
  }

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-03-31  3:47 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-30 21:47 [PATCH] Improved implementation of ${{var} cmd} etc Bart Schaefer
2024-03-31  3:47 ` Bart Schaefer

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).