zsh-workers
 help / color / mirror / code / Atom feed
From: Peter Stephenson <p.w.stephenson@ntlworld.com>
To: zsh-workers@zsh.org
Subject: Re: Bug with bash emulation regarding ':'
Date: Fri, 10 Feb 2012 21:04:11 +0000	[thread overview]
Message-ID: <20120210210411.4e14cc3f@pws-pc.ntlworld.com> (raw)
In-Reply-To: <120205132146.ZM7315@torch.brasslantern.com>

On Sun, 05 Feb 2012 13:21:46 -0800
Bart Schaefer <schaefer@brasslantern.com> wrote:
> (I wonder if changing the boolean "split" parameter to multsub() into a
> set of bitflags would suffice.)

It took some thinking about, but I think the following is about the most
minimal code.  Even allowing for the fact that explicit splitting is
separate from sh-wordsplitting (it's a stronger condition), it needed a
third flag to distinguish the cases...

- SHWORDSPLIT is on normally.
- SHWORDSPLIT isn't on, but is required by a recursive substitution.
- SHWORDSPLIT needs to be forced off by a recursive substitution.

At least I think so.

There's now no temporary setting of SHWORDSPLIT itself and the special
flags are only tested on the way through to paramsubst(), so command
substitution doesn't seem them.

I don't know if this helps with the other issues you mentioned.

Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.130
diff -p -u -r1.130 subst.c
--- Src/subst.c	21 Dec 2011 22:39:28 -0000	1.130
+++ Src/subst.c	10 Feb 2012 20:54:23 -0000
@@ -74,7 +74,10 @@ prefork(LinkList list, int flags)
 	     */
 	    setdata(node, cptr);
 	}
-	if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
+	if (!(node = stringsubst(list, node,
+				 flags & (PF_SINGLE|PF_SPLIT|
+					  PF_SHWORDSPLIT|PF_NOSHWORDSPLIT),
+				 asssub))) {
 	    unqueue_signals();
 	    return;
 	}
@@ -145,7 +148,7 @@ stringsubstquote(char *strstart, char **
 
 /**/
 static LinkNode
-stringsubst(LinkList list, LinkNode node, int ssub, int asssub)
+stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 {
     int qt;
     char *str3 = (char *)getdata(node);
@@ -213,7 +216,11 @@ stringsubst(LinkList list, LinkNode node
 		setdata(node, (void *) str3);
 		continue;
 	    } else {
-		node = paramsubst(list, node, &str, qt, ssub);
+		if ((isset(SHWORDSPLIT) && !(pf_flags & PF_NOSHWORDSPLIT)) ||
+		    (pf_flags & PF_SPLIT))
+		    pf_flags |= PF_SHWORDSPLIT;
+		node = paramsubst(list, node, &str, qt,
+				  pf_flags & (PF_SINGLE|PF_SHWORDSPLIT));
 		if (errflag || !node)
 		    return NULL;
 		str3 = (char *)getdata(node);
@@ -268,7 +275,7 @@ stringsubst(LinkList list, LinkNode node
 		       (qt && str[1] == '"'))))
 		    *str = ztokens[c - Pound];
 	    str++;
-	    if (!(pl = getoutput(str2 + 1, qt || ssub))) {
+	    if (!(pl = getoutput(str2 + 1, qt || (pf_flags & PF_SINGLE)))) {
 		zerr("parse error in command substitution");
 		return NULL;
 	    }
@@ -278,7 +285,7 @@ stringsubst(LinkList list, LinkNode node
 		str = strcpy(str2, str);
 		continue;
 	    }
-	    if (!qt && ssub && isset(GLOBSUBST))
+	    if (!qt && (pf_flags & PF_SINGLE) && isset(GLOBSUBST))
 		shtokenize(s);
 	    l1 = str2 - str3;
 	    l2 = strlen(s);
@@ -306,7 +313,7 @@ stringsubst(LinkList list, LinkNode node
 	     * We are in a normal argument which looks like an assignment
 	     * and is to be treated like one, with no word splitting.
 	     */
-	    ssub = 1;
+	    pf_flags |= PF_SINGLE;
 	}
 	str++;
     }
@@ -392,13 +399,13 @@ singsub(char **s)
 
 /**/
 static int
-multsub(char **s, int split, char ***a, int *isarr, char *sep)
+multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep)
 {
     int l;
     char **r, **p, *x = *s;
     local_list1(foo);
 
-    if (split) {
+    if (pf_flags & PF_SPLIT) {
 	/*
 	 * This doesn't handle multibyte characters, but we're
 	 * looking for whitespace separators which must be ASCII.
@@ -413,7 +420,7 @@ multsub(char **s, int split, char ***a, 
 
     init_list1(foo, x);
 
-    if (split) {
+    if (pf_flags & PF_SPLIT) {
 	LinkNode n = firstnode(&foo);
 	int inq = 0, inp = 0;
 	MB_METACHARINIT();
@@ -467,7 +474,7 @@ multsub(char **s, int split, char ***a, 
 	}
     }
 
-    prefork(&foo, 0);
+    prefork(&foo, pf_flags);
     if (errflag) {
 	if (isarr)
 	    *isarr = 0;
@@ -1437,7 +1444,7 @@ check_colon_subscript(char *str, char **
 
 /**/
 static LinkNode
-paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
+paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 {
     char *aptr = *str, c, cc;
     char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
@@ -1514,7 +1521,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * where we shouldn't, in particular on the multsubs for
      * handling embedded values for ${...=...} and the like.
      */
-    int spbreak = isset(SHWORDSPLIT) && !ssub && !qt;
+    int spbreak = (pf_flags & PF_SHWORDSPLIT) && !(pf_flags & PF_SINGLE) && !qt;
     /* Scalar and array value, see isarr above */
     char *val = NULL, **aval = NULL;
     /*
@@ -1564,6 +1571,11 @@ paramsubst(LinkList l, LinkNode n, char 
      */
     int shsplit = 0;
     /*
+     * "ssub" is true when we are called from singsub (via prefork):
+     * it means that we must join arrays and should not split words.
+     */
+    int ssub = (pf_flags & PF_SINGLE);
+    /*
      * The separator from (j) and (s) respectively, or (F) and (f)
      * respectively (hardwired to "\n" in that case).  Slightly
      * confusingly also used for ${#pm}, thought that's at least
@@ -1620,7 +1632,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * This is one of the things that decides whether multsub
      * will produce an array, but in an extremely indirect fashion.
      */
-    int nojoin = isset(SHWORDSPLIT) ? !(ifs && *ifs) : 0;
+    int nojoin = (pf_flags & PF_SHWORDSPLIT) ? !(ifs && *ifs) : 0;
     /*
      * != 0 means ${...}, otherwise $...  What works without braces
      * is largely a historical artefact (everything works with braces,
@@ -2618,7 +2630,7 @@ paramsubst(LinkList l, LinkNode n, char 
 	/* Fall Through! */
 	case '-':
 	    if (vunset) {
-		int ws = opts[SHWORDSPLIT];
+		int split_flags;
 		val = dupstring(s);
 		/* If word-splitting is enabled, we ask multsub() to split
 		 * the substituted string at unquoted whitespace.  Then, we
@@ -2627,9 +2639,20 @@ paramsubst(LinkList l, LinkNode n, char 
 		 * keep its array splits, and weird constructs such as
 		 * ${str+"one two" "3 2 1" foo "$str"} to only be split
 		 * at the unquoted spaces. */
-		opts[SHWORDSPLIT] = spbreak;
-		multsub(&val, spbreak && !aspar, (aspar ? NULL : &aval), &isarr, NULL);
-		opts[SHWORDSPLIT] = ws;
+		if (spbreak) {
+		    split_flags = PF_SHWORDSPLIT;
+		    if (!aspar)
+			split_flags |= PF_SPLIT;
+		} else {
+		    /*
+		     * It's not good enough not passing the flag to use
+		     * SHWORDSPLIT, because when we get to a nested
+		     * paramsubst we need to ignore isset(SHWORDSPLIT).
+		     */
+		    split_flags = PF_NOSHWORDSPLIT;
+		}
+		multsub(&val, split_flags, (aspar ? NULL : &aval),
+			&isarr, NULL);
 		copied = 1;
 		spbreak = 0;
 		/* Leave globsubst on if forced */
@@ -2647,21 +2670,21 @@ paramsubst(LinkList l, LinkNode n, char 
 	case '=':
 	case Equals:
 	    if (vunset) {
-		int ws = opts[SHWORDSPLIT];
 		char sav = *idend;
-		int l;
+		int l, split_flags;
 
 		*idend = '\0';
 		val = dupstring(s);
 		if (spsep || !arrasg) {
-		    opts[SHWORDSPLIT] = 0;
-		    multsub(&val, 0, NULL, &isarr, NULL);
+		    multsub(&val, PF_NOSHWORDSPLIT, NULL, &isarr, NULL);
 		} else {
-		    opts[SHWORDSPLIT] = spbreak;
-		    multsub(&val, spbreak, &aval, &isarr, NULL);
+		    if (spbreak)
+			split_flags = PF_SPLIT|PF_SHWORDSPLIT;
+		    else
+			split_flags = PF_NOSHWORDSPLIT;
+		    multsub(&val, split_flags, &aval, &isarr, NULL);
 		    spbreak = 0;
 		}
-		opts[SHWORDSPLIT] = ws;
 		if (arrasg) {
 		    /* This is an array assignment. */
 		    char *arr[2], **t, **a, **p;
@@ -3118,8 +3141,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * (afterward) may split the joined value (e.g. (s:-:) sets "spsep").  One
      * exception is that ${name:-word} and ${name:+word} will have already
      * done any requested splitting of the word value with quoting preserved.
-     * "ssub" is true when we are called from singsub (via prefork):
-     * it means that we must join arrays and should not split words. */
+     */
     if (ssub || (spbreak && isarr >= 0) || spsep || sep) {
 	if (isarr) {
 	    val = sepjoin(aval, sep, 1);
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.179
diff -p -u -r1.179 zsh.h
--- Src/zsh.h	8 Jan 2012 16:02:55 -0000	1.179
+++ Src/zsh.h	10 Feb 2012 20:54:23 -0000
@@ -1648,6 +1648,9 @@ enum {
 #define PF_TYPESET	0x01	/* argument handled like typeset foo=bar */
 #define PF_ASSIGN	0x02	/* argument handled like the RHS of foo=bar */
 #define PF_SINGLE	0x04	/* single word substitution */
+#define PF_SPLIT        0x08    /* explicitly split nested substitution */
+#define PF_SHWORDSPLIT  0x10    /* SHWORDSPLIT in parameter expn */
+#define PF_NOSHWORDSPLIT 0x20   /* SHWORDSPLIT forced off in nested subst */
 
 /*
  * Structure for adding parameters in a module.
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.60
diff -p -u -r1.60 D04parameter.ztst
--- Test/D04parameter.ztst	17 Aug 2011 19:00:10 -0000	1.60
+++ Test/D04parameter.ztst	10 Feb 2012 20:54:23 -0000
@@ -255,6 +255,20 @@
 >two
 >words
 
+  (setopt shwordsplit # ensure this doesn't get set in main shell...
+  test_splitting ()
+  {
+    array="one two three"
+    for e in $array; do
+      echo "'$e'"
+    done
+  }
+  test_split_var=
+  : ${test_split_var:=$(test_splitting)}
+  echo "_${test_split_var}_")
+0:SH_WORD_SPLIT inside $(...) inside ${...}
+>_'one' 'two' 'three'_
+
   print -l "${(f)$(print first line\\nsecond line\\nthird line)}"
 0:${(f)$(...)}
 >first line

-- 
Peter Stephenson <p.w.stephenson@ntlworld.com>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/


  reply	other threads:[~2012-02-10 21:11 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-29  2:46 Felipe Contreras
2012-01-29  5:31 ` Benjamin R. Haskell
2012-01-29 13:44   ` Felipe Contreras
2012-01-29 14:25     ` Mikael Magnusson
2012-01-29 14:27       ` Mikael Magnusson
2012-01-29 18:38       ` Vincent Lefevre
2012-01-29 18:53         ` Mikael Magnusson
2012-01-29 19:01           ` Vincent Lefevre
2012-01-29 20:30             ` Peter Stephenson
2012-01-29 21:05               ` Mikael Magnusson
2012-01-30 22:15       ` Felipe Contreras
2012-01-30 22:18         ` Mikael Magnusson
2012-01-29 18:36 ` Peter Stephenson
2012-02-01  4:29   ` Bart Schaefer
2012-02-01 16:29     ` Bart Schaefer
2012-02-05 20:11       ` Peter Stephenson
2012-02-05 21:21         ` Bart Schaefer
2012-02-10 21:04           ` Peter Stephenson [this message]
2012-02-11 21:08             ` Bart Schaefer
2012-02-12 20:29               ` Peter Stephenson
2012-02-14 17:41         ` Peter Stephenson
2012-02-14 23:30           ` Chet Ramey
2012-02-15 12:36             ` Peter Stephenson
2012-02-19 23:45               ` Bart Schaefer
2012-02-20  8:48                 ` Bart Schaefer
2012-02-20 10:59                 ` Peter Stephenson
2012-02-20 17:09                   ` Bart Schaefer
2012-01-30 19:22 ` Felipe Contreras
2012-02-10 11:08 Jilles Tjoelker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120210210411.4e14cc3f@pws-pc.ntlworld.com \
    --to=p.w.stephenson@ntlworld.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).