zsh-workers
 help / color / mirror / code / Atom feed
From: Peter Stephenson <p.stephenson@samsung.com>
To: zsh-workers@zsh.org
Subject: Re: Inconsistency with SHWORDSPLIT and leading spaces
Date: Fri, 06 Nov 2015 17:00:07 +0000	[thread overview]
Message-ID: <20151106170007.5196bd5e@pwslap01u.europe.root.pri> (raw)
In-Reply-To: <87a8qr75za.fsf@gmail.com>

On Fri, 6 Nov 2015 11:54:49 +0100
Christian Neukirchen <chneukirchen@gmail.com> wrote:
> juno ~% zsh -c 'setopt shwordsplit; x=foo; echo x${x:+ $x}' 
> xfoo

This is unlikely to be deliberate --- we should do *something* with the
space, not pretend it's not there.

This makes already insane code even worse, but it doesn't seem to have
broken anything in the tests.

Possible breakage is I haven't spotted some case where we use multsub()
with word splitting but we need to reset the knock-on splitting
(ws_at_start) for some reason similar to transforming the result into a
length (though I've picked that particular one up).

pws

diff --git a/Src/subst.c b/Src/subst.c
index 021d234..5289121 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -430,11 +430,16 @@ singsub(char **s)
  * set to 1.  Otherwise, *isarr is set to 0, and the result is put into *s,
  * with any necessary joining of multiple elements using sep (which can be
  * NULL to use IFS).  The return value is true iff the expansion resulted
- * in an empty list. */
+ * in an empty list.
+ *
+ * *ws_at_start is set to 1 if the string had whitespace at thes start
+ * that should cause word splitting against any preceeding string.
+ */
 
 /**/
 static int
-multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep)
+multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
+	int *ws_at_start)
 {
     int l;
     char **r, **p, *x = *s;
@@ -450,6 +455,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep)
 	    l++;
 	    if (!iwsep(STOUC(c)))
 		break;
+	    *ws_at_start = 1;
 	}
     }
 
@@ -1717,6 +1723,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
      * This is for compatibility.
      */
     int horrible_offset_hack = 0;
+    /*
+     * Signal back from multsub: with something like
+     *   x${:- $foo}
+     * with word-splitting active we need to split on that leading
+     * whitespace.  However, if there's no "x" the whitespace is
+     * simply removed.
+     */
+    int ws_at_start = 0;
 
     *s++ = '\0';
     /*
@@ -2265,7 +2279,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	 * remove the aspar test and extract a value from an array, if
 	 * necessary, when we handle (P) lower down.
 	 */
-	if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
+	if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL,
+		    &ws_at_start) && quoted) {
 	    /* Empty quoted string --- treat as null string, not elided */
 	    isarr = -1;
 	    aval = (char **) hcalloc(sizeof(char *));
@@ -2736,7 +2751,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 		    split_flags = PREFORK_NOSHWORDSPLIT;
 		}
 		multsub(&val, split_flags, (aspar ? NULL : &aval),
-			&isarr, NULL);
+			&isarr, NULL, &ws_at_start);
 		copied = 1;
 		spbreak = 0;
 		/* Leave globsubst on if forced */
@@ -2765,13 +2780,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 		     * behavior on caller choice of PREFORK_SHWORDSPLIT. */
 		    multsub(&val,
 			    spbreak ? PREFORK_SINGLE : PREFORK_NOSHWORDSPLIT,
-			    NULL, &isarr, NULL);
+			    NULL, &isarr, NULL, &ws_at_start);
 		} else {
 		    if (spbreak)
 			split_flags = PREFORK_SPLIT|PREFORK_SHWORDSPLIT;
 		    else
 			split_flags = PREFORK_NOSHWORDSPLIT;
-		    multsub(&val, split_flags, &aval, &isarr, NULL);
+		    multsub(&val, split_flags, &aval, &isarr, NULL,
+			    &ws_at_start);
 		    spbreak = 0;
 		}
 		if (arrasg) {
@@ -3303,6 +3319,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	}
 	if (haserr || errflag)
 	    return NULL;
+	ws_at_start = 0;
     }
     /*
      * This handles taking a length with ${#foo} and variations.
@@ -3341,6 +3358,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	sprintf(buf, "%ld", len);
 	val = dupstring(buf);
 	isarr = 0;
+	ws_at_start = 0;
     }
     /* At this point we make sure that our arrayness has affected the
      * arrayness of the linked list.  Then, we can turn our value into
@@ -3370,6 +3388,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	if (isarr) {
 	    val = sepjoin(aval, sep, 1);
 	    isarr = 0;
+	    ws_at_start = 0;
 	}
 	if (!ssub && (spbreak || spsep)) {
 	    aval = sepsplit(val, spsep, 0, 1);
@@ -3649,6 +3668,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
      * equivalent and only locally decide if we need to treat it
      * as a scalar.)
      */
+
+    /*
+     * If a multsub result had whitespace at the start and we're
+     * splitting and there's a previous string, now's the time to do so.
+     */
+    if (ws_at_start && aptr > ostr) {
+	insertlinknode(l, n, dupstrpfx(ostr, aptr - ostr)), incnode(n);
+	ostr = aptr;
+    }
     if (isarr) {
 	char *x;
 	char *y;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index cb7079c..59c14a4 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1744,3 +1744,57 @@
 >3_4_5_6
 >6
 >1_2_3_4_5_6
+
+ (setopt shwordsplit
+ do_test() {
+   print $#: "$@"
+ }
+ foo=bar
+ foo2="bar bar"
+ do_test ${:- foo}
+ do_test ${:- foo bar}
+ do_test ${:- $foo}
+ do_test ${:- $foo2}
+ do_test x${:- foo}
+ do_test x${:- foo bar}
+ do_test x${:- $foo}
+ do_test x${:- $foo2}
+ do_test x${foo:+ $foo}
+ )
+0:We Love SH_WORD_SPLIT Day celebrated with space at start of internal subst
+>1: foo
+>2: foo bar
+>1: bar
+>2: bar bar
+>2: x foo
+>3: x foo bar
+>2: x bar
+>3: x bar bar
+>2: x bar
+
+ (unsetopt shwordsplit # default, for clarity
+ do_test() {
+   print $#: "$@"
+ }
+ foo=bar
+ foo2="bar bar"
+ do_test ${:- foo}
+ do_test ${:- foo bar}
+ do_test ${:- $foo}
+ do_test ${:- $foo2}
+ do_test x${:- foo}
+ do_test x${:- foo bar}
+ do_test x${:- $foo}
+ do_test x${:- $foo2}
+ do_test x${foo:+ $foo}
+ )
+0:We Love NO_SH_WORD_SPLIT Even More Day celebrated as sanity check
+>1:  foo
+>1:  foo bar
+>1:  bar
+>1:  bar bar
+>1: x foo
+>1: x foo bar
+>1: x bar
+>1: x bar bar
+>1: x bar


  reply	other threads:[~2015-11-06 17:10 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-06 10:54 Christian Neukirchen
2015-11-06 17:00 ` Peter Stephenson [this message]
2015-11-07 17:42   ` Peter Stephenson
2015-11-07 19:43     ` Bart Schaefer
2015-11-08 18:18       ` Peter Stephenson
2015-11-08 18:55         ` Bart Schaefer
2015-11-09  6:03           ` Bart Schaefer
2015-11-11 17:49         ` PATCH: nested ${(P)} (formerly SHWORDSPLIT and leading spaces) Peter Stephenson
2015-11-11 18:13           ` Bart Schaefer
2015-11-11 21:55           ` Peter Stephenson
2015-11-12  9:46             ` Peter Stephenson
2015-11-12 14:19               ` Peter Stephenson
2015-11-13  8:17                 ` Jun T.
2015-11-13 15:07                   ` Jun T.
2015-11-14  1:33                     ` Bart Schaefer
2015-11-14  9:45                       ` Peter Stephenson
2015-11-14 18:51                         ` Bart Schaefer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151106170007.5196bd5e@pwslap01u.europe.root.pri \
    --to=p.stephenson@samsung.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).