zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: get off my case
@ 2015-05-21  8:54 Peter Stephenson
  2015-05-21  9:29 ` Mikael Magnusson
  2015-05-21  9:34 ` Peter Stephenson
  0 siblings, 2 replies; 3+ messages in thread
From: Peter Stephenson @ 2015-05-21  8:54 UTC (permalink / raw)
  To: Zsh Hackers' List

On closer examination, it turns out the previous "case" featurama could
be divided cleanly (if that's an appropriate word in this context) into
two parts.

First, there was code to handle sh-style parenthesis handling, what you
get when you set SH_GLOB, by sticking words back together with a "|" and
treating the result as a single pattern.  This is now redundant with the
alternations being handled separately.  So sh-style cases can be left
alone and continue to work properly without having to force initial
parentheses to match singly with SH_GLOB off.

Second, there was code to handle zsh patterns when a fully parenthesised
expression turned up with SH_GLOB off.  This is where the hackfest on
the internal whitespace took place.  This part is still needed to
reconcile the two worlds when we revert to only matching separate
opening parentheses in the SH_GLOB case.  However, it can be resurrected
without damage to the other part.

Note there are no further changes to wordcode here.

Case solved.

pws

diff --git a/Src/lex.c b/Src/lex.c
index 87b0cd3..841fb0b 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -761,8 +761,6 @@ gettok(void)
 	lexstop = 0;
 	return BAR;
     case LX1_INPAR:
-	if (incasepat == 2)
-	    return INPAR;
 	d = hgetc();
 	if (d == '(') {
 	    if (infor) {
diff --git a/Src/parse.c b/Src/parse.c
index c486699..7d618cd 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -1152,7 +1152,7 @@ par_case(int *cmplx)
 	YYERRORV(oecused);
     }
     brflag = (tok == INBRACE);
-    incasepat = 2;
+    incasepat = 1;
     incmdpos = 0;
     noaliases = ona;
     nocorrect = onc;
@@ -1165,10 +1165,8 @@ par_case(int *cmplx)
 	    zshlex();
 	if (tok == OUTBRACE)
 	    break;
-	if (tok == INPAR) {
-	    incasepat = 1;
+	if (tok == INPAR)
 	    zshlex();
-	}
 	if (tok != STRING)
 	    YYERRORV(oecused);
 	if (!strcmp(tokstr, "esac"))
@@ -1178,19 +1176,96 @@ par_case(int *cmplx)
 	pp = ecadd(0);
 	palts = ecadd(0);
 	nalts = 0;
+	/*
+	 * Hack here.
+	 *
+	 * [Pause for astonished hubbub to subside.]
+	 *
+	 * The next token we get may be
+	 * - ")" or "|" if we're looking at an honest-to-god
+	 *   "case" patten, either because there's no opening
+	 *   parenthesis, or because SH_GLOB is set and we
+	 *   managed to grab an initial "(" to mark the start
+	 *   of the case pattern.
+	 * - Something else --- we don't care what --- because
+	 *   we're parsing a complete "(...)" as a complete
+	 *   zsh pattern.  In that case, we treat this as a
+	 *   single instance of a case pattern but we pretend
+	 *   we're doing proper case parsing --- in which the
+	 *   parentheses and bar are in different words from
+	 *   the string, so may be separated by whitespace.
+	 *   So we quietly massage the whitespace and hope
+	 *   no one noticed.  This is horrible, but it's
+	 *   unfortunately too difficult to comine traditional
+	 *   zsh patterns with a properly parsed case pattern
+	 *   without generating incompatibilities which aren't
+	 *   all that popular (I've discovered).
+	 * - We can also end up with something other than ")" or "|"
+	 *   just because we're looking at garbage.
+	 *
+	 * Because of the second case, what happens next might
+	 * be the start of the command after the pattern, so we
+	 * need to treat it as in command position.  Luckily
+	 * this doesn't affect our ability to match a | or ) as
+	 * these are valid on command lines.
+	 */
+	incasepat = 0;
+	incmdpos = 1;
 	for (;;) {
-	    ecstr(str);
-	    ecadd(ecnpats++);
-	    nalts++;
-
 	    zshlex();
 	    if (tok == OUTPAR) {
+		ecstr(str);
+		ecadd(ecnpats++);
+		nalts++;
+
 		incasepat = 0;
 		incmdpos = 1;
 		zshlex();
 		break;
-	    } else if (tok != BAR)
+	    } else if (tok == BAR) {
+		ecstr(str);
+		ecadd(ecnpats++);
+		nalts++;
+
+		incasepat = 1;
+		incmdpos = 0;
+	    } else {
+		if (!nalts && str[0] == Inpar) {
+		    int pct = 0, sl;
+		    char *s;
+
+		    for (s = str; *s; s++) {
+			if (*s == Inpar)
+			    pct++;
+			if (!pct)
+			    break;
+			if (pct == 1) {
+			    if (*s == Bar || *s == Inpar)
+				while (iblank(s[1]))
+				    chuck(s+1);
+			    if (*s == Bar || *s == Outpar)
+				while (iblank(s[-1]) &&
+				       (s < str + 1 || s[-2] != Meta))
+				    chuck(--s);
+			}
+			if (*s == Outpar)
+			    pct--;
+		    }
+		    if (*s || pct || s == str)
+			YYERRORV(oecused);
+		    /* Simplify pattern by removing surrounding (...) */
+		    sl = strlen(str);
+		    DPUTS(*str != Inpar || str[sl - 1] != Outpar,
+			  "BUG: strange case pattern");
+		    str[sl - 1] = '\0';
+		    chuck(str);
+		    ecstr(str);
+		    ecadd(ecnpats++);
+		    nalts++;
+		    break;
+		}
 		YYERRORV(oecused);
+	    }
 
 	    zshlex();
 	    if (tok != STRING)
@@ -1208,7 +1283,7 @@ par_case(int *cmplx)
 	    break;
 	if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR)
 	    YYERRORV(oecused);
-	incasepat = 2;
+	incasepat = 1;
 	incmdpos = 0;
 	zshlex();
     }
diff --git a/Test/A01grammar.ztst b/Test/A01grammar.ztst
index 41fb486..50058e2 100644
--- a/Test/A01grammar.ztst
+++ b/Test/A01grammar.ztst
@@ -614,7 +614,8 @@
 >mytrue
 >END
 
-  fn() {
+  (emulate sh -c '
+   fn() {
     case $1 in
       ( one | two | three )
       print Matched $1
@@ -627,6 +628,7 @@
       ;;
     esac
   }
+  '
   which fn
   fn one
   fn two
@@ -635,8 +637,8 @@
   fn five
   fn six
   fn abecedinarian
-  fn xylophone
-0: case word handling
+  fn xylophone)
+0: case word handling in sh emulation (SH_GLOB parentheses)
 >fn () {
 >	case $1 in
 >		(one | two | three) print Matched $1 ;;
@@ -665,3 +667,31 @@
 0: case patterns within words
 >1 OK
 >2 OK
+
+  case horrible in
+    ([a-m])(|[n-z])rr(|ib(um|le|ah)))
+    print It worked
+    ;;
+  esac
+  case "a string with separate words" in
+    (*with separate*))
+    print That worked, too
+    ;;
+  esac
+0:Unbalanced parentheses and spaces with zsh pattern
+>It worked
+>That worked, too
+
+  case horrible in
+    (([a-m])(|[n-z])rr(|ib(um|le|ah)))
+    print It worked
+    ;;
+  esac
+  case "a string with separate words" in
+    (*with separate*)
+    print That worked, too
+    ;;
+  esac
+0:Balanced parentheses and spaces with zsh pattern
+>It worked
+>That worked, too


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PATCH: get off my case
  2015-05-21  8:54 PATCH: get off my case Peter Stephenson
@ 2015-05-21  9:29 ` Mikael Magnusson
  2015-05-21  9:34 ` Peter Stephenson
  1 sibling, 0 replies; 3+ messages in thread
From: Mikael Magnusson @ 2015-05-21  9:29 UTC (permalink / raw)
  To: Peter Stephenson; +Cc: Zsh Hackers' List

On Thu, May 21, 2015 at 10:54 AM, Peter Stephenson
<p.stephenson@samsung.com> wrote:
> On closer examination, it turns out the previous "case" featurama could
> be divided cleanly (if that's an appropriate word in this context) into
> two parts.
>
> First, there was code to handle sh-style parenthesis handling, what you
> get when you set SH_GLOB, by sticking words back together with a "|" and
> treating the result as a single pattern.  This is now redundant with the
> alternations being handled separately.  So sh-style cases can be left
> alone and continue to work properly without having to force initial
> parentheses to match singly with SH_GLOB off.
>
> Second, there was code to handle zsh patterns when a fully parenthesised
> expression turned up with SH_GLOB off.  This is where the hackfest on
> the internal whitespace took place.  This part is still needed to
> reconcile the two worlds when we revert to only matching separate
> opening parentheses in the SH_GLOB case.  However, it can be resurrected
> without damage to the other part.
>
> Note there are no further changes to wordcode here.
>
> Case solved.

I can confirm that zcompile of the distributed functions succeeds now,
as well as my own functions.

> diff --git a/Src/lex.c b/Src/lex.c
> index 87b0cd3..841fb0b 100644
> --- a/Src/lex.c
> +++ b/Src/lex.c
> +        *   "case" patten, either because there's no opening

pattern

> +        *   unfortunately too difficult to comine traditional

combine

-- 
Mikael Magnusson
http://mika.l3ib.org/tmp/2015-05-21-112239.png


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PATCH: get off my case
  2015-05-21  8:54 PATCH: get off my case Peter Stephenson
  2015-05-21  9:29 ` Mikael Magnusson
@ 2015-05-21  9:34 ` Peter Stephenson
  1 sibling, 0 replies; 3+ messages in thread
From: Peter Stephenson @ 2015-05-21  9:34 UTC (permalink / raw)
  To: Zsh Hackers' List

Fix the documentation.

pws

diff --git a/Doc/Zsh/grammar.yo b/Doc/Zsh/grammar.yo
index a3f4927..4476fc3 100644
--- a/Doc/Zsh/grammar.yo
+++ b/Doc/Zsh/grammar.yo
@@ -239,11 +239,17 @@ item(tt(case) var(word) tt(in) [ [tt(LPAR())] var(pattern) [ tt(|) var(pattern)
 Execute the var(list) associated with the first var(pattern)
 that matches var(word), if any.  The form of the patterns
 is the same as that used for filename generation.  See
-noderef(Filename Generation).  Note further that the whole
-pattern with alternatives is treated by the shell as equivalent
-to a group of patterns within parentheses, even though in
-the tt(case) syntax whitespace may appear about the parentheses and the
-vertical bar.
+noderef(Filename Generation).
+
+Note further that, unless the tt(SH_GLOB) option is set, the whole
+pattern with alternatives is treated by the shell as equivalent to a
+group of patterns within parentheses, although white space may appear
+about the parentheses and the vertical bar and will be stripped from the
+pattern at those points.  White space may appear elsewhere in the
+pattern; this is not stripped.  If the tt(SH_GLOB) option is set, so
+that an opening parenthesis can be unambiguously treated as part of the
+case syntax, the expression is parsed into separate words and these are
+treated as strict alternatives (as in other shells).
 
 If the var(list) that is executed is terminated with tt(;&) rather than
 tt(;;), the following list is also executed.  The rule for


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-05-21  9:35 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-21  8:54 PATCH: get off my case Peter Stephenson
2015-05-21  9:29 ` Mikael Magnusson
2015-05-21  9:34 ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).