zsh-workers
 help / color / mirror / code / Atom feed
* PATCH: multibyte delimiters for substitutions and parameter flags
@ 2006-11-02 18:37 Peter Stephenson
  0 siblings, 0 replies; only message in thread
From: Peter Stephenson @ 2006-11-02 18:37 UTC (permalink / raw)
  To: Zsh hackers list

This is supposed to fix multibyte delimiters for substitutions in
modifiers when used in globbing or parameters (not yet history,
which is separate code), and for delimiters in the arguments of
parameter flags and similar.

Index: Src/glob.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/glob.c,v
retrieving revision 1.54
diff -u -r1.54 glob.c
--- Src/glob.c	1 Nov 2006 12:25:22 -0000	1.54
+++ Src/glob.c	2 Nov 2006 18:33:58 -0000
@@ -1243,9 +1243,10 @@
 		    else {
 			/* ... or a user name */
 			char sav, *tt;
+			int arglen;
 
 			/* Find matching delimiters */
-			tt = get_strarg(s);
+			tt = get_strarg(s, &arglen);
 			if (!*tt) {
 			    zerr("missing end of name");
 			    data = 0;
@@ -1255,7 +1256,7 @@
 			    sav = *tt;
 			    *tt = '\0';
 
-			    if ((pw = getpwnam(s + 1)))
+			    if ((pw = getpwnam(s + arglen)))
 				data = pw->pw_uid;
 			    else {
 				zerr("unknown user");
@@ -1268,7 +1269,7 @@
 			    data = 0;
 #endif /* !USE_GETPWNAM */
 			    if (sav)
-				s = tt + 1;
+				s = tt + arglen;
 			    else
 				s = tt;
 			}
@@ -1283,8 +1284,9 @@
 		    else {
 			/* ...or a delimited group name. */
 			char sav, *tt;
+			int arglen;
 
-			tt = get_strarg(s);
+			tt = get_strarg(s, &arglen);
 			if (!*tt) {
 			    zerr("missing end of name");
 			    data = 0;
@@ -1294,7 +1296,7 @@
 			    sav = *tt;
 			    *tt = '\0';
 
-			    if ((gr = getgrnam(s + 1)))
+			    if ((gr = getgrnam(s + arglen)))
 				data = gr->gr_gid;
 			    else {
 				zerr("unknown group");
@@ -1307,7 +1309,7 @@
 			    data = 0;
 #endif /* !USE_GETGRNAM */
 			    if (sav)
-				s = tt + 1;
+				s = tt + arglen;
 			    else
 				s = tt;
 			}
@@ -1438,8 +1440,7 @@
 			    tt = NULL;
 			}
 		    } else {
-			plus = 1;
-			tt = get_strarg(s);
+			tt = get_strarg(s, &plus);
 			if (!*tt)
 			{
 			    zerr("missing end of string");
Index: Src/params.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/params.c,v
retrieving revision 1.120
diff -u -r1.120 params.c
--- Src/params.c	11 Sep 2006 11:09:15 -0000	1.120
+++ Src/params.c	2 Nov 2006 18:34:00 -0000
@@ -947,7 +947,7 @@
        int *prevcharlen, int *nextcharlen)
 {
     int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash;
-    int keymatch = 0, needtok = 0;
+    int keymatch = 0, needtok = 0, arglen;
     char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c;
     zlong num = 1, beg = 0, r = 0;
     Patprog pprog = NULL;
@@ -1004,28 +1004,28 @@
 		 * special interpretation by getindex() of `*' or `@'. */
 		break;
 	    case 'n':
-		t = get_strarg(++s);
+		t = get_strarg(++s, &arglen);
 		if (!*t)
 		    goto flagerr;
 		sav = *t;
 		*t = '\0';
-		num = mathevalarg(s + 1, &d);
+		num = mathevalarg(s + arglen, &d);
 		if (!num)
 		    num = 1;
 		*t = sav;
-		s = t;
+		s = t + arglen - 1;
 		break;
 	    case 'b':
 		hasbeg = 1;
-		t = get_strarg(++s);
+		t = get_strarg(++s, &arglen);
 		if (!*t)
 		    goto flagerr;
 		sav = *t;
 		*t = '\0';
-		if ((beg = mathevalarg(s + 1, &d)) > 0)
+		if ((beg = mathevalarg(s + arglen, &d)) > 0)
 		    beg--;
 		*t = sav;
-		s = t;
+		s = t + arglen - 1;
 		break;
 	    case 'p':
 		escapes = 1;
@@ -1033,15 +1033,16 @@
 	    case 's':
 		/* This gives the string that separates words *
 		 * (for use with the `w' flag).               */
-		t = get_strarg(++s);
+		t = get_strarg(++s, &arglen);
 		if (!*t)
 		    goto flagerr;
 		sav = *t;
 		*t = '\0';
-		sep = escapes ? getkeystring(s + 1, &waste, GETKEYS_SEP, NULL)
-		    : dupstring(s + 1);
+		s += arglen;
+		sep = escapes ? getkeystring(s, &waste, GETKEYS_SEP, NULL)
+		    : dupstring(s);
 		*t = sav;
-		s = t;
+		s = t + arglen - 1;
 		break;
 	    default:
 	      flagerr:
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.65
diff -u -r1.65 subst.c
--- Src/subst.c	1 Nov 2006 12:25:22 -0000	1.65
+++ Src/subst.c	2 Nov 2006 18:34:01 -0000
@@ -1137,62 +1137,113 @@
     return ret;
 }
 
+
+/*
+ * Look for a delimited portion of a string.  The first (possibly
+ * multibyte) character at s is the delimiter.  Various forms
+ * of brackets are treated separately, as documented.
+ *
+ * Returns a pointer to the final delimiter.  Sets *len to the
+ * length of the final delimiter; a NULL causes *len to be set
+ * to zero since we shouldn't advance past it.  (The string is
+ * tokenized, so a NULL is a real end of string.)
+ */
+
 /**/
 char *
-get_strarg(char *s)
+get_strarg(char *s, int *lenp)
 {
-    char t = *s++;
-
-    if (!t)
-	return s - 1;
+    convchar_t del;
+    int len;
+    char tok = 0;
+
+    MB_METACHARINIT();
+    len = MB_METACHARLENCONV(s, &del);
+    if (!len) {
+	*lenp = 0;
+	return s;
+    }
 
-    switch (t) {
-    case '(':
-	t = ')';
+#ifdef MULTIBYTE_SUPPORT
+    if (del == WEOF)
+	del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
+#endif
+    s += len;
+    switch (del) {
+    case ZWC('('):
+	del = ZWC(')');
 	break;
     case '[':
-	t = ']';
+	del = ZWC(']');
 	break;
     case '{':
-	t = '}';
+	del = ZWC('}');
 	break;
     case '<':
-	t = '>';
+	del = ZWC('>');
 	break;
     case Inpar:
-	t = Outpar;
+	tok = Outpar;
 	break;
     case Inang:
-	t = Outang;
+	tok = Outang;
 	break;
     case Inbrace:
-	t = Outbrace;
+	tok = Outbrace;
 	break;
     case Inbrack:
-	t = Outbrack;
+	tok = Outbrack;
 	break;
     }
 
-    while (*s && *s != t)
-	s++;
+    if (tok) {
+	/*
+	 * Looking for a matching token; we want the literal byte,
+	 * not a decoded multibyte character, so search specially.
+	 */
+	while (*s && *s != tok)
+	    s++;
+    } else {
+	convchar_t del2;
+	len = 0;
+	while (*s) {
+	    len = MB_METACHARLENCONV(s, &del2);
+#ifdef MULTIBYTE_SUPPORT
+	    if (del2 == WEOF)
+		del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
+#endif
+	    if (del == del2)
+		break;
+	    s += len;
+	}
+    }
 
+    *lenp = len;
     return s;
 }
 
+/*
+ * Get an integer argument; update *s to he end of the
+ * final delimiter.  *delmatchp is set to 1 if we have matching
+ * delimiters and there was no error in the evaluation, else 0.
+ */
+
 /**/
 static int
-get_intarg(char **s)
+get_intarg(char **s, int *delmatchp)
 {
-    char *t = get_strarg(*s + 1);
+    int arglen;
+    char *t = get_strarg(*s, &arglen);
     char *p, sav;
     zlong ret;
 
+    *delmatchp = 0;
     if (!*t)
 	return -1;
     sav = *t;
     *t = '\0';
-    p = dupstring(*s + 2);
-    *s = t;
+    p = dupstring(*s + arglen);
+    *s = t + arglen;
     *t = sav;
     if (parsestr(p))
 	return -1;
@@ -1204,6 +1255,7 @@
 	return -1;
     if (ret < 0)
 	ret = -ret;
+    *delmatchp = 1;
     return ret < 0 ? -ret : ret;
 }
 
@@ -1540,8 +1592,8 @@
 	    int escapes = 0;
 	    int klen;
 #define UNTOK(C)  (itok(C) ? ztokens[(C) - Pound] : (C))
-#define UNTOK_AND_ESCAPE(X) {\
-		untokenize(X = dupstring(s + 1));\
+#define UNTOK_AND_ESCAPE(X, S) {\
+		untokenize(X = dupstring(S));\
 		if (escapes) {\
 		    X = getkeystring(X, &klen, GETKEYS_SEP, NULL);\
 		    X = metafy(X, klen, META_HREALLOC);\
@@ -1549,6 +1601,9 @@
 	    }
 
 	    for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
+		int arglen;	/* length of modifier argument */
+		int delmatch;	/* integer delimiters matched OK */
+
 		switch (c) {
 		case ')':
 		case Outpar:
@@ -1578,9 +1633,11 @@
 		    flags |= SUB_SUBSTR;
 		    break;
 		case 'I':
-		    flnum = get_intarg(&s);
+		    s++;
+		    flnum = get_intarg(&s, &delmatch);
 		    if (flnum < 0)
 			goto flagerr;
+		    s--;
 		    break;
 
 		case 'L':
@@ -1658,16 +1715,16 @@
 		    tt = 1;
 		/* fall through */
 		case 'j':
-		    t = get_strarg(++s);
+		    t = get_strarg(++s, &arglen);
 		    if (*t) {
 			sav = *t;
 			*t = '\0';
 			if (tt)
-			    UNTOK_AND_ESCAPE(spsep)
+			    UNTOK_AND_ESCAPE(spsep, s + arglen)
 			else
-			    UNTOK_AND_ESCAPE(sep)
+			    UNTOK_AND_ESCAPE(sep, s + arglen)
 			*t = sav;
-			s = t;
+			s = t + arglen - 1;
 		    } else
 			goto flagerr;
 		    break;
@@ -1676,43 +1733,43 @@
 		    tt = 1;
 		/* fall through */
 		case 'r':
-		    sav = s[1];
-		    num = get_intarg(&s);
+		    s++;
+		    num = get_intarg(&s, &delmatch);
 		    if (num < 0)
 			goto flagerr;
 		    if (tt)
 			prenum = num;
 		    else
 			postnum = num;
-		    if (UNTOK(s[1]) != UNTOK(sav))
+		    if (!delmatch)
 			break;
-		    t = get_strarg(++s);
+		    t = get_strarg(s, &arglen);
 		    if (!*t)
 			goto flagerr;
 		    sav = *t;
 		    *t = '\0';
 		    if (tt)
-			UNTOK_AND_ESCAPE(premul)
+			UNTOK_AND_ESCAPE(premul, s + arglen)
 		    else
-			UNTOK_AND_ESCAPE(postmul)
+			UNTOK_AND_ESCAPE(postmul, s + arglen)
 		    *t = sav;
 		    sav = *s;
-		    s = t + 1;
+		    s = t + arglen;
 		    if (UNTOK(*s) != UNTOK(sav)) {
 			s--;
 			break;
 		    }
-		    t = get_strarg(s);
+		    t = get_strarg(s, &arglen);
 		    if (!*t)
 			goto flagerr;
 		    sav = *t;
 		    *t = '\0';
 		    if (tt)
-			UNTOK_AND_ESCAPE(preone)
+			UNTOK_AND_ESCAPE(preone, s + arglen)
 		    else
-			UNTOK_AND_ESCAPE(postone)
+			UNTOK_AND_ESCAPE(postone, s + arglen)
 		    *t = sav;
-		    s = t;
+		    s = t + arglen - 1;
 		    break;
 
 		case 'm':
@@ -3251,9 +3308,10 @@
 void
 modify(char **str, char **ptr)
 {
-    char *ptr1, *ptr2, *ptr3, del, *lptr, c, *test, *sep, *t, *tt, tc, *e;
-    char *copy, *all, *tmp, sav;
-    int gbal, wall, rec, al, nl;
+    char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
+    char *copy, *all, *tmp, sav, sav1, *ptr1end;
+    int gbal, wall, rec, al, nl, charlen, delmatch;
+    convchar_t del;
 
     test = NULL;
 
@@ -3282,20 +3340,48 @@
 		break;
 
 	    case 's':
-		/* TODO: multibyte delimiter */
 		c = **ptr;
 		(*ptr)++;
 		ptr1 = *ptr;
-		del = *ptr1++;
-		for (ptr2 = ptr1; *ptr2 != del && *ptr2; ptr2++);
+		MB_METACHARINIT();
+		charlen = MB_METACHARLENCONV(ptr1, &del);
+#ifdef MULTIBYTE_SUPPORT
+		if (del == WEOF)
+		    del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
+#endif
+		ptr1 += charlen;
+		for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
+		    convchar_t del2;
+		    charlen = MB_METACHARLENCONV(ptr2, &del2);
+#ifdef MULTIBYTE_SUPPORT
+		    if (del2 == WEOF)
+			del2 = (wint_t)((*ptr2 == Meta) ?
+					ptr2[1] ^ 32 : *ptr2);
+#endif
+		    if (del2 == del)
+			break;
+		}
 		if (!*ptr2) {
 		    zerr("bad substitution");
 		    return;
 		}
-		*ptr2++ = '\0';
-		for (ptr3 = ptr2; *ptr3 != del && *ptr3; ptr3++);
-		if ((sav = *ptr3))
-		    *ptr3++ = '\0';
+		ptr1end = ptr2;
+		ptr2 += charlen;
+		sav1 = *ptr1end;
+		*ptr1end = '\0';
+		for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
+		    convchar_t del3;
+		    charlen = MB_METACHARLENCONV(ptr3, &del3);
+#ifdef MULTIBYTE_SUPPORT
+		    if (del3 == WEOF)
+			del3 = (wint_t)((*ptr3 == Meta) ?
+					ptr3[1] ^ 32 : *ptr3);
+#endif
+		    if (del3 == del)
+			break;
+		}
+		sav = *ptr3;
+		*ptr3 = '\0';
 		if (*ptr1) {
 		    zsfree(hsubl);
 		    hsubl = ztrdup(ptr1);
@@ -3313,10 +3399,9 @@
 		for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
 		    if (inull(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
-		ptr2[-1] = del;
-		if (sav)
-		    ptr3[-1] = sav;
-		*ptr = ptr3 - 1;
+		*ptr1end = sav1;
+		*ptr3 = sav;
+		*ptr = ptr3 + charlen - 1;
 		break;
 
 	    case '&':
@@ -3335,13 +3420,13 @@
 	    case 'W':
 		wall = 1;
 		(*ptr)++;
-		ptr1 = get_strarg(ptr2 = *ptr);
+		ptr1 = get_strarg(ptr2 = *ptr, &charlen);
 		if ((sav = *ptr1))
 		    *ptr1 = '\0';
-		sep = dupstring(ptr2 + 1);
+		sep = dupstring(ptr2 + charlen);
 		if (sav)
 		    *ptr1 = sav;
-		*ptr = ptr1 + 1;
+		*ptr = ptr1 + charlen;
 		c = '\0';
 		break;
 
@@ -3350,8 +3435,8 @@
 		(*ptr)++;
 		break;
 	    case 'F':
-		rec = get_intarg(ptr);
 		(*ptr)++;
+		rec = get_intarg(ptr, &delmatch);
 		break;
 	    default:
 		*ptr = lptr;
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.21
diff -u -r1.21 D04parameter.ztst
--- Test/D04parameter.ztst	13 Sep 2006 20:55:30 -0000	1.21
+++ Test/D04parameter.ztst	2 Nov 2006 18:34:01 -0000
@@ -867,3 +867,17 @@
 >andsomekept
 >andsomekept
 
+  file=/one/two/three/four
+  print ${file:fh}
+  print ${file:F.1.h}
+  print ${file:F+2+h}
+  print ${file:F(3)h}
+  print ${file:F<4>h}
+  print ${file:F{5}h}
+0:Modifiers with repetition
+>/
+>/one/two/three
+>/one/two
+>/one
+>/
+>/
Index: Test/D07multibyte.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D07multibyte.ztst,v
retrieving revision 1.10
diff -u -r1.10 D07multibyte.ztst
--- Test/D07multibyte.ztst	13 Sep 2006 20:55:30 -0000	1.10
+++ Test/D07multibyte.ztst	2 Nov 2006 18:34:01 -0000
@@ -297,3 +297,17 @@
 >«κατέβην ¥«χθὲς»£ ¥¥«εἰς»£ «Πειραιᾶ
 >ςκατέβην ηςχθὲςΓλ τηςεἰςΓλ ςΠειραιᾶ
 # er... yeah, that looks right...
+
+  foo=picobarn
+  print ${foo:s£bar£rod£:s¥rod¥stick¥}
+0:Delimiters in modifiers
+>picostickn
+
+# TODO: if we get paired multibyte bracket delimiters to work
+# (as Emacs does, the smug so-and-so), the following should change.
+  foo=bar
+  print ${(r£5£¥X¥)foo}
+  print ${(l«10«»Y»£HI£)foo}
+0:Delimiters in parameter flags
+>barXX
+>YYYYYHIbar

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


To access the latest news from CSR copy this link into a web browser:  http://www.csr.com/email_sig.php


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2006-11-02 18:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-11-02 18:37 PATCH: multibyte delimiters for substitutions and parameter flags Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).