From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 12529 invoked from network); 11 Oct 2005 11:43:57 -0000 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by ns1.primenet.com.au with SMTP; 11 Oct 2005 11:43:57 -0000 Received: (qmail 44095 invoked from network); 11 Oct 2005 11:43:50 -0000 Received: from sunsite.dk (130.225.247.90) by a.mx.sunsite.dk with SMTP; 11 Oct 2005 11:43:50 -0000 Received: (qmail 27583 invoked by alias); 11 Oct 2005 11:43:38 -0000 Mailing-List: contact zsh-workers-help@sunsite.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 21863 Received: (qmail 27491 invoked from network); 11 Oct 2005 11:43:37 -0000 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by sunsite.dk with SMTP; 11 Oct 2005 11:43:37 -0000 Received: (qmail 41640 invoked from network); 11 Oct 2005 11:43:37 -0000 Received: from cluster-c.mailcontrol.com (HELO rly07c.srv.mailcontrol.com) (168.143.177.190) by a.mx.sunsite.dk with SMTP; 11 Oct 2005 11:43:35 -0000 Received: from exchange03.csr.com (mailhost1.csr.com [81.105.217.43]) by rly07c.srv.mailcontrol.com (MailControl) with ESMTP id j9BBhTU8020344 for ; Tue, 11 Oct 2005 12:43:29 +0100 Received: from news01 ([10.103.143.38]) by exchange03.csr.com with Microsoft SMTPSVC(5.0.2195.6713); Tue, 11 Oct 2005 12:45:33 +0100 Date: Tue, 11 Oct 2005 12:43:28 +0100 From: Peter Stephenson To: zsh-workers@sunsite.dk Subject: Re: [bug] backslash stripped in sh/ksh emulation Message-Id: <20051011124328.10ba41f8.pws@csr.com> In-Reply-To: <20051011123624.70fc9626.pws@csr.com> References: <20051011083842.GA5380@sc> <20051011123624.70fc9626.pws@csr.com> Organization: Cambridge Silicon Radio X-Mailer: Sylpheed version 0.9.12 (GTK+ 1.2.10; i386-redhat-linux-gnu) Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit X-OriginalArrivalTime: 11 Oct 2005 11:45:33.0190 (UTC) FILETIME=[4995EA60:01C5CE59] X-Scanned-By: MailControl A-05-40-00 (www.mailcontrol.com) on 10.67.0.117 X-Spam-Checker-Version: SpamAssassin 3.0.4 (2005-06-05) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-2.5 required=5.0 tests=AWL,BAYES_00 autolearn=ham version=3.0.4 Peter Stephenson wrote: > So I've introduced a variant of Bnull, the ghost of a backslash, called > Bnullkeep. This is only inserted in the code used for globsubst, isn't > removed by remnulargs(), and is explicitly ignored by pattern matching. If > the pattern match failed then untokenize() will restore the backslash to > output the original string. Er, and here's the actual code... Note there is a minor fix to ztest.zsh which was garbling \'s in output from diff because it used echo's backslash convention. Index: Src/glob.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/glob.c,v retrieving revision 1.46 diff -u -r1.46 glob.c --- Src/glob.c 18 Aug 2005 10:17:52 -0000 1.46 +++ Src/glob.c 11 Oct 2005 11:12:35 -0000 @@ -2487,19 +2487,29 @@ mod_export void tokenize(char *s) { - zshtokenize(s, 0); + zshtokenize(s, 0, 0); } +/* + * shtokenize is used when we tokenize a string with GLOB_SUBST set. + * In that case we need to retain backslashes when we turn the + * pattern back into a string, so that the string is not + * modified if it failed to match a pattern. + * + * It may be modified by the effect of SH_GLOB which turns off + * various zsh-specific options. + */ + /**/ mod_export void shtokenize(char *s) { - zshtokenize(s, isset(SHGLOB)); + zshtokenize(s, 1, isset(SHGLOB)); } /**/ static void -zshtokenize(char *s, int shglob) +zshtokenize(char *s, int glbsbst, int shglob) { char *t; int bslash = 0; @@ -2508,9 +2518,10 @@ cont: switch (*s) { case Bnull: + case Bnullkeep: case '\\': if (bslash) { - s[-1] = Bnull; + s[-1] = glbsbst ? Bnullkeep : Bnull; break; } bslash = 1; @@ -2519,7 +2530,7 @@ if (shglob) break; if (bslash) { - s[-1] = Bnull; + s[-1] = glbsbst ? Bnullkeep : Bnull; break; } t = s; @@ -2549,7 +2560,7 @@ for (t = ztokens; *t; t++) if (*t == *s) { if (bslash) - s[-1] = Bnull; + s[-1] = glbsbst ? Bnullkeep : Bnull; else *s = (t - ztokens) + Pound; break; @@ -2569,12 +2580,23 @@ char *o = s, c; while ((c = *s++)) - if (INULL(c)) { + if (c == Bnullkeep) { + /* + * An active backslash that needs to be turned back into + * a real backslash for output. However, we don't + * do that yet since we need to ignore it during + * pattern matching. + */ + continue; + } else if (INULL(c)) { char *t = s - 1; - while ((c = *s++)) - if (!INULL(c)) + while ((c = *s++)) { + if (c == Bnullkeep) + *t++ = '\\'; + else if (!INULL(c)) *t++ = c; + } *t = '\0'; if (!*o) { o[0] = Nularg; Index: Src/lex.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/lex.c,v retrieving revision 1.30 diff -u -r1.30 lex.c --- Src/lex.c 10 Aug 2005 10:56:41 -0000 1.30 +++ Src/lex.c 11 Oct 2005 11:12:35 -0000 @@ -33,7 +33,7 @@ /* tokens */ /**/ -mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\"; +mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\\\"; /* parts of the current token */ Index: Src/pattern.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/pattern.c,v retrieving revision 1.28 diff -u -r1.28 pattern.c --- Src/pattern.c 20 Sep 2005 15:10:27 -0000 1.28 +++ Src/pattern.c 11 Oct 2005 11:12:36 -0000 @@ -260,13 +260,13 @@ static char endstr[] = { '/', /* file only */ - '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, + '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep, /* all patterns */ Tilde, Hat, Pound /* extended glob only */ }; -#define PATENDSTRLEN_NORM 9 -#define PATENDSTRLEN_EXT 12 +#define PATENDSTRLEN_NORM 10 +#define PATENDSTRLEN_EXT 13 /* Default size for pattern buffer */ @@ -1240,6 +1240,13 @@ */ return 0; break; + case Bnullkeep: + /* + * Marker for restoring a backslash in output: + * does not match a character. + */ + return patcomppiece(flagp); + break; #ifdef DEBUG default: dputs("BUG: character not handled in patcomppiece"); Index: Src/subst.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/subst.c,v retrieving revision 1.40 diff -u -r1.40 subst.c --- Src/subst.c 7 Dec 2004 16:55:03 -0000 1.40 +++ Src/subst.c 11 Oct 2005 11:12:36 -0000 @@ -1945,7 +1945,7 @@ */ for (ptr = s; (c = *ptr) && c != '/'; ptr++) { - if ((c == Bnull || c == '\\') && ptr[1]) + if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1]) { if (ptr[1] == '/') chuck(ptr); @@ -2846,11 +2846,11 @@ } zsfree(hsubr); for (tt = hsubl; *tt; tt++) - if (INULL(*tt)) + if (INULL(*tt) && *tt != Bnullkeep) chuck(tt--); untokenize(hsubl); for (tt = hsubr = ztrdup(ptr2); *tt; tt++) - if (INULL(*tt)) + if (INULL(*tt) && *tt != Bnullkeep) chuck(tt--); ptr2[-1] = del; if (sav) Index: Src/zsh.h =================================================================== RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v retrieving revision 1.76 diff -u -r1.76 zsh.h --- Src/zsh.h 8 Aug 2005 16:49:11 -0000 1.76 +++ Src/zsh.h 11 Oct 2005 11:12:36 -0000 @@ -120,7 +120,10 @@ #define DEFAULT_IFS " \t\n\203 " -/* Character tokens */ +/* + * Character tokens. + * These should match the characters in ztokens, defined in lex.c + */ #define Pound ((char) 0x84) #define String ((char) 0x85) #define Hat ((char) 0x86) @@ -141,15 +144,33 @@ #define Tilde ((char) 0x95) #define Qtick ((char) 0x96) #define Comma ((char) 0x97) +/* + * Null arguments: placeholders for single and double quotes + * and backslashes. + */ #define Snull ((char) 0x98) #define Dnull ((char) 0x99) #define Bnull ((char) 0x9a) -#define Nularg ((char) 0x9b) +/* + * Backslash which will be returned to "\" instead of being stripped + * when we turn the string into a printable format. + */ +#define Bnullkeep ((char) 0x9b) +/* + * Null argument that does not correspond to any character. + * This should be last as it does not appear in ztokens and + * is used to initialise the IMETA type in inittyptab(). + */ +#define Nularg ((char) 0x9c) -#define INULL(x) (((x) & 0xfc) == 0x98) +#define INULL(x) (((x) & 0xf8) == 0x98) +/* + * Take care to update the use of IMETA appropriately when adding + * tokens here. + */ /* Marker used in paramsubst for rc_expand_param */ -#define Marker ((char) 0x9c) +#define Marker ((char) 0xa0) /* chars that need to be quoted if meant literally */ Index: Test/D04parameter.ztst =================================================================== RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v retrieving revision 1.12 diff -u -r1.12 D04parameter.ztst --- Test/D04parameter.ztst 22 Aug 2005 11:43:36 -0000 1.12 +++ Test/D04parameter.ztst 11 Oct 2005 11:12:36 -0000 @@ -196,6 +196,20 @@ >* boringfile evenmoreboringfile boringfile evenmoreboringfile >boringfile evenmoreboringfile +# The following tests a bug where globsubst didn't preserve +# backslashes when printing out the original string. + str1='\\*\\' + ( + setopt globsubst nonomatch + [[ \\\\ = $str1 ]] && print -r '\\ matched by' $str1 + [[ \\foo\\ = $str1 ]] && print -r '\\foo matched by' $str1 + [[ a\\b\\ = $str1 ]] || print -r 'a\\b not matched by' $str1 + ) +0:globsubst with backslashes +>\\ matched by \\*\\ +>\\foo matched by \\*\\ +>a\\b not matched by \\*\\ + print -l "${$(print one word)}" "${=$(print two words)}" 0:splitting of $(...) inside ${...} >one word Index: Test/ztst.zsh =================================================================== RCS file: /cvsroot/zsh/zsh/Test/ztst.zsh,v retrieving revision 1.22 diff -u -r1.22 ztst.zsh --- Test/ztst.zsh 9 Aug 2005 06:51:40 -0000 1.22 +++ Test/ztst.zsh 11 Oct 2005 11:12:36 -0000 @@ -280,7 +280,7 @@ diff_out=$(diff "$@") diff_ret="$?" if [[ "$diff_ret" != "0" ]]; then - echo "$diff_out" + print -r "$diff_out" fi return "$diff_ret" -- Peter Stephenson Software Engineer CSR PLC, Churchill House, Cambridge Business Park, Cowley Road Cambridge, CB4 0WZ, UK Tel: +44 (0)1223 692070 This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com