From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 17635 invoked from network); 20 Aug 2007 23:05:50 -0000 X-Spam-Checker-Version: SpamAssassin 3.2.1 (2007-05-02) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-2.5 required=5.0 tests=AWL,BAYES_00 autolearn=ham version=3.2.1 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by ns1.primenet.com.au with SMTP; 20 Aug 2007 23:05:50 -0000 Received-SPF: none (ns1.primenet.com.au: domain at sunsite.dk does not designate permitted sender hosts) Received: (qmail 70041 invoked from network); 20 Aug 2007 21:19:01 -0000 Received: from sunsite.dk (130.225.247.90) by a.mx.sunsite.dk with SMTP; 20 Aug 2007 21:19:01 -0000 Received: (qmail 11690 invoked by alias); 20 Aug 2007 21:18:58 -0000 Mailing-List: contact zsh-workers-help@sunsite.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 23784 Received: (qmail 11679 invoked from network); 20 Aug 2007 21:18:57 -0000 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by sunsite.dk with SMTP; 20 Aug 2007 21:18:57 -0000 Received: (qmail 69766 invoked from network); 20 Aug 2007 21:18:57 -0000 Received: from mtaout01-winn.ispmail.ntl.com (81.103.221.47) by a.mx.sunsite.dk with SMTP; 20 Aug 2007 21:18:50 -0000 Received: from aamtaout03-winn.ispmail.ntl.com ([81.103.221.35]) by mtaout01-winn.ispmail.ntl.com with ESMTP id <20070820211848.NZCX1783.mtaout01-winn.ispmail.ntl.com@aamtaout03-winn.ispmail.ntl.com>; Mon, 20 Aug 2007 22:18:48 +0100 Received: from pws-pc.ntlworld.com ([81.107.45.67]) by aamtaout03-winn.ispmail.ntl.com with SMTP id <20070820211848.BNBK26699.aamtaout03-winn.ispmail.ntl.com@pws-pc.ntlworld.com>; Mon, 20 Aug 2007 22:18:48 +0100 Date: Mon, 20 Aug 2007 22:18:29 +0100 From: Peter Stephenson To: zsh-workers@sunsite.dk, 419832-forwarded@bugs.debian.org, Alan Curry Subject: Re: Bug#419832: zsh: expanding non-ASCII filenames with Message-Id: <20070820221829.ef5abfc5.p.w.stephenson@ntlworld.com> In-Reply-To: <070817075515.ZM26686@torch.brasslantern.com> References: <20070817001222.GA19399@scowler.net> <200708170905.l7H9521T1534406@shell01.TheWorld.com> <20070817120844.GA9936@scowler.net> <20070817152210.6bb9559b@news01.csr.com> <070817075515.ZM26686@torch.brasslantern.com> X-Mailer: Sylpheed 2.3.1 (GTK+ 2.10.14; x86_64-redhat-linux-gnu) Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit On Fri, 17 Aug 2007 07:55:15 -0700 Bart Schaefer wrote: > torch% cat* > torch% cat a b > > So the recipe PWS wants is just to run "zsh -f". This and the version Clint posted both go through quotestring() with QT_BACKSLASH. So the fix is to improve handling of unprintable characters in that case. All the tests still pass, but I could have messed up some unusual case. By the way, I don't speak octal and I'd personally prefer \x, but it doesn't matter that much. Index: Src/utils.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/utils.c,v retrieving revision 1.164 diff -u -r1.164 utils.c --- Src/utils.c 10 May 2007 11:36:24 -0000 1.164 +++ Src/utils.c 20 Aug 2007 21:15:46 -0000 @@ -4124,6 +4124,51 @@ return 0; } + +static char * +addunprintable(char *v, const char *u, const char *uend) +{ + for (; u < uend; u++) { + /* + * Just do this byte by byte; there's no great + * advantage in being clever with multibyte + * characters if we don't think they're printable. + */ + int c; + if (*u == Meta) + c = STOUC(*++u ^ 32); + else + c = STOUC(*u); + switch (c) { + case '\0': + *v++ = '\\'; + *v++ = '0'; + if ('0' <= u[1] && u[1] <= '7') { + *v++ = '0'; + *v++ = '0'; + } + break; + + case '\007': *v++ = '\\'; *v++ = 'a'; break; + case '\b': *v++ = '\\'; *v++ = 'b'; break; + case '\f': *v++ = '\\'; *v++ = 'f'; break; + case '\n': *v++ = '\\'; *v++ = 'n'; break; + case '\r': *v++ = '\\'; *v++ = 'r'; break; + case '\t': *v++ = '\\'; *v++ = 't'; break; + case '\v': *v++ = '\\'; *v++ = 'v'; break; + + default: + *v++ = '\\'; + *v++ = '0' + ((c >> 6) & 7); + *v++ = '0' + ((c >> 3) & 7); + *v++ = '0' + (c & 7); + break; + } + } + + return v; +} + /* * Quote the string s and return the result. * @@ -4142,8 +4187,16 @@ { const char *u, *tt; char *v; - char *buf = hcalloc(4 * strlen(s) + 1); + /* + * With QT_BACKSLASH we may need to use $'\300' stuff. + * Keep memory usage within limits by allocating temporary + * storage and using heap for correct size at end. + */ + int alloclen = (instring == QT_BACKSLASH ? 7 : 4) * strlen(s) + 1; + char *buf = zshcalloc(alloclen); int sf = 0; + convchar_t cc; + const char *uend; DPUTS(instring < QT_BACKSLASH || instring > QT_DOLLARS, "BUG: bad quote type in quotestring"); @@ -4154,10 +4207,9 @@ * As we test for printability here we need to be able * to look for multibyte characters. */ - convchar_t cc; MB_METACHARINIT(); while (*u) { - const char *uend = u + MB_METACHARLENCONV(u, &cc); + uend = u + MB_METACHARLENCONV(u, &cc); if (e && !sf && *e <= u) { *e = v; @@ -4183,53 +4235,19 @@ *v++ = *u++; } else { /* Not printable */ - for (; u < uend; u++) { - /* - * Just do this byte by byte; there's no great - * advantage in being clever with multibyte - * characters if we don't think they're printable. - */ - int c; - if (*u == Meta) - c = STOUC(*++u ^ 32); - else - c = STOUC(*u); - switch (c) { - case '\0': - *v++ = '\\'; - *v++ = '0'; - if ('0' <= u[1] && u[1] <= '7') { - *v++ = '0'; - *v++ = '0'; - } - break; - - case '\007': *v++ = '\\'; *v++ = 'a'; break; - case '\b': *v++ = '\\'; *v++ = 'b'; break; - case '\f': *v++ = '\\'; *v++ = 'f'; break; - case '\n': *v++ = '\\'; *v++ = 'n'; break; - case '\r': *v++ = '\\'; *v++ = 'r'; break; - case '\t': *v++ = '\\'; *v++ = 't'; break; - case '\v': *v++ = '\\'; *v++ = 'v'; break; - - default: - *v++ = '\\'; - *v++ = '0' + ((c >> 6) & 7); - *v++ = '0' + ((c >> 3) & 7); - *v++ = '0' + (c & 7); - break; - } - } + v = addunprintable(v, u, uend); + u = uend; } } } else { /* - * Here the only special characters are syntactic, so - * we can go through bytewise. + * Here there are syntactic special characters, so + * we start by going through bytewise. */ - for (; *u; u++) { + while (*u) { + int dobackslash = 0; if (e && *e == u) *e = v, sf = 1; if (*u == Tick || *u == Qtick) { @@ -4239,8 +4257,6 @@ while (*u && *u != c) *v++ = *u++; *v++ = c; - if (!*u) - u--; continue; } else if ((*u == Qstring || *u == '$') && u[1] == '\'' && instring == QT_DOUBLE) { @@ -4268,9 +4284,7 @@ *v++ = *u++; } if (*u) - *v++ = *u; - else - u--; + *v++ = *u++; continue; } else if (ispecial(*u) && @@ -4296,13 +4310,51 @@ *v++ = '"', *v++ = '\n', *v++ = '"'; else *v++ = '\'', *v++ = '\''; + u++; continue; - } else - *v++ = '\\'; + } else { + /* + * We'll need a backslash, but don't add it + * yet since if the character isn't printable + * we'll have to upgrade it to $'...'. + */ + dobackslash = 1; + } } - if(*u == Meta) + + if (itok(*u) || instring != QT_BACKSLASH) { + /* Needs to be passed straight through. */ + if (dobackslash) + *v++ = '\\'; *v++ = *u++; - *v++ = *u; + continue; + } + + /* + * Now check if the output is unprintable in the + * current character set. + */ + uend = u + MB_METACHARLENCONV(u, &cc); + if ( +#ifdef MULTIBYTE_SUPPORT + cc != WEOF && +#endif + WC_ISPRINT(cc)) { + if (dobackslash) + *v++ = '\\'; + while (u < uend) { + if (*u == Meta) + *v++ = *u++; + *v++ = *u++; + } + } else { + /* Not printable */ + *v++ = '$'; + *v++ = '\''; + v = addunprintable(v, u, uend); + *v++ = '\''; + u = uend; + } } } *v = '\0'; @@ -4311,7 +4363,9 @@ *e = v, sf = 1; DPUTS(e && !sf, "BUG: Wild pointer *e in quotestring()"); - return buf; + v = dupstring(buf); + zfree(buf, alloclen); + return v; } /* Unmetafy and output a string, quoted if it contains special characters. */ -- Peter Stephenson Web page now at http://homepage.ntlworld.com/p.w.stephenson/