From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 25084 invoked by alias); 4 Dec 2015 22:01:51 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 37314 Received: (qmail 22848 invoked from network); 4 Dec 2015 22:01:48 -0000 X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham autolearn_force=no version=3.4.0 X-Originating-IP: [86.6.158.222] X-Spam: 0 X-Authority: v=2.1 cv=TYVrzkkh c=1 sm=1 tr=0 a=2SBOh4l1h08DI0L+aujZyQ==:117 a=2SBOh4l1h08DI0L+aujZyQ==:17 a=NLZqzBF-AAAA:8 a=kj9zAlcOel0A:10 a=ElmV1ERSWcEA:10 a=hD80L64hAAAA:8 a=RxJLj-ffZdOnC2Q3fncA:9 a=CjuIK1q_8ugA:10 Date: Fri, 4 Dec 2015 21:56:11 +0000 From: Peter Stephenson To: zsh-workers@zsh.org Subject: Re: The "set" utility outputs binary data Message-ID: <20151204215611.17d060fb@ntlworld.com> In-Reply-To: <20151204142900.0c035c6a@pwslap01u.europe.root.pri> References: <20151203140558.GA17469@zira.vinc17.org> <20151203142533.5aae65f6@pwslap01u.europe.root.pri> <20151204142900.0c035c6a@pwslap01u.europe.root.pri> X-Mailer: Claws Mail 3.11.1 (GTK+ 2.24.28; x86_64-redhat-linux-gnu) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit On Fri, 04 Dec 2015 14:29:00 +0000 Peter Stephenson wrote: > It looks like the strategy would be be to upgrade quotedzputs() to > interact better with nicezputs() and nicechar(). The code that's not > there at the moment is to pick the right sort of quotes, and you only > know that after the event at the moment, so the interface to those two > needs expanding. > > I'd propose not bothering to do this in the case where multibyte mode > isn't available (i.e is not even compiled in). It's not useful enough > and wouldn't get much testing. This seems to be going the right way; let me know of any oddities or unwanted side effects. Note a few "nice" representations have changed to fit $'..' conventions. pws diff --git a/Src/utils.c b/Src/utils.c index ca810de..d131383 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -411,7 +411,7 @@ putshout(int c) mod_export char * nicechar(int c) { - static char buf[6]; + static char buf[10]; char *s = buf; c &= 0xff; if (ZISPRINT(c)) @@ -427,7 +427,9 @@ nicechar(int c) goto done; } if (c == 0x7f) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c = '?'; } else if (c == '\n') { *s++ = '\\'; @@ -436,7 +438,9 @@ nicechar(int c) *s++ = '\\'; c = 't'; } else if (c < 0x20) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c += 0x40; } done: @@ -455,6 +459,22 @@ nicechar(int c) return buf; } +/* + * Return 1 if nicechar() would reformat this character. + */ + +/**/ +mod_export int +is_nicechar(int c) +{ + c &= 0xff; + if (ZISPRINT(c)) + return 0; + if (c & 0x80) + return !isset(PRINTEIGHTBIT); + return (c == 0x7f || c == '\n' || c == '\t' || c < 0x20); +} + /**/ #ifdef MULTIBYTE_SUPPORT static mbstate_t mb_shiftstate; @@ -532,7 +552,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) s = buf; if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) { if (c == 0x7f) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c = '?'; } else if (c == L'\n') { *s++ = '\\'; @@ -541,7 +563,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) *s++ = '\\'; c = 't'; } else if (c < 0x20) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c += 0x40; } else if (c >= 0x80) { ret = -1; @@ -611,6 +635,23 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) return buf; } +/* + * Return 1 if wcs_nicechar() would reformat this character for display. + */ + +/**/ +mod_export int is_wcs_nicechar(wchar_t c) +{ + if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) { + if (c == 0x7f || c == L'\n' || c == L'\t' || c < 0x20) + return 1; + if (c >= 0x80) { + return (c >= 0x100); + } + } + return 0; +} + /**/ mod_export int zwcwidth(wint_t wc) @@ -4834,12 +4875,15 @@ niceztrlen(char const *s) * If outstrp is not NULL, set *outstrp to a zalloc'd version of * the output (still metafied). * - * If "heap" is non-zero, use the heap for *outstrp, else zalloc. + * If flags contains NICEFLAG_HEAP, use the heap for *outstrp, else + * zalloc. + * If flags contsins NICEFLAG_QUOTE, the output is going to be within + * $'...', so quote "'" with a backslash. */ /**/ mod_export size_t -mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap) +mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) { size_t l = 0, newl; int umlen, outalloc, outleft, eol = 0; @@ -4886,7 +4930,10 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap) cnt = 1; /* FALL THROUGH */ default: - fmt = wcs_nicechar(c, &newl, NULL); + if (c == L'\'' && (flags & NICEFLAG_QUOTE)) + fmt = "\\'"; + else + fmt = wcs_nicechar(c, &newl, NULL); break; } @@ -4920,13 +4967,71 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap) if (outstrp) { *outptr = '\0'; /* Use more efficient storage for returned string */ - *outstrp = heap ? dupstring(outstr) : ztrdup(outstr); + *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr); free(outstr); } return l; } +/* + * Return 1 if mb_niceformat() would reformat this string, else 0. + */ + +/**/ +mod_export int +is_mb_niceformat(const char *s) +{ + int umlen, eol = 0, ret = 0; + wchar_t c; + char *ums, *ptr; + mbstate_t mbs; + + ums = ztrdup(s); + untokenize(ums); + ptr = unmetafy(ums, ¨en); + + memset(&mbs, 0, sizeof mbs); + while (umlen > 0) { + size_t cnt = eol ? MB_INVALID : mbrtowc(&c, ptr, umlen, &mbs); + + switch (cnt) { + case MB_INCOMPLETE: + eol = 1; + /* FALL THROUGH */ + case MB_INVALID: + /* The byte didn't convert, so output it as a \M-... sequence. */ + if (is_nicechar(*ptr)) { + ret = 1; + break; + } + cnt = 1; + /* Get mbs out of its undefined state. */ + memset(&mbs, 0, sizeof mbs); + break; + case 0: + /* Careful: converting '\0' returns 0, but a '\0' is a + * real character for us, so we should consume 1 byte. */ + cnt = 1; + /* FALL THROUGH */ + default: + if (is_wcs_nicechar(c)) + ret = 1; + break; + } + + if (ret) + break; + + umlen -= cnt; + ptr += cnt; + } + + free(ums); + + return ret; +} + /* ztrdup multibyte string with nice formatting */ /**/ @@ -4935,7 +5040,7 @@ nicedup(const char *s, int heap) { char *retstr; - (void)mb_niceformat(s, NULL, &retstr, heap); + (void)mb_niceformat(s, NULL, &retstr, heap ? NICEFLAG_HEAP : 0); return retstr; } @@ -5717,22 +5822,35 @@ quotestring(const char *s, char **e, int instring) /* Unmetafy and output a string, quoted if it contains special characters. */ /**/ -mod_export int +mod_export void quotedzputs(char const *s, FILE *stream) { int inquote = 0, c; /* check for empty string */ - if(!*s) - return fputs("''", stream); + if(!*s) { + fputs("''", stream); + return; + } - if (!hasspecial(s)) - return zputs(s, stream); +#ifdef MULTIBYTE_SUPPORT + if (is_mb_niceformat(s)) { + fputs("$'", stream); + mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE); + fputc('\'', stream); + return; + } +#endif /* MULTIBYTE_SUPPORT */ + + if (!hasspecial(s)) { + zputs(s, stream); + return; + } if (isset(RCQUOTES)) { /* use rc-style quotes-within-quotes for the whole string */ if(fputc('\'', stream) < 0) - return EOF; + return; while(*s) { if (*s == Meta) c = *++s ^ 32; @@ -5741,16 +5859,16 @@ quotedzputs(char const *s, FILE *stream) s++; if (c == '\'') { if(fputc('\'', stream) < 0) - return EOF; + return; } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) { if(fputc('\\', stream) < 0) - return EOF; + return; } if(fputc(c, stream) < 0) - return EOF; + return; } if(fputc('\'', stream) < 0) - return EOF; + return; } else { /* use Bourne-style quoting, avoiding empty quoted strings */ while(*s) { @@ -5762,31 +5880,30 @@ quotedzputs(char const *s, FILE *stream) if (c == '\'') { if(inquote) { if(fputc('\'', stream) < 0) - return EOF; + return; inquote=0; } if(fputs("\\'", stream) < 0) - return EOF; + return; } else { if (!inquote) { if(fputc('\'', stream) < 0) - return EOF; + return; inquote=1; } if(c == '\n' && isset(CSHJUNKIEQUOTES)) { if(fputc('\\', stream) < 0) - return EOF; + return; } if(fputc(c, stream) < 0) - return EOF; + return; } } if (inquote) { if(fputc('\'', stream) < 0) - return EOF; + return; } } - return 0; } /* Double-quote a metafied string. */ diff --git a/Src/zsh.h b/Src/zsh.h index d3bfcef..caf7def 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -3051,6 +3051,12 @@ enum { #define AFTERTRAPHOOK (zshhooks + 2) #ifdef MULTIBYTE_SUPPORT +/* Final argument to mb_niceformat() */ +enum { + NICEFLAG_HEAP = 1, /* Heap allocation where needed */ + NICEFLAG_QUOTE = 2, /* Result will appear in $'...' */ +}; + /* Metafied input */ #define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0) #define MB_METACHARINIT() mb_charinit()