From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 19440 invoked by alias); 7 Dec 2015 21:45:05 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 37344 Received: (qmail 15293 invoked from network); 7 Dec 2015 21:45:02 -0000 X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham autolearn_force=no version=3.4.0 X-Originating-IP: [86.6.158.222] X-Spam: 0 X-Authority: v=2.1 cv=RLtOZNW+ c=1 sm=1 tr=0 a=2SBOh4l1h08DI0L+aujZyQ==:117 a=2SBOh4l1h08DI0L+aujZyQ==:17 a=NLZqzBF-AAAA:8 a=kj9zAlcOel0A:10 a=ElmV1ERSWcEA:10 a=q2GGsy2AAAAA:8 a=fB9SRNqz1oeCsJSIqZwA:9 a=-hQIhpVIMpbQ3YuD:21 a=Dqg5ct8Bl38W8IPh:21 a=CjuIK1q_8ugA:10 Date: Mon, 7 Dec 2015 21:39:27 +0000 From: Peter Stephenson To: Bart Schaefer Cc: zsh-workers@zsh.org Subject: Re: The "set" utility outputs binary data Message-ID: <20151207213927.269719e2@ntlworld.com> In-Reply-To: <151207101308.ZM5827@torch.brasslantern.com> References: <20151203140558.GA17469@zira.vinc17.org> <20151203142533.5aae65f6@pwslap01u.europe.root.pri> <20151204142900.0c035c6a@pwslap01u.europe.root.pri> <20151204215611.17d060fb@ntlworld.com> <151206150844.ZM31242@torch.brasslantern.com> <20151207102449.5993404a@pwslap01u.europe.root.pri> <151207101308.ZM5827@torch.brasslantern.com> X-Mailer: Claws Mail 3.11.1 (GTK+ 2.24.28; x86_64-redhat-linux-gnu) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit On Mon, 7 Dec 2015 10:13:08 -0800 Bart Schaefer wrote: > On Dec 7, 10:24am, Peter Stephenson wrote: > } > } (You can't seriously be complaining that "typeset -m IFS" now outputs > } > } IFS=$' \t\n\C-@' > } > } instead of raw binary, can you?) > > Goodness, no. Just the ${(V)...} substitution, mostly (the test cases > 37335 updates) and anyplace where it's in human-informational output > rather than machine-re-readable output. This attempts to restore the short form when not called from quotedzputs(). As a "free" bonus (that is, it's free to everyone else), you can use ${(q+)...} to get the same effect as the new quoting within parameters (so (V) does what it usd to but (q+) gives you something a bit similar but readbackinable). I'll write some tests one day. I suppose you'll be wanting it to work, next. diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 564c70d..c6e7b6f 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1067,6 +1067,11 @@ If a tt(q-) is given (only a single tt(q) may appear), a minimal form of single quoting is used that only quotes the string if needed to protect special characters. Typically this form gives the most readable output. + +If a tt(q+) is given, an extended form of minmal quoting is used that +causes unprintable characters to be rendered using tt($')var(...)tt('). +This quoting is similar to that used by the output of values by the +tt(typeset) family of commands. ) item(tt(Q))( Remove one level of quotes from the resulting words. diff --git a/Src/subst.c b/Src/subst.c index d9c9d24..bb1dd89 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -1887,12 +1887,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, if (quotetype == QT_DOLLARS || quotetype == QT_BACKSLASH_PATTERN) goto flagerr; - if (s[1] == '-') { + if (s[1] == '-' || s[1] == '+') { if (quotemod) goto flagerr; s++; quotemod = 1; - quotetype = QT_SINGLE_OPTIONAL; + quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL : + QT_QUOTEDZPUTS; } else { if (quotetype == QT_SINGLE_OPTIONAL) { /* extra q's after '-' not allowed */ @@ -3583,7 +3584,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, ap = aval; if (quotemod > 0) { - if (quotetype > QT_BACKSLASH) { + if (quotetype == QT_QUOTEDZPUTS) { + for (; *ap; ap++) + *ap = quotedzputs(*ap, NULL); + } else if (quotetype > QT_BACKSLASH) { int sl; char *tmp; @@ -3626,7 +3630,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, if (!copied) val = dupstring(val), copied = 1; if (quotemod > 0) { - if (quotetype > QT_BACKSLASH) { + if (quotetype == QT_QUOTEDZPUTS) { + val = quotedzputs(val, NULL); + } else if (quotetype > QT_BACKSLASH) { int sl; char *tmp; tmp = quotestring(val, NULL, quotetype); diff --git a/Src/utils.c b/Src/utils.c index fc2b192..1554fa0 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -387,6 +387,7 @@ putshout(int c) return 0; } +#ifdef MULTIBYTE_SUPPORT /* * Turn a character into a visible representation thereof. The visible * string is put together in a static buffer, and this function returns @@ -409,6 +410,73 @@ putshout(int c) /**/ mod_export char * +nicechar_sel(int c, int quotable) +{ + static char buf[10]; + char *s = buf; + c &= 0xff; + if (ZISPRINT(c)) + goto done; + if (c & 0x80) { + if (isset(PRINTEIGHTBIT)) + goto done; + *s++ = '\\'; + *s++ = 'M'; + *s++ = '-'; + c &= 0x7f; + if(ZISPRINT(c)) + goto done; + } + if (c == 0x7f) { + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; + c = '?'; + } else if (c == '\n') { + *s++ = '\\'; + c = 'n'; + } else if (c == '\t') { + *s++ = '\\'; + c = 't'; + } else if (c < 0x20) { + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; + c += 0x40; + } + done: + /* + * The resulting string is still metafied, so check if + * we are returning a character in the range that needs metafication. + * This can't happen if the character is printed "nicely", so + * this results in a maximum of two bytes total (plus the null). + */ + if (imeta(c)) { + *s++ = Meta; + *s++ = c ^ 32; + } else + *s++ = c; + *s = 0; + return buf; +} + +/**/ +mod_export char * +nicechar(int c) +{ + return nicechar_sel(c, 0); +} + +#else /* MULTIBYTE_SUPPORT */ + +/**/ +mod_export char * nicechar(int c) { static char buf[10]; @@ -459,6 +527,8 @@ nicechar(int c) return buf; } +#endif /* MULTIBYTE_SUPPORT */ + /* * Return 1 if nicechar() would reformat this character. */ @@ -527,7 +597,7 @@ mb_charinit(void) /**/ mod_export char * -wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) +wcs_nicechar_sel(wchar_t c, size_t *widthp, char **swidep, int quotable) { static char *buf; static int bufalloc = 0, newalloc; @@ -552,9 +622,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) s = buf; if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) { if (c == 0x7f) { - *s++ = '\\'; - *s++ = 'C'; - *s++ = '-'; + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; c = '?'; } else if (c == L'\n') { *s++ = '\\'; @@ -563,9 +636,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) *s++ = '\\'; c = 't'; } else if (c < 0x20) { - *s++ = '\\'; - *s++ = 'C'; - *s++ = '-'; + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; c += 0x40; } else if (c >= 0x80) { ret = -1; @@ -635,6 +711,13 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) return buf; } +/**/ +mod_export char * +wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) +{ + return wcs_nicechar_sel(c, widthp, swidep, 0); +} + /* * Return 1 if wcs_nicechar() would reformat this character for display. */ @@ -4918,7 +5001,7 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) /* FALL THROUGH */ case MB_INVALID: /* The byte didn't convert, so output it as a \M-... sequence. */ - fmt = nicechar(*ptr); + fmt = nicechar_sel(*ptr, flags & NICEFLAG_QUOTE); newl = strlen(fmt); cnt = 1; /* Get mbs out of its undefined state. */ @@ -4933,7 +5016,7 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) if (c == L'\'' && (flags & NICEFLAG_QUOTE)) fmt = "\\'"; else - fmt = wcs_nicechar(c, &newl, NULL); + fmt = wcs_nicechar_sel(c, &newl, NULL, flags & NICEFLAG_QUOTE); break; } @@ -4967,8 +5050,13 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) if (outstrp) { *outptr = '\0'; /* Use more efficient storage for returned string */ - *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr); - free(outstr); + if (flags & NICEFLAG_NODUP) + *outstrp = outstr; + else { + *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : + ztrdup(outstr); + free(outstr); + } } return l; @@ -5834,38 +5922,76 @@ quotestring(const char *s, char **e, int instring) return v; } -/* Unmetafy and output a string, quoted if it contains special characters. */ +/* + * Unmetafy and output a string, quoted if it contains special + * characters. + * + * If stream is NULL, return the same output with any allocation on the + * heap. + */ /**/ -mod_export void +mod_export char * quotedzputs(char const *s, FILE *stream) { int inquote = 0, c; + char *outstr, *ptr; /* check for empty string */ if(!*s) { + if (!stream) + return "''"; fputs("''", stream); - return; + return NULL; } #ifdef MULTIBYTE_SUPPORT if (is_mb_niceformat(s)) { - fputs("$'", stream); - mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE); - fputc('\'', stream); - return; + if (stream) { + fputs("$'", stream); + mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE); + fputc('\'', stream); + return NULL; + } else { + char *substr; + mb_niceformat(s, NULL, &substr, NICEFLAG_QUOTE|NICEFLAG_NODUP); + outstr = (char *)zhalloc(4 + strlen(substr)); + sprintf(outstr, "$'%s'", substr); + free(substr); + return outstr; + } } #endif /* MULTIBYTE_SUPPORT */ if (!hasspecial(s)) { - zputs(s, stream); - return; + if (stream) { + zputs(s, stream); + return NULL; + } else { + return dupstring(s); + } } + if (!stream) { + const char *cptr; + int l = strlen(s) + 2; + for (cptr = s; *cptr; cptr++) { + if (*cptr == Meta) + cptr++; + else if (*cptr == '\'') + l += isset(RCQUOTES) ? 1 : 3; + } + ptr = outstr = zhalloc(l + 1); + } else { + ptr = outstr = NULL; + } if (isset(RCQUOTES)) { /* use rc-style quotes-within-quotes for the whole string */ - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; while(*s) { if (*s == Meta) c = *++s ^ 32; @@ -5873,52 +5999,98 @@ quotedzputs(char const *s, FILE *stream) c = *s; s++; if (c == '\'') { - if(fputc('\'', stream) < 0) - return; - } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) { - if(fputc('\\', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; + } else if (c == '\n' && isset(CSHJUNKIEQUOTES)) { + if (stream) { + if (fputc('\\', stream) < 0) + return NULL; + } else + *ptr++ = '\\'; + } + if (stream) { + if (fputc(c, stream) < 0) + return NULL; + } else { + if (imeta(c)) { + *ptr++ = Meta; + *ptr++ = c ^ 32; + } else + *ptr++ = c; } - if(fputc(c, stream) < 0) - return; } - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; } else { /* use Bourne-style quoting, avoiding empty quoted strings */ - while(*s) { + while (*s) { if (*s == Meta) c = *++s ^ 32; else c = *s; s++; if (c == '\'') { - if(inquote) { - if(fputc('\'', stream) < 0) - return; + if (inquote) { + if (stream) { + if (putc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; inquote=0; } - if(fputs("\\'", stream) < 0) - return; + if (stream) { + if (fputs("\\'", stream) < 0) + return NULL; + } else { + *ptr++ = '\\'; + *ptr++ = '\''; + } } else { if (!inquote) { - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; inquote=1; } - if(c == '\n' && isset(CSHJUNKIEQUOTES)) { - if(fputc('\\', stream) < 0) - return; + if (c == '\n' && isset(CSHJUNKIEQUOTES)) { + if (stream) { + if (fputc('\\', stream) < 0) + return NULL; + } else + *ptr++ = '\\'; + } + if (stream) { + if (fputc(c, stream) < 0) + return NULL; + } else { + if (imeta(c)) { + *ptr++ = Meta; + *ptr++ = c ^ 32; + } else + *ptr++ = c; } - if(fputc(c, stream) < 0) - return; } } if (inquote) { - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; } } + if (!stream) + *ptr++ = '\0'; + + return outstr; } /* Double-quote a metafied string. */ diff --git a/Src/zsh.h b/Src/zsh.h index caf7def..0302d68 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -272,7 +272,12 @@ enum { /* * As QT_BACKSLASH, but a NULL string is shown as ''. */ - QT_BACKSLASH_SHOWNULL + QT_BACKSLASH_SHOWNULL, + /* + * Quoting as produced by quotedzputs(), used for human + * readability of parameter values. + */ + QT_QUOTEDZPUTS }; #define QT_IS_SINGLE(x) ((x) == QT_SINGLE || (x) == QT_SINGLE_OPTIONAL) @@ -3055,6 +3060,7 @@ enum { enum { NICEFLAG_HEAP = 1, /* Heap allocation where needed */ NICEFLAG_QUOTE = 2, /* Result will appear in $'...' */ + NICEFLAG_NODUP = 4, /* Leave allocated */ }; /* Metafied input */ diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 2b46e06..1460ff6 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -398,7 +398,7 @@ foo=$'\x7f\x00' print -r -- ${(V)foo} 0:${(V)...} ->\C-?\C-@ +>^?^@ foo='playing '\''stupid'\'' "games" \w\i\t\h $quoting.' print -r ${(q)foo} diff --git a/Test/V09datetime.ztst b/Test/V09datetime.ztst index 831421d..7905155 100644 --- a/Test/V09datetime.ztst +++ b/Test/V09datetime.ztst @@ -71,4 +71,4 @@ print -r -- ${(V)"$(strftime $'%Y\0%m\0%d' 100000000)"} 0:Embedded nulls ->1973\C-@03\C-@03 +>1973^@03^@03