zsh-workers
 help / color / mirror / code / Atom feed
From: Peter Stephenson <p.w.stephenson@ntlworld.com>
To: zsh-workers@zsh.org
Subject: Re: The "set" utility outputs binary data
Date: Fri, 4 Dec 2015 21:56:11 +0000	[thread overview]
Message-ID: <20151204215611.17d060fb@ntlworld.com> (raw)
In-Reply-To: <20151204142900.0c035c6a@pwslap01u.europe.root.pri>

On Fri, 04 Dec 2015 14:29:00 +0000
Peter Stephenson <p.stephenson@samsung.com> wrote:
> It looks like the strategy would be be to upgrade quotedzputs() to
> interact better with nicezputs() and nicechar().  The code that's not
> there at the moment is to pick the right sort of quotes, and you only
> know that after the event at the moment, so the interface to those two
> needs expanding.
> 
> I'd propose not bothering to do this in the case where multibyte mode
> isn't available (i.e is not even compiled in).  It's not useful enough
> and wouldn't get much testing.

This seems to be going the right way; let me know of any oddities or
unwanted side effects.  Note a few "nice" representations have changed
to fit $'..' conventions.

pws

diff --git a/Src/utils.c b/Src/utils.c
index ca810de..d131383 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -411,7 +411,7 @@ putshout(int c)
 mod_export char *
 nicechar(int c)
 {
-    static char buf[6];
+    static char buf[10];
     char *s = buf;
     c &= 0xff;
     if (ZISPRINT(c))
@@ -427,7 +427,9 @@ nicechar(int c)
 	    goto done;
     }
     if (c == 0x7f) {
-	*s++ = '^';
+	*s++ = '\\';
+	*s++ = 'C';
+	*s++ = '-';
 	c = '?';
     } else if (c == '\n') {
 	*s++ = '\\';
@@ -436,7 +438,9 @@ nicechar(int c)
 	*s++ = '\\';
 	c = 't';
     } else if (c < 0x20) {
-	*s++ = '^';
+	*s++ = '\\';
+	*s++ = 'C';
+	*s++ = '-';
 	c += 0x40;
     }
     done:
@@ -455,6 +459,22 @@ nicechar(int c)
     return buf;
 }
 
+/*
+ * Return 1 if nicechar() would reformat this character.
+ */
+
+/**/
+mod_export int
+is_nicechar(int c)
+{
+    c &= 0xff;
+    if (ZISPRINT(c))
+	return 0;
+    if (c & 0x80)
+	return !isset(PRINTEIGHTBIT);
+    return (c == 0x7f || c == '\n' || c == '\t' || c < 0x20);
+}
+
 /**/
 #ifdef MULTIBYTE_SUPPORT
 static mbstate_t mb_shiftstate;
@@ -532,7 +552,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     s = buf;
     if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) {
 	if (c == 0x7f) {
-	    *s++ = '^';
+	    *s++ = '\\';
+	    *s++ = 'C';
+	    *s++ = '-';
 	    c = '?';
 	} else if (c == L'\n') {
 	    *s++ = '\\';
@@ -541,7 +563,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
 	    *s++ = '\\';
 	    c = 't';
 	} else if (c < 0x20) {
-	    *s++ = '^';
+	    *s++ = '\\';
+	    *s++ = 'C';
+	    *s++ = '-';
 	    c += 0x40;
 	} else if (c >= 0x80) {
 	    ret = -1;
@@ -611,6 +635,23 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     return buf;
 }
 
+/*
+ * Return 1 if wcs_nicechar() would reformat this character for display.
+ */
+
+/**/
+mod_export int is_wcs_nicechar(wchar_t c)
+{
+    if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) {
+	if (c == 0x7f || c == L'\n' || c == L'\t' || c < 0x20)
+	    return 1;
+	if (c >= 0x80) {
+	    return (c >= 0x100);
+	}
+    }
+    return 0;
+}
+
 /**/
 mod_export int
 zwcwidth(wint_t wc)
@@ -4834,12 +4875,15 @@ niceztrlen(char const *s)
  * If outstrp is not NULL, set *outstrp to a zalloc'd version of
  * the output (still metafied).
  *
- * If "heap" is non-zero, use the heap for *outstrp, else zalloc.
+ * If flags contains NICEFLAG_HEAP, use the heap for *outstrp, else
+ * zalloc.
+ * If flags contsins NICEFLAG_QUOTE, the output is going to be within
+ * $'...', so quote "'" with a backslash.
  */
 
 /**/
 mod_export size_t
-mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap)
+mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags)
 {
     size_t l = 0, newl;
     int umlen, outalloc, outleft, eol = 0;
@@ -4886,7 +4930,10 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap)
 	    cnt = 1;
 	    /* FALL THROUGH */
 	default:
-	    fmt = wcs_nicechar(c, &newl, NULL);
+	    if (c == L'\'' && (flags & NICEFLAG_QUOTE))
+		fmt = "\\'";
+	    else
+		fmt = wcs_nicechar(c, &newl, NULL);
 	    break;
 	}
 
@@ -4920,13 +4967,71 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap)
     if (outstrp) {
 	*outptr = '\0';
 	/* Use more efficient storage for returned string */
-	*outstrp = heap ? dupstring(outstr) : ztrdup(outstr);
+	*outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr);
 	free(outstr);
     }
 
     return l;
 }
 
+/*
+ * Return 1 if mb_niceformat() would reformat this string, else 0.
+ */
+
+/**/
+mod_export int
+is_mb_niceformat(const char *s)
+{
+    int umlen, eol = 0, ret = 0;
+    wchar_t c;
+    char *ums, *ptr;
+    mbstate_t mbs;
+
+    ums = ztrdup(s);
+    untokenize(ums);
+    ptr = unmetafy(ums, &umlen);
+
+    memset(&mbs, 0, sizeof mbs);
+    while (umlen > 0) {
+	size_t cnt = eol ? MB_INVALID : mbrtowc(&c, ptr, umlen, &mbs);
+
+	switch (cnt) {
+	case MB_INCOMPLETE:
+	    eol = 1;
+	    /* FALL THROUGH */
+	case MB_INVALID:
+	    /* The byte didn't convert, so output it as a \M-... sequence. */
+	    if (is_nicechar(*ptr))  {
+		ret = 1;
+		break;
+	    }
+	    cnt = 1;
+	    /* Get mbs out of its undefined state. */
+	    memset(&mbs, 0, sizeof mbs);
+	    break;
+	case 0:
+	    /* Careful:  converting '\0' returns 0, but a '\0' is a
+	     * real character for us, so we should consume 1 byte. */
+	    cnt = 1;
+	    /* FALL THROUGH */
+	default:
+	    if (is_wcs_nicechar(c))
+		ret = 1;
+	    break;
+	}
+
+	if (ret)
+	    break;
+
+	umlen -= cnt;
+	ptr += cnt;
+    }
+
+    free(ums);
+
+    return ret;
+}
+
 /* ztrdup multibyte string with nice formatting */
 
 /**/
@@ -4935,7 +5040,7 @@ nicedup(const char *s, int heap)
 {
     char *retstr;
 
-    (void)mb_niceformat(s, NULL, &retstr, heap);
+    (void)mb_niceformat(s, NULL, &retstr, heap ? NICEFLAG_HEAP : 0);
 
     return retstr;
 }
@@ -5717,22 +5822,35 @@ quotestring(const char *s, char **e, int instring)
 /* Unmetafy and output a string, quoted if it contains special characters. */
 
 /**/
-mod_export int
+mod_export void
 quotedzputs(char const *s, FILE *stream)
 {
     int inquote = 0, c;
 
     /* check for empty string */
-    if(!*s)
-	return fputs("''", stream);
+    if(!*s) {
+	fputs("''", stream);
+	return;
+    }
 
-    if (!hasspecial(s))
-	return zputs(s, stream);
+#ifdef MULTIBYTE_SUPPORT
+    if (is_mb_niceformat(s)) {
+	fputs("$'", stream);
+	mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE);
+	fputc('\'', stream);
+	return;
+    }
+#endif /* MULTIBYTE_SUPPORT */
+
+    if (!hasspecial(s)) {
+	zputs(s, stream);
+	return;
+    }
 
     if (isset(RCQUOTES)) {
 	/* use rc-style quotes-within-quotes for the whole string */
 	if(fputc('\'', stream) < 0)
-	    return EOF;
+	    return;
 	while(*s) {
 	    if (*s == Meta)
 		c = *++s ^ 32;
@@ -5741,16 +5859,16 @@ quotedzputs(char const *s, FILE *stream)
 	    s++;
 	    if (c == '\'') {
 		if(fputc('\'', stream) < 0)
-		    return EOF;
+		    return;
 	    } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) {
 		if(fputc('\\', stream) < 0)
-		    return EOF;
+		    return;
 	    }
 	    if(fputc(c, stream) < 0)
-		return EOF;
+		return;
 	}
 	if(fputc('\'', stream) < 0)
-	    return EOF;
+	    return;
     } else {
 	/* use Bourne-style quoting, avoiding empty quoted strings */
 	while(*s) {
@@ -5762,31 +5880,30 @@ quotedzputs(char const *s, FILE *stream)
 	    if (c == '\'') {
 		if(inquote) {
 		    if(fputc('\'', stream) < 0)
-			return EOF;
+			return;
 		    inquote=0;
 		}
 		if(fputs("\\'", stream) < 0)
-		    return EOF;
+		    return;
 	    } else {
 		if (!inquote) {
 		    if(fputc('\'', stream) < 0)
-			return EOF;
+			return;
 		    inquote=1;
 		}
 		if(c == '\n' && isset(CSHJUNKIEQUOTES)) {
 		    if(fputc('\\', stream) < 0)
-			return EOF;
+			return;
 		}
 		if(fputc(c, stream) < 0)
-		    return EOF;
+		    return;
 	    }
 	}
 	if (inquote) {
 	    if(fputc('\'', stream) < 0)
-		return EOF;
+		return;
 	}
     }
-    return 0;
 }
 
 /* Double-quote a metafied string. */
diff --git a/Src/zsh.h b/Src/zsh.h
index d3bfcef..caf7def 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -3051,6 +3051,12 @@ enum {
 #define AFTERTRAPHOOK  (zshhooks + 2)
 
 #ifdef MULTIBYTE_SUPPORT
+/* Final argument to mb_niceformat() */
+enum {
+    NICEFLAG_HEAP = 1,		/* Heap allocation where needed */
+    NICEFLAG_QUOTE = 2,		/* Result will appear in $'...' */
+};
+
 /* Metafied input */
 #define nicezputs(str, outs)	(void)mb_niceformat((str), (outs), NULL, 0)
 #define MB_METACHARINIT()	mb_charinit()


  reply	other threads:[~2015-12-04 22:01 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-03 14:05 Vincent Lefevre
2015-12-03 14:25 ` Peter Stephenson
2015-12-04 14:29   ` Peter Stephenson
2015-12-04 21:56     ` Peter Stephenson [this message]
2015-12-06 23:08       ` Bart Schaefer
2015-12-07 10:24         ` Peter Stephenson
2015-12-07 18:13           ` Bart Schaefer
2015-12-07 21:39             ` Peter Stephenson
2015-12-07 18:29           ` Nikolay Aleksandrovich Pavlov (ZyX)
2015-12-03 14:46 ` Stephane Chazelas
2015-12-03 23:43 ` Daniel Shahaf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151204215611.17d060fb@ntlworld.com \
    --to=p.w.stephenson@ntlworld.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).