zsh-workers
 help / color / mirror / code / Atom feed
* [PATCH] fix failure of D07multibyte on Mac OS X
@ 2015-03-02 15:47 Jun T.
  2015-03-04 16:57 ` Jun T.
  2015-05-08 15:42 ` Oliver Kiddle
  0 siblings, 2 replies; 4+ messages in thread
From: Jun T. @ 2015-03-02 15:47 UTC (permalink / raw)
  To: zsh-workers

D07multibyte fails on Mac OS X since the following commit:

commit b237ba0a8eaa5001283ac8448872021723b90aff
Author: Peter Stephenson <pws@zsh.org>
Date:   Fri Feb 20 16:25:47 2015 +0000

    34587: ensure multibyte characters don't overflow.

The failure is as follows:

*** 1 ****
! ./test_bad_param:1: command not found: $\M-i#
--- 1 ----
! ./test_bad_param:1: command not found: $?#

The character '?' above is 0xe9.

The problem is not in the commit but in a strange behavior of
isprint() on Mac OS X; under UTF-8 locale, it returns true for all the
characters in the range from 0xa0 to 0xff. Thus 0xe9 is printed as is.
(it seems isprint(c) is behaving like iswprint(c))

The problem has been there for a long time; any character which should
be printed as \M-x has been printed as a raw byte (as if PRINT_EIGHT_BIT
is on).

I feel this is a bug of Apple's isprint(), but they may have a
different opinion. Anyway, I thinks the only possible workaround is to
replace the broken isprint() by an alternative.

In the following patch most of the stuff are borrowed from the handling
of BROKEN_WCWIDTH. In order to minimize the possibility of breaking
anything on platforms other than Mac, isprint() is replaced by an
alternative only if isprint() is broken *and* building on Mac OS X,
assuming the problem exists only on Mac.

Any comments?

Jun


diff --git a/Src/compat.c b/Src/compat.c
index b0bcb62..21e2a5e 100644
--- a/Src/compat.c
+++ b/Src/compat.c
@@ -951,3 +951,19 @@ int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n)
 /**/
 #endif /* BROKEN_WCWIDTH && (__STDC_ISO_10646__ || __APPLE__) */
 
+/**/
+#if defined(__APPLE__) && defined(BROKEN_ISPRINT)
+
+/**/
+int
+isprint_ascii(int c)
+{
+    char *locale = setlocale(LC_CTYPE, NULL);
+    if (strcasestr(locale, "utf-8") || strcasestr(locale, "utf8"))
+	return (c >= 0x20 && c <= 0x7e);
+    else
+	return isprint(c);
+}
+
+/**/
+#endif /* __APPLE__ && BROKEN_ISPRINT */
diff --git a/Src/pattern.c b/Src/pattern.c
index df5e602..17cd40c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -3622,7 +3622,7 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
 		    return 1;
 		break;
 	    case PP_PRINT:
-		if (isprint(ch))
+		if (ISPRINT(ch))
 		    return 1;
 		break;
 	    case PP_PUNCT:
diff --git a/Src/utils.c b/Src/utils.c
index 1bcceb0..3d12807 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -414,7 +414,7 @@ nicechar(int c)
     static char buf[6];
     char *s = buf;
     c &= 0xff;
-    if (isprint(c))
+    if (ISPRINT(c))
 	goto done;
     if (c & 0x80) {
 	if (isset(PRINTEIGHTBIT))
@@ -423,7 +423,7 @@ nicechar(int c)
 	*s++ = 'M';
 	*s++ = '-';
 	c &= 0x7f;
-	if(isprint(c))
+	if(ISPRINT(c))
 	    goto done;
     }
     if (c == 0x7f) {
diff --git a/Src/ztype.h b/Src/ztype.h
index eef0f23..d1bef0a 100644
--- a/Src/ztype.h
+++ b/Src/ztype.h
@@ -75,3 +75,9 @@
 #define WC_ZISTYPE(X,Y)	zistype((X),(Y))
 #define WC_ISPRINT(X)	isprint(X)
 #endif
+
+#if defined(__APPLE__) && defined(BROKEN_ISPRINT)
+#define ISPRINT(c)  isprint_ascii(c)
+#else
+#define ISPRINT(c)  isprint(c)
+#endif
diff --git a/configure.ac b/configure.ac
index bfc02b2..7e770cd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2567,6 +2567,8 @@ AH_TEMPLATE([MULTIBYTE_SUPPORT],
 [Define to 1 if you want support for multibyte character sets.])
 AH_TEMPLATE([BROKEN_WCWIDTH],
 [Define to 1 if the wcwidth() function is present but broken.])
+AH_TEMPLATE([BROKEN_ISPRINT],
+[Define to 1 if the isprint() function is broken under UTF-8 locale.])
 if test x$zsh_cv_c_unicode_support = xyes; then
   AC_DEFINE(MULTIBYTE_SUPPORT)
 
@@ -2622,6 +2624,38 @@ if test x$zsh_cv_c_unicode_support = xyes; then
   if test x$zsh_cv_c_broken_wcwidth = xyes; then
     AC_DEFINE(BROKEN_WCWIDTH)
   fi
+
+  dnl Check if isprint() behaves correctly under UTF-8 locale.
+  dnl On some platform (maybe only on Mac OS X), isprint() returns
+  dnl true for all characters in the range from 0xa0 to 0xff if
+  dnl called under UTF-8 locale.
+  [locale_prog='char *my_locales[] = {
+  "en_US.UTF-8", "en_GB.UTF-8", "en.UTF-8", '
+  locale_prog="$locale_prog"`locale -a 2>/dev/null | \
+    sed -e 's/utf8/UTF-8/' | grep UTF-8 | \
+    while read line; do echo " \"$line\","; done;`
+  locale_prog="$locale_prog 0 };
+  #include <locale.h>
+  #include <ctype.h>
+
+  int main() {
+    char **localep;
+    for (localep = my_locales; *localep; localep++)
+      if (setlocale(LC_ALL, *localep) && isprint(0xa0))
+	return 0;
+    return 1;
+  }
+  "]
+
+  AC_CACHE_CHECK(if the isprint() function is broken,
+  zsh_cv_c_broken_isprint,
+  [AC_TRY_RUN([$locale_prog],
+  zsh_cv_c_broken_isprint=yes,
+  zsh_cv_c_broken_isprint=no,
+  zsh_cv_c_broken_isprint=no)])
+  if test x$zsh_cv_c_broken_isprint = xyes; then
+    AC_DEFINE(BROKEN_ISPRINT)
+  fi
 fi
 
 dnl




^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] fix failure of D07multibyte on Mac OS X
  2015-03-02 15:47 [PATCH] fix failure of D07multibyte on Mac OS X Jun T.
@ 2015-03-04 16:57 ` Jun T.
  2015-05-08 15:42 ` Oliver Kiddle
  1 sibling, 0 replies; 4+ messages in thread
From: Jun T. @ 2015-03-04 16:57 UTC (permalink / raw)
  To: zsh-workers

I've pushed the patch with the following modification.


diff --git a/Src/compat.c b/Src/compat.c
index 21e2a5e..09b3d6a 100644
--- a/Src/compat.c
+++ b/Src/compat.c
@@ -958,8 +958,7 @@ int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n)
 int
 isprint_ascii(int c)
 {
-    char *locale = setlocale(LC_CTYPE, NULL);
-    if (strcasestr(locale, "utf-8") || strcasestr(locale, "utf8"))
+    if (!strcmp(nl_langinfo(CODESET), "UTF-8"))
 	return (c >= 0x20 && c <= 0x7e);
     else
 	return isprint(c);



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] fix failure of D07multibyte on Mac OS X
  2015-03-02 15:47 [PATCH] fix failure of D07multibyte on Mac OS X Jun T.
  2015-03-04 16:57 ` Jun T.
@ 2015-05-08 15:42 ` Oliver Kiddle
  2015-05-09  5:52   ` Jun T.
  1 sibling, 1 reply; 4+ messages in thread
From: Oliver Kiddle @ 2015-05-08 15:42 UTC (permalink / raw)
  To: zsh-workers

> +#if defined(__APPLE__) && defined(BROKEN_ISPRINT)
> +#define ISPRINT(c)  isprint_ascii(c)
> +#else
> +#define ISPRINT(c)  isprint(c)
> +#endif

It seems that an effect of this patch is that a build on Solaris now
results in lots of warnings of this form:
"../../Src/ztype.h", line 82: warning: macro redefined: ISPRINT

Doing a grep for ISPRINT in /usr/include gives these two definitions,
though I've not traced which if either is being picked up.
/usr/include/netinet/ip_compat.h:#define        ISPRINT(x) isprint((u_char)(x))
/usr/include/sys/euc.h:#define  ISPRINT(c, wp)  (wp._multibyte && !ISASCII(c) || isprint(c))

It's only a warning but it may be nicer to rename the macro to better
avoid the conflict.

Oliver


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] fix failure of D07multibyte on Mac OS X
  2015-05-08 15:42 ` Oliver Kiddle
@ 2015-05-09  5:52   ` Jun T.
  0 siblings, 0 replies; 4+ messages in thread
From: Jun T. @ 2015-05-09  5:52 UTC (permalink / raw)
  To: zsh-workers


2015/05/09 00:42, Oliver Kiddle <okiddle@yahoo.co.uk> wrote:

> "../../Src/ztype.h", line 82: warning: macro redefined: ISPRINT

Renaming ISPRINT --> ZISPRINT.

Jun


diff --git a/Src/pattern.c b/Src/pattern.c
index 17cd40c..05dcb29 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -3622,7 +3622,7 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
 		    return 1;
 		break;
 	    case PP_PRINT:
-		if (ISPRINT(ch))
+		if (ZISPRINT(ch))
 		    return 1;
 		break;
 	    case PP_PUNCT:
diff --git a/Src/utils.c b/Src/utils.c
index 3d12807..13d4b83 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -414,7 +414,7 @@ nicechar(int c)
     static char buf[6];
     char *s = buf;
     c &= 0xff;
-    if (ISPRINT(c))
+    if (ZISPRINT(c))
 	goto done;
     if (c & 0x80) {
 	if (isset(PRINTEIGHTBIT))
@@ -423,7 +423,7 @@ nicechar(int c)
 	*s++ = 'M';
 	*s++ = '-';
 	c &= 0x7f;
-	if(ISPRINT(c))
+	if(ZISPRINT(c))
 	    goto done;
     }
     if (c == 0x7f) {
diff --git a/Src/ztype.h b/Src/ztype.h
index d1bef0a..b73e3f8 100644
--- a/Src/ztype.h
+++ b/Src/ztype.h
@@ -77,7 +77,7 @@
 #endif
 
 #if defined(__APPLE__) && defined(BROKEN_ISPRINT)
-#define ISPRINT(c)  isprint_ascii(c)
+#define ZISPRINT(c)  isprint_ascii(c)
 #else
-#define ISPRINT(c)  isprint(c)
+#define ZISPRINT(c)  isprint(c)
 #endif


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-05-09  6:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-02 15:47 [PATCH] fix failure of D07multibyte on Mac OS X Jun T.
2015-03-04 16:57 ` Jun T.
2015-05-08 15:42 ` Oliver Kiddle
2015-05-09  5:52   ` Jun T.

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).