zsh-workers
 help / color / mirror / code / Atom feed
* configure tests for iconv
@ 2005-02-24 16:39 Oliver Kiddle
  2005-03-01 12:46 ` Peter Stephenson
  0 siblings, 1 reply; 9+ messages in thread
From: Oliver Kiddle @ 2005-02-24 16:39 UTC (permalink / raw)
  To: Zsh workers

The following is an attempt to get the configure tests for iconv to work
properly. I've not actually been able to test this on any of the
critical systems such as Cygwin or Mac OS X (or any system that needs
-liconv for that matter) so I don't know for sure that it works.

I've also tried to clear up the compiler warnings. This includes a
section of configure to find out if the iconv() prototype uses const.
Bits of this was copied from various other configure scripts.

Oliver

Index: configure.ac
===================================================================
RCS file: /cvsroot/zsh/zsh/configure.ac,v
retrieving revision 1.28
diff -u -r1.28 configure.ac
--- configure.ac	14 Feb 2005 13:56:21 -0000	1.28
+++ configure.ac	24 Feb 2005 16:25:26 -0000
@@ -728,13 +728,45 @@
 
 AC_CHECK_LIB(socket, socket)
 
-AC_CHECK_LIB(iconv, iconv)
+dnl ---------------
+dnl CHECK FOR ICONV
+dnl ---------------
 
-case "$host_os" in
-  cygwin | darwin*)
-    dnl cygwin iconv() is really libiconv()
-    AC_CHECK_LIB(iconv, libiconv) ;;
-esac
+dnl Find iconv. It may be in libiconv and may be iconv() or libiconv()
+if test "x$ac_cv_header_iconv_h" = "xyes"; then
+  AC_CHECK_FUNC(iconv, ac_found_iconv=yes, ac_found_iconv=no)
+  if test "x$ac_found_iconv" = "xno"; then
+    AC_CHECK_LIB(iconv, iconv, ac_found_iconv=yes)
+    if test "x$ac_found_iconv" = "xno"; then
+      AC_CHECK_LIB(iconv, libiconv, ac_found_iconv=yes)
+    fi
+    if test "x$ac_found_iconv" != "xno"; then
+      LIBS="-liconv $LIBS"
+    fi
+  fi
+fi
+if test "x$ac_found_iconv" = xyes; then
+  AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.])
+fi
+
+dnl Check if iconv uses const in prototype declaration
+if test "x$ac_found_iconv" = "xyes"; then
+  AC_CACHE_CHECK(for iconv declaration, ac_cv_iconv_const,
+    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <stdlib.h>
+        #include <iconv.h>]],
+        [[#ifdef __cplusplus
+          "C"
+          #endif
+          #if defined(__STDC__) || defined(__cplusplus)
+          size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);
+          #else
+          size_t iconv();
+          #endif]])],
+      [ac_cv_iconv_const=],
+      [ac_cv_iconv_const=const])])
+  AC_DEFINE_UNQUOTED([ICONV_CONST], $ac_cv_iconv_const,
+    [Define as const if the declaration of iconv() needs const.])
+fi
 
 if test x$enable_pcre = xyes; then
 dnl pcre-config should probably be employed here
Index: Src/system.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/system.h,v
retrieving revision 1.29
diff -u -r1.29 system.h
--- Src/system.h	22 Feb 2005 13:12:55 -0000	1.29
+++ Src/system.h	24 Feb 2005 16:25:26 -0000
@@ -701,7 +701,7 @@
 #else
 # ifdef HAVE_LANGINFO_H
 #   include <langinfo.h>
-#   if defined(HAVE_ICONV_H) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+#   ifdef HAVE_ICONV
 #     include <iconv.h>
 #   endif
 # endif
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.73
diff -u -r1.73 utils.c
--- Src/utils.c	22 Feb 2005 18:24:45 -0000	1.73
+++ Src/utils.c	24 Feb 2005 16:25:27 -0000
@@ -3456,6 +3456,7 @@
 # if defined(HAVE_NL_LANGINFO) && defined(CODESET) && !defined(__STDC_ISO_10646__)
 /* Convert a character from UCS4 encoding to UTF-8 */
 
+/**/
 size_t
 ucs4toutf8(char *dest, unsigned int wval)
 {
@@ -3480,7 +3481,7 @@
     case 4: dest[3] = (wval & 0x3f) | 0x80; wval >>= 6;
     case 3: dest[2] = (wval & 0x3f) | 0x80; wval >>= 6;
     case 2: dest[1] = (wval & 0x3f) | 0x80; wval >>= 6;
-	*dest = wval | (0xfc << (6 - len)) & 0xfc;
+	*dest = wval | ((0xfc << (6 - len)) & 0xfc);
 	break;
     case 1: *dest = wval;
     }
@@ -3522,11 +3523,10 @@
     size_t count;
 #else
     unsigned int wval;
-# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && (defined(HAVE_ICONV_H) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV))
+# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && defined(HAVE_ICONV)
     iconv_t cd;
     char inbuf[4];
     size_t inbytes, outbytes;
-    char *inptr;
     size_t count;
 # endif
 #endif
@@ -3643,10 +3643,10 @@
 		    t += ucs4toutf8(t, wval);
 		    continue;
 		} else {
-#   if defined(HAVE_ICONV_H) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
+#   ifdef HAVE_ICONV
+		    ICONV_CONST char *inptr = inbuf;
     	    	    inbytes = 4;
 		    outbytes = 6;
-    	    	    inptr = inbuf;
 		    /* assume big endian convention for UCS-4 */
 		    for (i=3;i>=0;i--) {
 			inbuf[i] = wval & 0xff;
@@ -3664,7 +3664,7 @@
 			*len = t - buf;
 			return buf;
 		    }
-                    count = iconv(cd, (char **)&inptr, &inbytes, &t, &outbytes);
+                    count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
 		    iconv_close(cd);
 		    if (count == (size_t)-1) {
                         zerr("cannot do charset conversion", NULL, 0);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-02-24 16:39 configure tests for iconv Oliver Kiddle
@ 2005-03-01 12:46 ` Peter Stephenson
  2005-03-01 18:02   ` Oliver Kiddle
  0 siblings, 1 reply; 9+ messages in thread
From: Peter Stephenson @ 2005-03-01 12:46 UTC (permalink / raw)
  To: Zsh workers

Oliver Kiddle wrote:
> The following is an attempt to get the configure tests for iconv to work
> properly. I've not actually been able to test this on any of the
> critical systems such as Cygwin or Mac OS X (or any system that needs
> -liconv for that matter) so I don't know for sure that it works.

I finally got around to trying this under a couple of versions of
Cygwin.  It compiles and links with libiconv fine.  It announces it
can't do charset conversion, but as far as I can find out it always did;
probably something outside the C code needs setting up.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


**********************************************************************
This email and any files transmitted with it are confidential and
intended solely for the use of the individual or entity to whom they
are addressed. If you have received this email in error please notify
the system manager.

**********************************************************************


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-01 12:46 ` Peter Stephenson
@ 2005-03-01 18:02   ` Oliver Kiddle
  2005-03-02 16:08     ` Peter Stephenson
  0 siblings, 1 reply; 9+ messages in thread
From: Oliver Kiddle @ 2005-03-01 18:02 UTC (permalink / raw)
  To: Zsh workers

Peter wrote:
> I finally got around to trying this under a couple of versions of
> Cygwin.  It compiles and links with libiconv fine.  It announces it

That probably means the configure changes are right.

> can't do charset conversion, but as far as I can find out it always did;
> probably something outside the C code needs setting up.

Have you tried with a simple character in the basic ASCII range such as
\\u0061. If that works then it may be related to however locales are
configured in Cygwin.

There's a few places in the C code where it produces that error message
so it'd be useful to establish which one applies. Could you perhaps edit
the four instances of the error message to identify which is printed.
Either that or check what the various defines such as HAVE_NL_LANGINFO
and HAVE_ICONV are set to? If HAVE_ICONV is undefined, my AC_DEFINE may
be clashing with the AC_CHECK_FUNC. That could be solved by renaming
HAVE_ICONV in the AC_DEFINE.

Does /usr/bin/printf's \u work?
Also worth checking is
  zmodload zsh/langinfo
  print $langinfo[CODESET]

Thanks

Oliver


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-01 18:02   ` Oliver Kiddle
@ 2005-03-02 16:08     ` Peter Stephenson
  2005-03-02 17:32       ` Andrey Borzenkov
  2005-03-03 11:10       ` Oliver Kiddle
  0 siblings, 2 replies; 9+ messages in thread
From: Peter Stephenson @ 2005-03-02 16:08 UTC (permalink / raw)
  To: Zsh workers

Oliver Kiddle wrote:
> > can't do charset conversion, but as far as I can find out it always did;
> > probably something outside the C code needs setting up.
> 
> Have you tried with a simple character in the basic ASCII range such as
> \\u0061. If that works then it may be related to however locales are
> configured in Cygwin.

No, that doesn't work either.  The error is from returning -1 from
    	    	    cd = iconv_open(nl_langinfo(CODESET), "ISO-10646");

> Does /usr/bin/printf's \u work?

This fails too, but with the slightly odd error "invalid universal
character name".  It's not a problem with the input format, however,
since that complains about missing hexadecimal digits if you get it wrong.

> Also worth checking is
>   zmodload zsh/langinfo
>   print $langinfo[CODESET]

This gives US-ASCII, which might be part of the problem, though I really
haven't the faintest idea.  A quick scan of the regional and language
settings didn't suggest anything.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


**********************************************************************
This email and any files transmitted with it are confidential and
intended solely for the use of the individual or entity to whom they
are addressed. If you have received this email in error please notify
the system manager.

**********************************************************************


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-02 16:08     ` Peter Stephenson
@ 2005-03-02 17:32       ` Andrey Borzenkov
  2005-03-02 18:59         ` Peter A. Castro
  2005-03-03 11:10       ` Oliver Kiddle
  1 sibling, 1 reply; 9+ messages in thread
From: Andrey Borzenkov @ 2005-03-02 17:32 UTC (permalink / raw)
  To: zsh-workers

On Wednesday 02 March 2005 19:08, Peter Stephenson wrote:
> > Also worth checking is
> >   zmodload zsh/langinfo
> >   print $langinfo[CODESET]
>
> This gives US-ASCII, which might be part of the problem, though I really
> haven't the faintest idea.  A quick scan of the regional and language
> settings didn't suggest anything.

I do not think that cygwin really has locale support besides what internal 
Win32 API already provides. I wonder if it is using 8 or 16 bit interfaces 
(most Win32 functions have two versions).


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-02 17:32       ` Andrey Borzenkov
@ 2005-03-02 18:59         ` Peter A. Castro
  0 siblings, 0 replies; 9+ messages in thread
From: Peter A. Castro @ 2005-03-02 18:59 UTC (permalink / raw)
  To: Andrey Borzenkov; +Cc: zsh-workers

On Wed, 2 Mar 2005, Andrey Borzenkov wrote:

> On Wednesday 02 March 2005 19:08, Peter Stephenson wrote:
> > > Also worth checking is
> > >   zmodload zsh/langinfo
> > >   print $langinfo[CODESET]
> >
> > This gives US-ASCII, which might be part of the problem, though I really
> > haven't the faintest idea.  A quick scan of the regional and language
> > settings didn't suggest anything.
>
> I do not think that cygwin really has locale support besides what internal
> Win32 API already provides. I wonder if it is using 8 or 16 bit interfaces
> (most Win32 functions have two versions).

Cygwin does support locales via the setlocale() function.  Not sure
weither it's using the 8 or 16 bit interfaces (it's part of libc, I'll
have to check).  There is a problem, currently, where locales aren't
begin set and instead are reporting "C".  I'll see if I can find anything
further.

-- 
Peter A. Castro <doctor@fruitbat.org> or <Peter.Castro@oracle.com>
	"Cats are just autistic Dogs" -- Dr. Tony Attwood


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-02 16:08     ` Peter Stephenson
  2005-03-02 17:32       ` Andrey Borzenkov
@ 2005-03-03 11:10       ` Oliver Kiddle
  2005-03-03 11:26         ` Peter Stephenson
  1 sibling, 1 reply; 9+ messages in thread
From: Oliver Kiddle @ 2005-03-03 11:10 UTC (permalink / raw)
  To: Zsh workers

Peter wrote:
> 
> No, that doesn't work either.  The error is from returning -1 from
>     	    	    cd = iconv_open(nl_langinfo(CODESET), "ISO-10646");

I tried downloading GNU libiconv and, sure enough, it doesn't like
"ISO-10646". I had imagined libiconv was the same code as glibc uses but
perhaps not. At least with this being the problem, I'm now fairly
confident that the configure tests are working.

Trying a few different systems, it seems UCS-4BE is a much more portable
choice of name to identify the character encoding by. Given that the
endianness is explicit, that might be a better choice anyway. So with
the following patch it should now work. If this breaks for any system we
can always try multiple names for the encoding.

Incidentally, the patch also helps on Solaris 8. The Solaris machines I
have access to didn't previously have any of the UTF-8 iconv packages
installed so I had assumed it simply couldn't do the necessary
conversions. Below is also a patch against _iconv to pick up these
character encodings on Solaris.

> > Does /usr/bin/printf's \u work?
> 
> This fails too, but with the slightly odd error "invalid universal
> character name".  It's not a problem with the input format, however,

I think its telling you that it refuses to convert characters in that
particular range and not that it especially *can't* convert them. It
won't handle the basic ASCII characters on any system. I think it also
prints that message for certain reserved or unallocated ranges. I really
can't see the point of that but it's a GNU coreutils issue.

> This gives US-ASCII, which might be part of the problem, though I really
> haven't the faintest idea.  A quick scan of the regional and language
> settings didn't suggest anything.

Well with the patch below, it should hopefully now cope with stuff like
\\u0061 which is as much as we can hope for in a US-ASCII locale. The
rest is obviously a Cygwin issue. Perhaps we should add an UNKNOWN_CHAR
variable or similar system to allow something else to be substituted
instead of an error message.

Oliver

Index: Completion/Unix/Command/_iconv
===================================================================
RCS file: /cvsroot/zsh/zsh/Completion/Unix/Command/_iconv,v
retrieving revision 1.4
diff -u -r1.4 _iconv
--- Completion/Unix/Command/_iconv	17 Jun 2004 13:12:26 -0000	1.4
+++ Completion/Unix/Command/_iconv	3 Mar 2005 10:29:49 -0000
@@ -1,7 +1,8 @@
 #compdef iconv
 
-local expl curcontext="$curcontext" state line codeset ret=1
+local expl curcontext="$curcontext" state line ret=1
 local LOCPATH="${LOCPATH:-/usr/lib/nls/loc}"
+local -U codeset
 
 if _pick_variant gnu=GNU unix --version; then
 
@@ -40,6 +41,7 @@
   if [[ $state = codeset ]]; then
     if [[ -f /usr/lib/iconv/iconv_data ]]; then  # IRIX & Solaris
       codeset=( ${${(f)"$(</usr/lib/iconv/iconv_data)"}%%[[:blank:]]*} )
+      codeset+=( /usr/lib/iconv/*%*.so(Ne.'reply=( ${${REPLY:t}%%%*} ${${REPLY:r}#*%} )'.) )
     elif [[ -d $LOCPATH/iconv ]]; then  # OSF
       codeset=( $LOCPATH/iconv/*(N:t) )
       codeset=( ${(j:_:s:_:)codeset} )
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.75
diff -u -r1.75 utils.c
--- Src/utils.c	25 Feb 2005 10:21:01 -0000	1.75
+++ Src/utils.c	3 Mar 2005 10:29:50 -0000
@@ -3617,13 +3617,13 @@
 		    ICONV_CONST char *inptr = inbuf;
     	    	    inbytes = 4;
 		    outbytes = 6;
-		    /* assume big endian convention for UCS-4 */
+		    /* store value in big endian form */
 		    for (i=3;i>=0;i--) {
 			inbuf[i] = wval & 0xff;
 			wval >>= 8;
 		    }
 
-    	    	    cd = iconv_open(nl_langinfo(CODESET), "ISO-10646");
+    	    	    cd = iconv_open(nl_langinfo(CODESET), "UCS-4BE");
 		    if (cd == (iconv_t)-1) {
 			zerr("cannot do charset conversion", NULL, 0);
 			if (fromwhere == 4) {


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-03 11:10       ` Oliver Kiddle
@ 2005-03-03 11:26         ` Peter Stephenson
  2005-03-03 13:51           ` Oliver Kiddle
  0 siblings, 1 reply; 9+ messages in thread
From: Peter Stephenson @ 2005-03-03 11:26 UTC (permalink / raw)
  To: Zsh workers

Oliver Kiddle wrote:
> Trying a few different systems, it seems UCS-4BE is a much more portable
> choice of name to identify the character encoding by. Given that the
> endianness is explicit, that might be a better choice anyway. So with
> the following patch it should now work. If this breaks for any system we
> can always try multiple names for the encoding.

OK, Cygwin now converts characters up to and including 0x7f.  After that
it reports an error.  I can certainly believe this is down to Windows
thinking the locale doesn't support 8-bit characters.  (Simple tweaking
of LANG didn't seem to improve matters.)

Would it be worth changing the error message for that error (the second
one in the iconv branch) to suggest the error was with the particular
character, rather than with character sets in general?

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


**********************************************************************
This email and any files transmitted with it are confidential and
intended solely for the use of the individual or entity to whom they
are addressed. If you have received this email in error please notify
the system manager.

**********************************************************************


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: configure tests for iconv
  2005-03-03 11:26         ` Peter Stephenson
@ 2005-03-03 13:51           ` Oliver Kiddle
  0 siblings, 0 replies; 9+ messages in thread
From: Oliver Kiddle @ 2005-03-03 13:51 UTC (permalink / raw)
  To: Zsh workers

Peter wrote:
> OK, Cygwin now converts characters up to and including 0x7f.  After that

Good. Thanks for your help with this.

> Would it be worth changing the error message for that error (the second
> one in the iconv branch) to suggest the error was with the particular
> character, rather than with character sets in general?

When using wctomb() instead of iconv() it actually prints "character not
in range" for that situation so it certainly makes sense that we should
be consistent with that.

Oliver

--- utils.c.bak 2005-03-03 14:45:33.772178204 +0100
+++ utils.c     2005-03-03 14:45:36.562694413 +0100
@@ -3637,7 +3637,7 @@ getkeystring(char *s, int *len, int from
                     count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
                    iconv_close(cd);
                    if (count == (size_t)-1) {
-                        zerr("cannot do charset conversion", NULL, 0);
+                        zerr("character not in range", NULL, 0);
                        *t = '\0';
                        *len = t - buf;
                        return buf;


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2005-03-03 13:51 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-02-24 16:39 configure tests for iconv Oliver Kiddle
2005-03-01 12:46 ` Peter Stephenson
2005-03-01 18:02   ` Oliver Kiddle
2005-03-02 16:08     ` Peter Stephenson
2005-03-02 17:32       ` Andrey Borzenkov
2005-03-02 18:59         ` Peter A. Castro
2005-03-03 11:10       ` Oliver Kiddle
2005-03-03 11:26         ` Peter Stephenson
2005-03-03 13:51           ` Oliver Kiddle

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).