From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 22526 invoked from network); 21 Apr 2008 11:46:14 -0000 X-Spam-Checker-Version: SpamAssassin 3.2.4 (2008-01-01) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-2.4 required=5.0 tests=AWL,BAYES_00 autolearn=ham version=3.2.4 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by ns1.primenet.com.au with SMTP; 21 Apr 2008 11:46:14 -0000 Received-SPF: none (ns1.primenet.com.au: domain at sunsite.dk does not designate permitted sender hosts) Received: (qmail 59808 invoked from network); 21 Apr 2008 11:46:07 -0000 Received: from sunsite.dk (130.225.247.90) by a.mx.sunsite.dk with SMTP; 21 Apr 2008 11:46:07 -0000 Received: (qmail 25208 invoked by alias); 21 Apr 2008 11:46:02 -0000 Mailing-List: contact zsh-workers-help@sunsite.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 24856 Received: (qmail 25181 invoked from network); 21 Apr 2008 11:46:01 -0000 Received: from bifrost.dotsrc.org (130.225.254.106) by sunsite.dk with SMTP; 21 Apr 2008 11:46:01 -0000 Received: from cluster-g.mailcontrol.com (cluster-g.mailcontrol.com [85.115.41.190]) by bifrost.dotsrc.org (Postfix) with ESMTP id D9F11808A389 for ; Mon, 21 Apr 2008 13:45:55 +0200 (CEST) Received: from cameurexb01.EUROPE.ROOT.PRI ([62.189.241.200]) by rly17g.srv.mailcontrol.com (MailControl) with ESMTP id m3LBjqVx011801 for ; Mon, 21 Apr 2008 12:45:52 +0100 Received: from news01.csr.com ([10.103.143.38]) by cameurexb01.EUROPE.ROOT.PRI with Microsoft SMTPSVC(6.0.3790.3959); Mon, 21 Apr 2008 12:45:51 +0100 Received: from news01.csr.com (localhost.localdomain [127.0.0.1]) by news01.csr.com (8.14.2/8.13.4) with ESMTP id m3LBjpPD020466 for ; Mon, 21 Apr 2008 12:45:51 +0100 Received: from csr.com (pws@localhost) by news01.csr.com (8.14.2/8.14.2/Submit) with ESMTP id m3LBjpqO020463 for ; Mon, 21 Apr 2008 12:45:51 +0100 X-Authentication-Warning: news01.csr.com: pws owned process doing -bs To: zsh-workers@sunsite.dk (Zsh hackers list) Subject: PATCH: fix tests for combining and base characters X-Mailer: MH-E 8.0.3; nmh 1.2-20070115cvs; GNU Emacs 22.1.1 Date: Mon, 21 Apr 2008 12:45:51 +0100 Message-ID: <20462.1208778351@csr.com> From: Peter Stephenson X-OriginalArrivalTime: 21 Apr 2008 11:45:51.0650 (UTC) FILETIME=[3FDDF020:01C8A3A5] X-Scanned-By: MailControl A-08-00-04 (www.mailcontrol.com) on 10.71.0.127 X-Virus-Scanned: ClamAV 0.91.2/6861/Mon Apr 21 09:50:29 2008 on bifrost X-Virus-Status: Clean This is a nice, easy patch to fix tests for combining characters along the lines already discussed. If the test for combining characters needs to be made safer when we use an imported wcwidth(), our options are fairly limited; adding a test for !iswcntrl() is about the best I can think of. Index: Src/utils.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/utils.c,v retrieving revision 1.186 diff -u -r1.186 utils.c --- Src/utils.c 20 Apr 2008 21:17:29 -0000 1.186 +++ Src/utils.c 21 Apr 2008 11:44:44 -0000 @@ -3088,7 +3088,7 @@ * logically they are still part of the word, even if they * don't get displayed properly, so always do this. */ - if (iswpunct(c) && wcwidth(c) == 0) + if (IS_COMBINING(c)) return 1; return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len); Index: Src/zsh.h =================================================================== RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v retrieving revision 1.123 diff -u -r1.123 zsh.h --- Src/zsh.h 20 Apr 2008 21:17:29 -0000 1.123 +++ Src/zsh.h 21 Apr 2008 11:44:45 -0000 @@ -2281,7 +2281,34 @@ #define ZWC(c) L ## c #define ZWS(s) L ## s -#else +/* + * Test for a combining character. + * + * wc is assumed to be a wchar_t (i.e. we don't need zwcwidth). + * + * This may need to be more careful if we import a wcwidth() for + * compatibility to try to avoid clashes with the system library. + * + * Pedantic note: in Unicode, a combining character need not be + * zero length. However, we are concerned here about display; + * we simply need to know whether the character will be displayed + * on top of another one. We use "combining character" in this + * sense throughout the shell. I am not aware of a way of + * detecting the Unicode trait in standard libraries. + */ +#define IS_COMBINING(wc) (wcwidth(wc) == 0) +/* + * Test for the base of a combining character. + * + * We assume a combining character can be successfully displayed with + * any non-space printable character, which is what a graphic character + * is, as long as it has non-zero width. We need to avoid all forms of + * space because the shell will split words on any whitespace. + */ +#define IS_BASECHAR(wc) (iswgraph(wc) && wcwidth(wc) > 0) + +#else /* not MULTIBYTE_SUPPORT */ + #define MB_METACHARINIT() typedef int convchar_t; #define MB_METACHARLENCONV(str, cp) metacharlenconv((str), (cp)) @@ -2296,4 +2323,4 @@ #define ZWC(c) c #define ZWS(s) s -#endif +#endif /* MULTIBYTE_SUPPORT */ Index: Src/Zle/zle_move.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_move.c,v retrieving revision 1.15 diff -u -r1.15 zle_move.c --- Src/Zle/zle_move.c 21 Apr 2008 09:06:10 -0000 1.15 +++ Src/Zle/zle_move.c 21 Apr 2008 11:44:45 -0000 @@ -54,22 +54,20 @@ if (!isset(COMBININGCHARS) || loccs == zlell || loccs == 0) return 0; - /* need to be on zero-width punctuation character */ - if (!iswpunct(zleline[loccs]) || wcwidth(zleline[loccs]) != 0) + /* need to be on combining character */ + if (!IS_COMBINING(zleline[loccs])) return 0; /* yes, go left */ loccs--; for (;;) { - /* second test here is paranoia */ - if (iswalnum(zleline[loccs]) && wcwidth(zleline[loccs]) > 0) { + if (IS_BASECHAR(zleline[loccs])) { /* found start position */ if (setpos) *pos = loccs; return 1; - } else if (!iswpunct(zleline[loccs]) || - wcwidth(zleline[loccs]) != 0) { + } else if (!IS_COMBINING(zleline[loccs])) { /* no go */ return 0; } @@ -103,7 +101,7 @@ while (loccs < zlell) { /* Anything other than a combining char will do here */ - if (!iswpunct(zleline[loccs]) || wcwidth(zleline[loccs]) != 0) { + if (!IS_COMBINING(zleline[loccs])) { if (setpos) *pos = loccs; return 1; @@ -221,16 +219,14 @@ *retchr = wc; return ptr; } - /* HERE: test for combining char, fix when test changes */ - if (!iswpunct(wc) || wcwidth(wc) != 0) { + if (!IS_COMBINING(wc)) { /* not a combining character... */ if (last) { /* * ... but we were looking for a suitable base character, * test it. */ - /* HERE this test will change too */ - if (iwsalnum(wc) && wcwidth(wc) > 0) { + if (IS_BASECHAR(wc)) { /* * Yes, this will do. */ Index: Src/Zle/zle_refresh.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v retrieving revision 1.59 diff -u -r1.59 zle_refresh.c --- Src/Zle/zle_refresh.c 20 Apr 2008 21:17:30 -0000 1.59 +++ Src/Zle/zle_refresh.c 21 Apr 2008 11:44:46 -0000 @@ -1245,13 +1245,12 @@ rpms.nvcs = rpms.s - nbuf[rpms.nvln = rpms.ln]; } } - if (isset(COMBININGCHARS) && iswalnum(*t)) { + if (isset(COMBININGCHARS) && IS_BASECHAR(*t)) { /* - * Look for combining characters: trailing punctuation - * characters with printing width zero. + * Look for combining characters. */ for (ichars = 1; tmppos + ichars < tmpll; ichars++) { - if (!iswpunct(t[ichars]) || wcwidth(t[ichars]) != 0) + if (!IS_COMBINING(t[ichars])) break; } } else @@ -2267,9 +2266,8 @@ #ifdef MULTIBYTE_SUPPORT else if (iswprint(tmpline[t0]) && (width = wcwidth(tmpline[t0]) > 0)) { vsiz += width; - if (isset(COMBININGCHARS) && iswalnum(tmpline[t0])) { - while (t0 < tmpll-1 && iswpunct(tmpline[t0+1]) && - wcwidth(tmpline[t0+1]) == 0) + if (isset(COMBININGCHARS) && IS_BASECHAR(tmpline[t0])) { + while (t0 < tmpll-1 && IS_COMBINING(tmpline[t0+1])) t0++; } } @@ -2344,14 +2342,12 @@ } else if (iswprint(tmpline[t0]) && (width = wcwidth(tmpline[t0])) > 0) { int ichars; - if (isset(COMBININGCHARS) && iswalnum(tmpline[t0])) { + if (isset(COMBININGCHARS) && IS_BASECHAR(tmpline[t0])) { /* - * Look for combining characters: trailing printable - * characters with printing width zero. + * Look for combining characters. */ for (ichars = 1; t0 + ichars < tmpll; ichars++) { - if (!iswpunct(tmpline[t0+ichars]) || - wcwidth(tmpline[t0+ichars]) != 0) + if (!IS_COMBINING(tmpline[t0+ichars])) break; } } else -- Peter Stephenson Software Engineer CSR PLC, Churchill House, Cambridge Business Park, Cowley Road Cambridge, CB4 0WZ, UK Tel: +44 (0)1223 692070