From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 24927 invoked from network); 17 Apr 2008 12:08:43 -0000 X-Spam-Checker-Version: SpamAssassin 3.2.4 (2008-01-01) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00 autolearn=ham version=3.2.4 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by ns1.primenet.com.au with SMTP; 17 Apr 2008 12:08:43 -0000 Received-SPF: none (ns1.primenet.com.au: domain at sunsite.dk does not designate permitted sender hosts) Received: (qmail 55795 invoked from network); 17 Apr 2008 12:08:37 -0000 Received: from sunsite.dk (130.225.247.90) by a.mx.sunsite.dk with SMTP; 17 Apr 2008 12:08:37 -0000 Received: (qmail 14458 invoked by alias); 17 Apr 2008 12:08:34 -0000 Mailing-List: contact zsh-workers-help@sunsite.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 24825 Received: (qmail 14441 invoked from network); 17 Apr 2008 12:08:34 -0000 Received: from bifrost.dotsrc.org (130.225.254.106) by sunsite.dk with SMTP; 17 Apr 2008 12:08:34 -0000 Received: from cluster-d.mailcontrol.com (cluster-d.mailcontrol.com [217.69.20.190]) by bifrost.dotsrc.org (Postfix) with ESMTP id 00D028043AC7 for ; Thu, 17 Apr 2008 14:08:24 +0200 (CEST) Received: from cameurexb01.EUROPE.ROOT.PRI ([62.189.241.200]) by rly41d.srv.mailcontrol.com (MailControl) with ESMTP id m3HC89Wf005196 for ; Thu, 17 Apr 2008 13:08:27 +0100 Received: from news01.csr.com ([10.103.143.38]) by cameurexb01.EUROPE.ROOT.PRI with Microsoft SMTPSVC(6.0.3790.3959); Thu, 17 Apr 2008 13:08:24 +0100 Received: from news01.csr.com (localhost.localdomain [127.0.0.1]) by news01.csr.com (8.14.2/8.13.4) with ESMTP id m3HC8ONG002638 for ; Thu, 17 Apr 2008 13:08:24 +0100 Received: from csr.com (pws@localhost) by news01.csr.com (8.14.2/8.14.2/Submit) with ESMTP id m3HC8OXf002635 for ; Thu, 17 Apr 2008 13:08:24 +0100 X-Authentication-Warning: news01.csr.com: pws owned process doing -bs To: zsh-workers@sunsite.dk (Zsh hackers list) Subject: PATCH: fix the word boundary stuff X-Mailer: MH-E 8.0.3; nmh 1.2-20070115cvs; GNU Emacs 22.1.1 Date: Thu, 17 Apr 2008 13:08:24 +0100 Message-ID: <2634.1208434104@csr.com> From: Peter Stephenson X-OriginalArrivalTime: 17 Apr 2008 12:08:24.0128 (UTC) FILETIME=[BC5AB400:01C8A083] X-Scanned-By: MailControl A-08-00-04 (www.mailcontrol.com) on 10.68.0.151 X-Virus-Scanned: ClamAV 0.91.2/6810/Thu Apr 17 13:25:25 2008 on bifrost X-Virus-Status: Clean As I flagged up earlier, I think on reflection that zero-width punctuation characters should be treated as part of a word even if COMBINING_CHARS is off, since logically it doesn't really matter how they're displayed. This doesn't fix the ...-match function widgets. Index: Src/utils.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/utils.c,v retrieving revision 1.184 diff -u -r1.184 utils.c --- Src/utils.c 13 Apr 2008 16:58:42 -0000 1.184 +++ Src/utils.c 17 Apr 2008 12:06:34 -0000 @@ -3082,12 +3082,13 @@ if (iswalnum(c)) return 1; /* - * If we are handling combining characters, anything - * printable with zero width needs to be considered - * part of a word. + * If we are handling combining characters, any punctuation + * characters with zero width needs to be considered part of + * a word. If we are not handling combining characters then + * logically they are still part of the word, even if they + * don't get displayed properly, so always do this. */ - if (isset(COMBININGCHARS) && - iswprint(c) && wcwidth(c) == 0) + if (iswpunct(c) && wcwidth(c) == 0) return 1; return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len); Index: Src/Zle/zle_word.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_word.c,v retrieving revision 1.10 diff -u -r1.10 zle_word.c --- Src/Zle/zle_word.c 13 Apr 2008 16:58:44 -0000 1.10 +++ Src/Zle/zle_word.c 17 Apr 2008 12:06:35 -0000 @@ -31,9 +31,13 @@ #include "zle_word.pro" /* - * HERE: our handling of combining characters may be wrong. We - * should make sure we only consider a combining character part of - * a word if the base character is. + * In principle we shouldn't consider a zero-length punctuation + * character (i.e. a modifier of some sort) part of the word unless + * the base character has. However, we only consider them part of + * a word if we so consider all alphanumerics, so the distinction + * only applies if the characters are modifying something they probably + * ought not to be modifying. It's not really clear we need to + * be clever about this not very useful case. */ /**/ @@ -147,11 +151,20 @@ if (n < 0) return 1; while (n--) { - /* HERE: the zlecs + 1 here is suspect */ - while (zlecs != zlell && ZC_iblank(zleline[zlecs + 1])) - INCCS(); - while (zlecs != zlell && !ZC_iblank(zleline[zlecs + 1])) - INCCS(); + while (zlecs != zlell) { + int pos = zlecs; + INCPOS(pos); + if (!ZC_iblank(zleline[pos])) + break; + zlecs = pos; + } + while (zlecs != zlell) { + int pos = zlecs; + INCPOS(pos); + if (ZC_iblank(zleline[pos])) + break; + zlecs = pos; + } } if (zlecs != zlell && virangeflag) INCCS(); @@ -173,15 +186,37 @@ } while (n--) { /* HERE: the zlecs + 1 here is suspect */ - if (ZC_iblank(zleline[zlecs + 1])) - while (zlecs != zlell && ZC_iblank(zleline[zlecs + 1])) - INCCS(); - if (Z_vialnum(zleline[zlecs + 1])) - while (zlecs != zlell && Z_vialnum(zleline[zlecs + 1])) - INCCS(); - else - while (zlecs != zlell && !Z_vialnum(zleline[zlecs + 1]) && !ZC_iblank(zleline[zlecs + 1])) - INCCS(); + int pos; + while (zlecs != zlell) { + pos = zlecs; + INCPOS(pos); + if (!ZC_inblank(zleline[pos])) + break; + zlecs = pos; + } + if (zlecs != zlell) { + pos = zlecs; + INCPOS(pos); + if (Z_vialnum(zleline[pos])) { + for (;;) { + zlecs = pos; + if (zlecs == zlell) + break; + INCPOS(pos); + if (!Z_vialnum(zleline[pos])) + break; + } + } else { + for (;;) { + zlecs = pos; + if (zlecs == zlell) + break; + INCPOS(pos); + if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos])) + break; + } + } + } } if (zlecs != zlell && virangeflag) INCCS(); @@ -202,11 +237,20 @@ return ret; } while (n--) { - /* HERE: the zlecs - 1 here is suspect */ - while (zlecs && !ZC_iword(zleline[zlecs - 1])) - DECCS(); - while (zlecs && ZC_iword(zleline[zlecs - 1])) - DECCS(); + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (ZC_iword(zleline[pos])) + break; + zlecs = pos; + } + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (!ZC_iword(zleline[pos])) + break; + zlecs = pos; + } } return 0; } @@ -225,15 +269,36 @@ return ret; } while (n--) { - /* HERE: the zlecs - 1 here is suspect */ - while (zlecs && ZC_iblank(zleline[zlecs - 1])) - DECCS(); - if (Z_vialnum(zleline[zlecs - 1])) - while (zlecs && Z_vialnum(zleline[zlecs - 1])) - DECCS(); - else - while (zlecs && !Z_vialnum(zleline[zlecs - 1]) && !ZC_iblank(zleline[zlecs - 1])) - DECCS(); + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (!ZC_iblank(zleline[pos])) + break; + zlecs = pos; + } + if (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (Z_vialnum(zleline[pos])) { + for (;;) { + zlecs = pos; + if (zlecs == 0) + break; + DECPOS(pos); + if (!Z_vialnum(zleline[pos])) + break; + } + } else { + for (;;) { + zlecs = pos; + if (zlecs == 0) + break; + DECPOS(pos); + if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos])) + break; + } + } + } } return 0; } @@ -252,10 +317,20 @@ return ret; } while (n--) { - while (zlecs && ZC_iblank(zleline[zlecs - 1])) - DECCS(); - while (zlecs && !ZC_iblank(zleline[zlecs - 1])) - DECCS(); + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (!ZC_iblank(zleline[pos])) + break; + zlecs = pos; + } + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (ZC_iblank(zleline[pos])) + break; + zlecs = pos; + } } return 0; } @@ -274,10 +349,20 @@ return ret; } while (n--) { - while (zlecs && !ZC_iword(zleline[zlecs - 1])) - DECCS(); - while (zlecs && ZC_iword(zleline[zlecs - 1])) - DECCS(); + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (ZC_iword(zleline[pos])) + break; + zlecs = pos; + } + while (zlecs) { + int pos = zlecs; + DECPOS(pos); + if (!ZC_iword(zleline[pos])) + break; + zlecs = pos; + } } return 0; } @@ -296,14 +381,20 @@ return ret; } while (n--) { - /* - * HERE: the zlecs - 1 here is suspect, and we should - * do the DECCS() thing. - */ - while (x && !ZC_iword(zleline[x - 1])) - x--; - while (x && ZC_iword(zleline[x - 1])) - x--; + while (x) { + int pos = x; + DECPOS(pos); + if (ZC_iword(zleline[pos])) + break; + x = pos; + } + while (x) { + int pos = x; + DECPOS(pos); + if (!ZC_iword(zleline[pos])) + break; + x = pos; + } } backdel(zlecs - x, CUT_RAW); return 0; @@ -320,18 +411,36 @@ return 1; /* this taken from "vibackwardword" */ while (n--) { - /* - * HERE: the zlecs - 1 here is suspect, and we should - * do the DECCS() thing. - */ - while ((x > lim) && ZC_iblank(zleline[x - 1])) - x--; - if (Z_vialnum(zleline[x - 1])) - while ((x > lim) && Z_vialnum(zleline[x - 1])) - x--; - else - while ((x > lim) && !Z_vialnum(zleline[x - 1]) && !ZC_iblank(zleline[x - 1])) - x--; + while (x > lim) { + int pos = x; + DECPOS(pos); + if (!ZC_iblank(zleline[pos])) + break; + x = pos; + } + if (x > lim) { + int pos = x; + DECPOS(pos); + if (Z_vialnum(zleline[pos])) { + for (;;) { + x = pos; + if (x <= lim) + break; + DECPOS(pos); + if (!Z_vialnum(zleline[pos])) + break; + } + } else { + for (;;) { + x = pos; + if (x <= lim) + break; + DECPOS(pos); + if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos])) + break; + } + } + } } backkill(zlecs - x, CUT_FRONT); return 0; @@ -352,14 +461,20 @@ return ret; } while (n--) { - /* - * HERE: the zlecs - 1 here is suspect, and we should - * do the DECCS() thing. - */ - while (x && !ZC_iword(zleline[x - 1])) - x--; - while (x && ZC_iword(zleline[x - 1])) - x--; + while (x) { + int pos = x; + DECPOS(pos); + if (ZC_iword(zleline[x])) + break; + x = pos; + } + while (x) { + int pos = x; + DECPOS(pos); + if (!ZC_iword(zleline[x])) + break; + x = pos; + } } backkill(zlecs - x, CUT_FRONT); return 0; @@ -451,11 +566,10 @@ return ret; } while (n--) { - /* HERE: we should do the INCCS() thing */ while (x != zlell && !ZC_iword(zleline[x])) - x++; + INCPOS(x); while (x != zlell && ZC_iword(zleline[x])) - x++; + INCPOS(x); } foredel(x - zlecs, CUT_RAW); return 0; @@ -476,11 +590,10 @@ return ret; } while (n--) { - /* HERE: we should do the INCCS() thing */ while (x != zlell && !ZC_iword(zleline[x])) - x++; + INCPOS(x); while (x != zlell && ZC_iword(zleline[x])) - x++; + INCPOS(x); } forekill(x - zlecs, CUT_RAW); return 0; @@ -490,7 +603,7 @@ int transposewords(UNUSED(char **args)) { - int p1, p2, p3, p4, len, x = zlecs; + int p1, p2, p3, p4, len, x = zlecs, pos; ZLE_STRING_T temp, pp; int n = zmult; int neg = n < 0, ocs = zlecs; @@ -498,28 +611,54 @@ if (neg) n = -n; while (n--) { - /* - * HERE: we should do the INCCS() thing. - * A great deal of the following needs rewriting. - */ while (x != zlell && zleline[x] != ZWC('\n') && !ZC_iword(zleline[x])) - x++; + INCPOS(x); if (x == zlell || zleline[x] == ZWC('\n')) { x = zlecs; - while (x && zleline[x - 1] != ZWC('\n') && !ZC_iword(zleline[x])) - x--; - if (!x || zleline[x - 1] == ZWC('\n')) + while (x) { + if (ZC_iword(zleline[x])) + break; + pos = x; + DECPOS(pos); + if (zleline[pos] == ZWC('\n')) + break; + x = pos; + } + if (!x) + return 1; + pos = x; + DECPOS(pos); + if (zleline[pos] == ZWC('\n')) return 1; + x = pos; + } + for (p4 = x; p4 != zlell && ZC_iword(zleline[p4]); INCPOS(p4)) + ; + for (p3 = p4; p3; ) { + pos = p3; + DECPOS(pos); + if (!ZC_iword(zleline[pos])) + break; + p3 = pos; } - for (p4 = x; p4 != zlell && ZC_iword(zleline[p4]); p4++); - for (p3 = p4; p3 && ZC_iword(zleline[p3 - 1]); p3--); if (!p3) return 1; - for (p2 = p3; p2 && !ZC_iword(zleline[p2 - 1]); p2--); + for (p2 = p3; p2; ) { + pos = p2; + DECPOS(pos); + if (ZC_iword(zleline[pos])) + break; + p2 = pos; + } if (!p2) return 1; - for (p1 = p2; p1 && ZC_iword(zleline[p1 - 1]); p1--); - + for (p1 = p2; p1; ) { + pos = p1; + DECPOS(pos); + if (!ZC_iword(zleline[pos])) + break; + p1 = pos; + } pp = temp = (ZLE_STRING_T)zhalloc((p4 - p1)*ZLE_CHAR_SIZE); len = p4 - p3; ZS_memcpy(pp, zleline + p3, len); -- Peter Stephenson Software Engineer CSR PLC, Churchill House, Cambridge Business Park, Cowley Road Cambridge, CB4 0WZ, UK Tel: +44 (0)1223 692070