From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 9881 invoked from network); 28 Feb 2005 18:14:37 -0000 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by ns1.primenet.com.au with SMTP; 28 Feb 2005 18:14:37 -0000 Received: (qmail 76711 invoked from network); 28 Feb 2005 18:14:32 -0000 Received: from sunsite.dk (130.225.247.90) by a.mx.sunsite.dk with SMTP; 28 Feb 2005 18:14:32 -0000 Received: (qmail 5741 invoked by alias); 28 Feb 2005 18:14:24 -0000 Mailing-List: contact zsh-workers-help@sunsite.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 20892 Received: (qmail 5731 invoked from network); 28 Feb 2005 18:14:24 -0000 Received: from news.dotsrc.org (HELO a.mx.sunsite.dk) (130.225.247.88) by sunsite.dk with SMTP; 28 Feb 2005 18:14:24 -0000 Received: (qmail 76297 invoked from network); 28 Feb 2005 18:14:23 -0000 Received: from vms048pub.verizon.net (206.46.252.48) by a.mx.sunsite.dk with SMTP; 28 Feb 2005 18:14:19 -0000 Received: from candle.brasslantern.com ([4.11.1.68]) by vms048.mailsrvcs.net (Sun Java System Messaging Server 6.2 HotFix 0.04 (built Dec 24 2004)) with ESMTPA id <0ICM00FYWVZTK921@vms048.mailsrvcs.net> for zsh-workers@sunsite.dk; Mon, 28 Feb 2005 12:14:18 -0600 (CST) Received: from candle.brasslantern.com (IDENT:schaefer@localhost [127.0.0.1]) by candle.brasslantern.com (8.12.11/8.12.11) with ESMTP id j1SIEGFn026795 for ; Mon, 28 Feb 2005 10:14:16 -0800 Received: (from schaefer@localhost) by candle.brasslantern.com (8.12.11/8.12.11/Submit) id j1SIEFh0026794 for zsh-workers@sunsite.dk; Mon, 28 Feb 2005 10:14:15 -0800 Date: Mon, 28 Feb 2005 18:14:15 +0000 From: Bart Schaefer Subject: Re: PATCH: Apply spell correction to autocd In-reply-to: <200502281044.j1SAiJPP005690@news01.csr.com> To: zsh-workers@sunsite.dk Message-id: <1050228181415.ZM26793@candle.brasslantern.com> MIME-version: 1.0 X-Mailer: Z-Mail (5.0.0 30July97) Content-type: text/plain; charset=us-ascii References: <1050227204407.ZM19297@candle.brasslantern.com> <1050228065407.ZM20816@candle.brasslantern.com> <200502281044.j1SAiJPP005690@news01.csr.com> Comments: In reply to Peter Stephenson "Re: PATCH: Apply spell correction to autocd" (Feb 28, 10:44am) X-Spam-Checker-Version: SpamAssassin 3.0.2 on a.mx.sunsite.dk X-Spam-Level: X-Spam-Status: No, score=-2.6 required=6.0 tests=BAYES_00 autolearn=ham version=3.0.2 X-Spam-Hits: -2.6 On Feb 28, 10:44am, Peter Stephenson wrote: } } I don't think the internal spellchecking stuff has ever had a major } overhaul (as distinct from having extra bits grafted on). It's not } surprising if it's weird. I expect tidying it up would be a good idea. OK, here's a stab at it. See embedded comments (gasp). Apply this instead of the previous (20882) patch, not on top of it. Index: Src/utils.c =================================================================== RCS file: /extra/cvsroot/zsh/zsh-4.0/Src/utils.c,v retrieving revision 1.21 diff -c -r1.21 utils.c --- Src/utils.c 18 Feb 2005 17:05:17 -0000 1.21 +++ Src/utils.c 28 Feb 2005 18:06:44 -0000 @@ -1647,11 +1659,12 @@ mod_export void spckword(char **s, int hist, int cmd, int ask) { - char *t, *u; + char *t; int x; char ic = '\0'; int ne; int preflen = 0; + int autocd = cmd && isset(AUTOCD) && strcmp(*s, ".") && strcmp(*s, ".."); if ((histdone & HISTFLAG_NOEXEC) || **s == '-' || **s == '%') return; @@ -1715,8 +1728,7 @@ } if (access(unmeta(guess), F_OK) == 0) return; - if ((u = spname(guess)) != guess) - best = u; + best = spname(guess); if (!*t && cmd) { if (hashcmd(guess, pathchecked)) return; @@ -1726,12 +1738,28 @@ scanhashtable(shfunctab, 1, 0, 0, spscan, 0); scanhashtable(builtintab, 1, 0, 0, spscan, 0); scanhashtable(cmdnamtab, 1, 0, 0, spscan, 0); + if (autocd) { + char **pp; + for (pp = cdpath; *pp; pp++) { + char bestcd[PATH_MAX + 1]; + int thisdist; + /* Less than d here, instead of less than or equal * + * as used in spscan(), so that an autocd is chosen * + * only when it is better than anything so far, and * + * so we prefer directories earlier in the cdpath. */ + if ((thisdist = mindist(*pp, *s, bestcd)) < d) { + best = dupstring(bestcd); + d = thisdist; + } + } + } } } if (errflag) return; if (best && (int)strlen(best) > 1 && strcmp(best, guess)) { if (ic) { + char *u; if (preflen) { /* do not correct the result of an expansion */ if (strncmp(guess, best, preflen)) @@ -2421,10 +2449,14 @@ { char *p, spnameguess[PATH_MAX + 1], spnamebest[PATH_MAX + 1]; static char newname[PATH_MAX + 1]; - char *new = newname, *old; - int bestdist = 200, thisdist; + char *new = newname, *old = oldname; + int bestdist = 0, thisdist, thresh, maxthresh = 0; - old = oldname; + /* This loop corrects each directory component of the path, stopping * + * when any correction distance would exceed the distance threshold. * + * NULL is returned only if the first component cannot be corrected; * + * otherwise a copy of oldname with a corrected prefix is returned. * + * Rationale for this, if there ever was any, has been forgotten. */ for (;;) { while (*old == '/') *new++ = *old++; @@ -2436,15 +2468,29 @@ if (p < spnameguess + PATH_MAX) *p++ = *old; *p = '\0'; - if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= 3) { - if (bestdist < 3) { + /* Every component is allowed a single distance 2 correction or two * + * distance 1 corrections. Longer ones get additional corrections. */ + thresh = (int)(p - spnameguess) / 4 + 1; + if (thresh < 3) + thresh = 3; + if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= thresh) { + /* The next test is always true, except for the first path * + * component. We could initialize bestdist to some large * + * constant instead, and then compare to that constant here, * + * because an invariant is that we've never exceeded the * + * threshold for any component so far; but I think that looks * + * odd to the human reader, and we may make use of the total * + * distance for all corrections at some point in the future. */ + if (bestdist < maxthresh) { strcpy(new, spnameguess); strcat(new, old); return newname; } else return NULL; - } else - bestdist = thisdist; + } else { + maxthresh = bestdist + thresh; + bestdist += thisdist; + } for (p = spnamebest; (*new = *p++);) new++; } @@ -2487,6 +2533,7 @@ static int spdist(char *s, char *t, int thresh) { + /* TODO: Correction for non-ASCII and multibyte-input keyboards. */ char *p, *q; const char qwertykeymap[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\ @@ -2520,7 +2567,7 @@ if (!strcmp(s, t)) return 0; -/* any number of upper/lower mistakes allowed (dist = 1) */ + /* any number of upper/lower mistakes allowed (dist = 1) */ for (p = s, q = t; *p && tulower(*p) == tulower(*q); p++, q++); if (!*p && !*q) return 1; @@ -2544,7 +2591,7 @@ int t0; char *z; - /* mistyped letter */ + /* mistyped letter */ if (!(z = strchr(keymap, p[0])) || *z == '\n' || *z == '\t') return spdist(p + 1, q + 1, thresh - 1) + 1;