From: Bart Schaefer <schaefer@brasslantern.com>
To: zsh-workers@sunsite.dk
Subject: Re: PATCH: Apply spell correction to autocd
Date: Mon, 28 Feb 2005 18:14:15 +0000 [thread overview]
Message-ID: <1050228181415.ZM26793@candle.brasslantern.com> (raw)
In-Reply-To: <200502281044.j1SAiJPP005690@news01.csr.com>
On Feb 28, 10:44am, Peter Stephenson wrote:
}
} I don't think the internal spellchecking stuff has ever had a major
} overhaul (as distinct from having extra bits grafted on). It's not
} surprising if it's weird. I expect tidying it up would be a good idea.
OK, here's a stab at it. See embedded comments (gasp). Apply this instead
of the previous (20882) patch, not on top of it.
Index: Src/utils.c
===================================================================
RCS file: /extra/cvsroot/zsh/zsh-4.0/Src/utils.c,v
retrieving revision 1.21
diff -c -r1.21 utils.c
--- Src/utils.c 18 Feb 2005 17:05:17 -0000 1.21
+++ Src/utils.c 28 Feb 2005 18:06:44 -0000
@@ -1647,11 +1659,12 @@
mod_export void
spckword(char **s, int hist, int cmd, int ask)
{
- char *t, *u;
+ char *t;
int x;
char ic = '\0';
int ne;
int preflen = 0;
+ int autocd = cmd && isset(AUTOCD) && strcmp(*s, ".") && strcmp(*s, "..");
if ((histdone & HISTFLAG_NOEXEC) || **s == '-' || **s == '%')
return;
@@ -1715,8 +1728,7 @@
}
if (access(unmeta(guess), F_OK) == 0)
return;
- if ((u = spname(guess)) != guess)
- best = u;
+ best = spname(guess);
if (!*t && cmd) {
if (hashcmd(guess, pathchecked))
return;
@@ -1726,12 +1738,28 @@
scanhashtable(shfunctab, 1, 0, 0, spscan, 0);
scanhashtable(builtintab, 1, 0, 0, spscan, 0);
scanhashtable(cmdnamtab, 1, 0, 0, spscan, 0);
+ if (autocd) {
+ char **pp;
+ for (pp = cdpath; *pp; pp++) {
+ char bestcd[PATH_MAX + 1];
+ int thisdist;
+ /* Less than d here, instead of less than or equal *
+ * as used in spscan(), so that an autocd is chosen *
+ * only when it is better than anything so far, and *
+ * so we prefer directories earlier in the cdpath. */
+ if ((thisdist = mindist(*pp, *s, bestcd)) < d) {
+ best = dupstring(bestcd);
+ d = thisdist;
+ }
+ }
+ }
}
}
if (errflag)
return;
if (best && (int)strlen(best) > 1 && strcmp(best, guess)) {
if (ic) {
+ char *u;
if (preflen) {
/* do not correct the result of an expansion */
if (strncmp(guess, best, preflen))
@@ -2421,10 +2449,14 @@
{
char *p, spnameguess[PATH_MAX + 1], spnamebest[PATH_MAX + 1];
static char newname[PATH_MAX + 1];
- char *new = newname, *old;
- int bestdist = 200, thisdist;
+ char *new = newname, *old = oldname;
+ int bestdist = 0, thisdist, thresh, maxthresh = 0;
- old = oldname;
+ /* This loop corrects each directory component of the path, stopping *
+ * when any correction distance would exceed the distance threshold. *
+ * NULL is returned only if the first component cannot be corrected; *
+ * otherwise a copy of oldname with a corrected prefix is returned. *
+ * Rationale for this, if there ever was any, has been forgotten. */
for (;;) {
while (*old == '/')
*new++ = *old++;
@@ -2436,15 +2468,29 @@
if (p < spnameguess + PATH_MAX)
*p++ = *old;
*p = '\0';
- if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= 3) {
- if (bestdist < 3) {
+ /* Every component is allowed a single distance 2 correction or two *
+ * distance 1 corrections. Longer ones get additional corrections. */
+ thresh = (int)(p - spnameguess) / 4 + 1;
+ if (thresh < 3)
+ thresh = 3;
+ if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= thresh) {
+ /* The next test is always true, except for the first path *
+ * component. We could initialize bestdist to some large *
+ * constant instead, and then compare to that constant here, *
+ * because an invariant is that we've never exceeded the *
+ * threshold for any component so far; but I think that looks *
+ * odd to the human reader, and we may make use of the total *
+ * distance for all corrections at some point in the future. */
+ if (bestdist < maxthresh) {
strcpy(new, spnameguess);
strcat(new, old);
return newname;
} else
return NULL;
- } else
- bestdist = thisdist;
+ } else {
+ maxthresh = bestdist + thresh;
+ bestdist += thisdist;
+ }
for (p = spnamebest; (*new = *p++);)
new++;
}
@@ -2487,6 +2533,7 @@
static int
spdist(char *s, char *t, int thresh)
{
+ /* TODO: Correction for non-ASCII and multibyte-input keyboards. */
char *p, *q;
const char qwertykeymap[] =
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
@@ -2520,7 +2567,7 @@
if (!strcmp(s, t))
return 0;
-/* any number of upper/lower mistakes allowed (dist = 1) */
+ /* any number of upper/lower mistakes allowed (dist = 1) */
for (p = s, q = t; *p && tulower(*p) == tulower(*q); p++, q++);
if (!*p && !*q)
return 1;
@@ -2544,7 +2591,7 @@
int t0;
char *z;
- /* mistyped letter */
+ /* mistyped letter */
if (!(z = strchr(keymap, p[0])) || *z == '\n' || *z == '\t')
return spdist(p + 1, q + 1, thresh - 1) + 1;
next prev parent reply other threads:[~2005-02-28 18:14 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-02-27 20:44 Bart Schaefer
2005-02-28 6:54 ` Bart Schaefer
2005-02-28 10:44 ` Peter Stephenson
2005-02-28 18:14 ` Bart Schaefer [this message]
2005-02-28 18:17 ` Clint Adams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1050228181415.ZM26793@candle.brasslantern.com \
--to=schaefer@brasslantern.com \
--cc=zsh-workers@sunsite.dk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/zsh/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).