zsh-workers
 help / color / mirror / code / Atom feed
From: Bart Schaefer <schaefer@brasslantern.com>
To: zsh-workers@sunsite.dk
Subject: Re: PATCH: Apply spell correction to autocd
Date: Mon, 28 Feb 2005 18:14:15 +0000	[thread overview]
Message-ID: <1050228181415.ZM26793@candle.brasslantern.com> (raw)
In-Reply-To: <200502281044.j1SAiJPP005690@news01.csr.com>

On Feb 28, 10:44am, Peter Stephenson wrote:
}
} I don't think the internal spellchecking stuff has ever had a major
} overhaul (as distinct from having extra bits grafted on).  It's not
} surprising if it's weird.  I expect tidying it up would be a good idea.

OK, here's a stab at it.  See embedded comments (gasp).  Apply this instead
of the previous (20882) patch, not on top of it.

Index: Src/utils.c
===================================================================
RCS file: /extra/cvsroot/zsh/zsh-4.0/Src/utils.c,v
retrieving revision 1.21
diff -c -r1.21 utils.c
--- Src/utils.c	18 Feb 2005 17:05:17 -0000	1.21
+++ Src/utils.c	28 Feb 2005 18:06:44 -0000
@@ -1647,11 +1659,12 @@
 mod_export void
 spckword(char **s, int hist, int cmd, int ask)
 {
-    char *t, *u;
+    char *t;
     int x;
     char ic = '\0';
     int ne;
     int preflen = 0;
+    int autocd = cmd && isset(AUTOCD) && strcmp(*s, ".") && strcmp(*s, "..");
 
     if ((histdone & HISTFLAG_NOEXEC) || **s == '-' || **s == '%')
 	return;
@@ -1715,8 +1728,7 @@
 	}
 	if (access(unmeta(guess), F_OK) == 0)
 	    return;
-	if ((u = spname(guess)) != guess)
-	    best = u;
+	best = spname(guess);
 	if (!*t && cmd) {
 	    if (hashcmd(guess, pathchecked))
 		return;
@@ -1726,12 +1738,28 @@
 	    scanhashtable(shfunctab, 1, 0, 0, spscan, 0);
 	    scanhashtable(builtintab, 1, 0, 0, spscan, 0);
 	    scanhashtable(cmdnamtab, 1, 0, 0, spscan, 0);
+	    if (autocd) {
+		char **pp;
+		for (pp = cdpath; *pp; pp++) {
+		    char bestcd[PATH_MAX + 1];
+		    int thisdist;
+		    /* Less than d here, instead of less than or equal  *
+		     * as used in spscan(), so that an autocd is chosen *
+		     * only when it is better than anything so far, and *
+		     * so we prefer directories earlier in the cdpath.  */
+		    if ((thisdist = mindist(*pp, *s, bestcd)) < d) {
+			best = dupstring(bestcd);
+			d = thisdist;
+		    }
+		}
+	    }
 	}
     }
     if (errflag)
 	return;
     if (best && (int)strlen(best) > 1 && strcmp(best, guess)) {
 	if (ic) {
+	    char *u;
 	    if (preflen) {
 		/* do not correct the result of an expansion */
 		if (strncmp(guess, best, preflen))
@@ -2421,10 +2449,14 @@
 {
     char *p, spnameguess[PATH_MAX + 1], spnamebest[PATH_MAX + 1];
     static char newname[PATH_MAX + 1];
-    char *new = newname, *old;
-    int bestdist = 200, thisdist;
+    char *new = newname, *old = oldname;
+    int bestdist = 0, thisdist, thresh, maxthresh = 0;
 
-    old = oldname;
+    /* This loop corrects each directory component of the path, stopping *
+     * when any correction distance would exceed the distance threshold. *
+     * NULL is returned only if the first component cannot be corrected; *
+     * otherwise a copy of oldname with a corrected prefix is returned.  *
+     * Rationale for this, if there ever was any, has been forgotten.    */
     for (;;) {
 	while (*old == '/')
 	    *new++ = *old++;
@@ -2436,15 +2468,29 @@
 	    if (p < spnameguess + PATH_MAX)
 		*p++ = *old;
 	*p = '\0';
-	if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= 3) {
-	    if (bestdist < 3) {
+	/* Every component is allowed a single distance 2 correction or two *
+	 * distance 1 corrections.  Longer ones get additional corrections. */
+	thresh = (int)(p - spnameguess) / 4 + 1;
+	if (thresh < 3)
+	    thresh = 3;
+	if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= thresh) {
+	    /* The next test is always true, except for the first path    *
+	     * component.  We could initialize bestdist to some large     *
+	     * constant instead, and then compare to that constant here,  *
+	     * because an invariant is that we've never exceeded the      *
+	     * threshold for any component so far; but I think that looks *
+	     * odd to the human reader, and we may make use of the total  *
+	     * distance for all corrections at some point in the future.  */
+	    if (bestdist < maxthresh) {
 		strcpy(new, spnameguess);
 		strcat(new, old);
 		return newname;
 	    } else
 	    	return NULL;
-	} else
-	    bestdist = thisdist;
+	} else {
+	    maxthresh = bestdist + thresh;
+	    bestdist += thisdist;
+	}
 	for (p = spnamebest; (*new = *p++);)
 	    new++;
     }
@@ -2487,6 +2533,7 @@
 static int
 spdist(char *s, char *t, int thresh)
 {
+    /* TODO: Correction for non-ASCII and multibyte-input keyboards. */
     char *p, *q;
     const char qwertykeymap[] =
     "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
@@ -2520,7 +2567,7 @@
 
     if (!strcmp(s, t))
 	return 0;
-/* any number of upper/lower mistakes allowed (dist = 1) */
+    /* any number of upper/lower mistakes allowed (dist = 1) */
     for (p = s, q = t; *p && tulower(*p) == tulower(*q); p++, q++);
     if (!*p && !*q)
 	return 1;
@@ -2544,7 +2591,7 @@
 	    int t0;
 	    char *z;
 
-	/* mistyped letter */
+	    /* mistyped letter */
 
 	    if (!(z = strchr(keymap, p[0])) || *z == '\n' || *z == '\t')
 		return spdist(p + 1, q + 1, thresh - 1) + 1;


  reply	other threads:[~2005-02-28 18:14 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-02-27 20:44 Bart Schaefer
2005-02-28  6:54 ` Bart Schaefer
2005-02-28 10:44   ` Peter Stephenson
2005-02-28 18:14     ` Bart Schaefer [this message]
2005-02-28 18:17       ` Clint Adams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1050228181415.ZM26793@candle.brasslantern.com \
    --to=schaefer@brasslantern.com \
    --cc=zsh-workers@sunsite.dk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).