Index: Src/Zle/compmatch.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/Zle/compmatch.c,v retrieving revision 1.50 diff -u -p -r1.50 compmatch.c --- Src/Zle/compmatch.c 30 Sep 2006 06:53:15 -0000 1.50 +++ Src/Zle/compmatch.c 11 Oct 2006 17:43:58 -0000 @@ -1087,51 +1087,70 @@ comp_match(char *pfx, char *sfx, char *w return r; } -/* Check if the given pattern matches the given string. * - * p and s are either anchor or line pattern and string; +/* + * Try to match single pair of characters from line and word + * Either of line or word patterns (and characters) may be missing + */ +static int +cm_try_single(Cpattern lp, char lc, Cpattern wp, char wc) +{ + char li = 0, wi = 0; + + if (lp) { + li = lp->tab[STOUC(lc)]; + if (!li) + return 0; + } + if (wp) { + wi = wp->tab[STOUC(wc)]; + if (!wi) + return 0; + } + + /* Equiv is expected to be defined on both sides */ + if (lp && lp->equiv && wp && wp->equiv && li != wi) + return 0; + + return 1; +} + +/* + * Check if the given pattern matches the given string. * + * lp and ls are either anchor or line pattern and string; * wp and ws are word (candidate) pattern and string - * - * If only one pattern is given, we just check if characters match - * If both line and word are given, we check that characters match - * for {...} classes by comparing relative numbers in sequence. - * - * Patterns and strings are always passed in pairs, so it is enough - * to check for non-NULL wp. p should always be present. */ /**/ mod_export int -pattern_match(Cpattern p, char *s, Cpattern wp, char *ws) +pattern_match(Cpattern lp, char *ls, Cpattern wp, char *ws) { - unsigned char c; - unsigned char wc; - - while (p && wp && *s && *ws) { - c = p->tab[*((unsigned char *) s)]; - wc = wp->tab[*((unsigned char *) ws)]; - - if (!c || !wc || c != wc) + while (lp && wp && *ls && *ws) { + if (!cm_try_single(lp, *ls, wp, *ws)) return 0; - s++; + ls++; ws++; - p = p->next; + lp = lp->next; wp = wp->next; } - while (p && *s) { - if (!p->tab[*((unsigned char *) s)]) + while (lp && *ls) { + if (!cm_try_single(lp, *ls, NULL, '\0')) return 0; - p = p->next; - s++; + lp = lp->next; + ls++; } while (wp && *ws) { - if (!wp->tab[*((unsigned char *) ws)]) + if (!cm_try_single(NULL, '\0', wp, *ws)) return 0; wp = wp->next; ws++; } + + /* If any line is shorter than pattern, return falure */ + if ((lp && !*ls) || (wp && !*ws)) + return 0; return 1; } @@ -1214,106 +1233,164 @@ bld_parts(char *str, int len, int plen, return ret; } -/* This builds all the possible line patterns for the pattern pat in the - * buffer line. Initially line is the same as lp, but during recursive - * calls lp is incremented for storing successive characters. Whenever - * a full possible string is build, we test if this line matches the - * string given by wlen and word. +/* + * Compare two different line patterns if they can have some common character + * Insert one of common characters in line we are building (it does not matter + * which one) + * mlp - line pattern which has matched before + * mwp - word pattern which has matched before + * nlp - new line pattern that we currently test against mlp + * nwp - new word pattern that we currently test against mwp + * line - line we build; we insert characters there + */ + +/**/ +static int +pattern_compare(Cpattern mlp, Cpattern mwp, Cpattern nlp, Cpattern nwp, + char *line) +{ + while (nlp) { + int i; + + /* + * test to see if mlp and nlp have something in commons + * nlp cannot be less than mlp (we check pattern length before) + * but word pattern may of course be shorter than line ... + */ + for (i = 0; i < 256; i++) + if (mlp->tab[i] && nlp->tab[i]) { + /* for equiv. class they must also match word pattern */ + if (mlp->equiv) { + if (!mwp || !nwp || (mlp->tab[i] == mwp->tab[i] && + nlp->tab[i] == nwp->tab[i])) + break; + } else + break; + } + if (i < 256) { + /* OK we found character that matches both matchers */ + *line++ = (char)i; + } else { + /* No matching character */ + return 0; + } + mlp = mlp->next; + nlp = nlp->next; + if (mwp) mwp = mwp->next; + if (nwp) nwp = nwp->next; + } + + return 1; +} + +/* This tries to find out, if there is common line that may match two + * words (possible matches or parts thereof). When this function is called, + * it is ensured that `mword' has matched word pattern in `matcher'; + * we try to find a string that both matches line pattern in `matcher' + * and another word `word' * - * wpat contains pattern that matched previously - * lpat contains the pattern for line we build - * mword is a string that matched wpat before - * word is string that we try to match now + * matcher - matcher that `mword' has been matched against + * line - buffer for string we build + * lmatch - length of line built so far + * wmatch - length of word matching the above + * mword - word that has matched word pattern in `matcher' before + * word - is string that we try to match now + * wlen - length of `word' + * sfx - if we should match bacwards * - * The return value is the length of the string matched in the word, it + * The return value is the length of the string matched in the `word', it * is zero if we couldn't build a line that matches the word. */ - /**/ static int -bld_line(Cpattern wpat, Cpattern lpat, char *line, char *lp, +bld_line(Cmatcher matcher, char *line, int lmatch, int wmatch, char *mword, char *word, int wlen, int sfx) { - if (lpat) { - /* Still working on the pattern. */ - - int i, l; - unsigned char c = 0; + static Cpattern *mlpa; + static Cpattern *mwpa; + Cmlist ms; + Cmatcher mp; + Cpattern pat; + char *lp; + int llen = matcher->llen, rl = 0, i, clpos, cwpos; - /* Get the number of the character for a correspondence class - * if it has a corresponding class. */ - if (lpat->equiv) - if (wpat && *mword) { - c = wpat->tab[STOUC(*mword)]; - wpat = wpat->next; - mword++; - } + if (!lmatch) { + /* + * Setup array instead of list; this is required for suffix matchs + * Array size is that of line; we are not interested in patterns + * above this length + */ + mlpa = zalloc(sizeof(Cpattern) * llen); + mwpa = zalloc(sizeof(Cpattern) * llen); + for (i = 0, pat = matcher->line; pat; i++, pat = pat->next) + mlpa[i] = pat; + for (i = 0, pat = matcher->word; pat && i < llen; i++, pat = pat->next) + mwpa[i] = pat; + /* In case mword is smaller than line ... */ + for (i = matcher->wlen; i < llen; i++) + mwpa[i] = NULL; + } - /* Walk through the table in the pattern and try the characters - * that may appear in the current position. */ - for (i = 0; i < 256; i++) - if ((lpat->equiv && c) ? (c == lpat->tab[i]) : lpat->tab[i]) { - *lp = i; - /* We stored the character, now call ourselves to build - * the rest. */ - if ((l = bld_line(wpat, lpat->next, line, lp + 1, - mword, word, wlen, sfx))) - return l; - } + if (sfx) { + lp = line + llen - lmatch; + clpos = llen - lmatch - 1; + cwpos = llen - wmatch - 1; } else { - /* We reached the end, i.e. the line string is fully build, now - * see if it matches the given word. */ - - Cmlist ms; - Cmatcher mp; - int l = lp - line, t, rl = 0, ind, add; - - /* Quick test if the strings are exactly the same. */ - if (l == wlen && !strncmp(line, word, l)) - return l; - - if (sfx) { - line = lp; word += wlen; - ind = -1; add = -1; - } else { - ind = 0; add = 1; + lp = line + lmatch; + clpos = lmatch; + cwpos = wmatch; + } + + /* + * First try if next character in word fits mword directly; + * if yes, try to build the rest of line + * Do it only if word is at least as large as line + */ + if (wmatch + 1 <= llen && + cm_try_single(mlpa[clpos], word[clpos], + mwpa[clpos], mword[clpos])) { + /* last character? */ + if (lmatch + 1 == llen) { + rl = 1; + goto cleanup; } - /* We loop through the whole line string built. */ - while (l && wlen) { - if (word[ind] == line[ind]) { - /* The same character in both strings, skip over. */ - line += add; word += add; - l--; wlen--; rl++; - } else { - t = 0; - for (ms = bmatchers; ms && !t; ms = ms->next) { - mp = ms->matcher; - if (mp && !mp->flags && mp->wlen <= wlen && mp->llen <= l && - pattern_match(mp->line, (sfx ? line - mp->llen : line), - mp->word, (sfx ? word - mp->wlen : word))) { - /* Both the line and the word pattern matched, - * now skip over the matched portions. */ - if (sfx) { - line -= mp->llen; word -= mp->wlen; - } else { - line += mp->llen; word += mp->wlen; - } - l -= mp->llen; wlen -= mp->wlen; rl += mp->wlen; - t = 1; - } - } - if (!t) - /* Didn't match, give up. */ - return 0; + if ((rl = bld_line(matcher, line, lmatch + 1, wmatch + 1, + mword, word, wlen, sfx)) >= 0) { + rl++; + goto cleanup; + } + } + + /* nope; try each matcher in turn and see if we can match with it */ + for (ms = bmatchers; ms; ms = ms->next) { + mp = ms->matcher; + if (mp && !mp->flags && mp->wlen <= wlen - wmatch && mp->llen <= llen - lmatch && + pattern_match(mp->word, (sfx ? word - wmatch - mp->wlen : word + wmatch), + NULL, NULL) && + pattern_compare(mlpa[clpos], mwpa[clpos], mp->line, mp->word, + (sfx ? lp - mp->llen : lp))) { + /* Both the line and the word pattern matched, + * now try to match the rest */ + rl = bld_line(matcher, line, lmatch + mp->llen, wmatch + mp->wlen, + mword, word, wlen, sfx); + if (rl) { + rl += mp->wlen; + goto cleanup; } } - if (!l) - /* Unmatched portion in the line built, return matched length. */ - return rl; } - return 0; + + /* Nothing match; return failure */ + +cleanup: + if (!lmatch) { + zfree(mlpa, sizeof(Cpattern) * matcher->llen); + zfree(mwpa, sizeof(Cpattern) * matcher->wlen); + } + + return rl; } /* This builds a string that may be put on the line that fully matches the @@ -1357,7 +1434,7 @@ join_strs(int la, char *sa, int lb, char } /* Now try to build a string that matches the other * string. */ - if ((bl = bld_line(mp->word, mp->line, line, line, + if ((bl = bld_line(mp, line, 0, 0, *ap, *bp, *blp, 0))) { /* Found one, put it into the return string. */ line[mp->llen] = '\0'; @@ -1560,7 +1637,7 @@ join_sub(Cmdata md, char *str, int len, else mw = nw - (sfx ? mp->wlen : 0); - if ((bl = bld_line(mp->word, mp->line, line, line, + if ((bl = bld_line(mp, line, 0, 0, mw, (t ? nw : ow), (t ? nl : ol), sfx))) { /* Yep, one of the lines matched the other * string. */