From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 4617 invoked from network); 9 Jun 2000 07:48:36 -0000 Received: from sunsite.auc.dk (130.225.51.30) by ns1.primenet.com.au with SMTP; 9 Jun 2000 07:48:36 -0000 Received: (qmail 15228 invoked by alias); 9 Jun 2000 07:48:21 -0000 Mailing-List: contact zsh-workers-help@sunsite.auc.dk; run by ezmlm Precedence: bulk X-No-Archive: yes X-Seq: 11833 Received: (qmail 15221 invoked from network); 9 Jun 2000 07:48:20 -0000 Date: Fri, 9 Jun 2000 09:48:09 +0200 (MET DST) Message-Id: <200006090748.JAA21446@beta.informatik.hu-berlin.de> From: Sven Wischnowsky To: zsh-workers@sunsite.auc.dk Subject: PATCH: faster filenames Here is the first patch to make _path_files faster. It uses the thing Felix suggested (accept exact directory matches immediately, we were doing that anyway, only a bit later). Then it tries to improve the glob patterns used by putting some characters from $PREFIX into it. And finally, it does all this in C, saving is several array- assignments. This is only the first patch for this, though. It currently gives up quite early, for example if $compstate[pattern_match] is set, e.g. because globcomplete is set. It also gives up if there are match specs, so if your matcher-list does not have an empty string as the first value, this patch won't help you match with (real) partial path matching, only if the directories in the path on the line are completely typed already. I have ideas to improve both of these things (and left a comment in computil.c for them), just didn't have enough time yesterday, so maybe I'll have another patch after the weekend (which is three days long in this country). I don't have the time to try it with that directory on sourceforge either, only with a local latex-fonts directory with over 6700 files and there the speedup was definitely noticeable. If it's true that the file caching is the major problem on directories like the sourceforge one, then I can't see how we can improve that currently (after all, we have to complete directories and that means stat'ing). Bye Sven Index: Completion/Core/_path_files =================================================================== RCS file: /cvsroot/zsh/zsh/Completion/Core/_path_files,v retrieving revision 1.19 diff -u -r1.19 _path_files --- Completion/Core/_path_files 2000/06/06 12:19:33 1.19 +++ Completion/Core/_path_files 2000/06/09 07:45:22 @@ -5,7 +5,7 @@ local linepath realpath donepath prepath testpath exppath skips skipped local tmp1 tmp2 tmp3 tmp4 i orig eorig pre suf tpre tsuf opre osuf cpre -local pats haspats ignore pfxsfx remt sopt gopt opt sdirs ignpar +local pats haspats ignore pfxsfx sopt gopt opt sdirs ignpar local nm=$compstate[nmatches] menu matcher mopts sort match mid typeset -U prepaths exppaths @@ -312,35 +312,14 @@ # Get the matching files by globbing. - tmp2=( "$tmp1[@]" ) if [[ "$tpre$tsuf" = */* ]]; then - if [[ ! -o globdots && "$PREFIX" = .* ]]; then - tmp1=( ${^tmp1}${skipped}*(-/) ${^tmp1}${skipped}.*(-/) ) - else - tmp1=( ${^tmp1}${skipped}*(-/) ) - fi - if [[ -n "$sdirs" && ( -o globdots || "$PREFIX" = .* ) ]]; then - if [[ "$sdirs" = yes ]]; then - tmp1=( "$tmp1[@]" $^tmp2${skipped}{.,..} ) - elif [[ "$sdirs" = .. ]]; then - tmp1=( "$tmp1[@]" $^tmp2${skipped}.. ) - fi - fi + compfiles -P tmp1 "$skipped" "$_matcher" "$sdirs" + elif [[ "$sopt" = *[/f]* ]]; then + compfiles -p tmp1 "$skipped" "$_matcher" "$sdirs" "$pats" else - if [[ ! -o globdots && "$PREFIX" = .* ]]; then - tmp1=( ${^tmp1}${skipped}${^~pats} ${^tmp1}${skipped}.${^~pats:#.*} ) - else - tmp1=( ${^tmp1}${skipped}${^~pats} ) - fi - if [[ -n "$sdirs" && "$sopt" = *[/f]* && - ( -o globdots || "$PREFIX" = .* ) ]]; then - if [[ "$sdirs" = yes ]]; then - tmp1=( "$tmp1[@]" $^tmp2${skipped}{.,..} ) - elif [[ "$sdirs" = .. ]]; then - tmp1=( "$tmp1[@]" $^tmp2${skipped}.. ) - fi - fi + compfiles -p tmp1 "$skipped" "$_matcher" '' "$pats" fi + tmp1=( $~tmp1 ) if [[ -n "$PREFIX$SUFFIX" ]]; then # See which of them match what's on the line. @@ -354,9 +333,13 @@ compadd -D tmp1 -F _comp_ignore "$matcher[@]" - "${(@)tmp2:t}" fi else - [[ "$tmp1[1]" = */* ]] && tmp2=( "$tmp1[@]" ) + if [[ "$tmp1[1]" = */* ]]; then + tmp2=( "$tmp1[@]" ) + else + tmp2=( '' ) + fi - builtin compadd -D tmp1 -F _comp_ignore "$matcher[@]" - "${(@)tmp1:t}" + compadd -D tmp1 -F _comp_ignore "$matcher[@]" - "${(@)tmp1:t}" fi # If no file matches, save the expanded path and continue with @@ -411,24 +394,12 @@ if [[ -n "$ignpar" && -z "$_comp_no_ignore" && "$tpre$tsuf" != */* && $#tmp1 -ne 0 && ( "$ignpar" != *dir* || "$pats" = '*(-/)' ) && - ( "$ignpar" != *..* || "$tmp1" = *../* ) ]]; then - if [[ "$ignpar" = *parent* ]]; then - for i in ${(M)^tmp1:#*/*}(-/); do - remt="${${i#$prepath$realpath$donepath}%/*}" - while [[ "$remt" = */* && - ! "$prepath$realpath$donepath$remt" -ef "$i" ]]; do - remt="${remt%/*}" - done - [[ "$remt" = */* || "$remt" -ef "$i" ]] && - _comp_ignore=( "$_comp_ignore[@]" "${(q)i}" ) - done - fi - if [[ "$ignpar" = *pwd* ]]; then - for i in ${^tmp1}(-/); do - [[ "$i" -ef "$PWD" ]] && _comp_ignore=( "$_comp_ignore[@]" "${(q)i}" ) - done - fi - (( $#_comp_ignore && $mopts[(I)-F] )) || mopts=( "$mopts[@]" -F _comp_ignore ) + ( "$ignpar" != *..* || "$tmp1[1]" = *../* ) ]]; then + + compfiles -i tmp1 _comp_ignore "$ignpar" "$prepath$realpath$donepath" + + (( $#_comp_ignore && $mopts[(I)-F] )) || + mopts=( "$mopts[@]" -F _comp_ignore ) fi # Step over to the next component, if any. Index: Src/Zle/computil.c =================================================================== RCS file: /cvsroot/zsh/zsh/Src/Zle/computil.c,v retrieving revision 1.26 diff -u -r1.26 computil.c --- Src/Zle/computil.c 2000/05/30 10:54:22 1.26 +++ Src/Zle/computil.c 2000/06/09 07:45:22 @@ -3023,6 +3023,295 @@ return 0; } +#define PATH_MAX2 (PATH_MAX * 2) + +static LinkList +cfp_test_exact(LinkList names, char *skipped) +{ + char buf[PATH_MAX2 + 1], *suf, *p; + int l, sl, found = 0; + struct stat st; + LinkNode node; + LinkList ret = newlinklist(); + + if (!(compprefix && *compprefix) && !(compsuffix && *compsuffix)) + return NULL; + + sl = strlen(skipped) + (compprefix ? strlen(compprefix) : 0) + + (compsuffix ? strlen(compsuffix) : 0); + + if (sl > PATH_MAX2) + return NULL; + + suf = dyncat(skipped, rembslash(dyncat(compprefix, compsuffix))); + + for (node = firstnode(names); node; incnode(node)) { + if ((l = strlen(p = (char *) getdata(node))) && l + sl < PATH_MAX2) { + strcpy(buf, p); + strcpy(buf + l, suf); + + if (!ztat(buf, &st, 0) && S_ISDIR(st.st_mode)) { + found = 1; + addlinknode(ret, dupstring(buf)); + } + } + } + return (found ? ret : NULL); +} + +static void +cfp_opt_pats(char **pats, char *matcher) +{ + char *add, **p, *q, *t, *s; + + /**** For now we don't try to improve the patterns if there are match + * specs. We should work on this some more... + * + * And another one: we can do this with comppatmatch if the word + * doesn't contain wildcards, unless the approximate matcher is + * active. Better: unless there is a compadd function. I.e., we + * need one more piece of information from the shell code, at least + * I'd prefer to get it from _path_files in case we find other + * conditions for not trying to improve patterns. */ + + if ((comppatmatch && *comppatmatch) || *matcher || + !compprefix || !*compprefix) + return; + + add = rembslash(compprefix); + + for (p = pats; *add && (q = *p); p++) { + if (*q) { + q = dupstring(q); + t = q + strlen(q) - 1; + if (*t == ')') { + for (s = t--; t > q; t--) + if (*t == ')' || *t == '|' || *t == '~' || *t == '(') + break; + if (t != q && *t == '(') + *t = '\0'; + } + for (; *q && *add; q++) { + if (*q == '\\' && q[1]) { + for (s = add, q++; *s && *s != *q; s++); + *s = '\0'; + } else if (*q == '<') { + for (s = add; *s && !idigit(*s); s++); + *s = '\0'; + } else if (*q == '[') { + int not, first = 1; + char *x = ++q; + + if ((not = (*x == '!' || *x == '^'))) + x++; + for (; *x && (first || *x != ']'); x++) { + if (x[1] == '-' && x[2]) { + char c1 = *x, c2 = x[2]; + + for (s = add; *s && (*x < c1 || *x > c2); s++); + *s = '\0'; + } else { + for (s = add; *s && *s != *x; s++); + *s = '\0'; + } + } + } else if (*q != '?' && *q != '*' && *q != '(' && *q != ')' && + *q != '|' && *q != '~' && *q != '#') { + for (s = add; *s && *s != *q; s++); + *s = '\0'; + } + } + } + } + if (*add) { + for (p = pats; *p; p++) + if (**p == '*') + *p = dyncat(add, *p); + } +} + +static LinkList +cfp_bld_pats(int dirs, LinkList names, char *skipped, char **pats) +{ + LinkList ret = newlinklist(); + LinkNode node; + int ol, sl = strlen(skipped), pl, dot; + char **p, *o, *str; + + dot = (unset(GLOBDOTS) && compprefix && *compprefix == '.'); + for (node = firstnode(names); node; incnode(node)) { + ol = strlen(o = (char *) getdata(node)); + for (p = pats; *p; p++) { + pl = strlen(*p); + str = (char *) zhalloc(ol + sl + pl + 1); + strcpy(str, o); + strcpy(str + ol, skipped); + strcpy(str + ol + sl, *p); + addlinknode(ret, str); + if (dot && **p != '.') { + str = (char *) zhalloc(ol + sl + pl + 2); + strcpy(str, o); + strcpy(str + ol, skipped); + str[ol + sl] = '.'; + strcpy(str + ol + sl + 1, *p); + addlinknode(ret, str); + } + } + } + return ret; +} + +static LinkList +cfp_add_sdirs(LinkList final, LinkList orig, char *skipped, char *sdirs) +{ + int add = 0; + + if (*sdirs) { + if (!strcmp(sdirs, "yes")) + add = 2; + else if (!strcmp(sdirs, "..")) + add = 1; + } + if (add) { + LinkNode node; + char *s1 = dyncat(skipped, ".."); + char *s2 = (add == 2 ? dyncat(skipped, ".") : NULL), *m; + + for (node = firstnode(orig); node; incnode(node)) { + if (*(m = (char *) getdata(node))) { + addlinknode(final, dyncat((char *) getdata(node), s1)); + if (s2) + addlinknode(final, dyncat((char *) getdata(node), s2)); + } + } + } + return final; +} + +static LinkList +cf_pats(int dirs, LinkList names, char *skipped, char *matcher, char *sdirs, + char **pats) +{ + LinkList ret; + char *dpats[2]; + + if (dirs && (ret = cfp_test_exact(names, skipped))) + return cfp_add_sdirs(ret, names, skipped, sdirs); + + if (dirs) { + dpats[0] = "*(-/)"; + dpats[1] = NULL; + pats = dpats; + } + cfp_opt_pats(pats, matcher); + + return cfp_add_sdirs(cfp_bld_pats(dirs, names, skipped, pats), + names, skipped, sdirs); +} + +static void +cf_ignore(char **names, LinkList ign, char *style, char *path) +{ + int pl = strlen(path), tpar, tpwd, found; + struct stat nst, est, st; + char *n, *c, *e; + + tpar = !!strstr(style, "parent"); + if ((tpwd = !!strstr(style, "pwd")) && stat(pwd, &est)) + tpwd = 0; + + if (!tpar && !tpwd) + return; + + for (; (n = *names); names++) { + if (!ztat(n, &nst, 0) && S_ISDIR(nst.st_mode)) { + if (tpwd && nst.st_dev == est.st_dev && nst.st_ino == est.st_ino) { + addlinknode(ign, bslashquote(n, NULL, 0)); + continue; + } + if (tpar && !strncmp((c = dupstring(n)), path, pl)) { + found = 0; + while ((e = strrchr(c, '/')) && e > c + pl) { + *e = '\0'; + if (!ztat(c, &st, 0) && + st.st_dev == nst.st_dev && st.st_ino == nst.st_ino) { + found = 1; + break; + } + } + if (found || ((e = strrchr(c, '/')) && e > c + pl && + !ztat(c, &st, 0) && st.st_dev == nst.st_dev && + st.st_ino == nst.st_ino)) + addlinknode(ign, bslashquote(n, NULL, 0)); + } + } + } +} + +static int +bin_compfiles(char *nam, char **args, char *ops, int func) +{ + if (**args != '-') { + zwarnnam(nam, "missing option: %s", *args, 0); + return 1; + } + if (args[0][2]) { + zwarnnam(nam, "invalid option: %s", *args, 0); + return 1; + } + switch (args[0][1]) { + case 'p': + case 'P': + { + char **tmp; + LinkList l; + + if (!args[1] || !args[2] || !args[3] || !args[4] || + (args[0][1] == 'p' && !args[5])) { + zwarnnam(nam, "too few arguments", NULL, 0); + return 1; + } + if (!(tmp = getaparam(args[1]))) { + zwarnnam(nam, "unknown parameter: %s", args[1], 0); + return 0; + } + for (l = newlinklist(); *tmp; tmp++) + addlinknode(l, *tmp); + set_list_array(args[1], cf_pats((args[0][1] == 'P'), l, args[2], + args[3], args[4], args + 5)); + return 0; + } + case 'i': + { + char **tmp; + LinkList l; + + if (!args[1] || !args[2] || !args[3] || !args[4]) { + zwarnnam(nam, "too few arguments", NULL, 0); + return 1; + } + if (args[5]) { + zwarnnam(nam, "too many arguments", NULL, 0); + return 1; + } + tmp = getaparam(args[2]); + l = newlinklist(); + if (tmp) + for (; *tmp; tmp++) + addlinknode(l, *tmp); + if (!(tmp = getaparam(args[1]))) { + zwarnnam(nam, "unknown parameter: %s", args[1], 0); + return 0; + } + cf_ignore(tmp, l, args[3], args[4]); + set_list_array(args[2], l); + return 0; + } + } + zwarnnam(nam, "invalid option: %s", *args, 0); + return 1; +} + static struct builtin bintab[] = { BUILTIN("compdescribe", 0, bin_compdescribe, 3, -1, 0, NULL, NULL), BUILTIN("comparguments", 0, bin_comparguments, 1, -1, 0, NULL, NULL), @@ -3031,6 +3320,7 @@ BUILTIN("comptags", 0, bin_comptags, 1, -1, 0, NULL, NULL), BUILTIN("comptry", 0, bin_comptry, 0, -1, 0, NULL, NULL), BUILTIN("compfmt", 0, bin_compfmt, 2, -1, 0, NULL, NULL), + BUILTIN("compfiles", 0, bin_compfiles, 1, -1, 0, NULL, NULL), }; -- Sven Wischnowsky wischnow@informatik.hu-berlin.de