From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 5161 invoked by alias); 12 Jan 2017 20:42:19 -0000 Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm Precedence: bulk X-No-Archive: yes List-Id: Zsh Workers List List-Post: List-Help: X-Seq: 40342 Received: (qmail 1862 invoked from network); 12 Jan 2017 20:42:19 -0000 X-Qmail-Scanner-Diagnostics: from know-smtprelay-omc-10.server.virginmedia.net by f.primenet.com.au (envelope-from , uid 7791) with qmail-scanner-2.11 (clamdscan: 0.99.2/21882. spamassassin: 3.4.1. Clear:RC:0(80.0.253.74):SA:0(-1.2/5.0):. Processed in 1.55843 secs); 12 Jan 2017 20:42:19 -0000 X-Spam-Checker-Version: SpamAssassin 3.4.1 (2015-04-28) on f.primenet.com.au X-Spam-Level: X-Spam-Status: No, score=-1.2 required=5.0 tests=RCVD_IN_DNSWL_NONE, RCVD_IN_MSPIKE_H2,SPF_PASS autolearn=unavailable autolearn_force=no version=3.4.1 X-Envelope-From: p.w.stephenson@ntlworld.com X-Qmail-Scanner-Mime-Attachments: | X-Qmail-Scanner-Zip-Files: | Received-SPF: pass (ns1.primenet.com.au: SPF record at _smtprelay.virginmedia.com designates 80.0.253.74 as permitted sender) X-Originating-IP: [86.21.219.59] X-Spam: 0 X-Authority: v=2.1 cv=WM+CJSYR c=1 sm=1 tr=0 a=utowdAHh8RITBM/6U1BPxA==:117 a=utowdAHh8RITBM/6U1BPxA==:17 a=L9H7d07YOLsA:10 a=9cW_t1CCXrUA:10 a=s5jvgZ67dGcA:10 a=kj9zAlcOel0A:10 a=MWUjAzoEKyAA:10 a=NLZqzBF-AAAA:8 a=hD80L64hAAAA:8 a=gNt1rXdR9r1-N8cZFr8A:9 a=ntOE1itJnWcUExKu:21 a=q0c7M6oTHsILNKoV:21 a=CjuIK1q_8ugA:10 a=wW_WBVUImv98JQXhvVPZ:22 a=ImLYxMjnSFIJI1KS7skI:22 Date: Thu, 12 Jan 2017 20:42:10 +0000 From: Peter Stephenson To: Zsh hackers list Subject: Re: PATH: autoload with explicit path Message-ID: <20170112204210.14ab1f11@ntlworld.com> In-Reply-To: <20170111205122.2c47e89f@ntlworld.com> References: <20161211221844.5e51affe@ntlworld.com> <161212080550.ZM935@torch.brasslantern.com> <20161212163124.6654f077@pwslap01u.europe.root.pri> <20170110193102.7725620a@ntlworld.com> <20170111114232.04eedd08@pwslap01u.europe.root.pri> <20170111205122.2c47e89f@ntlworld.com> X-Mailer: Claws Mail 3.11.1 (GTK+ 2.24.28; x86_64-redhat-linux-gnu) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit On Wed, 11 Jan 2017 20:51:22 +0000 Peter Stephenson wrote: > On Wed, 11 Jan 2017 11:42:32 +0000 > Peter Stephenson wrote: > > The main disadvantage is each function record now contains the name of > > the directory, so this takes extra memory --- it looks like a couple of > > dozen k in the case of my function directory which is quite full. This > > could be optimised with indirection but I'm not sure it's worth the work. > > This seemed like too good an idea not to follow up. Now "autoload > /blah/blah/*" is as efficient as the old way of doing it, too. Update based on the previous fix, also fix a memory leak in the cache and add a test to check what happens when bits of the cache are deleted. I think this is sane now. pws diff --git a/Src/builtin.c b/Src/builtin.c index 716ddd4..a683032 100644 --- a/Src/builtin.c +++ b/Src/builtin.c @@ -2958,13 +2958,13 @@ check_autoload(Shfunc shf, char *name, Options ops, int func) } } if (getfpfunc(shf->node.nam, NULL, &dir_path, NULL, 1)) { - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); if (*dir_path != '/') { dir_path = zhtricat(metafy(zgetcwd(), -1, META_HEAPDUP), "/", dir_path); dir_path = xsymlink(dir_path, 1); } - shf->filename = ztrdup(dir_path); + dircache_set(&shf->filename, dir_path); shf->node.flags |= PM_LOADDIR; return 0; } @@ -3029,8 +3029,8 @@ add_autoload_function(Shfunc shf, char *funcname) *nam++ = '\0'; dir = funcname; } - zsfree(shf->filename); - shf->filename = ztrdup(dir); + dircache_set(&shf->filename, NULL); + dircache_set(&shf->filename, dir); shf->node.flags |= PM_LOADDIR; shfunctab->addnode(shfunctab, ztrdup(nam), shf); } else { @@ -3280,8 +3280,8 @@ bin_functions(char *name, char **argv, Options ops, int func) shfunctab->addnode(shfunctab, ztrdup(funcname), shf); } if (*argv) { - zsfree(shf->filename); - shf->filename = ztrdup(*argv); + dircache_set(&shf->filename, NULL); + dircache_set(&shf->filename, *argv); on |= PM_LOADDIR; } shf->node.flags = on; diff --git a/Src/exec.c b/Src/exec.c index 68c455b..7a5d2bd 100644 --- a/Src/exec.c +++ b/Src/exec.c @@ -4902,6 +4902,7 @@ execfuncdef(Estate state, Eprog redir_prog) shf = (Shfunc) zalloc(sizeof(*shf)); shf->funcdef = prog; shf->node.flags = 0; + /* No dircache here, not a directory */ shf->filename = ztrdup(scriptfilename); shf->lineno = lineno; /* @@ -4934,7 +4935,7 @@ execfuncdef(Estate state, Eprog redir_prog) freeeprog(shf->funcdef); if (shf->redir) /* shouldn't be */ freeeprog(shf->redir); - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); zfree(shf, sizeof(*shf)); state->pc = end; return 1; @@ -4965,7 +4966,7 @@ execfuncdef(Estate state, Eprog redir_prog) freeeprog(shf->funcdef); if (shf->redir) /* shouldn't be */ freeeprog(shf->redir); - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); zfree(shf, sizeof(*shf)); break; } else { @@ -4974,7 +4975,7 @@ execfuncdef(Estate state, Eprog redir_prog) (signum = getsignum(s + 4)) != -1) { if (settrap(signum, NULL, ZSIG_FUNC)) { freeeprog(shf->funcdef); - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); zfree(shf, sizeof(*shf)); state->pc = end; return 1; @@ -5205,7 +5206,8 @@ loadautofn(Shfunc shf, int fksh, int autol, int current_fpath) else shf->funcdef = dupeprog(prog, 0); shf->node.flags &= ~(PM_UNDEFINED|PM_LOADDIR); - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); + /* Full filename, don't use dircache */ shf->filename = fname; } else { VARARR(char, n, strlen(shf->node.nam) + 1); @@ -5229,7 +5231,8 @@ loadautofn(Shfunc shf, int fksh, int autol, int current_fpath) else shf->funcdef = dupeprog(stripkshdef(prog, shf->node.nam), 0); shf->node.flags &= ~(PM_UNDEFINED|PM_LOADDIR); - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); + /* Full filename, don't use dircache */ shf->filename = fname; } popheap(); @@ -5620,6 +5623,8 @@ runshfunc(Eprog prog, FuncWrap wrap, char *name) * Search fpath for an undefined function. Finds the file, and returns the * list of its contents. * + * If test is 0, load the function; *fname is set to zalloc'ed location. + * * If test_only is 1, don't load function, just test for it: * - Non-null return means function was found * - *fname points to path at which found (not duplicated) diff --git a/Src/hashtable.c b/Src/hashtable.c index 2a8b585..22ebe34 100644 --- a/Src/hashtable.c +++ b/Src/hashtable.c @@ -889,7 +889,7 @@ freeshfuncnode(HashNode hn) freeeprog(shf->funcdef); if (shf->redir) freeeprog(shf->redir); - zsfree(shf->filename); + dircache_set(&shf->filename, NULL); if (shf->sticky) { if (shf->sticky->n_on_opts) zfree(shf->sticky->on_opts, @@ -1442,3 +1442,140 @@ freehistdata(Histent he, int unlink) } } } + + +/*********************************************************************** + * Directory name cache mechanism + * + * The idea of this is that there are various shell structures, + * notably functions, that record the directories with which they + * are associated. Rather than store the full string each time, + * we store a pointer to the same location and count the references. + * This is optimised so that retrieval is quick at the expense of + * searching the list when setting up the structure, which is a much + * rarer operation. + * + * There is nothing special about the fact that the strings are + * directories, except for the assumptions for efficiency that many + * structures will point to the same one, and that there are not too + * many different directories associated with the shell. + **********************************************************************/ + +struct dircache_entry +{ + /* Name of directory in cache */ + char *name; + /* Number of references to it */ + int refs; +}; + +/* + * dircache is the cache, of length dircache_size. + * dircache_lastentry is the last entry used, an optimisation + * for multiple references to the same directory, e.g + * "autoload /blah/blah/\*". + */ +struct dircache_entry *dircache, *dircache_lastentry; +int dircache_size; + +/* + * Set *name to point to a cached version of value. + * value is copied so may come from any source. + * + * If value is NULL, look for the existing value of *name (safe if this + * too is NULL) and remove a reference to it from the cache. If it's + * not found in the cache, it's assumed to be an allocated string and + * freed --- this currently occurs for a shell function that's been + * loaded as the filename is now a full path, not just a directory, + * though we may one day optimise this to a cached directory plus a + * name, too. Note --- the function does *not* otherwise check + * if *name points to something already cached, so this is + * necessary any time *name may already be in the cache. + */ + +/**/ +mod_export void +dircache_set(char **name, char *value) +{ + struct dircache_entry *dcptr, *dcnew; + + if (!value) { + if (!*name) + return; + if (!dircache_size) { + zsfree(*name); + *name = NULL; + return; + } + + for (dcptr = dircache; dcptr < dircache + dircache_size; dcptr++) + { + /* Must be a pointer much, not a string match */ + if (*name == dcptr->name) + { + --dcptr->refs; + if (!dcptr->refs) { + ptrdiff_t ind = dcptr - dircache; + zsfree(dcptr->name); + --dircache_size; + + if (!dircache_size) { + zfree(dircache, sizeof(*dircache)); + dircache = NULL; + dircache_lastentry = NULL; + return; + } + dcnew = (struct dircache_entry *) + zalloc(dircache_size * sizeof(*dcnew)); + if (ind) + memcpy(dcnew, dircache, ind * sizeof(*dcnew)); + if (ind < dircache_size) + memcpy(dcnew + ind, dcptr + 1, + (dircache_size - ind) * sizeof(*dcnew)); + zfree(dircache, (dircache_size+1)*sizeof(*dcnew)); + dircache = dcnew; + dircache_lastentry = NULL; + } + *name = NULL; + return; + } + } + zsfree(*name); + *name = NULL; + } else { + /* + * We'll maintain the cache at exactly the right size rather + * than overallocating. The rationale here is that typically + * we'll get a lot of functions in a small number of directories + * so the complexity overhead of maintaining a separate count + * isn't really matched by the efficiency gain. + */ + if (dircache_lastentry && + !strcmp(value, dircache_lastentry->name)) { + *name = dircache_lastentry->name; + ++dircache_lastentry->refs; + return; + } else if (!dircache_size) { + dircache_size = 1; + dcptr = dircache = + (struct dircache_entry *)zalloc(sizeof(*dircache)); + } else { + for (dcptr = dircache; dcptr < dircache + dircache_size; dcptr++) + { + if (!strcmp(value, dcptr->name)) { + *name = dcptr->name; + ++dcptr->refs; + return; + } + } + ++dircache_size; + dircache = (struct dircache_entry *) + zrealloc(dircache, sizeof(*dircache) * dircache_size); + dcptr = dircache + dircache_size - 1; + } + dcptr->name = ztrdup(value); + *name = dcptr->name; + dcptr->refs = 1; + dircache_lastentry = dcptr; + } +} diff --git a/Src/signals.c b/Src/signals.c index 9e05add..2de5743 100644 --- a/Src/signals.c +++ b/Src/signals.c @@ -811,6 +811,7 @@ dosavetrap(int sig, int level) newshf->node.nam = ztrdup(shf->node.nam); newshf->node.flags = shf->node.flags; newshf->funcdef = dupeprog(shf->funcdef, 0); + /* Assume this is a real filename, don't use dircache */ newshf->filename = ztrdup(shf->filename); if (shf->sticky) { newshf->sticky = sticky_emulation_dup(shf->sticky, 0); diff --git a/Test/C04funcdef.ztst b/Test/C04funcdef.ztst index 7100280..370394b 100644 --- a/Test/C04funcdef.ztst +++ b/Test/C04funcdef.ztst @@ -429,6 +429,45 @@ 0: autoload -X interaction with absolute filename used for source location >Function was loaded correctly. + ( + fpath=() + mkdir extra2 + for f in fun2a fun2b; do + print "print $f" >extra2/$f + done + repeat 3; do + autoload $PWD/extra2/fun2{a,b} $PWD/extra/spec + fun2a + fun2b + spec + unfunction fun2a fun2b spec + autoload $PWD/extra2/fun2{a,b} $PWD/extra/spec + spec + fun2b + fun2a + unfunction fun2a fun2b spec + done + ) +0: Exercise the directory name cache for autoloads +>fun2a +>fun2b +>I have been loaded by explicit path. +>I have been loaded by explicit path. +>fun2b +>fun2a +>fun2a +>fun2b +>I have been loaded by explicit path. +>I have been loaded by explicit path. +>fun2b +>fun2a +>fun2a +>fun2b +>I have been loaded by explicit path. +>I have been loaded by explicit path. +>fun2b +>fun2a + %clean rm -f file.in file.out