A patch with hashtable optimization, which doesn't work

zsh-workers
 help / color / mirror / code / Atom feed

* A patch with hashtable optimization, which doesn't work
@ 2017-05-18 10:14 Sebastian Gniazdowski
  2017-05-18 12:16 ` Sebastian Gniazdowski
  0 siblings, 1 reply; 4+ messages in thread
From: Sebastian Gniazdowski @ 2017-05-18 10:14 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 1217 bytes --]

Hello,
it is really simple to keep collision lists sorted. However, I get error about typeset not being found. Debugged that typeset is inserted into builtintab. I am pretty exhausted, maybe someone will have a revelation on this. Debugged that typeset is being searched in:

exec.c:729 if ((cn = (Cmdnam) cmdnamtab->getnode(cmdnamtab, arg0))) {

at final stage, so it apparently misses builtintab search before. Below is the main idea, attached is full patch.

-       if (ht->cmpnodes(hp->nam, hn->nam) == 0) {
+        cres = ht->cmpnodes(hp->nam, hn->nam);
+       if ( cres == 0 ) {
            hq->next = hn;
            goto replacing;
-       }
+       } else if ( cres > 0 ) {
+            /* Pointer `hp` is at 'right' position - it points
+             * to a greater element than `hn`, so we should
+             * insert at one-before, that is after `hq` */
+            hq->next = hn;
+            hn->next = hp;
+            if (++ht->ct >= ht->hsize * 2 && !ht->scan)
+                expandhashtable(ht);
+            return NULL;
+        }

--
Sebastian Gniazdowski
psprint /at/ zdharma.org

[-- Attachment #2: hashopt.diff.txt --]
[-- Type: text/plain, Size: 3148 bytes --]

diff --git a/Src/hashtable.c b/Src/hashtable.c
index ba5faad..d820efd 100644
--- a/Src/hashtable.c
+++ b/Src/hashtable.c
@@ -162,6 +162,7 @@ addhashnode(HashTable ht, char *nam, void *nodeptr)
 HashNode
 addhashnode2(HashTable ht, char *nam, void *nodeptr)
 {
+    int cres;
     unsigned hashval;
     HashNode hn, hp, hq;
 
@@ -181,7 +182,8 @@ addhashnode2(HashTable ht, char *nam, void *nodeptr)
     }
 
     /* else check if the first node contains the same key */
-    if (ht->cmpnodes(hp->nam, hn->nam) == 0) {
+    cres = ht->cmpnodes(hp->nam, hn->nam);
+    if ( cres == 0 ) {
 	ht->nodes[hashval] = hn;
 	replacing:
 	hn->next = hp->next;
@@ -196,21 +198,39 @@ addhashnode2(HashTable ht, char *nam, void *nodeptr)
 		ht->scan->u.u = hn;
 	}
 	return hp;
+    } else if ( cres > 0 ) {
+        /* Insert in front of the list, because first
+         * element, nodes[hashval], is larger, so new
+         * element has to be before it */
+        hn->next = hp;
+        ht->nodes[hashval] = hn;
+	if (++ht->ct >= ht->hsize * 2 && !ht->scan)
+	    expandhashtable(ht);
     }
 
     /* else run through the list and check all the keys */
     hq = hp;
     hp = hp->next;
     for (; hp; hq = hp, hp = hp->next) {
-	if (ht->cmpnodes(hp->nam, hn->nam) == 0) {
+        cres = ht->cmpnodes(hp->nam, hn->nam);
+	if ( cres == 0 ) {
 	    hq->next = hn;
 	    goto replacing;
-	}
+	} else if ( cres > 0 ) {
+            /* Pointer `hp` is at 'right' position - it points
+             * to a greater element than `hn`, so we should
+             * insert at one-before, that is after `hq` */
+            hq->next = hn;
+            hn->next = hp;
+            if (++ht->ct >= ht->hsize * 2 && !ht->scan)
+                expandhashtable(ht);
+            return NULL;
+        }
     }
 
-    /* else just add it at the front of the list */
-    hn->next = ht->nodes[hashval];
-    ht->nodes[hashval] = hn;
+    /* else just add it at the end of the list */
+    hq->next = hn;
+    hn->next = NULL;
     if (++ht->ct >= ht->hsize * 2 && !ht->scan)
         expandhashtable(ht);
     return NULL;
@@ -225,17 +245,20 @@ addhashnode2(HashTable ht, char *nam, void *nodeptr)
 mod_export HashNode
 gethashnode(HashTable ht, const char *nam)
 {
+    int cres;
     unsigned hashval;
     HashNode hp;
 
     hashval = ht->hash(nam) % ht->hsize;
     for (hp = ht->nodes[hashval]; hp; hp = hp->next) {
-	if (ht->cmpnodes(hp->nam, nam) == 0) {
+        cres = ht->cmpnodes(hp->nam, nam);
+	if (cres == 0) {
 	    if (hp->flags & DISABLED)
 		return NULL;
 	    else
 		return hp;
-	}
+	} else if (cres > 0)
+            return NULL;
     }
     return NULL;
 }
@@ -249,13 +272,17 @@ gethashnode(HashTable ht, const char *nam)
 mod_export HashNode
 gethashnode2(HashTable ht, const char *nam)
 {
+    int cres;
     unsigned hashval;
     HashNode hp;
 
     hashval = ht->hash(nam) % ht->hsize;
     for (hp = ht->nodes[hashval]; hp; hp = hp->next) {
-	if (ht->cmpnodes(hp->nam, nam) == 0)
+        cres = ht->cmpnodes(hp->nam, nam);
+	if (cres == 0)
 	    return hp;
+        else if (cres > 0)
+            return NULL;
     }
     return NULL;
 }

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: A patch with hashtable optimization, which doesn't work
  2017-05-18 10:14 A patch with hashtable optimization, which doesn't work Sebastian Gniazdowski
@ 2017-05-18 12:16 ` Sebastian Gniazdowski
  2017-05-20  5:06   ` Sebastian Gniazdowski
  0 siblings, 1 reply; 4+ messages in thread
From: Sebastian Gniazdowski @ 2017-05-18 12:16 UTC (permalink / raw)
  To: zsh-workers

On 18 maja 2017 at 12:15:29, Sebastian Gniazdowski (psprint@zdharma.org) wrote:
> Hello,
> it is really simple to keep collision lists sorted. However, I get error about typeset  
> not being found. Debugged that typeset is inserted into builtintab. I am pretty exhausted,  
> maybe someone will have a revelation on this. Debugged that typeset is being searched  
> in:

Found it, missing return NULL here below, writing so that no one wastes time, will now test performance.

+    } else if ( cres > 0 ) {
+        /* Insert in front of the list, because first
+         * element, nodes[hashval], is larger, so new
+         * element has to be before it */
+        hn->next = hp;
+        ht->nodes[hashval] = hn;
+       if (++ht->ct >= ht->hsize * 2 && !ht->scan)
+           expandhashtable(ht);
+        return NULL;

--
Sebastian Gniazdowski
psprint /at/ zdharma.org


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: A patch with hashtable optimization, which doesn't work
  2017-05-18 12:16 ` Sebastian Gniazdowski
@ 2017-05-20  5:06   ` Sebastian Gniazdowski
  2017-05-20 17:08     ` Improving zcompdump (Re: A patch with hashtable optimization, which doesn't work) Bart Schaefer
  0 siblings, 1 reply; 4+ messages in thread
From: Sebastian Gniazdowski @ 2017-05-20  5:06 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 2337 bytes --]

There are no performance gains from keeping the collision list sorted. Tried many tests and nothing.

Tried to optimize mkautofn, to speed up sourcing zcompdump. The idea is simple – make single allocation, not two. However also no performance gains visible:

@@ -3483,15 +3483,15 @@ mkautofn(Shfunc shf)
 {
     Eprog p;

-    p = (Eprog) zalloc(sizeof(*p));
+    p = (Eprog) zalloc(sizeof(*p) + 5 * sizeof(wordcode));
     p->len = 5 * sizeof(wordcode);
-    p->prog = (Wordcode) zalloc(p->len);
+    p->prog = (Wordcode) (((char*)p)+sizeof(*p));
     p->strs = NULL;
     p->shf = shf;
     p->npats = 0;
     p->nref = 1; /* allocated from permanent storage */
     p->pats = (Patprog *) p->prog;
-    p->flags = EF_REAL;
+    p->flags = EF_REAL_SINGLE; /* no allocation for p->prog */

Attached is a full patch. Maybe it will inspire someone. I think that if sourcing time of zcompdump could drop from 55 ms to 35 ms it would be a great success. Tried callgrind on zsh -f sourcing the file, and it said:

22,888,884  _platform_strcmp [/usr/lib/system/libsystem_platform.dylib]
18,067,499  ImageLoaderMachOCompressed::trieWalk(unsigned char const*, unsigned char const*, char const*) [/usr/lib/dyld]
12,041,235  ecstrcode [/usr/local/bin/zsh]
 4,877,588  _pthread_mutex_unlock_slow [/usr/lib/system/libsystem_pthread.dylib]
 3,330,755  _pthread_mutex_lock_slow [/usr/lib/system/libsystem_pthread.dylib]
 2,937,243  ingetc [/usr/local/bin/zsh]
 1,854,600  stringsubst [/usr/local/bin/zsh]

So there's no allocation there. Tried to find something in ecstrcode but no luck.

Best regards,
Sebastian Gniazdowski

On 18 maja 2017 at 14:17:27, Sebastian Gniazdowski (psprint@zdharma.org) wrote:
> On 18 maja 2017 at 12:15:29, Sebastian Gniazdowski (psprint@zdharma.org) wrote:
> > Hello,
> > it is really simple to keep collision lists sorted. However, I get error about typeset  
> > not being found. Debugged that typeset is inserted into builtintab. I am pretty exhausted,  
> > maybe someone will have a revelation on this. Debugged that typeset is being searched  
> > in:
>  
> Found it, missing return NULL here below, writing so that no one wastes time, will now  
> test performance.

--
Sebastian Gniazdowski
psprint /at/ zdharma.org

[-- Attachment #2: prog_opt.diff.txt --]
[-- Type: text/plain, Size: 4194 bytes --]

diff --git a/Src/builtin.c b/Src/builtin.c
index 063644e..680b19a 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -3483,15 +3483,15 @@ mkautofn(Shfunc shf)
 {
     Eprog p;
 
-    p = (Eprog) zalloc(sizeof(*p));
+    p = (Eprog) zalloc(sizeof(*p) + 5 * sizeof(wordcode));
     p->len = 5 * sizeof(wordcode);
-    p->prog = (Wordcode) zalloc(p->len);
+    p->prog = (Wordcode) (((char*)p)+sizeof(*p));
     p->strs = NULL;
     p->shf = shf;
     p->npats = 0;
     p->nref = 1; /* allocated from permanent storage */
     p->pats = (Patprog *) p->prog;
-    p->flags = EF_REAL;
+    p->flags = EF_REAL_SINGLE; /* no allocation for p->prog */
     p->dump = NULL;
 
     p->prog[0] = WCB_LIST((Z_SYNC | Z_END), 0);
diff --git a/Src/exec.c b/Src/exec.c
index debb0ae..088080b 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -4973,7 +4973,13 @@ execfuncdef(Estate state, Eprog redir_prog)
 	    prog = (Eprog) zhalloc(sizeof(*prog));
 	    prog->nref = -1; /* on the heap */
 	} else {
-	    prog = (Eprog) zalloc(sizeof(*prog));
+            if (state->prog->dump || !names) {
+                prog = (Eprog) zalloc(sizeof(*prog));
+            } else {
+                /* The EF_REAL path below */
+                prog = (Eprog) zalloc(sizeof(*prog) + len);
+                prog->pats = pp = (Patprog *) (((char*)prog)+sizeof(*prog));
+            }
 	    prog->nref = 1; /* allocated from permanent storage */
 	}
 	prog->npats = npats;
@@ -4992,8 +4998,11 @@ execfuncdef(Estate state, Eprog redir_prog)
 	    prog->prog = state->pc;
 	    prog->strs = state->strs + sbeg;
 	} else {
-	    prog->flags = EF_REAL;
-	    prog->pats = pp = (Patprog *) zalloc(len);
+            /* The EF_REAL path */
+	    prog->flags = EF_REAL_SINGLE;
+	    // prog->pats = pp = (Patprog *) zalloc(len);
+            // -->
+            // prog->pats = pp = (((char*)prog)+sizeof(*prog));
 	    prog->prog = (Wordcode) (prog->pats + npats);
 	    prog->strs = (char *) (prog->prog + nprg);
 	    prog->dump = NULL;
diff --git a/Src/parse.c b/Src/parse.c
index 8769baa..1e1c528 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -504,18 +504,28 @@ bld_eprog(int heap)
 
     ecadd(WCB_END());
 
-    ret = heap ? (Eprog) zhalloc(sizeof(*ret)) : (Eprog) zalloc(sizeof(*ret));
-    ret->len = ((ecnpats * sizeof(Patprog)) +
-		(ecused * sizeof(wordcode)) +
-		ecsoffs);
-    ret->npats = ecnpats;
-    ret->nref = heap ? -1 : 1;
-    ret->pats = heap ? (Patprog *) zhalloc(ret->len) :
-	(Patprog *) zshcalloc(ret->len);
+    if ( heap ) {
+        ret = (Eprog) zhalloc(sizeof(*ret));
+        ret->len = ((ecnpats * sizeof(Patprog)) +
+                (ecused * sizeof(wordcode)) +
+                ecsoffs);
+        ret->npats = ecnpats;
+        ret->nref = -1;
+        ret->pats = (Patprog *) zhalloc(ret->len);
+    } else {
+        int len = ((ecnpats * sizeof(Patprog)) +
+                (ecused * sizeof(wordcode)) +
+                ecsoffs);
+        ret = (Eprog) zalloc(sizeof(*ret) + len);
+        ret->len = len;
+        ret->npats = ecnpats;
+        ret->nref = 1;
+        ret->pats = (Patprog *) (((char*)ret)+sizeof(*ret));
+    }
     ret->prog = (Wordcode) (ret->pats + ecnpats);
     ret->strs = (char *) (ret->prog + ecused);
     ret->shf = NULL;
-    ret->flags = heap ? EF_HEAP : EF_REAL;
+    ret->flags = heap ? EF_HEAP : EF_REAL_SINGLE;
     ret->dump = NULL;
     for (l = 0; l < ecnpats; l++)
 	ret->pats[l] = dummy_patprog1;
@@ -2709,9 +2719,14 @@ freeeprog(Eprog p)
 	    if (p->dump) {
 		decrdumpcount(p->dump);
 		zfree(p->pats, p->npats * sizeof(Patprog));
-	    } else
-		zfree(p->pats, p->len);
-	    zfree(p, sizeof(*p));
+	    } else {
+                if ( (p->flags & EF_REAL_SINGLE) == 0 )
+                    zfree(p->pats, p->len);
+            }
+            if ( (p->flags & EF_REAL_SINGLE) )
+                zfree(p, sizeof(*p) + p->len);
+            else
+                zfree(p, sizeof(*p));
 	}
     }
 }
diff --git a/Src/zsh.h b/Src/zsh.h
index 22f73f8..ab70cb7 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -797,6 +797,7 @@ struct eprog {
 #define EF_HEAP 2
 #define EF_MAP  4
 #define EF_RUN  8
+#define EF_REAL_SINGLE 16
 
 typedef struct estate *Estate;
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Improving zcompdump (Re: A patch with hashtable optimization, which doesn't work)
  2017-05-20  5:06   ` Sebastian Gniazdowski
@ 2017-05-20 17:08     ` Bart Schaefer
  0 siblings, 0 replies; 4+ messages in thread
From: Bart Schaefer @ 2017-05-20 17:08 UTC (permalink / raw)
  To: zsh-workers

On May 20,  7:06am, Sebastian Gniazdowski wrote:
}
} There are no performance gains from keeping the collision list sorted.

Yes, I was going to reply to your earlier message to remark that I would
only expect sorting the collision list to *slow down* insertion ops, not
speed up lookups.

} Tried to optimize mkautofn, to speed up sourcing zcompdump.

How much does zcompdump actually help?  Have you compared startup with
and without it?

There's a bunch of stuff in .zcompdump.  Have you investigated whether
certain parts of it are slower than others?

One lesson learned with Completion/Base/Utility/_store_cache is that
parsing array assignments is expensive.  _store_cache now writes out
the array contents as a here-document and splits with ${(Q)${(z)...}}
to avoid parser overhead.  The _comps array is large; maybe there is
a big win to be had by changing the format of the file.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-05-20 17:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-18 10:14 A patch with hashtable optimization, which doesn't work Sebastian Gniazdowski
2017-05-18 12:16 ` Sebastian Gniazdowski
2017-05-20  5:06   ` Sebastian Gniazdowski
2017-05-20 17:08     ` Improving zcompdump (Re: A patch with hashtable optimization, which doesn't work) Bart Schaefer

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).