zsh-workers
 help / color / mirror / code / Atom feed
* [PATCH] Finally a startup optimization (hasher in ecstrcode)
@ 2017-07-05  7:35 Sebastian Gniazdowski
  0 siblings, 0 replies; only message in thread
From: Sebastian Gniazdowski @ 2017-07-05  7:35 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 1261 bytes --]

Hello,
without the patch (callgrind_annotate):


26,300,937  ???:_platform_strcmp [libsystem_platform.dylib]
13,288,306  Src/parse.c:ecstrcode [Src/zsh]
 4,486,910  Src/hashtable.c:hasher [Src/zsh]

after:

 2,661,459  ???:_platform_strcmp [libsystem_platform.dylib]
 2,588,524  Src/parse.c:ecstrcode [Src/zsh]
 5,959,019  Src/hashtable.c:hasher [Src/zsh]


So, for ecstrcode & hasher, the gain is 10.5 million, the loss is 1.5 million. Shell startup time measured with:

% repeat 10 { time /usr/local/bin/zsh-5.3.1-dev-0 -i -c exit } 

drops from 205 ms to 192 ms.

With additional has_token() patch, it's 205 ms to 190 ms. I think this can be assumed to be 15-20ms.

The 9 million gained shows that this is a serious optimization. Three more such optimizations, and gain would be 45-60 ms, a noticable difference.

Data: https://github.com/zdharma/hacking-private/tree/master/startup-dump-opt

Any ideas maybe, of what more can be done? I'm trying to add meaning to my functions-lexicon feature (automatically managed functions, loaded via autoload -w ...zwc). Dumps seem to be quite optimially mmapped, not sure where to look for something else related to Eprog.

--
Sebastian Gniazdowski
psprint /at/ zdharma.org

[-- Attachment #2: silence_ecstrcode.diff --]
[-- Type: application/octet-stream, Size: 1102 bytes --]

diff --git a/Src/parse.c b/Src/parse.c
index 8769baa..00a8f6a 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -396,6 +396,8 @@ ecstrcode(char *s)
 {
     int l, t = has_token(s);
 
+    unsigned val = hasher(s);
+
     if ((l = strlen(s) + 1) && l <= 4) {
 	wordcode c = (t ? 3 : 2);
 	switch (l) {
@@ -410,8 +412,9 @@ ecstrcode(char *s)
 	int cmp;
 
 	for (pp = &ecstrs; (p = *pp); ) {
-	    if (!(cmp = p->nfunc - ecnfunc) && !(cmp = strcmp(p->str, s)))
+	    if (!(cmp = p->nfunc - ecnfunc) && !(cmp = (((signed)p->hashval) - ((signed)val))) && !(cmp = strcmp(p->str, s))) {
 		return p->offs;
+            }
 	    pp = (cmp < 0 ? &(p->left) : &(p->right));
 	}
 	p = *pp = (Eccstr) zhalloc(sizeof(*p));
@@ -420,6 +423,7 @@ ecstrcode(char *s)
 	p->aoffs = ecsoffs;
 	p->str = s;
 	p->nfunc = ecnfunc;
+        p->hashval = val;
 	ecsoffs += l;
 
 	return p->offs;
diff --git a/Src/zsh.h b/Src/zsh.h
index 137b2a5..5973cf4 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -813,6 +813,7 @@ struct eccstr {
     char *str;
     wordcode offs, aoffs;
     int nfunc;
+    int hashval;
 };
 
 #define EC_NODUP  0

[-- Attachment #3: less_has_token.diff --]
[-- Type: application/octet-stream, Size: 648 bytes --]

diff --git a/Src/parse.c b/Src/parse.c
index 8769baa..6f193f3 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -394,9 +394,10 @@ ecdel(int p)
 static wordcode
 ecstrcode(char *s)
 {
-    int l, t = has_token(s);
+    int l, t;
 
     if ((l = strlen(s) + 1) && l <= 4) {
+        t = has_token(s);
 	wordcode c = (t ? 3 : 2);
 	switch (l) {
 	case 4: c |= ((wordcode) STOUC(s[2])) << 19;
@@ -414,6 +415,9 @@ ecstrcode(char *s)
 		return p->offs;
 	    pp = (cmp < 0 ? &(p->left) : &(p->right));
 	}
+
+        t = has_token(s);
+
 	p = *pp = (Eccstr) zhalloc(sizeof(*p));
 	p->left = p->right = 0;
 	p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);

[-- Attachment #4: O2_repeat10.txt --]
[-- Type: text/plain, Size: 3099 bytes --]

repeat 10 { time /usr/local/bin/zsh-5.3.1-dev-0 -i -c exit }

O2, optimized (hasher use):

/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 94% cpu 0,189 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,06s system 95% cpu 0,198 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,06s system 94% cpu 0,193 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,06s system 94% cpu 0,200 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 95% cpu 0,190 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 94% cpu 0,193 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 94% cpu 0,190 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,06s system 94% cpu 0,196 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 95% cpu 0,188 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,12s user 0,05s system 94% cpu 0,189 total

0.189 + 0.198 + 0.193 + 0.200 + 0.190 + 0.193 + 0.190 + 0.196 + 0.188 + 0.189 => 1.926 / 10 = 0.192


O2, no optimizations:

/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 94% cpu 0,200 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,201 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,209 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,203 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,205 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,15s user 0,05s system 95% cpu 0,216 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,207 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,06s system 95% cpu 0,208 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,206 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,14s user 0,05s system 95% cpu 0,199 total

0.200 + 0.201 + 0.209 + 0.203 + 0.205 + 0.216 + 0.207 + 0.208 + 0.206 + 0.199 => 2.054 / 10 = 0.205


O2, additional has_token optimization (more 0.18* results, 5 vs 3)

/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 94% cpu 0,192 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 95% cpu 0,189 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 95% cpu 0,191 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 94% cpu 0,189 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 95% cpu 0,190 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,06s system 93% cpu 0,203 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,12s user 0,05s system 94% cpu 0,187 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,12s user 0,05s system 95% cpu 0,185 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,13s user 0,05s system 95% cpu 0,193 total
/usr/local/bin/zsh-5.3.1-dev-0 -i -c exit  0,12s user 0,05s system 94% cpu 0,184 total

0.192 + 0.189 + 0.191 + 0.189 + 0.190 + 0.203 + 0.187 + 0.185 + 0.193 + 0.184 => 1.903 / 10 = 0.190

[-- Attachment #5: test_zsh_script.txt --]
[-- Type: text/plain, Size: 1105 bytes --]

source "$HOME/.zplugin/bin/zplugin.zsh"
autoload -Uz _zplugin
(( ${+_comps} )) && _comps[zplugin]=_zplugin

autoload -Uz compinit
compinit

zplugin snippet https://github.com/robbyrussell/oh-my-zsh/blob/master/plugins/git/git.plugin.zsh
zplugin snippet https://github.com/robbyrussell/oh-my-zsh/blob/master/lib/git.zsh
zplugin cdclear -q

zstyle ":plugin:zconvey" check_interval "1"
zstyle ":plugin:zconvey" greeting "text"
zstyle ":plugin:zconvey" expire_seconds "20"

#zmodload zsh/zprof
#() {
fpath[1,0]=( ~/.zplugin/lexicon.zwc )
autoload -Uzw ~/.zplugin/lexicon.zwc

zplugin light psprint zsh-navigation-tools
zplugin light psprint zsh-editing-workbench
zplugin light psprint zsh-cmd-architect
zplugin light psprint zsh-select
zplugin light psprint zprompts
zplugin light psprint zzcomplete
zplugin light rimraf k
zplugin light psprint history-search-multi-word
zplugin light zdharma zbrowse
zplugin light oz safe-paste
zplugin light zsh-users zsh-autosuggestions
zplugin light psprint zcommodore
zplugin light zdharma/fast-syntax-highlighting

zplugin cdreplay

#zprof | head -n 14
#}

# vim:ft=zsh

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2017-07-05  7:36 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-05  7:35 [PATCH] Finally a startup optimization (hasher in ecstrcode) Sebastian Gniazdowski

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).