zsh-workers
 help / color / mirror / code / Atom feed
From: Peter Stephenson <p.stephenson@samsung.com>
To: zsh-workers@zsh.org
Subject: Re: Callgrind run
Date: Thu, 10 Nov 2016 12:31:56 +0000	[thread overview]
Message-ID: <20161110123156.1d1699ec@pwslap01u.europe.root.pri> (raw)
In-Reply-To: <1478774232.2371010.783342705.69C81F52@webmail.messagingengine.com>

On Thu, 10 Nov 2016 02:37:12 -0800
Sebastian Gniazdowski <psprint@fastmail.com> wrote:
> Seems that Zsh execution could be greatly optimized if functions:
> remnulargs, untokenize, haswilds could be optimized. Not sure if the
> results are reasonable, as haswilds just iterates over a string and does
> quite basic switch. The other two functions have nested loops, so they
> look more likely as being time consuming. Maybe the nested loop can be
> changed to something else?

The nested loops aren't "real" nested loops; the inner loop runs to
completion and then breaks if the outer loop detects a condition that
needs handling.

To do a good job optimising here, we really need state information
outside the functions --- in an experiment with my start up files, only
16% of calls to untokenize() actually had any effect.  But recording the
state generally is a very big change.

Some possible optimisations are along the following lines, although a
bit of care it's needed as it's not necessarily the case on all
architectures that the bit test used by itok() is necessarily faster
than the range test the following replaces it with.  It did seem faster
on this fairly standard Intel CPU.

I probably won't be committing this.

diff --git a/Src/exec.c b/Src/exec.c
index a01a633..a6b01a6 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -1953,26 +1953,24 @@ makecline(LinkList list)
 mod_export void
 untokenize(char *s)
 {
-    if (*s) {
+    if (*s) {			/* "" may be a const string. Ick. */
 	int c;
 
-	while ((c = *s++))
-	    if (itok(c)) {
+	while ((c = *s++)) {
+	    if (c >= FIRST_TOK && c <= LAST_TOK) {
 		char *p = s - 1;
 
 		if (c != Nularg)
-		    *p++ = ztokens[c - Pound];
+		    *p++ = ztoken_to_char[STOUC(c)];
 
 		while ((c = *s++)) {
-		    if (itok(c)) {
-			if (c != Nularg)
-			    *p++ = ztokens[c - Pound];
-		    } else
-			*p++ = c;
+		    if (c != Nularg)
+			*p++ = ztoken_to_char[STOUC(c)];
 		}
 		*p = '\0';
 		break;
 	    }
+	}
     }
 }
 
diff --git a/Src/glob.c b/Src/glob.c
index 50f6dce..4d3fc51 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -3570,7 +3570,7 @@ remnulargs(char *s)
     if (*s) {
 	char *o = s, c;
 
-	while ((c = *s++))
+	while ((c = *s++)) {
 	    if (c == Bnullkeep) {
 		/*
 		 * An active backslash that needs to be turned back into
@@ -3579,7 +3579,7 @@ remnulargs(char *s)
 		 * pattern matching.
 		 */
 		continue;
-	    } else if (inull(c)) {
+	    } else if (c >= FIRST_NULL && c <= LAST_NULL) {
 		char *t = s - 1;
 
 		while ((c = *s++)) {
@@ -3595,6 +3595,7 @@ remnulargs(char *s)
 		}
 		break;
 	    }
+	}
     }
 }
 
diff --git a/Src/lex.c b/Src/lex.c
index 8896128..bfd6b11 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -37,6 +37,18 @@
 /**/
 mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
 
+/*
+ * Map a possibly tokenized unsigned char to a normal unsigned
+ * char, for use in untokenize().
+ *
+ * Tokens that need untokenizing (everything in ztokens except Nularg)
+ * map to a different character, everything else maps to itself.
+ * In particular, metafied characters are passed through unchanged
+ * (effectively escaping tokens) and do not need special handling.
+ */
+/**/
+mod_export char ztoken_to_char[256];
+
 /* parts of the current token */
 
 /**/
diff --git a/Src/utils.c b/Src/utils.c
index 3d535b8..9fa8a97 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4012,6 +4012,18 @@ inittyptab(void)
     for (s = PATCHARS; *s; s++)
 	typtab[STOUC(*s)] |= IPATTERN;
 
+    for (t0 = 0; t0 < 256; t0++)
+    {
+	if (itok(t0) && (char)t0 != Nularg)
+	{
+	    ztoken_to_char[t0] = ztokens[t0 - STOUC(Pound)];
+	}
+	else
+	{
+	    ztoken_to_char[t0] = (char)t0;
+	}
+    }
+
     unqueue_signals();
 }
 
diff --git a/Src/zsh.h b/Src/zsh.h
index a5d4455..5065a54 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -170,6 +170,7 @@ struct mathfunc {
  * These should match the characters in ztokens, defined in lex.c
  */
 #define Pound		((char) 0x84)
+#define FIRST_TOK	Pound
 #define String		((char) 0x85)
 #define Hat		((char) 0x86)
 #define Star		((char) 0x87)
@@ -204,6 +205,7 @@ struct mathfunc {
  * and backslashes.
  */
 #define Snull		((char) 0x9d)
+#define FIRST_NULL	Snull
 #define Dnull		((char) 0x9e)
 #define Bnull		((char) 0x9f)
 /*
@@ -217,6 +219,8 @@ struct mathfunc {
  * is used to initialise the IMETA type in inittyptab().
  */
 #define Nularg		((char) 0xa1)
+#define LAST_TOK	Nularg
+#define LAST_NULL	Nularg
 
 /*
  * Take care to update the use of IMETA appropriately when adding


  reply	other threads:[~2016-11-10 12:32 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20161110103845epcas3p3e7cabeffae723219daafa8d3e6b32f12@epcas3p3.samsung.com>
2016-11-10 10:37 ` Sebastian Gniazdowski
2016-11-10 12:31   ` Peter Stephenson [this message]
2016-11-10 14:07     ` Sebastian Gniazdowski
2016-11-10 13:47   ` multibyte optimisations Peter Stephenson
2016-11-10 14:57     ` Sebastian Gniazdowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161110123156.1d1699ec@pwslap01u.europe.root.pri \
    --to=p.stephenson@samsung.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).