zsh-workers
 help / color / mirror / code / Atom feed
From: Sebastian Gniazdowski <psprint@fastmail.com>
To: zsh-workers@zsh.org
Subject: [PATCH] Optimization of getarrvalue()
Date: Tue, 08 Nov 2016 12:11:39 -0800	[thread overview]
Message-ID: <1478635899.1897979.781551353.05792438@webmail.messagingengine.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1519 bytes --]

Hello
The function contains arrdup() that includes elements beyond end index.
I've replaced it with arrdup_max() that has limit parameter – will
duplicate at most that limit of elements. Following test code:

test_fun() {
    local -a arr
    repeat 12; do
        arr+=( "abcdefghijklmnop" $arr )
    done

    local -a arr2
    repeat 2000; do
        arr2=( ${arr[1,300]} )
    done
}

generates array of 4095 elements, and the running times are 1099 ms
(optimized) vs 2038 ms (unoptimized). The array generation utilizes
previous array optimization.

More, I suspect a memory leak in following code that has been replaced:

    if (v->end <= v->start)
        s[0] = NULL;
    else if (arrlen_ge(s, v->end - v->start))
        s[v->end - v->start] = NULL;

That code adapts array according to end index – however it seems that
strings after the NULL are then unreachable to freearray() ? That said,
I wasn't able to obtain the memory leak with repeated ${arr[1,2]} use
for $#arr > 2.

Interesting that some tests fail (e.g. ./Y03arguments.ztst) if I here
duplicate nular instead of doing:

    } else if (v->end <= v->start) {
        s = arrdup_max(s, 1);
        s[0] = NULL;

like in the original code. The test output is then:

@@ -6,6 +6,6 @@
 line: {tst r}{}
 line: {tst x}{}
 line: {tst x }{}
-MESSAGE:{no more arguments}
+MESSAGE:{}
 line: {tst x y }{}
-MESSAGE:{no more arguments}
+MESSAGE:{}

-- 
  Sebastian Gniazdowski
  psprint@fastmail.com

[-- Attachment #2: arr_opt.diff --]
[-- Type: text/plain, Size: 1937 bytes --]

diff --git a/Src/params.c b/Src/params.c
index 5fab84a..1e0bece 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2290,14 +2290,24 @@ getarrvalue(Value v)
 	v->start += arrlen(s);
     if (v->end < 0)
 	v->end += arrlen(s) + 1;
-    if (arrlen_lt(s, v->start) || v->start < 0)
+
+    /* Null if 1) array too short, 2) index still negative */
+    if (arrlen_lt(s, v->start) || v->start < 0) {
 	s = arrdup(nular);
-    else
-	s = arrdup(s + v->start);
-    if (v->end <= v->start)
-	s[0] = NULL;
-    else if (arrlen_ge(s, v->end - v->start))
-	s[v->end - v->start] = NULL;
+    } else if (v->end <= v->start) {
+        s = arrdup_max(s, 1);
+        s[0] = NULL;
+    } else {
+        /* Here copying not till the end of the source array is handled
+         * -- arrdup_max will copy at most v->end - v->start elements,
+         * starting from v->start element. Original code said:
+	 *  s[v->end - v->start] = NULL
+         * which means that there are exactly the same number of
+         * elements as the value of the above *0-based* index.
+         * */
+	s = arrdup_max(s + v->start, v->end - v->start);
+    }
+
     return s;
 }
 
diff --git a/Src/utils.c b/Src/utils.c
index 733f570..e4351ad 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4231,6 +4231,37 @@ arrdup(char **s)
 
 /**/
 mod_export char **
+arrdup_max(char **s, unsigned max)
+{
+    char **x, **y, **s_bkp, *bkp;
+    int len;
+
+    len = arrlen(s);
+
+    /* Limit has sense only if not equal to len */
+    if (max < len) {
+        s_bkp = s;
+
+        /* Num. of elements == max, sentinel *index* is the same */
+        bkp = s[max];
+        s[max] = NULL;
+    } else {
+        max = len;
+    }
+
+    y = x = (char **) zhalloc(sizeof(char *) * (max + 1));
+
+    while ((*x++ = dupstring(*s++)));
+
+    if (max < len) {
+        s_bkp[max] = bkp;
+    }
+
+    return y;
+}
+
+/**/
+mod_export char **
 zarrdup(char **s)
 {
     char **x, **y;

[-- Attachment #3: testopt5.zsh --]
[-- Type: application/octet-stream, Size: 272 bytes --]

#!Src/zsh-arr5-before
#!Src/zsh-arr5

zmodload zsh/zprof

test_fun() {
    local -a arr
    repeat 12; do
        arr+=( "abcdefghijklmnop" $arr )
    done

    local -a arr2
    repeat 2000; do
        arr2=( ${arr[1,300]} )
    done
}

test_fun
test_fun
test_fun

zprof

             reply	other threads:[~2016-11-08 20:11 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20161108201233epcas1p1e2900e2d67af8b8558ebdb70eb7ad480@epcas1p1.samsung.com>
2016-11-08 20:11 ` Sebastian Gniazdowski [this message]
2016-11-08 21:58   ` Bart Schaefer
2016-11-09  7:11   ` Bart Schaefer
2016-11-09 11:42   ` Peter Stephenson
2016-11-09 16:03     ` Bart Schaefer
2016-11-14 12:32       ` Jun T.
2016-11-14 13:15         ` Jun T.
2016-11-14 13:57         ` Peter Stephenson
2016-11-14 15:35           ` Jun T.
2016-11-14 17:10           ` Bart Schaefer
2016-11-16  7:55             ` Sebastian Gniazdowski
2016-11-15 12:28         ` Peter Stephenson
2016-11-15 19:57         ` Peter Stephenson
2016-11-15 21:11           ` Bart Schaefer
2016-11-16 14:06           ` Jun T.
2016-11-16 16:14             ` Jun T.
2016-11-16 18:50             ` Bart Schaefer
2016-11-21 12:30               ` Jun T.
2016-11-24  0:55                 ` Bart Schaefer
2016-11-24 11:49                   ` Jun T.
2016-11-29  6:11                     ` Array slices that don't exist [was Optimization of getarrvalue()] Bart Schaefer
2016-11-29  9:34                       ` Peter Stephenson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1478635899.1897979.781551353.05792438@webmail.messagingengine.com \
    --to=psprint@fastmail.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).