zsh-workers
 help / color / mirror / code / Atom feed
* Significant assignstrvalue optimization
@ 2016-11-18 15:51 Sebastian Gniazdowski
  2016-11-20 19:36 ` Peter Stephenson
  0 siblings, 1 reply; 2+ messages in thread
From: Sebastian Gniazdowski @ 2016-11-18 15:51 UTC (permalink / raw)
  To: zsh-workers

[-- Attachment #1: Type: text/plain, Size: 2174 bytes --]

Hello,
first thing – the dupstring_glen() that was in the code isn't needed –
the value allocated was only read, not written to in any place in the
code:

-           z = dupstring_glen(v->pm->gsu.s->getfn(v->pm), (unsigned*)
&zlen);
+            z = v->pm->gsu.s->getfn(v->pm);
+            zlen = strlen(z);

This gives significant speed up – although only for very large buffers –
attached testopt4bb.zsh:

    i=$(( 20000 ))
    while (( i -- )); do
        a+="aaaaaaaaaaaaaaaaaaaaaaaaa"
    done

runs for 953 ms with optimizations, 1576 ms without them. 

Second: the same that I did with setarrvalue – if we assign a[10]="a",
and 10-th element already exists, i.e. size of string doesn't change,
then instead of:

                x = (char *) zalloc(newsize + 1);
                strncpy(x, z, v->start);
                strcpy(x + v->start, val);
                strcat(x + v->start, z + v->end);
                v->pm->gsu.s->setfn(v->pm, x);

It is done:

                /* Size doesn't change, can limit actions to only
                 * overwriting bytes in already allocated string */
                strncpy(z + v->start, val, vlen);

Condition excludes special parameters:

            /* Does new size differ? */
            if ( newsize != zlen || v->pm->node.flags & PM_SPECIAL ) {

Test code:

a="exampletext"
repeat 12 a+="$a"

strtest() {
    repeat 60000; do
        a[10]="a"
    done
}

runs 366 ms vs 876 ms. The string has length 45056 elements, and there
are as much as 60k repetitions, so previous code was actually already
fast despite the extra zalloc, strncpy, strcat – which is little weird.
That said, the optimization can save one's day in certain situations.

Also added extensive tests of string assignments.

PS. The code again removes "+ 1" from new size computation. The comment
explains why:

            /* Characters preceding start index +
               characters of what is assigned +
               characters following end index */
            newsize = v->start + vlen + (zlen - v->end);

-- 
  Sebastian Gniazdowski
  psprint@fastmail.com

[-- Attachment #2: string_big_opt.diff --]
[-- Type: text/plain, Size: 4055 bytes --]

diff --git a/Src/params.c b/Src/params.c
index ef72cba..90f6fdf 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2436,9 +2436,10 @@ assignstrvalue(Value v, char *val, int flags)
 		v->pm->width = strlen(val);
 	} else {
 	    char *z, *x;
-	    int zlen;
+            int zlen, vlen, newsize;
 
-	    z = dupstring_glen(v->pm->gsu.s->getfn(v->pm), (unsigned*) &zlen);
+            z = v->pm->gsu.s->getfn(v->pm);
+            zlen = strlen(z);
 
 	    if ((v->flags & VALFLAG_INV) && unset(KSHARRAYS))
 		v->start--, v->end--;
@@ -2469,12 +2470,26 @@ assignstrvalue(Value v, char *val, int flags)
 	    }
 	    else if (v->end > zlen)
 		v->end = zlen;
-	    x = (char *) zalloc(v->start + strlen(val) + zlen - v->end + 1);
-	    strncpy(x, z, v->start);
-	    strcpy(x + v->start, val);
-	    strcat(x + v->start, z + v->end);
-	    v->pm->gsu.s->setfn(v->pm, x);
-	    zsfree(val);
+
+            vlen = strlen(val);
+            /* Characters preceding start index +
+               characters of what is assigned +
+               characters following end index */
+            newsize = v->start + vlen + (zlen - v->end);
+
+            /* Does new size differ? */
+            if ( newsize != zlen || v->pm->node.flags & PM_SPECIAL ) {
+                x = (char *) zalloc(newsize + 1);
+                strncpy(x, z, v->start);
+                strcpy(x + v->start, val);
+                strcat(x + v->start, z + v->end);
+                v->pm->gsu.s->setfn(v->pm, x);
+            } else {
+                /* Size doesn't change, can limit actions to only
+                 * overwriting bytes in already allocated string */
+                strncpy(z + v->start, val, vlen);
+            }
+            zsfree(val);
 	}
 	break;
     case PM_INTEGER:
diff --git a/Test/A06assign.ztst b/Test/A06assign.ztst
index da4e3b0..bf39aee 100644
--- a/Test/A06assign.ztst
+++ b/Test/A06assign.ztst
@@ -489,3 +489,143 @@
  print $array
 0:slice beyond length of array
 >FIRST
+
+# tests of string assignments
+
+ a="abc"
+ a[1]=x
+ print $a
+0:overwrite first character in string
+>xbc
+
+ a="abc"
+ a[2]="x"
+ print $a
+0:overwrite middle character in string
+>axc
+
+ a="abc"
+ a[3]="x"
+ print $a
+0:overwrite last character in string
+>abx
+
+ a="abc"
+ a[-1]="x"
+ print $a
+0:overwrite -1 character in string
+>abx
+
+ a="abc"
+ a[-2]="x"
+ print $a
+0:overwrite -2 character (middle) in string
+>axc
+
+ a="ab"
+ a[-2]="x"
+ print $a
+0:overwrite -2 character (first) in string
+>xb
+
+ a="abc"
+ a[-3]="x"
+ print $a
+0:overwrite -3 character (first) in string
+>xbc
+
+ a="abc"
+ a[-4]="x"
+ print $a
+0:overwrite -4 character (before first) in string
+>xabc
+
+ a="abc"
+ a[-5]="x"
+ print $a
+0:overwrite -5 character (before-before first) in string
+>xabc
+
+ a="abc"
+ a[-4,0]="x"
+ print $a
+0:overwrite [-4,0] characters (before first) in string
+>xabc
+
+ a="abc"
+ a[-4,-4]="x"
+ print $a
+0:overwrite [-4,-4] character (before first) in string
+>xabc
+
+ a="abc"
+ a[-40,-30]="x"
+ print $a
+0:overwrite [-40,-30] characters (far before first) in string
+>xabc
+
+ a="abc"
+ a[-40,1]="x"
+ print $a
+0:overwrite [-40,1] characters in short string
+>xbc
+
+ a="abc"
+ a[-40,40]="x"
+ print $a
+0:overwrite [-40,40] characters in short string
+>x
+
+ a="abc"
+ a[2,40]="x"
+ print $a
+0:overwrite [2,40] characters in short string
+>ax
+
+ a="abc"
+ a[2,-1]="x"
+ print $a
+0:overwrite [2,-1] characters in short string
+>ax
+
+ a="abc"
+ a[-2,-1]="x"
+ print $a
+0:overwrite [-2,-1] characters in short string
+>ax
+
+ a="a"
+ a[-1]="xx"
+ print $a
+0:overwrite [-1] character with "xx"
+>xx
+
+ a="a"
+ a[-2]="xx"
+ print $a
+0:overwrite [-2] character (before first) with "xx"
+>xxa
+
+ a="a"
+ a[2]="xx"
+ print $a
+0:overwrite [2] character (after last) with "xx"
+>axx
+
+ a=""
+ a[1]="xx"
+ print $a
+0:overwrite [1] character (string: "") with "xx"
+>xx
+
+ a=""
+ a[-1]="xx"
+ print $a
+0:overwrite [-1] character (string: "") with "xx"
+>xx
+
+ a=""
+ a[2]="xx"
+ print $a
+0:overwrite [2] character (string: "") with "xx"
+>xx

[-- Attachment #3: testopt4bb.zsh --]
[-- Type: application/octet-stream, Size: 205 bytes --]

#!Src/zsh-nodup
#!Src/zsh-nodup-before

zmodload zsh/zprof

strtest() {
    a=""

    i=$(( 20000 ))
    while (( i -- )); do
        a+="aaaaaaaaaaaaaaaaaaaaaaaaa"
    done
}

strtest

zprof | head -n 25

[-- Attachment #4: testopt8.zsh --]
[-- Type: application/octet-stream, Size: 228 bytes --]

#!Src/zsh
#!Src/zsh-nodup-before
#!Src/zsh-nodup

zmodload zsh/zprof

strtest() {
    repeat 60000; do
        a[10]="a"
    done
}

a="exampletext"
repeat 12 a+="$a"

strtest

zprof | head -n 25

echo "Resulting length: ${#a}"

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Significant assignstrvalue optimization
  2016-11-18 15:51 Significant assignstrvalue optimization Sebastian Gniazdowski
@ 2016-11-20 19:36 ` Peter Stephenson
  0 siblings, 0 replies; 2+ messages in thread
From: Peter Stephenson @ 2016-11-20 19:36 UTC (permalink / raw)
  To: zsh-workers

On Fri, 18 Nov 2016 07:51:25 -0800
Sebastian Gniazdowski <psprint@fastmail.com> wrote:
> Second: the same that I did with setarrvalue – if we assign a[10]="a",
> and 10-th element already exists, i.e. size of string doesn't change,
> then instead of:
> 
>                 x = (char *) zalloc(newsize + 1);
>                 strncpy(x, z, v->start);
>                 strcpy(x + v->start, val);
>                 strcat(x + v->start, z + v->end);
>                 v->pm->gsu.s->setfn(v->pm, x);
> 
> It is done:
> 
>                 /* Size doesn't change, can limit actions to only
>                  * overwriting bytes in already allocated string */
>                 strncpy(z + v->start, val, vlen);

It might be safer both to check explicitly if the setter is the standard
string one, and to implement the remaining functionality of that if it
is.  This shouldn't hit any of the standard cases and I think is now
transparently equivalent to the old code.

diff --git a/Src/params.c b/Src/params.c
index 9d741cb..a79debc 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2436,9 +2436,10 @@ assignstrvalue(Value v, char *val, int flags)
 		v->pm->width = strlen(val);
 	} else {
 	    char *z, *x;
-	    int zlen;
+            int zlen, vlen, newsize;
 
-	    z = dupstring_glen(v->pm->gsu.s->getfn(v->pm), (unsigned*) &zlen);
+            z = v->pm->gsu.s->getfn(v->pm);
+            zlen = strlen(z);
 
 	    if ((v->flags & VALFLAG_INV) && unset(KSHARRAYS))
 		v->start--, v->end--;
@@ -2469,12 +2470,34 @@ assignstrvalue(Value v, char *val, int flags)
 	    }
 	    else if (v->end > zlen)
 		v->end = zlen;
-	    x = (char *) zalloc(v->start + strlen(val) + zlen - v->end + 1);
-	    strncpy(x, z, v->start);
-	    strcpy(x + v->start, val);
-	    strcat(x + v->start, z + v->end);
-	    v->pm->gsu.s->setfn(v->pm, x);
-	    zsfree(val);
+
+            vlen = strlen(val);
+            /* Characters preceding start index +
+               characters of what is assigned +
+               characters following end index */
+            newsize = v->start + vlen + (zlen - v->end);
+
+            /* Does new size differ? */
+            if (newsize != zlen || v->pm->gsu.s->setfn != strsetfn) {
+                x = (char *) zalloc(newsize + 1);
+                strncpy(x, z, v->start);
+                strcpy(x + v->start, val);
+                strcat(x + v->start, z + v->end);
+                v->pm->gsu.s->setfn(v->pm, x);
+            } else {
+		Param pm = v->pm;
+                /* Size doesn't change, can limit actions to only
+                 * overwriting bytes in already allocated string */
+                strncpy(z + v->start, val, vlen);
+		/* Implement remainder of strsetfn */
+		if (!(pm->node.flags & PM_HASHELEM) &&
+		    ((pm->node.flags & PM_NAMEDDIR) ||
+		     isset(AUTONAMEDIRS))) {
+		    pm->node.flags |= PM_NAMEDDIR;
+		    adduserdir(pm->node.nam, z, 0, 0);
+		}
+            }
+            zsfree(val);
 	}
 	break;
     case PM_INTEGER:
diff --git a/Test/A06assign.ztst b/Test/A06assign.ztst
index da4e3b0..bf39aee 100644
--- a/Test/A06assign.ztst
+++ b/Test/A06assign.ztst
@@ -489,3 +489,143 @@
  print $array
 0:slice beyond length of array
 >FIRST
+
+# tests of string assignments
+
+ a="abc"
+ a[1]=x
+ print $a
+0:overwrite first character in string
+>xbc
+
+ a="abc"
+ a[2]="x"
+ print $a
+0:overwrite middle character in string
+>axc
+
+ a="abc"
+ a[3]="x"
+ print $a
+0:overwrite last character in string
+>abx
+
+ a="abc"
+ a[-1]="x"
+ print $a
+0:overwrite -1 character in string
+>abx
+
+ a="abc"
+ a[-2]="x"
+ print $a
+0:overwrite -2 character (middle) in string
+>axc
+
+ a="ab"
+ a[-2]="x"
+ print $a
+0:overwrite -2 character (first) in string
+>xb
+
+ a="abc"
+ a[-3]="x"
+ print $a
+0:overwrite -3 character (first) in string
+>xbc
+
+ a="abc"
+ a[-4]="x"
+ print $a
+0:overwrite -4 character (before first) in string
+>xabc
+
+ a="abc"
+ a[-5]="x"
+ print $a
+0:overwrite -5 character (before-before first) in string
+>xabc
+
+ a="abc"
+ a[-4,0]="x"
+ print $a
+0:overwrite [-4,0] characters (before first) in string
+>xabc
+
+ a="abc"
+ a[-4,-4]="x"
+ print $a
+0:overwrite [-4,-4] character (before first) in string
+>xabc
+
+ a="abc"
+ a[-40,-30]="x"
+ print $a
+0:overwrite [-40,-30] characters (far before first) in string
+>xabc
+
+ a="abc"
+ a[-40,1]="x"
+ print $a
+0:overwrite [-40,1] characters in short string
+>xbc
+
+ a="abc"
+ a[-40,40]="x"
+ print $a
+0:overwrite [-40,40] characters in short string
+>x
+
+ a="abc"
+ a[2,40]="x"
+ print $a
+0:overwrite [2,40] characters in short string
+>ax
+
+ a="abc"
+ a[2,-1]="x"
+ print $a
+0:overwrite [2,-1] characters in short string
+>ax
+
+ a="abc"
+ a[-2,-1]="x"
+ print $a
+0:overwrite [-2,-1] characters in short string
+>ax
+
+ a="a"
+ a[-1]="xx"
+ print $a
+0:overwrite [-1] character with "xx"
+>xx
+
+ a="a"
+ a[-2]="xx"
+ print $a
+0:overwrite [-2] character (before first) with "xx"
+>xxa
+
+ a="a"
+ a[2]="xx"
+ print $a
+0:overwrite [2] character (after last) with "xx"
+>axx
+
+ a=""
+ a[1]="xx"
+ print $a
+0:overwrite [1] character (string: "") with "xx"
+>xx
+
+ a=""
+ a[-1]="xx"
+ print $a
+0:overwrite [-1] character (string: "") with "xx"
+>xx
+
+ a=""
+ a[2]="xx"
+ print $a
+0:overwrite [2] character (string: "") with "xx"
+>xx


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-11-20 19:37 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-18 15:51 Significant assignstrvalue optimization Sebastian Gniazdowski
2016-11-20 19:36 ` Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).