zsh-workers
 help / color / mirror / code / Atom feed
* Re: PATCH: Re: 4.0.1-pre-1
@ 2001-03-07 12:54 Sven Wischnowsky
  0 siblings, 0 replies; 7+ messages in thread
From: Sven Wischnowsky @ 2001-03-07 12:54 UTC (permalink / raw)
  To: zsh-workers


Alexandre Duret-Lutz wrote:

> ...
> 
> Here zsh now consumes less memory that bash, and the testsuite
> is really usable, that's great!  However it is still
> significantly slower:
> 
> % time bash testsuite > /dev/null
> bash testsuite > /dev/null  0.88s user 0.44s system 101% cpu 1.302 total
> % time zsh testsuite > /dev/null
> zsh testsuite > /dev/null  7.13s user 0.27s system 100% cpu 7.357 total
> 
> (this is a testsuite that exists before the first test, as I explained.)
> 
> I don't really care myself, 7s doesn't sound too much for such a
> big script, but maybe you'll have another great idea... This
> testsuite seems like a good benchmark for the parsing code.

Whew, been playing with gprof...


There were two main reasons:

1) ecstrcode() used a list of strings alreayd used to avoid having to
encode strings more than once. Searching that list was rather
expensive (lots of calls to strcmp()), so the patch below changes this 
to use a binary tree.

Interestingly, though, changing the function to not search for
duplicates made things even slower because then we had to allocate
more and that was even more expensive. I hadn't expected that when I
wrote ecstrcode(). Interesting.

2) zhalloc(), the main culprit. There we have this loop that searches
a heap with enough free memory to satisfy the request. With many small 
allocations as in the script this meant several millon tries, because
new heaps are appended to the list. Slightly changing the way we use
fheap (a pointer that may point to the first heap with free memory)
made it significantly faster (for the test case I used gprof reported
0.03 seconds instead of 2.80). The change is simply that we let fheap
point to a newly allocated heap if we just created it (lots of free
memory and the loop doesn't loop).


With these changes zsh is even slightly faster than bash here (without 
memory debugging code, with -O2 and all that).

Bye
 Sven

Index: Src/mem.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/mem.c,v
retrieving revision 1.4
diff -u -r1.4 mem.c
--- Src/mem.c	2001/01/16 13:44:20	1.4
+++ Src/mem.c	2001/03/07 12:53:29
@@ -105,7 +105,8 @@
 
 static Heap heaps;
 
-/* first heap with free space, not always correct */
+/* a heap with free space, not always correct (it will be the last heap
+ * if that was newly allocated but it may also be another one) */
 
 static Heap fheap;
 
@@ -297,7 +298,8 @@
 
     /* find a heap with enough free space */
 
-    for (h = (fheap ? fheap : heaps); h; h = h->next) {
+    for (h = ((fheap && HEAP_ARENA_SIZE >= (size + fheap->used)) ? fheap : heaps);
+	 h; h = h->next) {
 	if (HEAP_ARENA_SIZE >= (n = size + h->used)) {
 	    void *ret;
 
@@ -364,7 +366,7 @@
 	    hp->next = h;
 	else
 	    heaps = h;
-	fheap = NULL;
+	fheap = h;
 
 	unqueue_signals();
 	return arena(h);
Index: Src/parse.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/parse.c,v
retrieving revision 1.18
diff -u -r1.18 parse.c
--- Src/parse.c	2001/03/06 13:00:41	1.18
+++ Src/parse.c	2001/03/07 12:53:30
@@ -211,9 +211,9 @@
  * containing the characters. Longer strings are encoded as the offset
  * into the strs character array stored in the eprog struct shifted by
  * two and ored with the bit pattern 0x.
- * The ecstr() function that adds the code for a string uses a simple
- * list of strings already added so that long strings are encoded only
- * once.
+ * The ecstrcode() function that adds the code for a string uses a simple
+ * binary tree of strings already added so that long strings are encoded
+ * only once.
  *
  * Note also that in the eprog struct the pattern, code, and string
  * arrays all point to the same memory block.
@@ -320,19 +320,18 @@
 	}
 	return c;
     } else {
-	Eccstr p, q = NULL;
+	Eccstr p, *pp;
+	int cmp;
 
-	for (p = ecstrs; p; q = p, p = p->next)
-	    if (p->nfunc == ecnfunc && !strcmp(s, p->str))
+	for (pp = &ecstrs; (p = *pp); ) {
+	    if (!(cmp = p->nfunc - ecnfunc) && !(cmp = strcmp(p->str, s)))
 		return p->offs;
-
-	p = (Eccstr) zhalloc(sizeof(*p));
-	p->next = NULL;
-	if (q)
-	    q->next = p;
-	else
-	    ecstrs = p;
+	    pp = (cmp < 0 ? &(p->left) : &(p->right));
+	}
+	p = *pp = (Eccstr) zhalloc(sizeof(*p));
+	p->left = p->right = 0;
 	p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);
+	p->aoffs = ecsoffs;
 	p->str = s;
 	p->nfunc = ecnfunc;
 	ecsoffs += l;
@@ -341,12 +340,7 @@
     }
 }
 
-static int
-ecstr(char *s)
-{
-    return ecadd(ecstrcode(s));
-}
-
+#define ecstr(S) ecadd(ecstrcode(S))
 
 #define par_save_list(C) \
     do { \
@@ -379,12 +373,20 @@
 
 /* Build eprog. */
 
+static void
+copy_ecstr(Eccstr s, char *p)
+{
+    while (s) {
+	memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);
+	copy_ecstr(s->left, p);
+	s = s->right;
+    }
+}
+
 static Eprog
 bld_eprog(void)
 {
     Eprog ret;
-    Eccstr p;
-    char *q;
     int l;
 
     ecadd(WCB_END());
@@ -403,10 +405,8 @@
     for (l = 0; l < ecnpats; l++)
 	ret->pats[l] = dummy_patprog1;
     memcpy(ret->prog, ecbuf, ecused * sizeof(wordcode));
-    for (p = ecstrs, q = ret->strs; p; p = p->next, q += l) {
-	l = strlen(p->str) + 1;
-	memcpy(q, p->str, l);
-    }
+    copy_ecstr(ecstrs, ret->strs);
+
     zfree(ecbuf, eclen);
     ecbuf = NULL;
 
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.25
diff -u -r1.25 zsh.h
--- Src/zsh.h	2001/02/28 09:12:57	1.25
+++ Src/zsh.h	2001/03/07 12:53:31
@@ -522,9 +522,9 @@
 typedef struct eccstr *Eccstr;
 
 struct eccstr {
-    Eccstr next;
+    Eccstr left, right;
     char *str;
-    wordcode offs;
+    wordcode offs, aoffs;
     int nfunc;
 };
 

--
Sven Wischnowsky                         wischnow@informatik.hu-berlin.de


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Re: 4.0.1-pre-1
  2001-03-06 15:37 ` Bart Schaefer
  2001-03-06 16:02   ` Peter Stephenson
@ 2001-03-06 16:14   ` Bart Schaefer
  1 sibling, 0 replies; 7+ messages in thread
From: Bart Schaefer @ 2001-03-06 16:14 UTC (permalink / raw)
  To: Sven Wischnowsky, zsh-workers

On Mar 6,  3:37pm, Bart Schaefer wrote:
} Subject: Re: PATCH: Re: 4.0.1-pre-1
}
} It runs "make check" correctly when linked with ElectricFence, but I
} don't believe that detects memory leaks, only memory under/over-runs.
} 
} However, with the latest CVS this morning, I'm getting a failure in
} 54compmatch:

Sorry, bad report.  54compmatch is clean without efence, but won't run
with it (and the output I included was misleading).

All the other tests pass even with efence.

-- 
Bart Schaefer                                 Brass Lantern Enterprises
http://www.well.com/user/barts              http://www.brasslantern.com

Zsh: http://www.zsh.org | PHPerl Project: http://phperl.sourceforge.net   


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Re: 4.0.1-pre-1
  2001-03-06 12:57 Sven Wischnowsky
  2001-03-06 15:37 ` Bart Schaefer
@ 2001-03-06 16:07 ` Alexandre Duret-Lutz
  1 sibling, 0 replies; 7+ messages in thread
From: Alexandre Duret-Lutz @ 2001-03-06 16:07 UTC (permalink / raw)
  To: zsh-workers

>>> "Sven" == Sven Wischnowsky <wischnow@informatik.hu-berlin.de> writes:

[...]

 Sven> Yes, lots ;-)

As always :)

[...]

 Sven> The patch makes that testsuite work, roughly consuming as much memory
 Sven> as bash on this machine here (both sh and ksh use less memory, sigh).

Here zsh now consumes less memory that bash, and the testsuite
is really usable, that's great!  However it is still
significantly slower:

% time bash testsuite > /dev/null
bash testsuite > /dev/null  0.88s user 0.44s system 101% cpu 1.302 total
% time zsh testsuite > /dev/null
zsh testsuite > /dev/null  7.13s user 0.27s system 100% cpu 7.357 total

(this is a testsuite that exists before the first test, as I explained.)

I don't really care myself, 7s doesn't sound too much for such a
big script, but maybe you'll have another great idea... This
testsuite seems like a good benchmark for the parsing code.

[...]

Thanks!
-- 
Alexandre Duret-Lutz


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Re: 4.0.1-pre-1
  2001-03-06 15:37 ` Bart Schaefer
@ 2001-03-06 16:02   ` Peter Stephenson
  2001-03-06 16:14   ` Bart Schaefer
  1 sibling, 0 replies; 7+ messages in thread
From: Peter Stephenson @ 2001-03-06 16:02 UTC (permalink / raw)
  To: Zsh hackers list

> (This reminds me again that we'd discussed renumbering the test suite to
> group similar tests together and perform them in vaguely dependency order.
> Are we going to get to that and to re-organizing Completion/ before PWS
> decides to shove 4.0.1 out the door?)

This will probably depend who `we' are.  I haven't actually thought about
this in any detail, but if there are any suggestions, I'm prepared to
apply my mind to posting interested-sounding followups.

-- 
Peter Stephenson <pws@csr.com>                  Software Engineer
CSR Ltd., Unit 300, Science Park, Milton Road,
Cambridge, CB4 0XL, UK                          Tel: +44 (0)1223 392070


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: PATCH: Re: 4.0.1-pre-1
  2001-03-06 12:57 Sven Wischnowsky
@ 2001-03-06 15:37 ` Bart Schaefer
  2001-03-06 16:02   ` Peter Stephenson
  2001-03-06 16:14   ` Bart Schaefer
  2001-03-06 16:07 ` Alexandre Duret-Lutz
  1 sibling, 2 replies; 7+ messages in thread
From: Bart Schaefer @ 2001-03-06 15:37 UTC (permalink / raw)
  To: Sven Wischnowsky, zsh-workers

On Mar 6,  1:57pm, Sven Wischnowsky wrote:
} Subject: PATCH: Re: 4.0.1-pre-1
}
} The patch below changes the code in parse.c to use real memory for the 
} `ecbuf'. It would be very nice if we could avoid that and instead
} write a better hrealloc(), but that's non-trivial to say the least.
} 
} I think I got the freeing right, but I'd be thankful if someone with 
} an allocation profiler could verify that.

It runs "make check" correctly when linked with ElectricFence, but I
don't believe that detects memory leaks, only memory under/over-runs.

However, with the latest CVS this morning, I'm getting a failure in
54compmatch:

*** /tmp/zsh.ztst.out.22091     Tue Mar  6 07:31:47 2001
--- /tmp/zsh.ztst.tout.22091    Tue Mar  6 07:31:48 2001
***************
*** 1,3 ****
! line: {tst .:}{}
  COMPADD:{}
  INSERT_POSITIONS:{4:5:6}
--- 1,3 ----
! line: {tst }{.:}
  COMPADD:{}
  INSERT_POSITIONS:{4:5:6}
Test ../../zsh-3.1.6/Test/54compmatch.ztst failed: output differs from expected as shown above for:
 workers_11388_matcher='r:|[:.]=** r:|=*'
 workers_11388_list=(a.b:0 c.d:1)
 test_code $workers_11388_matcher workers_11388_list
 comptest $'tst :\t'
Was testing: Non-bug from workers 11388
../../zsh-3.1.6/Test/54compmatch.ztst: test failed.


(This reminds me again that we'd discussed renumbering the test suite to
group similar tests together and perform them in vaguely dependency order.
Are we going to get to that and to re-organizing Completion/ before PWS
decides to shove 4.0.1 out the door?)

-- 
Bart Schaefer                                 Brass Lantern Enterprises
http://www.well.com/user/barts              http://www.brasslantern.com

Zsh: http://www.zsh.org | PHPerl Project: http://phperl.sourceforge.net   


^ permalink raw reply	[flat|nested] 7+ messages in thread

* PATCH: Re: 4.0.1-pre-1
@ 2001-03-06 12:57 Sven Wischnowsky
  2001-03-06 15:37 ` Bart Schaefer
  2001-03-06 16:07 ` Alexandre Duret-Lutz
  0 siblings, 2 replies; 7+ messages in thread
From: Sven Wischnowsky @ 2001-03-06 12:57 UTC (permalink / raw)
  To: zsh-workers


Alexandre Duret-Lutz wrote:

> X-Seq: 13573
> 
> Hi!
> 
> >>> "Peter" == Peter Stephenson <pws@csr.com> writes:
> 
> [...]
> 
>  Peter> If there are any outstanding bugs you feel still need to
>  Peter> be fixed, please report them again.
> 
> [...]
> 
> I don't know if this can be considered as an `outstanding bug'
> but I haven't seen any mention of zsh-users/3574 in this thread.
> 
> I've tried to run that script (testsuite) with Zsh: it takes a
> *while* to start (i.e., to parse, I guess), acquires all the
> memory, most of the swap, brings the machine to its knees, and
> eventually run script as would other shells.  Althought it
> actually works, this behavior is quite uncomfortable :)  
> Any idea?

Yes, lots ;-)

About this particular problem: the reason was hrealloc(), a function
I've never really been comfortable with. The parser used heap memory
when building the word code and used hrealloc() to increase the size
of the memory block when needed. Unfortunately, hrealloc() is only
seldom able to do what its name seems to imply. Often (very often) it
has to allocate a new block -- and since this is heap memory, the old
one won't be freed. That lead to the out-of-memory we were seeing.

The patch below changes the code in parse.c to use real memory for the 
`ecbuf'. It would be very nice if we could avoid that and instead
write a better hrealloc(), but that's non-trivial to say the least.

The patch makes that testsuite work, roughly consuming as much memory
as bash on this machine here (both sh and ksh use less memory, sigh).

I think I got the freeing right, but I'd be thankful if someone with 
an allocation profiler could verify that.

Bye
 Sven

Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.15
diff -u -r1.15 lex.c
--- Src/lex.c	2001/02/28 09:12:57	1.15
+++ Src/lex.c	2001/03/06 12:56:12
@@ -270,6 +270,7 @@
     inredir = 0;
     hdocs = NULL;
     histactive = 0;
+    ecbuf = NULL;
 
     ls->next = lstack;
     lstack = ls;
@@ -318,6 +319,8 @@
     hwbegin = lstack->hwbegin;
     hwend = lstack->hwend;
     addtoline = lstack->addtoline;
+    if (ecbuf)
+	zfree(ecbuf, eclen);
     eclen = lstack->eclen;
     ecused = lstack->ecused;
     ecnpats = lstack->ecnpats;
Index: Src/parse.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/parse.c,v
retrieving revision 1.17
diff -u -r1.17 parse.c
--- Src/parse.c	2001/01/22 12:03:55	1.17
+++ Src/parse.c	2001/03/06 12:56:13
@@ -235,6 +235,11 @@
 /**/
 int ecsoffs, ecssub, ecnfunc;
 
+#define EC_INIT_SIZE         256
+#define EC_DOUBLE_THRESHOLD  32768
+#define EC_INCREMENT         1024
+
+
 /* Adjust pointers in here-doc structs. */
 
 static void
@@ -255,10 +260,11 @@
     int m;
 
     if ((eclen - ecused) < n) {
-	int a = (n > 256 ? n : 256);
+	int a = (eclen < EC_DOUBLE_THRESHOLD ? eclen : EC_INCREMENT);
 
-	ecbuf = (Wordcode) hrealloc((char *) ecbuf, eclen * sizeof(wordcode),
-				    (eclen + a) * sizeof(wordcode));
+	if (n > a) a = n;
+
+	ecbuf = (Wordcode) zrealloc((char *) ecbuf, (eclen + a) * sizeof(wordcode));
 	eclen += a;
     }
     if ((m = ecused - p) > 0)
@@ -273,9 +279,10 @@
 ecadd(wordcode c)
 {
     if ((eclen - ecused) < 1) {
-	ecbuf = (Wordcode) hrealloc((char *) ecbuf, eclen * sizeof(wordcode),
-				    (eclen + 256) * sizeof(wordcode));
-	eclen += 256;
+	int a = (eclen < EC_DOUBLE_THRESHOLD ? eclen : EC_INCREMENT);
+
+	ecbuf = (Wordcode) zrealloc((char *) ecbuf, (eclen + a) * sizeof(wordcode));
+	eclen += a;
     }
     ecbuf[ecused] = c;
     ecused++;
@@ -360,7 +367,9 @@
 static void
 init_parse(void)
 {
-    ecbuf = (Wordcode) zhalloc((eclen = 256) * sizeof(wordcode));
+    if (ecbuf) zfree(ecbuf, eclen);
+
+    ecbuf = (Wordcode) zalloc((eclen = EC_INIT_SIZE) * sizeof(wordcode));
     ecused = 0;
     ecstrs = NULL;
     ecsoffs = ecnpats = 0;
@@ -398,6 +407,9 @@
 	l = strlen(p->str) + 1;
 	memcpy(q, p->str, l);
     }
+    zfree(ecbuf, eclen);
+    ecbuf = NULL;
+
     return ret;
 }
 

--
Sven Wischnowsky                         wischnow@informatik.hu-berlin.de


^ permalink raw reply	[flat|nested] 7+ messages in thread

* PATCH: Re: 4.0.1-pre-1
  2001-02-14 17:32 4.0.1-pre-1 Bart Schaefer
@ 2001-02-14 17:50 ` Peter Stephenson
  0 siblings, 0 replies; 7+ messages in thread
From: Peter Stephenson @ 2001-02-14 17:50 UTC (permalink / raw)
  To: Zsh hackers list

> Any particular reason we skipped 4.0.0?

It just felt wrong to have the first release ending in 0 instead of 1.

> Any reason not to do the doc fix suggested in users/3524?
> 
> Minor "make *clean" fix was suggested in 13245.

These are easy.

Index: Makefile.in
===================================================================
RCS file: /cvsroot/zsh/zsh/Makefile.in,v
retrieving revision 1.6
diff -u -r1.6 Makefile.in
--- Makefile.in	2000/12/06 08:28:20	1.6
+++ Makefile.in	2001/02/14 17:47:19
@@ -114,7 +114,7 @@
 @CLEAN_MK@
 
 distclean-here:
-	rm -f Makefile config.h config.status config.log config.cache stamp-h Config/defs.mk
+	rm -f Makefile config.h config.status config.log config.cache config.modules stamp-h Config/defs.mk
 
 realclean-here:
 	cd $(sdir) && rm -f config.h.in stamp-h.in configure
Index: Doc/Zsh/options.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/options.yo,v
retrieving revision 1.12
diff -u -r1.12 options.yo
--- Doc/Zsh/options.yo	2000/08/10 16:19:12	1.12
+++ Doc/Zsh/options.yo	2001/02/14 17:47:19
@@ -343,7 +343,7 @@
 delete the pattern from the argument list;
 do not report an error unless all the patterns
 in a command have no matches.
-Overrides tt(NULL_GLOB).
+Overrides tt(NOMATCH).
 )
 pindex(DVORAK)
 item(tt(DVORAK))(


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2001-03-07 12:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-03-07 12:54 PATCH: Re: 4.0.1-pre-1 Sven Wischnowsky
  -- strict thread matches above, loose matches on Subject: below --
2001-03-06 12:57 Sven Wischnowsky
2001-03-06 15:37 ` Bart Schaefer
2001-03-06 16:02   ` Peter Stephenson
2001-03-06 16:14   ` Bart Schaefer
2001-03-06 16:07 ` Alexandre Duret-Lutz
2001-02-14 17:32 4.0.1-pre-1 Bart Schaefer
2001-02-14 17:50 ` PATCH: 4.0.1-pre-1 Peter Stephenson

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).