zsh-workers
 help / color / Atom feed
* PATCH: readhistfile(): do not call ftell() in loop
@ 2020-05-03  9:33 xzfcpw
  2020-05-04 15:30 ` Peter Stephenson
  0 siblings, 1 reply; 2+ messages in thread
From: xzfcpw @ 2020-05-03  9:33 UTC (permalink / raw)
  To: zsh-workers

Prior to this patch, readhistfile() performed one lseek() syscall (inside ftell()) per each history entry.
This is time-consuming on large histfiles.

This patch replaces this call to ftell() with manual tracking of bytes read.

Benchmark on my histfile (5.5M bytes, 138103 lines):
hyperfine --warmup 5 './zsh -is < /dev/null'
old: 708.8 ms
new: 498.9 ms


Also, minor code style fix: `fseek(in, 0, 0)` replaced with `fseek(in, 0, SEEK_SET)`.


diff --git a/Src/hist.c b/Src/hist.c
index 5281e8718..3a8eb9a16 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2575,10 +2575,11 @@ resizehistents(void)
 }
 
 static int
-readhistline(int start, char **bufp, int *bufsiz, FILE *in)
+readhistline(int start, char **bufp, int *bufsiz, off_t *fpos, FILE *in)
 {
     char *buf = *bufp;
     if (fgets(buf + start, *bufsiz - start, in)) {
+	fpos += strlen(buf + start);
 	int len = start + strlen(buf + start);
 	if (len == start)
 	    return -1;
@@ -2588,7 +2589,7 @@ readhistline(int start, char **bufp, int *bufsiz, FILE *in)
 		    return -1;
 		*bufp = zrealloc(buf, 2 * (*bufsiz));
 		*bufsiz = 2 * (*bufsiz);
-		return readhistline(len, bufp, bufsiz, in);
+		return readhistline(len, bufp, bufsiz, fpos, in);
 	    }
 	}
 	else {
@@ -2596,7 +2597,7 @@ readhistline(int start, char **bufp, int *bufsiz, FILE *in)
 	    if (len > 1 && buf[len - 2] == '\\') {
 		buf[--len - 1] = '\n';
 		if (!feof(in))
-		    return readhistline(len, bufp, bufsiz, in);
+		    return readhistline(len, bufp, bufsiz, fpos, in);
 	    }
 	}
 	return len;
@@ -2612,7 +2613,7 @@ readhistfile(char *fn, int err, int readflags)
     FILE *in;
     Histent he;
     time_t stim, ftim, tim = time(NULL);
-    off_t fpos;
+    off_t fpos, lfpos;
     short *words;
     struct stat sb;
     int nwordpos, nwords, bufsiz;
@@ -2664,7 +2665,8 @@ readhistfile(char *fn, int err, int readflags)
 	if (readflags & HFILE_SKIPOLD
 	 || (hist_ignore_all_dups && newflags & hist_skip_flags))
 	    newflags |= HIST_MAKEUNIQUE;
-	while (fpos = ftell(in), (l = readhistline(0, &buf, &bufsiz, in))) {
+	fpos = ftell(in);
+	while (lfpos = fpos, l = readhistline(0, &buf, &bufsiz, &fpos, in)) {
 	    char *pt;
 	    int remeta = 0;
 
@@ -2723,7 +2725,8 @@ readhistfile(char *fn, int err, int readflags)
 		     && histstrcmp(pt, lasthist.text) == 0)
 			searching = 0;
 		    else {
-			fseek(in, 0, 0);
+			fseek(in, 0, SEEK_SET);
+			fpos = 0;
 			histfile_linect = 0;
 			searching = -1;
 		    }
@@ -2738,7 +2741,7 @@ readhistfile(char *fn, int err, int readflags)
 
 	    if (readflags & HFILE_USE_OPTIONS) {
 		histfile_linect++;
-		lasthist.fpos = fpos;
+		lasthist.fpos = lfpos;
 		lasthist.stim = stim;
 	    }
 


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: PATCH: readhistfile(): do not call ftell() in loop
  2020-05-03  9:33 PATCH: readhistfile(): do not call ftell() in loop xzfcpw
@ 2020-05-04 15:30 ` Peter Stephenson
  0 siblings, 0 replies; 2+ messages in thread
From: Peter Stephenson @ 2020-05-04 15:30 UTC (permalink / raw)
  To: xzfcpw, zsh-workers

> On 03 May 2020 at 10:33 xzfcpw@gmail.com wrote:
> 
> 
> Prior to this patch, readhistfile() performed one lseek() syscall (inside ftell()) per each history entry.
> This is time-consuming on large histfiles.
> 
> This patch replaces this call to ftell() with manual tracking of bytes read.

Looks fine to me --- can't think of any gotchas with file position.  I'll commit if no one else
has any comments.

pws

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-03  9:33 PATCH: readhistfile(): do not call ftell() in loop xzfcpw
2020-05-04 15:30 ` Peter Stephenson

zsh-workers

Archives are clonable: git clone --mirror http://inbox.vuxu.org/zsh-workers

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://inbox.vuxu.org/vuxu.archive.zsh.workers


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git