zsh-workers
 help / color / mirror / code / Atom feed
From: xzfcpw@gmail.com
To: zsh-workers@zsh.org
Subject: PATCH: readhistfile(): do not call ftell() in loop
Date: Sun, 03 May 2020 09:33:05 +0000	[thread overview]
Message-ID: <cef5c98b3d0fb920e93d90c7cd44ce0f2da3c68d.camel@gmail.com> (raw)

Prior to this patch, readhistfile() performed one lseek() syscall (inside ftell()) per each history entry.
This is time-consuming on large histfiles.

This patch replaces this call to ftell() with manual tracking of bytes read.

Benchmark on my histfile (5.5M bytes, 138103 lines):
hyperfine --warmup 5 './zsh -is < /dev/null'
old: 708.8 ms
new: 498.9 ms


Also, minor code style fix: `fseek(in, 0, 0)` replaced with `fseek(in, 0, SEEK_SET)`.


diff --git a/Src/hist.c b/Src/hist.c
index 5281e8718..3a8eb9a16 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2575,10 +2575,11 @@ resizehistents(void)
 }
 
 static int
-readhistline(int start, char **bufp, int *bufsiz, FILE *in)
+readhistline(int start, char **bufp, int *bufsiz, off_t *fpos, FILE *in)
 {
     char *buf = *bufp;
     if (fgets(buf + start, *bufsiz - start, in)) {
+	fpos += strlen(buf + start);
 	int len = start + strlen(buf + start);
 	if (len == start)
 	    return -1;
@@ -2588,7 +2589,7 @@ readhistline(int start, char **bufp, int *bufsiz, FILE *in)
 		    return -1;
 		*bufp = zrealloc(buf, 2 * (*bufsiz));
 		*bufsiz = 2 * (*bufsiz);
-		return readhistline(len, bufp, bufsiz, in);
+		return readhistline(len, bufp, bufsiz, fpos, in);
 	    }
 	}
 	else {
@@ -2596,7 +2597,7 @@ readhistline(int start, char **bufp, int *bufsiz, FILE *in)
 	    if (len > 1 && buf[len - 2] == '\\') {
 		buf[--len - 1] = '\n';
 		if (!feof(in))
-		    return readhistline(len, bufp, bufsiz, in);
+		    return readhistline(len, bufp, bufsiz, fpos, in);
 	    }
 	}
 	return len;
@@ -2612,7 +2613,7 @@ readhistfile(char *fn, int err, int readflags)
     FILE *in;
     Histent he;
     time_t stim, ftim, tim = time(NULL);
-    off_t fpos;
+    off_t fpos, lfpos;
     short *words;
     struct stat sb;
     int nwordpos, nwords, bufsiz;
@@ -2664,7 +2665,8 @@ readhistfile(char *fn, int err, int readflags)
 	if (readflags & HFILE_SKIPOLD
 	 || (hist_ignore_all_dups && newflags & hist_skip_flags))
 	    newflags |= HIST_MAKEUNIQUE;
-	while (fpos = ftell(in), (l = readhistline(0, &buf, &bufsiz, in))) {
+	fpos = ftell(in);
+	while (lfpos = fpos, l = readhistline(0, &buf, &bufsiz, &fpos, in)) {
 	    char *pt;
 	    int remeta = 0;
 
@@ -2723,7 +2725,8 @@ readhistfile(char *fn, int err, int readflags)
 		     && histstrcmp(pt, lasthist.text) == 0)
 			searching = 0;
 		    else {
-			fseek(in, 0, 0);
+			fseek(in, 0, SEEK_SET);
+			fpos = 0;
 			histfile_linect = 0;
 			searching = -1;
 		    }
@@ -2738,7 +2741,7 @@ readhistfile(char *fn, int err, int readflags)
 
 	    if (readflags & HFILE_USE_OPTIONS) {
 		histfile_linect++;
-		lasthist.fpos = fpos;
+		lasthist.fpos = lfpos;
 		lasthist.stim = stim;
 	    }
 


             reply	other threads:[~2020-05-03  9:33 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-03  9:33 xzfcpw [this message]
2020-05-04 15:30 ` Peter Stephenson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cef5c98b3d0fb920e93d90c7cd44ce0f2da3c68d.camel@gmail.com \
    --to=xzfcpw@gmail.com \
    --cc=zsh-workers@zsh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).