From: xzfcpw@gmail.com
To: zsh-workers@zsh.org
Subject: PATCH: readhistfile(): do not call ftell() in loop
Date: Sun, 03 May 2020 09:33:05 +0000 [thread overview]
Message-ID: <cef5c98b3d0fb920e93d90c7cd44ce0f2da3c68d.camel@gmail.com> (raw)
Prior to this patch, readhistfile() performed one lseek() syscall (inside ftell()) per each history entry.
This is time-consuming on large histfiles.
This patch replaces this call to ftell() with manual tracking of bytes read.
Benchmark on my histfile (5.5M bytes, 138103 lines):
hyperfine --warmup 5 './zsh -is < /dev/null'
old: 708.8 ms
new: 498.9 ms
Also, minor code style fix: `fseek(in, 0, 0)` replaced with `fseek(in, 0, SEEK_SET)`.
diff --git a/Src/hist.c b/Src/hist.c
index 5281e8718..3a8eb9a16 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2575,10 +2575,11 @@ resizehistents(void)
}
static int
-readhistline(int start, char **bufp, int *bufsiz, FILE *in)
+readhistline(int start, char **bufp, int *bufsiz, off_t *fpos, FILE *in)
{
char *buf = *bufp;
if (fgets(buf + start, *bufsiz - start, in)) {
+ fpos += strlen(buf + start);
int len = start + strlen(buf + start);
if (len == start)
return -1;
@@ -2588,7 +2589,7 @@ readhistline(int start, char **bufp, int *bufsiz, FILE *in)
return -1;
*bufp = zrealloc(buf, 2 * (*bufsiz));
*bufsiz = 2 * (*bufsiz);
- return readhistline(len, bufp, bufsiz, in);
+ return readhistline(len, bufp, bufsiz, fpos, in);
}
}
else {
@@ -2596,7 +2597,7 @@ readhistline(int start, char **bufp, int *bufsiz, FILE *in)
if (len > 1 && buf[len - 2] == '\\') {
buf[--len - 1] = '\n';
if (!feof(in))
- return readhistline(len, bufp, bufsiz, in);
+ return readhistline(len, bufp, bufsiz, fpos, in);
}
}
return len;
@@ -2612,7 +2613,7 @@ readhistfile(char *fn, int err, int readflags)
FILE *in;
Histent he;
time_t stim, ftim, tim = time(NULL);
- off_t fpos;
+ off_t fpos, lfpos;
short *words;
struct stat sb;
int nwordpos, nwords, bufsiz;
@@ -2664,7 +2665,8 @@ readhistfile(char *fn, int err, int readflags)
if (readflags & HFILE_SKIPOLD
|| (hist_ignore_all_dups && newflags & hist_skip_flags))
newflags |= HIST_MAKEUNIQUE;
- while (fpos = ftell(in), (l = readhistline(0, &buf, &bufsiz, in))) {
+ fpos = ftell(in);
+ while (lfpos = fpos, l = readhistline(0, &buf, &bufsiz, &fpos, in)) {
char *pt;
int remeta = 0;
@@ -2723,7 +2725,8 @@ readhistfile(char *fn, int err, int readflags)
&& histstrcmp(pt, lasthist.text) == 0)
searching = 0;
else {
- fseek(in, 0, 0);
+ fseek(in, 0, SEEK_SET);
+ fpos = 0;
histfile_linect = 0;
searching = -1;
}
@@ -2738,7 +2741,7 @@ readhistfile(char *fn, int err, int readflags)
if (readflags & HFILE_USE_OPTIONS) {
histfile_linect++;
- lasthist.fpos = fpos;
+ lasthist.fpos = lfpos;
lasthist.stim = stim;
}
next reply other threads:[~2020-05-03 9:33 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-03 9:33 xzfcpw [this message]
2020-05-04 15:30 ` Peter Stephenson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cef5c98b3d0fb920e93d90c7cd44ce0f2da3c68d.camel@gmail.com \
--to=xzfcpw@gmail.com \
--cc=zsh-workers@zsh.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/zsh/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).