From: ori@eigenstate.org
To: 9front@9front.org
Subject: Re: [9front] git: tune deltification
Date: Sat, 12 Feb 2022 22:49:56 -0500 [thread overview]
Message-ID: <64111FCCFEB91FC5434307463B7BAFDE@eigenstate.org> (raw)
In-Reply-To: <5628E6D857F1F7842D6A50FE216B3316@eigenstate.org>
Quoth ori@eigenstate.org:
> Dropping the chunk size reduces pack sizes
> by about 15%, from 120 megs to 100.
>
> Replacing sha1 with murmurhash3 when hashing
> deltas drops the time to repack the 9front
> repo by about 20 seconds.
Updated, changing u64 -> u32 where appropriate.
diff 2367a2aeaec8432e6b059135e49c2fa86e415ae5 uncommitted
--- a/sys/src/cmd/git/delta.c
+++ b/sys/src/cmd/git/delta.c
@@ -4,10 +4,9 @@
#include "git.h"
enum {
- Minchunk = 128,
+ Minchunk = 32,
+ Splitmask = 0x7f,
Maxchunk = 8192,
- Splitmask = (1<<8)-1,
-
};
static u32int geartab[] = {
@@ -45,16 +44,47 @@
0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2, 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
};
-static u64int
-hash(void *p, int n)
+/* murmurhash3 */
+u32int
+hash(void *ptr, int len)
{
- uchar buf[SHA1dlen];
- sha1((uchar*)p, n, buf, nil);
- return GETBE64(buf);
+ u32int h, k, s;
+ uchar *p;
+ int i;
+
+ /* Read in groups of 4. */
+ h = 2928213749ul;
+ p = ptr;
+ for (i = len >> 2; i; i--) {
+ k = *(u32int*)p;
+ s = k * 0xcc9e2d51;
+ s = (s << 15) | (s >> 17);
+ h ^= s*0x1b873593;
+ h = (h << 13) | (h >> 19);
+ h = h * 5 + 0xe6546b64;
+ p += 4;
+ }
+ /* Read the rest. */
+ k = 0;
+ for (i = len & 3; i; i--) {
+ k <<= 8;
+ k |= p[i - 1];
+ }
+ s = k * 0xcc9e2d51;
+ s = (s << 15) | (s >> 17);
+ h ^= s*0x1b873593;
+ /* Finalize. */
+ h ^= len;
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+ return h;
}
static void
-addblk(Dtab *dt, void *buf, int len, int off, u64int h)
+addblk(Dtab *dt, void *buf, int len, int off, u32int h)
{
int i, sz, probe;
Dblock *db;
@@ -88,7 +118,7 @@
lookup(Dtab *dt, uchar *p, int n)
{
int probe;
- u64int h;
+ u32int h;
h = hash(p, n);
for(probe = h % dt->sz; dt->b[probe].buf != nil; probe = (probe + 1) % dt->sz){
@@ -127,7 +157,7 @@
dtinit(Dtab *dt, Object *obj)
{
uchar *s, *e;
- u64int h;
+ u32int h;
vlong n, o;
o = 0;
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -183,7 +183,7 @@
uchar *buf;
int len;
int off;
- u64int hash;
+ u32int hash;
};
struct Delta {
next prev parent reply other threads:[~2022-02-13 3:58 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-10 6:15 ori
2022-02-13 3:49 ` ori [this message]
2022-02-10 14:18 ori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=64111FCCFEB91FC5434307463B7BAFDE@eigenstate.org \
--to=ori@eigenstate.org \
--cc=9front@9front.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).