9front - general discussion about 9front
 help / color / mirror / Atom feed
From: ori@eigenstate.org
To: 9front@9front.org
Subject: Re: [9front] git: tune deltification
Date: Sat, 12 Feb 2022 22:49:56 -0500	[thread overview]
Message-ID: <64111FCCFEB91FC5434307463B7BAFDE@eigenstate.org> (raw)
In-Reply-To: <5628E6D857F1F7842D6A50FE216B3316@eigenstate.org>

Quoth ori@eigenstate.org:
> Dropping the chunk size reduces pack sizes
> by about 15%, from 120 megs to 100.
> 
> Replacing sha1 with murmurhash3 when hashing
> deltas drops the time to repack the 9front
> repo by about 20 seconds.

Updated, changing u64 -> u32 where appropriate.

diff 2367a2aeaec8432e6b059135e49c2fa86e415ae5 uncommitted
--- a/sys/src/cmd/git/delta.c
+++ b/sys/src/cmd/git/delta.c
@@ -4,10 +4,9 @@
 #include "git.h"
 
 enum {
-	Minchunk	= 128,
+	Minchunk	= 32,
+	Splitmask	= 0x7f,
 	Maxchunk	= 8192,
-	Splitmask	= (1<<8)-1,
-	
 };
 
 static u32int geartab[] = {
@@ -45,16 +44,47 @@
     0x9984a4f4, 0xd5de43cc, 0xd294daed, 0xbecba2d2, 0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
 };
 
-static u64int
-hash(void *p, int n)
+/* murmurhash3 */
+u32int
+hash(void *ptr, int len)
 {
-	uchar buf[SHA1dlen];
-	sha1((uchar*)p, n, buf, nil);
-	return GETBE64(buf);
+	u32int h, k, s;
+	uchar *p;
+	int i;
+
+	/* Read in groups of 4. */
+	h = 2928213749ul;
+	p = ptr;
+	for (i = len >> 2; i; i--) {
+		k = *(u32int*)p;
+		s = k * 0xcc9e2d51;
+		s = (s << 15) | (s >> 17);
+		h ^= s*0x1b873593;
+		h = (h << 13) | (h >> 19);
+		h = h * 5 + 0xe6546b64;
+		p += 4;
+	}
+	/* Read the rest. */
+	k = 0;
+	for (i = len & 3; i; i--) {
+		k <<= 8;
+		k |= p[i - 1];
+	}
+	s = k * 0xcc9e2d51;
+	s = (s << 15) | (s >> 17);
+	h ^= s*0x1b873593;
+	/* Finalize. */
+	h ^= len;
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+	return h;
 }
 
 static void
-addblk(Dtab *dt, void *buf, int len, int off, u64int h)
+addblk(Dtab *dt, void *buf, int len, int off, u32int h)
 {
 	int i, sz, probe;
 	Dblock *db;
@@ -88,7 +118,7 @@
 lookup(Dtab *dt, uchar *p, int n)
 {
 	int probe;
-	u64int h;
+	u32int h;
 
 	h = hash(p, n);
 	for(probe = h % dt->sz; dt->b[probe].buf != nil; probe = (probe + 1) % dt->sz){
@@ -127,7 +157,7 @@
 dtinit(Dtab *dt, Object *obj)
 {
 	uchar *s, *e;
-	u64int h;
+	u32int h;
 	vlong n, o;
 	
 	o = 0;
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -183,7 +183,7 @@
 	uchar	*buf;
 	int	len;
 	int	off;
-	u64int	hash;
+	u32int	hash;
 };
 
 struct Delta {


  reply	other threads:[~2022-02-13  3:58 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-10  6:15 ori
2022-02-13  3:49 ` ori [this message]
2022-02-10 14:18 ori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=64111FCCFEB91FC5434307463B7BAFDE@eigenstate.org \
    --to=ori@eigenstate.org \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).