9front - general discussion about 9front
 help / color / mirror / Atom feed
* [9front] [v2] git: new index format, 235x faster walks
@ 2023-09-18  2:21 ori
  2023-09-18  3:49 ` ori
  0 siblings, 1 reply; 2+ messages in thread
From: ori @ 2023-09-18  2:21 UTC (permalink / raw)
  To: 9front

New revision for testing, with a few bugs fixed:

- The first revision disabled the opportunistic rewrite
  of the index; I'd turned it off for testing, but forgot
  to turn it back on; with this revision, you'll actually
  see the promised speedups.

- The mode is now added to the index, so that we can detect
  changes to the execute bit and such.

- The index is not regenerated as eagerly when checking
  if a file is checked in, which should reduce churn on
  the dump.

- When passing walk a filter list, we still load the whole
  thing so that we don't remove files from the index by
  accident. This was hidden by the bug in the first rev.


diff cf6987b1f35c33402fe3061eae9f8ff526fee2a8 uncommitted
--- a/sys/src/cmd/git/add
+++ b/sys/src/cmd/git/add
@@ -7,32 +7,17 @@
 flagfmt='r:remove'; args='file ...'
 eval `''{aux/getflags $*} || exec aux/usage
 
-add='tracked'
-del='removed'
-if(~ $remove 1){
-	add='removed'
-	del='tracked'
-}
+s=A
+if(~ $remove 1)
+	s=R
 if(~ $#* 0)
 	exec aux/usage
 
 paths=`$nl{cleanname -d $gitrel $* | drop $gitroot}
-if(~ $add tracked)
+if(~ $s A)
 	files=`$nl{walk -f ./$paths}
 if not
-	files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths}
-
-for(f in $files){
-	if(! ~ `$nl{cleanname $f} .git/*){
-		addpath=.git/index9/$add/$f
-		delpath=.git/index9/$del/$f
-		mkdir -p `$nl{basename -d $addpath}
-		mkdir -p `$nl{basename -d $delpath}
-		# We don't want a matching qid, so that
-		# git/walk doesn't think this came from
-		# a checkout.
-		echo -n > $addpath
-		rm -f $delpath
-	}
-}
+	files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths}
+for(f in $files)
+	echo $s NOQID 0 $f >> .git/INDEX9
 exit ''
--- a/sys/src/cmd/git/clone
+++ b/sys/src/cmd/git/clone
@@ -87,11 +87,8 @@
 		git/fs
 		@ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \
 			|| die 'checkout failed:' $status
-		for(f in `$nl{walk -f $tree | drop $tree}){
-			idx=.git/index9/tracked/$f
-			mkdir -p `$nl{basename -d $idx}
-			walk -eq ./$f > $idx
-		}
+		{for(f in `$nl{cd $tree && walk -f})
+			echo 'T NOQID 0 '$f} > .git/INDEX9
 	}
 	if not{
 		echo no default branch >[1=2]
--- a/sys/src/cmd/git/commit
+++ b/sys/src/cmd/git/commit
@@ -65,8 +65,8 @@
 fn parents{
 	if(! ~ $#revise 0)
 		parents=`{cat $gitfs/HEAD/parent}
-	if not if(test -f .git/index9/merge-parents)
-		parents=`{cat .git/index9/merge-parents | sort | uniq}
+	if not if(test -f .git/merge-parents)
+		parents=`{cat .git/merge-parents | sort | uniq}
 	if not if(~ $initial true)
 		parents=()
 	if not
@@ -78,7 +78,7 @@
 	if(! ~ $#parents 0)
 		pflags='-p'^$parents
 	hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status}
-	rm -f .git/index9/merge-parents
+	rm -f .git/merge-parents
 }
 
 fn update{
@@ -89,14 +89,10 @@
 	echo $branch: $hash
 	echo $hash > $refpath
 	for(f in $files){
-		if(test -e .git/index9/removed/$f || ! test -e $f){
-			rm -f .git/index9/removed/$f
-			rm -f .git/index9/tracked/$f
-		}
-		if not{
-			mkdir -p `{basename -d $f}
-			walk -eq $f > .git/index9/tracked/$f
-		}
+		if(! test -e $f && ! test -e .git/object/$hash/tree/$f)
+			echo R NOQID 0 $f >> .git/INDEX9
+		if not
+			echo T NOQID 0 $f >> .git/INDEX9
 	}
 }
 
@@ -120,12 +116,12 @@
 }
 
 files=()
-if(test -f .git/index9/merge-parents)
-	files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'}
+if(test -f .git/merge-parents)
+	files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'}
 if(! ~ $#* 0)
 	files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}})
-if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0)
-	die 'nothing to commit' $status
+if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0)
+	die 'nothing to commit'
 @{
 	flag e +
 	whoami
--- a/sys/src/cmd/git/conf.c
+++ b/sys/src/cmd/git/conf.c
@@ -59,7 +59,7 @@
 main(int argc, char **argv)
 {
 	char repo[512], *p, *s;
-	int i, j;
+	int i, j, nrel;
 
 	ARGBEGIN{
 	case 'f':	file[nfile++]=EARGF(usage());	break;
@@ -69,7 +69,7 @@
 	}ARGEND;
 
 	if(findroot){
-		if(findrepo(repo, sizeof(repo)) == -1)
+		if(findrepo(repo, sizeof(repo), &nrel) == -1)
 			sysfatal("%r");
 		print("%s\n", repo);
 		exits(nil);
--- a/sys/src/cmd/git/diff
+++ b/sys/src/cmd/git/diff
@@ -7,8 +7,10 @@
 flagfmt='c:commit branch, s:summarize'; args='[file ...]'
 eval `''{aux/getflags $*} || exec aux/usage
 
-if(~ $#commit 0)
+if(~ $#commit 0){
 	commit=HEAD
+	cparam=()
+}
 
 files=()
 if(! ~ $#* 0)
@@ -16,7 +18,7 @@
 
 branch=`{git/query -p $commit}
 if(~ $summarize 1){
-	git/walk -fMAR -b $commit $files
+	git/walk -fMAR $cparam $files
 	exit
 }
 
@@ -24,7 +26,7 @@
 mntgen /mnt/scratch
 bind $branch/tree/ /mnt/scratch/a
 bind . /mnt/scratch/b
-for(f in `$nl{git/walk -c -fRMA -b $commit $files}){
+for(f in `$nl{git/walk -c -fRMA $cparam $files}){
 	if(~ $#showed 0){
 		echo diff `{git/query $commit} uncommitted
 		showed=1
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -21,6 +21,7 @@
 typedef struct Dblock	Dblock;
 typedef struct Objq	Objq;
 typedef struct Qelt	Qelt;
+typedef struct Idxent	Idxent;
 
 enum {
 	Pathmax		= 512,
@@ -190,6 +191,13 @@
 	int	len;
 };
 
+struct Idxent {
+	char	*path;
+	Qid	qid;
+	int	mode;
+	int	order;
+	char	state;
+};
 
 #define GETBE16(b)\
 		((((b)[0] & 0xFFul) <<  8) | \
@@ -301,9 +309,10 @@
 int	hassuffix(char *, char *);
 int	swapsuffix(char *, int, char *, char *, char *);
 char	*strip(char *);
-int	findrepo(char *, int);
+int	findrepo(char *, int, int*);
 int	showprogress(int, int);
 u64int	murmurhash2(void*, usize);
+Qid	parseqid(char*);
 
 /* packing */
 void	dtinit(Dtab *, Object*);
--- a/sys/src/cmd/git/import
+++ b/sys/src/cmd/git/import
@@ -98,7 +98,8 @@
 		if(~ $#nocommit 0){
 			if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){
 				echo $hash > $refpath
-				rm -f .git/index9/removed/$files
+				for(f in $files)
+					echo T NOQID 0 $f >> .git/INDEX9
 			}
 		}
 		status=''''
--- a/sys/src/cmd/git/init
+++ b/sys/src/cmd/git/init
@@ -31,7 +31,7 @@
 	echo '[branch "'$branch'"]'
 	echo '	remote = origin'
 }
-
+>$dir/.git/INDEX9
 >$dir/.git/HEAD {
 	echo ref: refs/heads/$branch
 }
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -239,7 +239,7 @@
 main(int argc, char **argv)
 {
 	char path[1024], repo[1024], *p, *r;
-	int i, nrepo;
+	int i, nrel, nrepo;
 
 	ARGBEGIN{
 	case 'e':
@@ -259,7 +259,7 @@
 		break;
 	}ARGEND;
 
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	nrepo = strlen(repo);
 	if(argc != 0){
--- a/sys/src/cmd/git/merge
+++ b/sys/src/cmd/git/merge
@@ -38,8 +38,8 @@
 	git/revert .
 	exit ''
 }
-echo $ours >> .git/index9/merge-parents
-echo $theirs >> .git/index9/merge-parents
+echo $ours >> .git/merge-parents
+echo $theirs >> .git/merge-parents
 
 merge $ours $base $theirs
 >[1=2] echo 'merge complete: remember to commit'
--- a/sys/src/cmd/git/pack.c
+++ b/sys/src/cmd/git/pack.c
@@ -66,7 +66,7 @@
 Object *lruhead;
 Object *lrutail;
 vlong	ncache;
-vlong	cachemax = 512*MiB;
+vlong	cachemax = 1024*MiB;
 Packf	*packf;
 int	npackf;
 int	openpacks;
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -152,10 +152,10 @@
 void
 main(int argc, char **argv)
 {
-	int i, j, n;
-	Hash *h;
-	char *p, *e, *s, *objpfx;
 	char query[2048], repo[512];
+	char *p, *e, *s, *objpfx;
+	int i, j, n, nrel;
+	Hash *h;
 
 	ARGBEGIN{
 	case 'd':	chattygit++;	break;
@@ -170,7 +170,7 @@
 
 	if(argc == 0)
 		usage();
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	if(chdir(repo) == -1)
 		sysfatal("chdir: %r");
--- a/sys/src/cmd/git/save.c
+++ b/sys/src/cmd/git/save.c
@@ -10,6 +10,7 @@
 	char *dat;
 	int ndat;
 };
+
 enum {
 	Maxparents = 16,
 };
@@ -21,7 +22,9 @@
 char	*commitmsg;
 Hash	parents[Maxparents];
 int	nparents;
-
+Idxent	*idx;
+int	idxsz;
+int	nidx;
 int
 gitmode(Dirent *e)
 {
@@ -38,6 +41,20 @@
 }
 
 int
+idxcmp(void *pa, void *pb)
+{
+	Idxent *a, *b;
+	int c;
+
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
+}
+
+int
 entcmp(void *pa, void *pb)
 {
 	char abuf[256], bbuf[256], *ae, *be;
@@ -184,27 +201,22 @@
 int
 tracked(char *path)
 {
-	char ipath[256];
-	Dir *d;
+	int lo, hi, mid, r;
 
-	/* Explicitly removed. */
-	snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/removed") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	d = dirstat(ipath);
-	if(d != nil && d->qid.type != QTDIR){
-		free(d);
-		return 0;
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		r = strcmp(path, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
 	}
-
-	/* Explicitly added. */
-	snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	if(access(ipath, AEXIST) == 0)
-		return 1;
-
-	return 0;
+	return r == 0;
 }
 
 int
@@ -354,10 +366,11 @@
 void
 main(int argc, char **argv)
 {
+	char *ln, *dstr, *parts[4], cwd[1024];
+	int i, r, line, ncwd;
 	Hash th, ch;
-	char *dstr, cwd[1024];
-	int i, r, ncwd;
 	vlong date;
+	Biobuf *f;
 	Object *t;
 
 	gitinit();
@@ -425,6 +438,33 @@
 	}
 
 	t = findroot();
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	if((f = Bopen(".git/INDEX9", OREAD)) == nil)
+		sysfatal("open index: %r");
+	line = 0;
+	while((ln = Brdstr(f, '\n', 1)) != nil){
+		line++;
+		if(ln[0] == 0 || ln[0] == '\n')
+			continue;
+		if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+			sysfatal(".git/INDEX9:%d: corrupt index", line);
+		if(nidx == idxsz){
+			idxsz += idxsz/2;
+			idx = realloc(idx, idxsz*sizeof(Idxent));
+		}
+		cleanname(parts[3]);
+		idx[nidx].state = *parts[0];
+		idx[nidx].qid = parseqid(parts[1]);
+		idx[nidx].mode = strtol(parts[2], nil, 8);
+		idx[nidx].path = strdup(parts[3]);
+		idx[nidx].order = nidx;
+		nidx++;
+		free(ln);
+	}
+	Bterm(f);
+	qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	r = treeify(t, argv, argv + argc, 0, &th);
 	if(r == -1)
 		sysfatal("could not commit: %r\n");
--- a/sys/src/cmd/git/test/basic.rc
+++ b/sys/src/cmd/git/test/basic.rc
@@ -64,4 +64,3 @@
 q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch
 ! test -e scratch/upstream/file2.txt || die mismatch
 ! test -e scratch/downstream/file2.txt || die mismatch
-
--- a/sys/src/cmd/git/test/util.rc
+++ b/sys/src/cmd/git/test/util.rc
@@ -1,5 +1,5 @@
 fn q {
-	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out
+	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid
 }
 fn qq {
 	$* >/dev/null >[2]/dev/null
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -192,8 +192,10 @@
 	Qid q;
 
 	q = va_arg(fmt->args, Qid);
-	return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}",
-	    q.path, QDIR(&q), (q.path >> 8), q.vers, q.type);
+	if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff)
+		return fmtprint(fmt, "NOQID");
+	else
+		return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type);
 }
 
 void
@@ -294,7 +296,7 @@
 
 /* Finds the directory containing the git repo. */
 int
-findrepo(char *buf, int nbuf)
+findrepo(char *buf, int nbuf, int *nrel)
 {
 	char *p, *suff;
 
@@ -302,6 +304,7 @@
 	if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
 		return -1;
 
+	*nrel = 0;
 	for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
 		strcpy(p, suff);
 		if(access(buf, AEXIST) == 0){
@@ -308,6 +311,7 @@
 			p[p == buf] = '\0';
 			return 0;
 		}
+		*nrel += 1;
 		*p = '\0';
 	}
 	werrstr("not a git repository");
@@ -441,4 +445,25 @@
 	h ^= h >> 15;
 
 	return h;
+}
+
+Qid
+parseqid(char *s)
+{
+	char *e;
+	Qid q;
+
+	if(strcmp(s, "NOQID") == 0)
+		return (Qid){-1, -1, -1};		
+	e = s;
+	q.path = strtoull(e, &e, 16);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.vers = strtoul(e+1, &e, 10);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.type = strtoul(e+1, &e, 16);
+	if(*e != '\0')
+		sysfatal("corrupt qid: %s (%x)\n", s, *e);
+	return q;
 }
--- a/sys/src/cmd/git/walk.c
+++ b/sys/src/cmd/git/walk.c
@@ -2,53 +2,72 @@
 #include <libc.h>
 #include "git.h"
 
+typedef struct Seen	Seen;
+typedef struct Idxed	Idxed;
+typedef struct Idxent	Idxent;
+
 #define NCACHE 4096
-#define TDIR ".git/index9/tracked"
-#define RDIR ".git/index9/removed"
-typedef struct Cache	Cache;
-typedef struct Wres	Wres;
-struct Cache {
+
+enum {
+	Rflg	= 1 << 0,
+	Mflg	= 1 << 1,
+	Aflg	= 1 << 2,
+	Uflg	= 1 << 3,
+	/* everything after this is not an error */
+	Tflg	= 1 << 4,
+};
+
+struct Seen {
 	Dir*	cache;
 	int	n;
 	int	max;
 };
 
-struct Wres {
-	char	**path;
-	int	npath;
-	int	pathsz;
+struct Idxed {
+	char**	cache;
+	int	n;
+	int	max;
 };
 
-enum {
-	Rflg	= 1 << 0,
-	Mflg	= 1 << 1,
-	Aflg	= 1 << 2,
-	Tflg	= 1 << 3,
-};
+Seen	seentab[NCACHE];
+Idxed	idxtab[NCACHE];
+char	repopath[1024];
+char	wdirpath[1024];
+char	*rstr	= "R ";
+char	*mstr	= "M ";
+char	*astr	= "A ";
+char	*ustr	= "U ";
+char	*tstr 	= "T ";
+char	*bdir = ".git/fs/HEAD/tree";
+int	useidx	= 1;
+int	nrel;
+int	quiet;
+int	dirty;
+int	printflg;
 
-Cache seencache[NCACHE];
-int quiet;
-int useindex;
-int printflg;
-char *bdir = ".git/fs/HEAD/tree";
-char *rstr = "R ";
-char *tstr = "T ";
-char *mstr = "M ";
-char *astr = "A ";
+Idxent	*idx;
+int	idxsz;
+int	nidx;
+int	staleidx;
+Idxent	*wdir;
+int	wdirsz;
+int	nwdir;
 
+int	loadwdir(char*);
+
 int
 seen(Dir *dir)
 {
+	Seen *c;
 	Dir *dp;
 	int i;
-	Cache *c;
 
-	c = &seencache[dir->qid.path&(NCACHE-1)];
+	c = &seentab[dir->qid.path&(NCACHE-1)];
 	dp = c->cache;
 	for(i=0; i<c->n; i++, dp++)
-		if(dir->qid.path == dp->qid.path &&
-		   dir->type == dp->type &&
-		   dir->dev == dp->dev)
+		if(dir->qid.path == dp->qid.path
+		&& dir->qid.type == dp->qid.type
+		&& dir->dev == dp->dev)
 			return 1;
 	if(c->n == c->max){
 		if (c->max == 0)
@@ -63,129 +82,100 @@
 	return 0;
 }
 
-void
-grow(Wres *r)
-{
-	if(r->npath == r->pathsz){
-		r->pathsz = 2*r->pathsz + 1;
-		r->path = erealloc(r->path, r->pathsz * sizeof(char*));
-	}
-}
-
 int
-readpaths(Wres *r, char *pfx, char *dir)
+checkedin(Idxent *e, int change)
 {
-	char *f, *sub, *full, *sep;
-	Dir *d;
-	int fd, ret, i, n;
+	char *p;
+	int r;
 
-	d = nil;
-	ret = -1;
-	sep = "";
-	if(dir[0] != 0)
-		sep = "/";
-	if((full = smprint("%s/%s", pfx, dir)) == nil)
-		sysfatal("smprint: %r");
-	if((fd = open(full, OREAD)) < 0)
-		goto error;
-	while((n = dirread(fd, &d)) > 0){
-		for(i = 0; i < n; i++){
-			if(seen(&d[i]))
-				continue;
-			if(d[i].qid.type & QTDIR){
-				if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				if(readpaths(r, pfx, sub) == -1){
-					free(sub);
-					goto error;
-				}
-				free(sub);
-			}else{
-				grow(r);
-				if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				r->path[r->npath++] = f;
-			}
-		}
-		free(d);
+	p = smprint("%s/%s", bdir, e->path);
+	r = access(p, AEXIST);
+	if(r == 0 && change){
+		e->state = 'T';
+		staleidx = 1;
 	}
-	ret = r->npath;
-error:
-	close(fd);
-	free(full);
-	return ret;
+	free(p);
+	return r == 0;
 }
 
 int
-cmp(void *pa, void *pb)
+indexed(char *path, int isdir)
 {
-	return strcmp(*(char **)pa, *(char **)pb);
-}
+	int lo, hi, mid, n, r;
+	char *s;
 
-void
-dedup(Wres *r)
-{
-	int i, o;
-
-	if(r->npath <= 1)
-		return;
-	o = 0;
-	qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
-	for(i = 1; i < r->npath; i++)
-		if(strcmp(r->path[o], r->path[i]) != 0)
-			r->path[++o] = r->path[i];
-	r->npath = o + 1;
+	if(!useidx){
+		s = smprint("%s/%s", bdir, path);
+		r = access(s, AEXIST);
+		free(s);
+		return r == 0;
+	}
+	s = path;
+	if(isdir)
+		s = smprint("%s/", path);
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	n = strlen(s);
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		if(isdir)
+			r = strncmp(s, idx[mid].path, n);
+		else
+			r = strcmp(s, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
+	}
+	if(isdir)
+		free(s);
+	return r == 0;
 }
 
 int
-sameqid(Dir *d, char *qf)
+idxcmp(void *pa, void *pb)
 {
-	char indexqid[64], fileqid[64], *p;
-	int fd, n;
+	Idxent *a, *b;
+	int c;
 
-	if(!d)
-		return 0;
-	if((fd = open(qf, OREAD)) == -1)
-		return 0;
-	if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
-		return 0;
-	indexqid[n] = 0;
-	close(fd);
-	if((p = strpbrk(indexqid, "  \t\n\r")) != nil)
-		*p = 0;
-
-	snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
-		d->qid.path, d->qid.vers, d->qid.type);
-
-	if(strcmp(indexqid, fileqid) == 0)
-		return 1;
-	return 0;
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
 }
 
-void
-writeqid(Dir *d, char *qf)
-{
-	int fd;
-
-	if((fd = create(qf, OWRITE, 0666)) == -1)
-		return;
-	fprint(fd, "%ullx.%uld.%.2uhhx\n",
-		d->qid.path, d->qid.vers, d->qid.type);
-	close(fd);
-}
-
 int
-samedata(char *pa, char *pb)
+samedata(Idxent *a, Idxent *b)
 {
-	char ba[32*1024], bb[32*1024];
+	char *gitpath, ba[IOUNIT], bb[IOUNIT];
 	int fa, fb, na, nb, same;
+	Dir *da, *db;
 
+	if(a != nil){
+		if(a->qid.path == b->qid.path
+		&& a->qid.vers == b->qid.vers
+		&& a->qid.type == b->qid.type
+		&& a->mode == b->mode
+		&& a->mode != 0)
+			return 1;
+	}
+
+	da = nil;
+	db = nil;
 	same = 0;
-	fa = open(pa, OREAD);
-	fb = open(pb, OREAD);
-	if(fa == -1 || fb == -1){
+	if((gitpath = smprint("%s/%s", bdir, b->path)) == nil)
+		sysfatal("smprint: %r");
+	fa = open(b->path, OREAD);
+	fb = open(gitpath, OREAD);
+	if(fa == -1 || fb == -1)
 		goto mismatch;
-	}
+	da = dirfstat(fa);
+	db = dirfstat(fb);
 	while(1){
 		if((na = readn(fa, ba, sizeof(ba))) == -1)
 			goto mismatch;
@@ -198,8 +188,19 @@
 		if(memcmp(ba, bb, na) != 0)
 			goto mismatch;
 	}
+	if(a != nil){
+		a->qid = b->qid;
+		if(da != nil)
+			a->mode = da->mode;
+	}
+	staleidx = 1;
+	if(da->mode != db->mode)
+		goto mismatch;
 	same = 1;
+
 mismatch:
+	free(da);
+	free(db);
 	if(fa != -1)
 		close(fa);
 	if(fb != -1)
@@ -207,7 +208,129 @@
 	return same;
 }
 
+int
+loadent(char *dir, Dir *d, int fullpath)
+{
+	char *path;
+	int ret, isdir;
+	Idxent *e;
+
+	if(fullpath)
+		path = strdup(dir);
+	else
+		path = smprint("%s/%s", dir, d->name);
+	if(path == nil)
+		sysfatal("smprint: %r");
+
+	ret = 0;
+	isdir = d->qid.type & QTDIR;
+	cleanname(path);
+	if((printflg & Uflg) == 0 && !indexed(path, isdir))
+		return 0;
+	if(isdir){
+		ret = loadwdir(path);
+		free(path);
+	}else{
+		if(nwdir == wdirsz){
+			wdirsz += wdirsz/2;
+			wdir = erealloc(wdir, wdirsz*sizeof(Idxent));
+		}
+		e = wdir + nwdir;
+		e->path = path;
+		e->qid = d->qid;
+		e->mode = d->mode;
+		e->order = nwdir;
+		e->state = 'T';
+		nwdir++;
+	}
+	return ret;
+}
+
+int
+loadwdir(char *path)
+{
+	int fd, ret, i, n;
+	Dir *d, *e;
+
+	d = nil;
+	e = nil;
+	ret = -1;
+
+	cleanname(path);
+	if(strncmp(path, ".git/", 5) == 0)
+		return 0;
+	if((fd = open(path, OREAD)) < 0)
+		goto error;
+	if((e = dirfstat(fd)) == nil)
+		sysfatal("fstat: %r");
+	if(e->qid.type & QTDIR)
+		while((n = dirread(fd, &d)) > 0){
+			for(i = 0; i < n; i++)
+				if(loadent(path, &d[i], 0) == -1)
+					goto error;
+			free(d);
+		}
+	else{
+		if(loadent(path, e, 1) == -1)
+			goto error;
+	}
+	ret = 0;
+error:
+	free(e);
+	if(fd != -1)
+		close(fd);
+	return ret;
+}
+
+int
+pfxmatch(char *p, char **pfx, int *pfxlen, int npfx)
+{
+	int i;
+
+	if(npfx == 0)
+		return 1;
+	for(i = 0; i < npfx; i++){
+		if(strncmp(p, pfx[i], pfxlen[i]) == 0
+		&& (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0))
+			return 1;
+		if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0)
+			return 1;
+	}
+	return 0;
+}
+
+
+char*
+reporel(char *s)
+{
+	char *p;
+	int n;
+
+	if(*s == '/')
+		s = strdup(s);
+	else
+		s = smprint("%s/%s", wdirpath, s);
+	p = cleanname(s);
+	n = strlen(repopath);
+	if(strncmp(s, repopath, n) == 0)
+		p = s + n;
+	else
+		sysfatal("path outside repo: %s", s);
+	if(*p == '/')
+		p++;
+	memmove(s, p, strlen(p)+1);
+	return s;
+}
+
 void
+show(Biobuf *o, int flg, char *str, char *path)
+{
+	dirty |= flg;
+	if(!quiet && (printflg & flg))
+		Bprint(o, "%s%s\n", str, path);
+}
+
+void
 usage(void)
 {
 	fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
@@ -217,14 +340,21 @@
 void
 main(int argc, char **argv)
 {
-	char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
-	char *p, *e;
-	int i, dirty;
-	Wres r;
+	char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8];
+	int i, j, c, line, wfd, *argn;
+	Biobuf *f, *o, *w;
 	Hash h;
-	Dir *d;
+	Dir rn;
 
 	gitinit();
+	if(access(".git/fs/ctl", AEXIST) != 0)
+		sysfatal("no running git/fs");
+	if(getwd(wdirpath, sizeof(wdirpath)) == nil)
+		sysfatal("getwd: %r");
+	if(findrepo(repopath, sizeof(repopath), &nrel) == -1)
+		sysfatal("find root: %r");
+	if(chdir(repopath) == -1)
+		sysfatal("chdir: %r");
 
 	ARGBEGIN{
 	case 'q':
@@ -235,6 +365,7 @@
 		tstr = "";
 		mstr = "";
 		astr = "";
+		ustr = "";
 		break;
 	case 'f':
 		for(p = EARGF(usage()); *p; p++)
@@ -243,109 +374,160 @@
 			case 'A':	printflg |= Aflg;	break;
 			case 'M':	printflg |= Mflg;	break;
 			case 'R':	printflg |= Rflg;	break;
+			case 'U':	printflg |= Uflg;	break;
 			default:	usage();		break;
 		}
 		break;
 	case 'b':
-		useindex = 0;
+		useidx = 0;
 		base = EARGF(usage());
 		if(resolveref(&h, base) == -1)
-			sysfatal("no such ref '%s'", base);
+			sysfatal("no such reXf '%s'", base);
 		bdir = smprint(".git/fs/object/%H/tree", h);
 		break;
 	default:
 		usage();
-	}ARGEND
+	}ARGEND;
 
-	gitinit();
-	if(findrepo(repo, sizeof(repo)) == -1)
-		sysfatal("find root: %r");
-	if(chdir(repo) == -1)
-		sysfatal("chdir: %r");
-	if(access(".git/fs/ctl", AEXIST) != 0)
-		sysfatal("no running git/fs");
-	dirty = 0;
-	memset(&r, 0, sizeof(r));
 	if(printflg == 0)
 		printflg = Tflg | Aflg | Mflg | Rflg;
-	if(argc == 0){
-		if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
-			sysfatal("read tracked: %r");
-		if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
-			sysfatal("read removed: %r");
-		if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
-			sysfatal("read base: %r");
-	}else{
-		for(i = 0; i < argc; i++){
-			tpath = smprint(TDIR"/%s", argv[i]);
-			rpath = smprint(RDIR"/%s", argv[i]);
-			bpath = smprint("%s/%s", bdir, argv[i]);
-			if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
-				goto nextarg;
-			if(d->mode & DMDIR){
-				readpaths(&r, TDIR, argv[i]);
-				readpaths(&r, RDIR, argv[i]);
-				readpaths(&r, bdir, argv[i]);
-			}else{
-				grow(&r);
-				r.path[r.npath++] = estrdup(argv[i]);
+
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	nwdir = 0;
+	wdirsz = 32;
+	wdir = emalloc(wdirsz*sizeof(Idxent));
+	argrel = emalloc(argc*sizeof(int));
+	argn = emalloc(argc*sizeof(int));
+	for(i = 0; i < argc; i++){
+		argrel[i] = reporel(argv[i]);
+		argn[i] = strlen(argrel[i]);
+	}
+	if((o = Bfdopen(1, OWRITE)) == nil)
+		sysfatal("open out: %r");
+	if(useidx){
+		if((f = Bopen(".git/INDEX9", OREAD)) == nil){
+			fprint(2, "open index: %r\n");
+			if(access(".git/index9", AEXIST) == 0){
+				fprint(2, "index format conversion needed:\n");
+				fprint(2, "\tcd %s && git/fs\n", repopath);
+				fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n");
+				fprint(2, "\t@{cd .git/index9/removed && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n");
 			}
-nextarg:
-			free(bpath);
-			free(tpath);
-			free(rpath);
-			free(d);
+			exits("noindex");
 		}
+		line = 0;
+		while((ln = Brdstr(f, '\n', 1)) != nil){
+			line++;
+			/* allow blank lines */
+			if(ln[0] == 0 || ln[0] == '\n')
+				continue;
+			if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+				sysfatal(".git/INDEX9:%d: corrupt index", line);
+			if(nidx == idxsz){
+				idxsz += idxsz/2;
+				idx = realloc(idx, idxsz*sizeof(Idxent));
+			}
+			cleanname(parts[3]);
+			idx[nidx].state = *parts[0];
+			idx[nidx].qid = parseqid(parts[1]);
+			idx[nidx].mode = strtol(parts[2], nil, 8);
+			idx[nidx].path = strdup(parts[3]);
+			idx[nidx].order = nidx;
+			nidx++;
+			free(ln);
+		}
+		qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	}
-	dedup(&r);
 
-	for(i = 0; i < r.npath; i++){
-		p = r.path[i];
-		d = dirstat(p);
-		if(d && d->mode & DMDIR)
-			goto next;
-		rpath = smprint(RDIR"/%s", p);
-		tpath = smprint(TDIR"/%s", p);
-		bpath = smprint("%s/%s", bdir, p);
-		/* Fast path: we don't want to force access to the rpath. */
-		if(useindex && d && sameqid(d, tpath)) {
-			if(!quiet && (printflg & Tflg))
-				print("%s%s\n", tstr, p);
+	if(argc == 0)
+		loadwdir(".");
+	else for(i = 0; i < argc; i++)
+		loadwdir(argrel[i]);
+	qsort(wdir, nwdir, sizeof(Idxent), idxcmp);
+	i = 0;
+	j = 0;
+	while(i < nidx || j < nwdir){
+		/* find the last entry we tracked for a path */
+		while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){
+			staleidx = 1;
+			i++;
+		}
+		while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0)
+			j++;
+		if(!pfxmatch(idx[i].path, argrel, argn, argc)){
+			i++;
+			continue;
+		}
+		if(i >= nidx)
+			c = 1;
+		else if(j >= nwdir)
+			c = -1;
+		else
+			c = strcmp(idx[i].path, wdir[j].path);
+		/* exists in both index and on disk */
+		if(c == 0){
+			if(idx[i].state == 'R' && checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			else if(idx[i].state == 'A' && !checkedin(&idx[i], 1))
+				show(o, Aflg, astr, idx[i].path);
+			else if(!samedata(&idx[i], &wdir[j]))
+				show(o, Mflg, mstr, idx[i].path);
+			else
+				show(o, Tflg, tstr, idx[i].path);
+			i++;
+			j++;
+		/* only exists in index */
+		}else if(c < 0){
+			show(o, Rflg, rstr, idx[i].path);
+			i++;
+		/* only exists on disk */
 		}else{
-			if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
-				if(access(bpath, AEXIST) == 0){
-					dirty |= Rflg;
-					if(!quiet && (printflg & Rflg))
-						print("%s%s\n", rstr, p);
-				}
-			}else if(access(bpath, AEXIST) == -1) {
-				dirty |= Aflg;
-				if(!quiet && (printflg & Aflg))
-					print("%s%s\n", astr, p);
-			}else if(samedata(p, bpath)){
-				if(!quiet && (printflg & Tflg))
-					print("%s%s\n", tstr, p);
-				if(useindex)
-					writeqid(d, tpath);
-			}else{
-				dirty |= Mflg;
-				if(!quiet && (printflg & Mflg))
-					print("%s%s\n", mstr, p);
-			}
+			if(!useidx && checkedin(&wdir[j], 0)){
+				if(samedata(nil, &wdir[j]))
+					show(o, Tflg, tstr, wdir[j].path);
+				else
+					show(o, Mflg, mstr, wdir[j].path);
+			}else if(printflg & Uflg)
+				show(o, Uflg, ustr, wdir[j].path);
+			j++;
 		}
-		free(rpath);
-		free(tpath);
-		free(bpath);
-next:
-		free(d);
 	}
+	Bterm(o);
+
+	if(useidx && staleidx)
+	if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){
+		if((w = Bfdopen(wfd, OWRITE)) == nil){
+			close(wfd);
+			goto Nope;
+		}
+		for(i = 0; i < nidx; i++){
+			while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0)
+				i++;
+			Bprint(w, "%c %Q %o %s\n",
+				idx[i].state,
+				idx[i].qid, 
+				idx[i].mode,
+				idx[i].path);
+		}
+		Bterm(w);
+		nulldir(&rn);
+		rn.name = "INDEX9";
+		if(remove(".git/INDEX9") == -1)
+			goto Nope;
+		if(dirwstat(".git/INDEX9.new", &rn) == -1)
+			sysfatal("rename: %r");
+	}
+
+Nope:
 	if(!dirty)
 		exits(nil);
 
-	p = buf;
-	e = buf + sizeof(buf);
+	p = xbuf;
+	e = p + sizeof(xbuf);
 	for(i = 0; (1 << i) != Tflg; i++)
 		if(dirty & (1 << i))
-			p = seprint(p, e, "%c", "DMAT"[i]);
-	exits(buf);
+			p = seprint(p, e, "%c", "RMAUT"[i]);
+	exits(xbuf);
 }


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [9front] [v2] git: new index format, 235x faster walks
  2023-09-18  2:21 [9front] [v2] git: new index format, 235x faster walks ori
@ 2023-09-18  3:49 ` ori
  0 siblings, 0 replies; 2+ messages in thread
From: ori @ 2023-09-18  3:49 UTC (permalink / raw)
  To: 9front

Sorry for the spam; One more revision; found a couple of missing
changes to the new 4 column index format.

diff 597ff0b59658032e141553f286e02b9370f995b5 uncommitted
--- a/sys/src/cmd/git/add
+++ b/sys/src/cmd/git/add
@@ -7,32 +7,17 @@
 flagfmt='r:remove'; args='file ...'
 eval `''{aux/getflags $*} || exec aux/usage
 
-add='tracked'
-del='removed'
-if(~ $remove 1){
-	add='removed'
-	del='tracked'
-}
+s=A
+if(~ $remove 1)
+	s=R
 if(~ $#* 0)
 	exec aux/usage
 
 paths=`$nl{cleanname -d $gitrel $* | drop $gitroot}
-if(~ $add tracked)
+if(~ $s A)
 	files=`$nl{walk -f ./$paths}
 if not
-	files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths}
-
-for(f in $files){
-	if(! ~ `$nl{cleanname $f} .git/*){
-		addpath=.git/index9/$add/$f
-		delpath=.git/index9/$del/$f
-		mkdir -p `$nl{basename -d $addpath}
-		mkdir -p `$nl{basename -d $delpath}
-		# We don't want a matching qid, so that
-		# git/walk doesn't think this came from
-		# a checkout.
-		echo -n > $addpath
-		rm -f $delpath
-	}
-}
+	files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths}
+for(f in $files)
+	echo $s NOQID 0 $f >> .git/INDEX9
 exit ''
--- a/sys/src/cmd/git/branch
+++ b/sys/src/cmd/git/branch
@@ -84,7 +84,6 @@
 for(m in $cleanpaths){
 	d=`$nl{basename -d $m}
 	mkdir -p $d
-	mkdir -p .git/index9/tracked/$d
 	# Modifications can turn a file into
 	# a directory, or vice versa, so we
 	# need to delete and copy the files
@@ -97,11 +96,11 @@
 		b=file
 	if(! ~ $a $b){
 		rm -rf $m
-		rm -rf .git/index9/tracked/$m
+		echo R NOQID 0 $m >> .git/INDEX9
 	}
 	if(~ $b file){
 		cp -x -- $basedir/tree/$m $m
-		walk -eq $m > .git/index9/tracked/$m
+		echo T NOQID 0 $m >> .git/INDEX9
 		touch $m
 	}
 }
@@ -112,9 +111,9 @@
 	merge1 $ours $ours $common $theirs
 }
 
-if(! ~ $#deleted 0){
-	rm -f $deleted
-	rm -f .git/index9/tracked/$deleted
+for(d in $deleted){
+	rm -f $d
+	echo R NOQID 0 $d >> .git/INDEX9
 }
 
 echo ref: $new > .git/HEAD
--- a/sys/src/cmd/git/clone
+++ b/sys/src/cmd/git/clone
@@ -87,11 +87,8 @@
 		git/fs
 		@ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \
 			|| die 'checkout failed:' $status
-		for(f in `$nl{walk -f $tree | drop $tree}){
-			idx=.git/index9/tracked/$f
-			mkdir -p `$nl{basename -d $idx}
-			walk -eq ./$f > $idx
-		}
+		{for(f in `$nl{cd $tree && walk -f})
+			echo 'T NOQID 0 '$f} > .git/INDEX9
 	}
 	if not{
 		echo no default branch >[1=2]
--- a/sys/src/cmd/git/commit
+++ b/sys/src/cmd/git/commit
@@ -65,8 +65,8 @@
 fn parents{
 	if(! ~ $#revise 0)
 		parents=`{cat $gitfs/HEAD/parent}
-	if not if(test -f .git/index9/merge-parents)
-		parents=`{cat .git/index9/merge-parents | sort | uniq}
+	if not if(test -f .git/merge-parents)
+		parents=`{cat .git/merge-parents | sort | uniq}
 	if not if(~ $initial true)
 		parents=()
 	if not
@@ -78,7 +78,7 @@
 	if(! ~ $#parents 0)
 		pflags='-p'^$parents
 	hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status}
-	rm -f .git/index9/merge-parents
+	rm -f .git/merge-parents
 }
 
 fn update{
@@ -89,14 +89,10 @@
 	echo $branch: $hash
 	echo $hash > $refpath
 	for(f in $files){
-		if(test -e .git/index9/removed/$f || ! test -e $f){
-			rm -f .git/index9/removed/$f
-			rm -f .git/index9/tracked/$f
-		}
-		if not{
-			mkdir -p `{basename -d $f}
-			walk -eq $f > .git/index9/tracked/$f
-		}
+		if(! test -e $f && ! test -e .git/object/$hash/tree/$f)
+			echo R NOQID 0 $f >> .git/INDEX9
+		if not
+			echo T NOQID 0 $f >> .git/INDEX9
 	}
 }
 
@@ -120,12 +116,12 @@
 }
 
 files=()
-if(test -f .git/index9/merge-parents)
-	files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'}
+if(test -f .git/merge-parents)
+	files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'}
 if(! ~ $#* 0)
 	files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}})
-if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0)
-	die 'nothing to commit' $status
+if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0)
+	die 'nothing to commit'
 @{
 	flag e +
 	whoami
--- a/sys/src/cmd/git/conf.c
+++ b/sys/src/cmd/git/conf.c
@@ -59,7 +59,7 @@
 main(int argc, char **argv)
 {
 	char repo[512], *p, *s;
-	int i, j;
+	int i, j, nrel;
 
 	ARGBEGIN{
 	case 'f':	file[nfile++]=EARGF(usage());	break;
@@ -69,7 +69,7 @@
 	}ARGEND;
 
 	if(findroot){
-		if(findrepo(repo, sizeof(repo)) == -1)
+		if(findrepo(repo, sizeof(repo), &nrel) == -1)
 			sysfatal("%r");
 		print("%s\n", repo);
 		exits(nil);
--- a/sys/src/cmd/git/diff
+++ b/sys/src/cmd/git/diff
@@ -7,8 +7,10 @@
 flagfmt='c:commit branch, s:summarize'; args='[file ...]'
 eval `''{aux/getflags $*} || exec aux/usage
 
-if(~ $#commit 0)
+if(~ $#commit 0){
 	commit=HEAD
+	cparam=()
+}
 
 files=()
 if(! ~ $#* 0)
@@ -16,7 +18,7 @@
 
 branch=`{git/query -p $commit}
 if(~ $summarize 1){
-	git/walk -fMAR -b $commit $files
+	git/walk -fMAR $cparam $files
 	exit
 }
 
@@ -24,7 +26,7 @@
 mntgen /mnt/scratch
 bind $branch/tree/ /mnt/scratch/a
 bind . /mnt/scratch/b
-for(f in `$nl{git/walk -c -fRMA -b $commit $files}){
+for(f in `$nl{git/walk -c -fRMA $cparam $files}){
 	if(~ $#showed 0){
 		echo diff `{git/query $commit} uncommitted
 		showed=1
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -21,6 +21,7 @@
 typedef struct Dblock	Dblock;
 typedef struct Objq	Objq;
 typedef struct Qelt	Qelt;
+typedef struct Idxent	Idxent;
 
 enum {
 	Pathmax		= 512,
@@ -190,6 +191,13 @@
 	int	len;
 };
 
+struct Idxent {
+	char	*path;
+	Qid	qid;
+	int	mode;
+	int	order;
+	char	state;
+};
 
 #define GETBE16(b)\
 		((((b)[0] & 0xFFul) <<  8) | \
@@ -301,9 +309,10 @@
 int	hassuffix(char *, char *);
 int	swapsuffix(char *, int, char *, char *, char *);
 char	*strip(char *);
-int	findrepo(char *, int);
+int	findrepo(char *, int, int*);
 int	showprogress(int, int);
 u64int	murmurhash2(void*, usize);
+Qid	parseqid(char*);
 
 /* packing */
 void	dtinit(Dtab *, Object*);
--- a/sys/src/cmd/git/import
+++ b/sys/src/cmd/git/import
@@ -98,7 +98,8 @@
 		if(~ $#nocommit 0){
 			if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){
 				echo $hash > $refpath
-				rm -f .git/index9/removed/$files
+				for(f in $files)
+					echo T NOQID 0 $f >> .git/INDEX9
 			}
 		}
 		status=''''
--- a/sys/src/cmd/git/init
+++ b/sys/src/cmd/git/init
@@ -31,7 +31,7 @@
 	echo '[branch "'$branch'"]'
 	echo '	remote = origin'
 }
-
+>$dir/.git/INDEX9
 >$dir/.git/HEAD {
 	echo ref: refs/heads/$branch
 }
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -239,7 +239,7 @@
 main(int argc, char **argv)
 {
 	char path[1024], repo[1024], *p, *r;
-	int i, nrepo;
+	int i, nrel, nrepo;
 
 	ARGBEGIN{
 	case 'e':
@@ -259,7 +259,7 @@
 		break;
 	}ARGEND;
 
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	nrepo = strlen(repo);
 	if(argc != 0){
--- a/sys/src/cmd/git/merge
+++ b/sys/src/cmd/git/merge
@@ -38,8 +38,8 @@
 	git/revert .
 	exit ''
 }
-echo $ours >> .git/index9/merge-parents
-echo $theirs >> .git/index9/merge-parents
+echo $ours >> .git/merge-parents
+echo $theirs >> .git/merge-parents
 
 merge $ours $base $theirs
 >[1=2] echo 'merge complete: remember to commit'
--- a/sys/src/cmd/git/pack.c
+++ b/sys/src/cmd/git/pack.c
@@ -66,7 +66,7 @@
 Object *lruhead;
 Object *lrutail;
 vlong	ncache;
-vlong	cachemax = 512*MiB;
+vlong	cachemax = 1024*MiB;
 Packf	*packf;
 int	npackf;
 int	openpacks;
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -152,10 +152,10 @@
 void
 main(int argc, char **argv)
 {
-	int i, j, n;
-	Hash *h;
-	char *p, *e, *s, *objpfx;
 	char query[2048], repo[512];
+	char *p, *e, *s, *objpfx;
+	int i, j, n, nrel;
+	Hash *h;
 
 	ARGBEGIN{
 	case 'd':	chattygit++;	break;
@@ -170,7 +170,7 @@
 
 	if(argc == 0)
 		usage();
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	if(chdir(repo) == -1)
 		sysfatal("chdir: %r");
--- a/sys/src/cmd/git/save.c
+++ b/sys/src/cmd/git/save.c
@@ -10,6 +10,7 @@
 	char *dat;
 	int ndat;
 };
+
 enum {
 	Maxparents = 16,
 };
@@ -21,7 +22,9 @@
 char	*commitmsg;
 Hash	parents[Maxparents];
 int	nparents;
-
+Idxent	*idx;
+int	idxsz;
+int	nidx;
 int
 gitmode(Dirent *e)
 {
@@ -38,6 +41,20 @@
 }
 
 int
+idxcmp(void *pa, void *pb)
+{
+	Idxent *a, *b;
+	int c;
+
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
+}
+
+int
 entcmp(void *pa, void *pb)
 {
 	char abuf[256], bbuf[256], *ae, *be;
@@ -184,27 +201,22 @@
 int
 tracked(char *path)
 {
-	char ipath[256];
-	Dir *d;
+	int lo, hi, mid, r;
 
-	/* Explicitly removed. */
-	snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/removed") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	d = dirstat(ipath);
-	if(d != nil && d->qid.type != QTDIR){
-		free(d);
-		return 0;
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		r = strcmp(path, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
 	}
-
-	/* Explicitly added. */
-	snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	if(access(ipath, AEXIST) == 0)
-		return 1;
-
-	return 0;
+	return r == 0;
 }
 
 int
@@ -354,10 +366,11 @@
 void
 main(int argc, char **argv)
 {
+	char *ln, *dstr, *parts[4], cwd[1024];
+	int i, r, line, ncwd;
 	Hash th, ch;
-	char *dstr, cwd[1024];
-	int i, r, ncwd;
 	vlong date;
+	Biobuf *f;
 	Object *t;
 
 	gitinit();
@@ -425,6 +438,33 @@
 	}
 
 	t = findroot();
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	if((f = Bopen(".git/INDEX9", OREAD)) == nil)
+		sysfatal("open index: %r");
+	line = 0;
+	while((ln = Brdstr(f, '\n', 1)) != nil){
+		line++;
+		if(ln[0] == 0 || ln[0] == '\n')
+			continue;
+		if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+			sysfatal(".git/INDEX9:%d: corrupt index", line);
+		if(nidx == idxsz){
+			idxsz += idxsz/2;
+			idx = realloc(idx, idxsz*sizeof(Idxent));
+		}
+		cleanname(parts[3]);
+		idx[nidx].state = *parts[0];
+		idx[nidx].qid = parseqid(parts[1]);
+		idx[nidx].mode = strtol(parts[2], nil, 8);
+		idx[nidx].path = strdup(parts[3]);
+		idx[nidx].order = nidx;
+		nidx++;
+		free(ln);
+	}
+	Bterm(f);
+	qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	r = treeify(t, argv, argv + argc, 0, &th);
 	if(r == -1)
 		sysfatal("could not commit: %r\n");
--- a/sys/src/cmd/git/test/basic.rc
+++ b/sys/src/cmd/git/test/basic.rc
@@ -64,4 +64,3 @@
 q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch
 ! test -e scratch/upstream/file2.txt || die mismatch
 ! test -e scratch/downstream/file2.txt || die mismatch
-
--- a/sys/src/cmd/git/test/util.rc
+++ b/sys/src/cmd/git/test/util.rc
@@ -1,5 +1,5 @@
 fn q {
-	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out
+	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid
 }
 fn qq {
 	$* >/dev/null >[2]/dev/null
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -192,8 +192,10 @@
 	Qid q;
 
 	q = va_arg(fmt->args, Qid);
-	return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}",
-	    q.path, QDIR(&q), (q.path >> 8), q.vers, q.type);
+	if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff)
+		return fmtprint(fmt, "NOQID");
+	else
+		return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type);
 }
 
 void
@@ -294,7 +296,7 @@
 
 /* Finds the directory containing the git repo. */
 int
-findrepo(char *buf, int nbuf)
+findrepo(char *buf, int nbuf, int *nrel)
 {
 	char *p, *suff;
 
@@ -302,6 +304,7 @@
 	if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
 		return -1;
 
+	*nrel = 0;
 	for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
 		strcpy(p, suff);
 		if(access(buf, AEXIST) == 0){
@@ -308,6 +311,7 @@
 			p[p == buf] = '\0';
 			return 0;
 		}
+		*nrel += 1;
 		*p = '\0';
 	}
 	werrstr("not a git repository");
@@ -441,4 +445,25 @@
 	h ^= h >> 15;
 
 	return h;
+}
+
+Qid
+parseqid(char *s)
+{
+	char *e;
+	Qid q;
+
+	if(strcmp(s, "NOQID") == 0)
+		return (Qid){-1, -1, -1};		
+	e = s;
+	q.path = strtoull(e, &e, 16);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.vers = strtoul(e+1, &e, 10);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.type = strtoul(e+1, &e, 16);
+	if(*e != '\0')
+		sysfatal("corrupt qid: %s (%x)\n", s, *e);
+	return q;
 }
--- a/sys/src/cmd/git/walk.c
+++ b/sys/src/cmd/git/walk.c
@@ -2,53 +2,72 @@
 #include <libc.h>
 #include "git.h"
 
+typedef struct Seen	Seen;
+typedef struct Idxed	Idxed;
+typedef struct Idxent	Idxent;
+
 #define NCACHE 4096
-#define TDIR ".git/index9/tracked"
-#define RDIR ".git/index9/removed"
-typedef struct Cache	Cache;
-typedef struct Wres	Wres;
-struct Cache {
+
+enum {
+	Rflg	= 1 << 0,
+	Mflg	= 1 << 1,
+	Aflg	= 1 << 2,
+	Uflg	= 1 << 3,
+	/* everything after this is not an error */
+	Tflg	= 1 << 4,
+};
+
+struct Seen {
 	Dir*	cache;
 	int	n;
 	int	max;
 };
 
-struct Wres {
-	char	**path;
-	int	npath;
-	int	pathsz;
+struct Idxed {
+	char**	cache;
+	int	n;
+	int	max;
 };
 
-enum {
-	Rflg	= 1 << 0,
-	Mflg	= 1 << 1,
-	Aflg	= 1 << 2,
-	Tflg	= 1 << 3,
-};
+Seen	seentab[NCACHE];
+Idxed	idxtab[NCACHE];
+char	repopath[1024];
+char	wdirpath[1024];
+char	*rstr	= "R ";
+char	*mstr	= "M ";
+char	*astr	= "A ";
+char	*ustr	= "U ";
+char	*tstr 	= "T ";
+char	*bdir = ".git/fs/HEAD/tree";
+int	useidx	= 1;
+int	nrel;
+int	quiet;
+int	dirty;
+int	printflg;
 
-Cache seencache[NCACHE];
-int quiet;
-int useindex;
-int printflg;
-char *bdir = ".git/fs/HEAD/tree";
-char *rstr = "R ";
-char *tstr = "T ";
-char *mstr = "M ";
-char *astr = "A ";
+Idxent	*idx;
+int	idxsz;
+int	nidx;
+int	staleidx;
+Idxent	*wdir;
+int	wdirsz;
+int	nwdir;
 
+int	loadwdir(char*);
+
 int
 seen(Dir *dir)
 {
+	Seen *c;
 	Dir *dp;
 	int i;
-	Cache *c;
 
-	c = &seencache[dir->qid.path&(NCACHE-1)];
+	c = &seentab[dir->qid.path&(NCACHE-1)];
 	dp = c->cache;
 	for(i=0; i<c->n; i++, dp++)
-		if(dir->qid.path == dp->qid.path &&
-		   dir->type == dp->type &&
-		   dir->dev == dp->dev)
+		if(dir->qid.path == dp->qid.path
+		&& dir->qid.type == dp->qid.type
+		&& dir->dev == dp->dev)
 			return 1;
 	if(c->n == c->max){
 		if (c->max == 0)
@@ -63,129 +82,100 @@
 	return 0;
 }
 
-void
-grow(Wres *r)
-{
-	if(r->npath == r->pathsz){
-		r->pathsz = 2*r->pathsz + 1;
-		r->path = erealloc(r->path, r->pathsz * sizeof(char*));
-	}
-}
-
 int
-readpaths(Wres *r, char *pfx, char *dir)
+checkedin(Idxent *e, int change)
 {
-	char *f, *sub, *full, *sep;
-	Dir *d;
-	int fd, ret, i, n;
+	char *p;
+	int r;
 
-	d = nil;
-	ret = -1;
-	sep = "";
-	if(dir[0] != 0)
-		sep = "/";
-	if((full = smprint("%s/%s", pfx, dir)) == nil)
-		sysfatal("smprint: %r");
-	if((fd = open(full, OREAD)) < 0)
-		goto error;
-	while((n = dirread(fd, &d)) > 0){
-		for(i = 0; i < n; i++){
-			if(seen(&d[i]))
-				continue;
-			if(d[i].qid.type & QTDIR){
-				if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				if(readpaths(r, pfx, sub) == -1){
-					free(sub);
-					goto error;
-				}
-				free(sub);
-			}else{
-				grow(r);
-				if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				r->path[r->npath++] = f;
-			}
-		}
-		free(d);
+	p = smprint("%s/%s", bdir, e->path);
+	r = access(p, AEXIST);
+	if(r == 0 && change){
+		e->state = 'T';
+		staleidx = 1;
 	}
-	ret = r->npath;
-error:
-	close(fd);
-	free(full);
-	return ret;
+	free(p);
+	return r == 0;
 }
 
 int
-cmp(void *pa, void *pb)
+indexed(char *path, int isdir)
 {
-	return strcmp(*(char **)pa, *(char **)pb);
-}
+	int lo, hi, mid, n, r;
+	char *s;
 
-void
-dedup(Wres *r)
-{
-	int i, o;
-
-	if(r->npath <= 1)
-		return;
-	o = 0;
-	qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
-	for(i = 1; i < r->npath; i++)
-		if(strcmp(r->path[o], r->path[i]) != 0)
-			r->path[++o] = r->path[i];
-	r->npath = o + 1;
+	if(!useidx){
+		s = smprint("%s/%s", bdir, path);
+		r = access(s, AEXIST);
+		free(s);
+		return r == 0;
+	}
+	s = path;
+	if(isdir)
+		s = smprint("%s/", path);
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	n = strlen(s);
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		if(isdir)
+			r = strncmp(s, idx[mid].path, n);
+		else
+			r = strcmp(s, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
+	}
+	if(isdir)
+		free(s);
+	return r == 0;
 }
 
 int
-sameqid(Dir *d, char *qf)
+idxcmp(void *pa, void *pb)
 {
-	char indexqid[64], fileqid[64], *p;
-	int fd, n;
+	Idxent *a, *b;
+	int c;
 
-	if(!d)
-		return 0;
-	if((fd = open(qf, OREAD)) == -1)
-		return 0;
-	if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
-		return 0;
-	indexqid[n] = 0;
-	close(fd);
-	if((p = strpbrk(indexqid, "  \t\n\r")) != nil)
-		*p = 0;
-
-	snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
-		d->qid.path, d->qid.vers, d->qid.type);
-
-	if(strcmp(indexqid, fileqid) == 0)
-		return 1;
-	return 0;
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
 }
 
-void
-writeqid(Dir *d, char *qf)
-{
-	int fd;
-
-	if((fd = create(qf, OWRITE, 0666)) == -1)
-		return;
-	fprint(fd, "%ullx.%uld.%.2uhhx\n",
-		d->qid.path, d->qid.vers, d->qid.type);
-	close(fd);
-}
-
 int
-samedata(char *pa, char *pb)
+samedata(Idxent *a, Idxent *b)
 {
-	char ba[32*1024], bb[32*1024];
+	char *gitpath, ba[IOUNIT], bb[IOUNIT];
 	int fa, fb, na, nb, same;
+	Dir *da, *db;
 
+	if(a != nil){
+		if(a->qid.path == b->qid.path
+		&& a->qid.vers == b->qid.vers
+		&& a->qid.type == b->qid.type
+		&& a->mode == b->mode
+		&& a->mode != 0)
+			return 1;
+	}
+
+	da = nil;
+	db = nil;
 	same = 0;
-	fa = open(pa, OREAD);
-	fb = open(pb, OREAD);
-	if(fa == -1 || fb == -1){
+	if((gitpath = smprint("%s/%s", bdir, b->path)) == nil)
+		sysfatal("smprint: %r");
+	fa = open(b->path, OREAD);
+	fb = open(gitpath, OREAD);
+	if(fa == -1 || fb == -1)
 		goto mismatch;
-	}
+	da = dirfstat(fa);
+	db = dirfstat(fb);
 	while(1){
 		if((na = readn(fa, ba, sizeof(ba))) == -1)
 			goto mismatch;
@@ -198,8 +188,19 @@
 		if(memcmp(ba, bb, na) != 0)
 			goto mismatch;
 	}
+	if(a != nil){
+		a->qid = b->qid;
+		if(da != nil)
+			a->mode = da->mode;
+	}
+	staleidx = 1;
+	if(da->mode != db->mode)
+		goto mismatch;
 	same = 1;
+
 mismatch:
+	free(da);
+	free(db);
 	if(fa != -1)
 		close(fa);
 	if(fb != -1)
@@ -207,7 +208,129 @@
 	return same;
 }
 
+int
+loadent(char *dir, Dir *d, int fullpath)
+{
+	char *path;
+	int ret, isdir;
+	Idxent *e;
+
+	if(fullpath)
+		path = strdup(dir);
+	else
+		path = smprint("%s/%s", dir, d->name);
+	if(path == nil)
+		sysfatal("smprint: %r");
+
+	ret = 0;
+	isdir = d->qid.type & QTDIR;
+	cleanname(path);
+	if((printflg & Uflg) == 0 && !indexed(path, isdir))
+		return 0;
+	if(isdir){
+		ret = loadwdir(path);
+		free(path);
+	}else{
+		if(nwdir == wdirsz){
+			wdirsz += wdirsz/2;
+			wdir = erealloc(wdir, wdirsz*sizeof(Idxent));
+		}
+		e = wdir + nwdir;
+		e->path = path;
+		e->qid = d->qid;
+		e->mode = d->mode;
+		e->order = nwdir;
+		e->state = 'T';
+		nwdir++;
+	}
+	return ret;
+}
+
+int
+loadwdir(char *path)
+{
+	int fd, ret, i, n;
+	Dir *d, *e;
+
+	d = nil;
+	e = nil;
+	ret = -1;
+
+	cleanname(path);
+	if(strncmp(path, ".git/", 5) == 0)
+		return 0;
+	if((fd = open(path, OREAD)) < 0)
+		goto error;
+	if((e = dirfstat(fd)) == nil)
+		sysfatal("fstat: %r");
+	if(e->qid.type & QTDIR)
+		while((n = dirread(fd, &d)) > 0){
+			for(i = 0; i < n; i++)
+				if(loadent(path, &d[i], 0) == -1)
+					goto error;
+			free(d);
+		}
+	else{
+		if(loadent(path, e, 1) == -1)
+			goto error;
+	}
+	ret = 0;
+error:
+	free(e);
+	if(fd != -1)
+		close(fd);
+	return ret;
+}
+
+int
+pfxmatch(char *p, char **pfx, int *pfxlen, int npfx)
+{
+	int i;
+
+	if(npfx == 0)
+		return 1;
+	for(i = 0; i < npfx; i++){
+		if(strncmp(p, pfx[i], pfxlen[i]) == 0
+		&& (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0))
+			return 1;
+		if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0)
+			return 1;
+	}
+	return 0;
+}
+
+
+char*
+reporel(char *s)
+{
+	char *p;
+	int n;
+
+	if(*s == '/')
+		s = strdup(s);
+	else
+		s = smprint("%s/%s", wdirpath, s);
+	p = cleanname(s);
+	n = strlen(repopath);
+	if(strncmp(s, repopath, n) == 0)
+		p = s + n;
+	else
+		sysfatal("path outside repo: %s", s);
+	if(*p == '/')
+		p++;
+	memmove(s, p, strlen(p)+1);
+	return s;
+}
+
 void
+show(Biobuf *o, int flg, char *str, char *path)
+{
+	dirty |= flg;
+	if(!quiet && (printflg & flg))
+		Bprint(o, "%s%s\n", str, path);
+}
+
+void
 usage(void)
 {
 	fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
@@ -217,14 +340,21 @@
 void
 main(int argc, char **argv)
 {
-	char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
-	char *p, *e;
-	int i, dirty;
-	Wres r;
-	Hash h;
-	Dir *d;
+	char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8];
+	int i, j, c, line, wfd, *argn;
+	Biobuf *f, *o, *w;
+	Hash h, hd;
+	Dir rn;
 
 	gitinit();
+	if(access(".git/fs/ctl", AEXIST) != 0)
+		sysfatal("no running git/fs");
+	if(getwd(wdirpath, sizeof(wdirpath)) == nil)
+		sysfatal("getwd: %r");
+	if(findrepo(repopath, sizeof(repopath), &nrel) == -1)
+		sysfatal("find root: %r");
+	if(chdir(repopath) == -1)
+		sysfatal("chdir: %r");
 
 	ARGBEGIN{
 	case 'q':
@@ -235,6 +365,7 @@
 		tstr = "";
 		mstr = "";
 		astr = "";
+		ustr = "";
 		break;
 	case 'f':
 		for(p = EARGF(usage()); *p; p++)
@@ -243,109 +374,164 @@
 			case 'A':	printflg |= Aflg;	break;
 			case 'M':	printflg |= Mflg;	break;
 			case 'R':	printflg |= Rflg;	break;
+			case 'U':	printflg |= Uflg;	break;
 			default:	usage();		break;
 		}
 		break;
 	case 'b':
-		useindex = 0;
+		useidx = 0;
 		base = EARGF(usage());
 		if(resolveref(&h, base) == -1)
 			sysfatal("no such ref '%s'", base);
+		/* optimization: we're a lot faster when using the index */
+		if(resolveref(&hd, "HEAD") == 0 && hasheq(&h, &hd))
+			useidx = 1;
 		bdir = smprint(".git/fs/object/%H/tree", h);
 		break;
 	default:
 		usage();
-	}ARGEND
+	}ARGEND;
 
-	gitinit();
-	if(findrepo(repo, sizeof(repo)) == -1)
-		sysfatal("find root: %r");
-	if(chdir(repo) == -1)
-		sysfatal("chdir: %r");
-	if(access(".git/fs/ctl", AEXIST) != 0)
-		sysfatal("no running git/fs");
-	dirty = 0;
-	memset(&r, 0, sizeof(r));
 	if(printflg == 0)
 		printflg = Tflg | Aflg | Mflg | Rflg;
-	if(argc == 0){
-		if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
-			sysfatal("read tracked: %r");
-		if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
-			sysfatal("read removed: %r");
-		if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
-			sysfatal("read base: %r");
-	}else{
-		for(i = 0; i < argc; i++){
-			tpath = smprint(TDIR"/%s", argv[i]);
-			rpath = smprint(RDIR"/%s", argv[i]);
-			bpath = smprint("%s/%s", bdir, argv[i]);
-			if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
-				goto nextarg;
-			if(d->mode & DMDIR){
-				readpaths(&r, TDIR, argv[i]);
-				readpaths(&r, RDIR, argv[i]);
-				readpaths(&r, bdir, argv[i]);
-			}else{
-				grow(&r);
-				r.path[r.npath++] = estrdup(argv[i]);
+
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	nwdir = 0;
+	wdirsz = 32;
+	wdir = emalloc(wdirsz*sizeof(Idxent));
+	argrel = emalloc(argc*sizeof(int));
+	argn = emalloc(argc*sizeof(int));
+	for(i = 0; i < argc; i++){
+		argrel[i] = reporel(argv[i]);
+		argn[i] = strlen(argrel[i]);
+	}
+	if((o = Bfdopen(1, OWRITE)) == nil)
+		sysfatal("open out: %r");
+	if(useidx){
+		if((f = Bopen(".git/INDEX9", OREAD)) == nil){
+			fprint(2, "open index: %r\n");
+			if(access(".git/index9", AEXIST) == 0){
+				fprint(2, "index format conversion needed:\n");
+				fprint(2, "\tcd %s && git/fs\n", repopath);
+				fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n");
+				fprint(2, "\t@{cd .git/index9/removed && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n");
 			}
-nextarg:
-			free(bpath);
-			free(tpath);
-			free(rpath);
-			free(d);
+			exits("noindex");
 		}
+		line = 0;
+		while((ln = Brdstr(f, '\n', 1)) != nil){
+			line++;
+			/* allow blank lines */
+			if(ln[0] == 0 || ln[0] == '\n')
+				continue;
+			if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+				sysfatal(".git/INDEX9:%d: corrupt index", line);
+			if(nidx == idxsz){
+				idxsz += idxsz/2;
+				idx = realloc(idx, idxsz*sizeof(Idxent));
+			}
+			cleanname(parts[3]);
+			idx[nidx].state = *parts[0];
+			idx[nidx].qid = parseqid(parts[1]);
+			idx[nidx].mode = strtol(parts[2], nil, 8);
+			idx[nidx].path = strdup(parts[3]);
+			idx[nidx].order = nidx;
+			nidx++;
+			free(ln);
+		}
+		qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	}
-	dedup(&r);
 
-	for(i = 0; i < r.npath; i++){
-		p = r.path[i];
-		d = dirstat(p);
-		if(d && d->mode & DMDIR)
-			goto next;
-		rpath = smprint(RDIR"/%s", p);
-		tpath = smprint(TDIR"/%s", p);
-		bpath = smprint("%s/%s", bdir, p);
-		/* Fast path: we don't want to force access to the rpath. */
-		if(useindex && d && sameqid(d, tpath)) {
-			if(!quiet && (printflg & Tflg))
-				print("%s%s\n", tstr, p);
+	if(argc == 0)
+		loadwdir(".");
+	else for(i = 0; i < argc; i++)
+		loadwdir(argrel[i]);
+	qsort(wdir, nwdir, sizeof(Idxent), idxcmp);
+	i = 0;
+	j = 0;
+	while(i < nidx || j < nwdir){
+		/* find the last entry we tracked for a path */
+		while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){
+			staleidx = 1;
+			i++;
+		}
+		while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0)
+			j++;
+		if(i < nidx && !pfxmatch(idx[i].path, argrel, argn, argc)){
+			i++;
+			continue;
+		}
+		if(i >= nidx)
+			c = 1;
+		else if(j >= nwdir)
+			c = -1;
+		else
+			c = strcmp(idx[i].path, wdir[j].path);
+		/* exists in both index and on disk */
+		if(c == 0){
+			if(idx[i].state == 'R' && checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			else if(idx[i].state == 'A' && !checkedin(&idx[i], 1))
+				show(o, Aflg, astr, idx[i].path);
+			else if(!samedata(&idx[i], &wdir[j]))
+				show(o, Mflg, mstr, idx[i].path);
+			else
+				show(o, Tflg, tstr, idx[i].path);
+			i++;
+			j++;
+		/* only exists in index */
+		}else if(c < 0){
+			if(checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			i++;
+		/* only exists on disk */
 		}else{
-			if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
-				if(access(bpath, AEXIST) == 0){
-					dirty |= Rflg;
-					if(!quiet && (printflg & Rflg))
-						print("%s%s\n", rstr, p);
-				}
-			}else if(access(bpath, AEXIST) == -1) {
-				dirty |= Aflg;
-				if(!quiet && (printflg & Aflg))
-					print("%s%s\n", astr, p);
-			}else if(samedata(p, bpath)){
-				if(!quiet && (printflg & Tflg))
-					print("%s%s\n", tstr, p);
-				if(useindex)
-					writeqid(d, tpath);
-			}else{
-				dirty |= Mflg;
-				if(!quiet && (printflg & Mflg))
-					print("%s%s\n", mstr, p);
-			}
+			if(!useidx && checkedin(&wdir[j], 0)){
+				if(samedata(nil, &wdir[j]))
+					show(o, Tflg, tstr, wdir[j].path);
+				else
+					show(o, Mflg, mstr, wdir[j].path);
+			}else if(printflg & Uflg)
+				show(o, Uflg, ustr, wdir[j].path);
+			j++;
 		}
-		free(rpath);
-		free(tpath);
-		free(bpath);
-next:
-		free(d);
 	}
+	Bterm(o);
+
+	if(useidx && staleidx)
+	if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){
+		if((w = Bfdopen(wfd, OWRITE)) == nil){
+			close(wfd);
+			goto Nope;
+		}
+		for(i = 0; i < nidx; i++){
+			while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0)
+				i++;
+			Bprint(w, "%c %Q %o %s\n",
+				idx[i].state,
+				idx[i].qid, 
+				idx[i].mode,
+				idx[i].path);
+		}
+		Bterm(w);
+		nulldir(&rn);
+		rn.name = "INDEX9";
+		if(remove(".git/INDEX9") == -1)
+			goto Nope;
+		if(dirwstat(".git/INDEX9.new", &rn) == -1)
+			sysfatal("rename: %r");
+	}
+
+Nope:
 	if(!dirty)
 		exits(nil);
 
-	p = buf;
-	e = buf + sizeof(buf);
+	p = xbuf;
+	e = p + sizeof(xbuf);
 	for(i = 0; (1 << i) != Tflg; i++)
 		if(dirty & (1 << i))
-			p = seprint(p, e, "%c", "DMAT"[i]);
-	exits(buf);
+			p = seprint(p, e, "%c", "RMAUT"[i]);
+	exits(xbuf);
 }


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-09-18  3:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-18  2:21 [9front] [v2] git: new index format, 235x faster walks ori
2023-09-18  3:49 ` ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).