9front - general discussion about 9front
 help / color / mirror / Atom feed
From: ori@eigenstate.org
To: 9front@9front.org
Subject: Re: [9front] [v2] git: new index format, 235x faster walks
Date: Sun, 17 Sep 2023 23:49:59 -0400	[thread overview]
Message-ID: <91B9FC3B10A4C61A17F2EA04575388E7@eigenstate.org> (raw)
In-Reply-To: <C700D47DA6F5C801827FD8F2D29D274D@eigenstate.org>

Sorry for the spam; One more revision; found a couple of missing
changes to the new 4 column index format.

diff 597ff0b59658032e141553f286e02b9370f995b5 uncommitted
--- a/sys/src/cmd/git/add
+++ b/sys/src/cmd/git/add
@@ -7,32 +7,17 @@
 flagfmt='r:remove'; args='file ...'
 eval `''{aux/getflags $*} || exec aux/usage
 
-add='tracked'
-del='removed'
-if(~ $remove 1){
-	add='removed'
-	del='tracked'
-}
+s=A
+if(~ $remove 1)
+	s=R
 if(~ $#* 0)
 	exec aux/usage
 
 paths=`$nl{cleanname -d $gitrel $* | drop $gitroot}
-if(~ $add tracked)
+if(~ $s A)
 	files=`$nl{walk -f ./$paths}
 if not
-	files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths}
-
-for(f in $files){
-	if(! ~ `$nl{cleanname $f} .git/*){
-		addpath=.git/index9/$add/$f
-		delpath=.git/index9/$del/$f
-		mkdir -p `$nl{basename -d $addpath}
-		mkdir -p `$nl{basename -d $delpath}
-		# We don't want a matching qid, so that
-		# git/walk doesn't think this came from
-		# a checkout.
-		echo -n > $addpath
-		rm -f $delpath
-	}
-}
+	files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths}
+for(f in $files)
+	echo $s NOQID 0 $f >> .git/INDEX9
 exit ''
--- a/sys/src/cmd/git/branch
+++ b/sys/src/cmd/git/branch
@@ -84,7 +84,6 @@
 for(m in $cleanpaths){
 	d=`$nl{basename -d $m}
 	mkdir -p $d
-	mkdir -p .git/index9/tracked/$d
 	# Modifications can turn a file into
 	# a directory, or vice versa, so we
 	# need to delete and copy the files
@@ -97,11 +96,11 @@
 		b=file
 	if(! ~ $a $b){
 		rm -rf $m
-		rm -rf .git/index9/tracked/$m
+		echo R NOQID 0 $m >> .git/INDEX9
 	}
 	if(~ $b file){
 		cp -x -- $basedir/tree/$m $m
-		walk -eq $m > .git/index9/tracked/$m
+		echo T NOQID 0 $m >> .git/INDEX9
 		touch $m
 	}
 }
@@ -112,9 +111,9 @@
 	merge1 $ours $ours $common $theirs
 }
 
-if(! ~ $#deleted 0){
-	rm -f $deleted
-	rm -f .git/index9/tracked/$deleted
+for(d in $deleted){
+	rm -f $d
+	echo R NOQID 0 $d >> .git/INDEX9
 }
 
 echo ref: $new > .git/HEAD
--- a/sys/src/cmd/git/clone
+++ b/sys/src/cmd/git/clone
@@ -87,11 +87,8 @@
 		git/fs
 		@ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \
 			|| die 'checkout failed:' $status
-		for(f in `$nl{walk -f $tree | drop $tree}){
-			idx=.git/index9/tracked/$f
-			mkdir -p `$nl{basename -d $idx}
-			walk -eq ./$f > $idx
-		}
+		{for(f in `$nl{cd $tree && walk -f})
+			echo 'T NOQID 0 '$f} > .git/INDEX9
 	}
 	if not{
 		echo no default branch >[1=2]
--- a/sys/src/cmd/git/commit
+++ b/sys/src/cmd/git/commit
@@ -65,8 +65,8 @@
 fn parents{
 	if(! ~ $#revise 0)
 		parents=`{cat $gitfs/HEAD/parent}
-	if not if(test -f .git/index9/merge-parents)
-		parents=`{cat .git/index9/merge-parents | sort | uniq}
+	if not if(test -f .git/merge-parents)
+		parents=`{cat .git/merge-parents | sort | uniq}
 	if not if(~ $initial true)
 		parents=()
 	if not
@@ -78,7 +78,7 @@
 	if(! ~ $#parents 0)
 		pflags='-p'^$parents
 	hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status}
-	rm -f .git/index9/merge-parents
+	rm -f .git/merge-parents
 }
 
 fn update{
@@ -89,14 +89,10 @@
 	echo $branch: $hash
 	echo $hash > $refpath
 	for(f in $files){
-		if(test -e .git/index9/removed/$f || ! test -e $f){
-			rm -f .git/index9/removed/$f
-			rm -f .git/index9/tracked/$f
-		}
-		if not{
-			mkdir -p `{basename -d $f}
-			walk -eq $f > .git/index9/tracked/$f
-		}
+		if(! test -e $f && ! test -e .git/object/$hash/tree/$f)
+			echo R NOQID 0 $f >> .git/INDEX9
+		if not
+			echo T NOQID 0 $f >> .git/INDEX9
 	}
 }
 
@@ -120,12 +116,12 @@
 }
 
 files=()
-if(test -f .git/index9/merge-parents)
-	files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'}
+if(test -f .git/merge-parents)
+	files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'}
 if(! ~ $#* 0)
 	files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}})
-if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0)
-	die 'nothing to commit' $status
+if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0)
+	die 'nothing to commit'
 @{
 	flag e +
 	whoami
--- a/sys/src/cmd/git/conf.c
+++ b/sys/src/cmd/git/conf.c
@@ -59,7 +59,7 @@
 main(int argc, char **argv)
 {
 	char repo[512], *p, *s;
-	int i, j;
+	int i, j, nrel;
 
 	ARGBEGIN{
 	case 'f':	file[nfile++]=EARGF(usage());	break;
@@ -69,7 +69,7 @@
 	}ARGEND;
 
 	if(findroot){
-		if(findrepo(repo, sizeof(repo)) == -1)
+		if(findrepo(repo, sizeof(repo), &nrel) == -1)
 			sysfatal("%r");
 		print("%s\n", repo);
 		exits(nil);
--- a/sys/src/cmd/git/diff
+++ b/sys/src/cmd/git/diff
@@ -7,8 +7,10 @@
 flagfmt='c:commit branch, s:summarize'; args='[file ...]'
 eval `''{aux/getflags $*} || exec aux/usage
 
-if(~ $#commit 0)
+if(~ $#commit 0){
 	commit=HEAD
+	cparam=()
+}
 
 files=()
 if(! ~ $#* 0)
@@ -16,7 +18,7 @@
 
 branch=`{git/query -p $commit}
 if(~ $summarize 1){
-	git/walk -fMAR -b $commit $files
+	git/walk -fMAR $cparam $files
 	exit
 }
 
@@ -24,7 +26,7 @@
 mntgen /mnt/scratch
 bind $branch/tree/ /mnt/scratch/a
 bind . /mnt/scratch/b
-for(f in `$nl{git/walk -c -fRMA -b $commit $files}){
+for(f in `$nl{git/walk -c -fRMA $cparam $files}){
 	if(~ $#showed 0){
 		echo diff `{git/query $commit} uncommitted
 		showed=1
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -21,6 +21,7 @@
 typedef struct Dblock	Dblock;
 typedef struct Objq	Objq;
 typedef struct Qelt	Qelt;
+typedef struct Idxent	Idxent;
 
 enum {
 	Pathmax		= 512,
@@ -190,6 +191,13 @@
 	int	len;
 };
 
+struct Idxent {
+	char	*path;
+	Qid	qid;
+	int	mode;
+	int	order;
+	char	state;
+};
 
 #define GETBE16(b)\
 		((((b)[0] & 0xFFul) <<  8) | \
@@ -301,9 +309,10 @@
 int	hassuffix(char *, char *);
 int	swapsuffix(char *, int, char *, char *, char *);
 char	*strip(char *);
-int	findrepo(char *, int);
+int	findrepo(char *, int, int*);
 int	showprogress(int, int);
 u64int	murmurhash2(void*, usize);
+Qid	parseqid(char*);
 
 /* packing */
 void	dtinit(Dtab *, Object*);
--- a/sys/src/cmd/git/import
+++ b/sys/src/cmd/git/import
@@ -98,7 +98,8 @@
 		if(~ $#nocommit 0){
 			if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){
 				echo $hash > $refpath
-				rm -f .git/index9/removed/$files
+				for(f in $files)
+					echo T NOQID 0 $f >> .git/INDEX9
 			}
 		}
 		status=''''
--- a/sys/src/cmd/git/init
+++ b/sys/src/cmd/git/init
@@ -31,7 +31,7 @@
 	echo '[branch "'$branch'"]'
 	echo '	remote = origin'
 }
-
+>$dir/.git/INDEX9
 >$dir/.git/HEAD {
 	echo ref: refs/heads/$branch
 }
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -239,7 +239,7 @@
 main(int argc, char **argv)
 {
 	char path[1024], repo[1024], *p, *r;
-	int i, nrepo;
+	int i, nrel, nrepo;
 
 	ARGBEGIN{
 	case 'e':
@@ -259,7 +259,7 @@
 		break;
 	}ARGEND;
 
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	nrepo = strlen(repo);
 	if(argc != 0){
--- a/sys/src/cmd/git/merge
+++ b/sys/src/cmd/git/merge
@@ -38,8 +38,8 @@
 	git/revert .
 	exit ''
 }
-echo $ours >> .git/index9/merge-parents
-echo $theirs >> .git/index9/merge-parents
+echo $ours >> .git/merge-parents
+echo $theirs >> .git/merge-parents
 
 merge $ours $base $theirs
 >[1=2] echo 'merge complete: remember to commit'
--- a/sys/src/cmd/git/pack.c
+++ b/sys/src/cmd/git/pack.c
@@ -66,7 +66,7 @@
 Object *lruhead;
 Object *lrutail;
 vlong	ncache;
-vlong	cachemax = 512*MiB;
+vlong	cachemax = 1024*MiB;
 Packf	*packf;
 int	npackf;
 int	openpacks;
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -152,10 +152,10 @@
 void
 main(int argc, char **argv)
 {
-	int i, j, n;
-	Hash *h;
-	char *p, *e, *s, *objpfx;
 	char query[2048], repo[512];
+	char *p, *e, *s, *objpfx;
+	int i, j, n, nrel;
+	Hash *h;
 
 	ARGBEGIN{
 	case 'd':	chattygit++;	break;
@@ -170,7 +170,7 @@
 
 	if(argc == 0)
 		usage();
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	if(chdir(repo) == -1)
 		sysfatal("chdir: %r");
--- a/sys/src/cmd/git/save.c
+++ b/sys/src/cmd/git/save.c
@@ -10,6 +10,7 @@
 	char *dat;
 	int ndat;
 };
+
 enum {
 	Maxparents = 16,
 };
@@ -21,7 +22,9 @@
 char	*commitmsg;
 Hash	parents[Maxparents];
 int	nparents;
-
+Idxent	*idx;
+int	idxsz;
+int	nidx;
 int
 gitmode(Dirent *e)
 {
@@ -38,6 +41,20 @@
 }
 
 int
+idxcmp(void *pa, void *pb)
+{
+	Idxent *a, *b;
+	int c;
+
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
+}
+
+int
 entcmp(void *pa, void *pb)
 {
 	char abuf[256], bbuf[256], *ae, *be;
@@ -184,27 +201,22 @@
 int
 tracked(char *path)
 {
-	char ipath[256];
-	Dir *d;
+	int lo, hi, mid, r;
 
-	/* Explicitly removed. */
-	snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/removed") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	d = dirstat(ipath);
-	if(d != nil && d->qid.type != QTDIR){
-		free(d);
-		return 0;
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		r = strcmp(path, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
 	}
-
-	/* Explicitly added. */
-	snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	if(access(ipath, AEXIST) == 0)
-		return 1;
-
-	return 0;
+	return r == 0;
 }
 
 int
@@ -354,10 +366,11 @@
 void
 main(int argc, char **argv)
 {
+	char *ln, *dstr, *parts[4], cwd[1024];
+	int i, r, line, ncwd;
 	Hash th, ch;
-	char *dstr, cwd[1024];
-	int i, r, ncwd;
 	vlong date;
+	Biobuf *f;
 	Object *t;
 
 	gitinit();
@@ -425,6 +438,33 @@
 	}
 
 	t = findroot();
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	if((f = Bopen(".git/INDEX9", OREAD)) == nil)
+		sysfatal("open index: %r");
+	line = 0;
+	while((ln = Brdstr(f, '\n', 1)) != nil){
+		line++;
+		if(ln[0] == 0 || ln[0] == '\n')
+			continue;
+		if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+			sysfatal(".git/INDEX9:%d: corrupt index", line);
+		if(nidx == idxsz){
+			idxsz += idxsz/2;
+			idx = realloc(idx, idxsz*sizeof(Idxent));
+		}
+		cleanname(parts[3]);
+		idx[nidx].state = *parts[0];
+		idx[nidx].qid = parseqid(parts[1]);
+		idx[nidx].mode = strtol(parts[2], nil, 8);
+		idx[nidx].path = strdup(parts[3]);
+		idx[nidx].order = nidx;
+		nidx++;
+		free(ln);
+	}
+	Bterm(f);
+	qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	r = treeify(t, argv, argv + argc, 0, &th);
 	if(r == -1)
 		sysfatal("could not commit: %r\n");
--- a/sys/src/cmd/git/test/basic.rc
+++ b/sys/src/cmd/git/test/basic.rc
@@ -64,4 +64,3 @@
 q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch
 ! test -e scratch/upstream/file2.txt || die mismatch
 ! test -e scratch/downstream/file2.txt || die mismatch
-
--- a/sys/src/cmd/git/test/util.rc
+++ b/sys/src/cmd/git/test/util.rc
@@ -1,5 +1,5 @@
 fn q {
-	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out
+	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid
 }
 fn qq {
 	$* >/dev/null >[2]/dev/null
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -192,8 +192,10 @@
 	Qid q;
 
 	q = va_arg(fmt->args, Qid);
-	return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}",
-	    q.path, QDIR(&q), (q.path >> 8), q.vers, q.type);
+	if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff)
+		return fmtprint(fmt, "NOQID");
+	else
+		return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type);
 }
 
 void
@@ -294,7 +296,7 @@
 
 /* Finds the directory containing the git repo. */
 int
-findrepo(char *buf, int nbuf)
+findrepo(char *buf, int nbuf, int *nrel)
 {
 	char *p, *suff;
 
@@ -302,6 +304,7 @@
 	if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
 		return -1;
 
+	*nrel = 0;
 	for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
 		strcpy(p, suff);
 		if(access(buf, AEXIST) == 0){
@@ -308,6 +311,7 @@
 			p[p == buf] = '\0';
 			return 0;
 		}
+		*nrel += 1;
 		*p = '\0';
 	}
 	werrstr("not a git repository");
@@ -441,4 +445,25 @@
 	h ^= h >> 15;
 
 	return h;
+}
+
+Qid
+parseqid(char *s)
+{
+	char *e;
+	Qid q;
+
+	if(strcmp(s, "NOQID") == 0)
+		return (Qid){-1, -1, -1};		
+	e = s;
+	q.path = strtoull(e, &e, 16);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.vers = strtoul(e+1, &e, 10);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.type = strtoul(e+1, &e, 16);
+	if(*e != '\0')
+		sysfatal("corrupt qid: %s (%x)\n", s, *e);
+	return q;
 }
--- a/sys/src/cmd/git/walk.c
+++ b/sys/src/cmd/git/walk.c
@@ -2,53 +2,72 @@
 #include <libc.h>
 #include "git.h"
 
+typedef struct Seen	Seen;
+typedef struct Idxed	Idxed;
+typedef struct Idxent	Idxent;
+
 #define NCACHE 4096
-#define TDIR ".git/index9/tracked"
-#define RDIR ".git/index9/removed"
-typedef struct Cache	Cache;
-typedef struct Wres	Wres;
-struct Cache {
+
+enum {
+	Rflg	= 1 << 0,
+	Mflg	= 1 << 1,
+	Aflg	= 1 << 2,
+	Uflg	= 1 << 3,
+	/* everything after this is not an error */
+	Tflg	= 1 << 4,
+};
+
+struct Seen {
 	Dir*	cache;
 	int	n;
 	int	max;
 };
 
-struct Wres {
-	char	**path;
-	int	npath;
-	int	pathsz;
+struct Idxed {
+	char**	cache;
+	int	n;
+	int	max;
 };
 
-enum {
-	Rflg	= 1 << 0,
-	Mflg	= 1 << 1,
-	Aflg	= 1 << 2,
-	Tflg	= 1 << 3,
-};
+Seen	seentab[NCACHE];
+Idxed	idxtab[NCACHE];
+char	repopath[1024];
+char	wdirpath[1024];
+char	*rstr	= "R ";
+char	*mstr	= "M ";
+char	*astr	= "A ";
+char	*ustr	= "U ";
+char	*tstr 	= "T ";
+char	*bdir = ".git/fs/HEAD/tree";
+int	useidx	= 1;
+int	nrel;
+int	quiet;
+int	dirty;
+int	printflg;
 
-Cache seencache[NCACHE];
-int quiet;
-int useindex;
-int printflg;
-char *bdir = ".git/fs/HEAD/tree";
-char *rstr = "R ";
-char *tstr = "T ";
-char *mstr = "M ";
-char *astr = "A ";
+Idxent	*idx;
+int	idxsz;
+int	nidx;
+int	staleidx;
+Idxent	*wdir;
+int	wdirsz;
+int	nwdir;
 
+int	loadwdir(char*);
+
 int
 seen(Dir *dir)
 {
+	Seen *c;
 	Dir *dp;
 	int i;
-	Cache *c;
 
-	c = &seencache[dir->qid.path&(NCACHE-1)];
+	c = &seentab[dir->qid.path&(NCACHE-1)];
 	dp = c->cache;
 	for(i=0; i<c->n; i++, dp++)
-		if(dir->qid.path == dp->qid.path &&
-		   dir->type == dp->type &&
-		   dir->dev == dp->dev)
+		if(dir->qid.path == dp->qid.path
+		&& dir->qid.type == dp->qid.type
+		&& dir->dev == dp->dev)
 			return 1;
 	if(c->n == c->max){
 		if (c->max == 0)
@@ -63,129 +82,100 @@
 	return 0;
 }
 
-void
-grow(Wres *r)
-{
-	if(r->npath == r->pathsz){
-		r->pathsz = 2*r->pathsz + 1;
-		r->path = erealloc(r->path, r->pathsz * sizeof(char*));
-	}
-}
-
 int
-readpaths(Wres *r, char *pfx, char *dir)
+checkedin(Idxent *e, int change)
 {
-	char *f, *sub, *full, *sep;
-	Dir *d;
-	int fd, ret, i, n;
+	char *p;
+	int r;
 
-	d = nil;
-	ret = -1;
-	sep = "";
-	if(dir[0] != 0)
-		sep = "/";
-	if((full = smprint("%s/%s", pfx, dir)) == nil)
-		sysfatal("smprint: %r");
-	if((fd = open(full, OREAD)) < 0)
-		goto error;
-	while((n = dirread(fd, &d)) > 0){
-		for(i = 0; i < n; i++){
-			if(seen(&d[i]))
-				continue;
-			if(d[i].qid.type & QTDIR){
-				if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				if(readpaths(r, pfx, sub) == -1){
-					free(sub);
-					goto error;
-				}
-				free(sub);
-			}else{
-				grow(r);
-				if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				r->path[r->npath++] = f;
-			}
-		}
-		free(d);
+	p = smprint("%s/%s", bdir, e->path);
+	r = access(p, AEXIST);
+	if(r == 0 && change){
+		e->state = 'T';
+		staleidx = 1;
 	}
-	ret = r->npath;
-error:
-	close(fd);
-	free(full);
-	return ret;
+	free(p);
+	return r == 0;
 }
 
 int
-cmp(void *pa, void *pb)
+indexed(char *path, int isdir)
 {
-	return strcmp(*(char **)pa, *(char **)pb);
-}
+	int lo, hi, mid, n, r;
+	char *s;
 
-void
-dedup(Wres *r)
-{
-	int i, o;
-
-	if(r->npath <= 1)
-		return;
-	o = 0;
-	qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
-	for(i = 1; i < r->npath; i++)
-		if(strcmp(r->path[o], r->path[i]) != 0)
-			r->path[++o] = r->path[i];
-	r->npath = o + 1;
+	if(!useidx){
+		s = smprint("%s/%s", bdir, path);
+		r = access(s, AEXIST);
+		free(s);
+		return r == 0;
+	}
+	s = path;
+	if(isdir)
+		s = smprint("%s/", path);
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	n = strlen(s);
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		if(isdir)
+			r = strncmp(s, idx[mid].path, n);
+		else
+			r = strcmp(s, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
+	}
+	if(isdir)
+		free(s);
+	return r == 0;
 }
 
 int
-sameqid(Dir *d, char *qf)
+idxcmp(void *pa, void *pb)
 {
-	char indexqid[64], fileqid[64], *p;
-	int fd, n;
+	Idxent *a, *b;
+	int c;
 
-	if(!d)
-		return 0;
-	if((fd = open(qf, OREAD)) == -1)
-		return 0;
-	if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
-		return 0;
-	indexqid[n] = 0;
-	close(fd);
-	if((p = strpbrk(indexqid, "  \t\n\r")) != nil)
-		*p = 0;
-
-	snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
-		d->qid.path, d->qid.vers, d->qid.type);
-
-	if(strcmp(indexqid, fileqid) == 0)
-		return 1;
-	return 0;
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
 }
 
-void
-writeqid(Dir *d, char *qf)
-{
-	int fd;
-
-	if((fd = create(qf, OWRITE, 0666)) == -1)
-		return;
-	fprint(fd, "%ullx.%uld.%.2uhhx\n",
-		d->qid.path, d->qid.vers, d->qid.type);
-	close(fd);
-}
-
 int
-samedata(char *pa, char *pb)
+samedata(Idxent *a, Idxent *b)
 {
-	char ba[32*1024], bb[32*1024];
+	char *gitpath, ba[IOUNIT], bb[IOUNIT];
 	int fa, fb, na, nb, same;
+	Dir *da, *db;
 
+	if(a != nil){
+		if(a->qid.path == b->qid.path
+		&& a->qid.vers == b->qid.vers
+		&& a->qid.type == b->qid.type
+		&& a->mode == b->mode
+		&& a->mode != 0)
+			return 1;
+	}
+
+	da = nil;
+	db = nil;
 	same = 0;
-	fa = open(pa, OREAD);
-	fb = open(pb, OREAD);
-	if(fa == -1 || fb == -1){
+	if((gitpath = smprint("%s/%s", bdir, b->path)) == nil)
+		sysfatal("smprint: %r");
+	fa = open(b->path, OREAD);
+	fb = open(gitpath, OREAD);
+	if(fa == -1 || fb == -1)
 		goto mismatch;
-	}
+	da = dirfstat(fa);
+	db = dirfstat(fb);
 	while(1){
 		if((na = readn(fa, ba, sizeof(ba))) == -1)
 			goto mismatch;
@@ -198,8 +188,19 @@
 		if(memcmp(ba, bb, na) != 0)
 			goto mismatch;
 	}
+	if(a != nil){
+		a->qid = b->qid;
+		if(da != nil)
+			a->mode = da->mode;
+	}
+	staleidx = 1;
+	if(da->mode != db->mode)
+		goto mismatch;
 	same = 1;
+
 mismatch:
+	free(da);
+	free(db);
 	if(fa != -1)
 		close(fa);
 	if(fb != -1)
@@ -207,7 +208,129 @@
 	return same;
 }
 
+int
+loadent(char *dir, Dir *d, int fullpath)
+{
+	char *path;
+	int ret, isdir;
+	Idxent *e;
+
+	if(fullpath)
+		path = strdup(dir);
+	else
+		path = smprint("%s/%s", dir, d->name);
+	if(path == nil)
+		sysfatal("smprint: %r");
+
+	ret = 0;
+	isdir = d->qid.type & QTDIR;
+	cleanname(path);
+	if((printflg & Uflg) == 0 && !indexed(path, isdir))
+		return 0;
+	if(isdir){
+		ret = loadwdir(path);
+		free(path);
+	}else{
+		if(nwdir == wdirsz){
+			wdirsz += wdirsz/2;
+			wdir = erealloc(wdir, wdirsz*sizeof(Idxent));
+		}
+		e = wdir + nwdir;
+		e->path = path;
+		e->qid = d->qid;
+		e->mode = d->mode;
+		e->order = nwdir;
+		e->state = 'T';
+		nwdir++;
+	}
+	return ret;
+}
+
+int
+loadwdir(char *path)
+{
+	int fd, ret, i, n;
+	Dir *d, *e;
+
+	d = nil;
+	e = nil;
+	ret = -1;
+
+	cleanname(path);
+	if(strncmp(path, ".git/", 5) == 0)
+		return 0;
+	if((fd = open(path, OREAD)) < 0)
+		goto error;
+	if((e = dirfstat(fd)) == nil)
+		sysfatal("fstat: %r");
+	if(e->qid.type & QTDIR)
+		while((n = dirread(fd, &d)) > 0){
+			for(i = 0; i < n; i++)
+				if(loadent(path, &d[i], 0) == -1)
+					goto error;
+			free(d);
+		}
+	else{
+		if(loadent(path, e, 1) == -1)
+			goto error;
+	}
+	ret = 0;
+error:
+	free(e);
+	if(fd != -1)
+		close(fd);
+	return ret;
+}
+
+int
+pfxmatch(char *p, char **pfx, int *pfxlen, int npfx)
+{
+	int i;
+
+	if(npfx == 0)
+		return 1;
+	for(i = 0; i < npfx; i++){
+		if(strncmp(p, pfx[i], pfxlen[i]) == 0
+		&& (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0))
+			return 1;
+		if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0)
+			return 1;
+	}
+	return 0;
+}
+
+
+char*
+reporel(char *s)
+{
+	char *p;
+	int n;
+
+	if(*s == '/')
+		s = strdup(s);
+	else
+		s = smprint("%s/%s", wdirpath, s);
+	p = cleanname(s);
+	n = strlen(repopath);
+	if(strncmp(s, repopath, n) == 0)
+		p = s + n;
+	else
+		sysfatal("path outside repo: %s", s);
+	if(*p == '/')
+		p++;
+	memmove(s, p, strlen(p)+1);
+	return s;
+}
+
 void
+show(Biobuf *o, int flg, char *str, char *path)
+{
+	dirty |= flg;
+	if(!quiet && (printflg & flg))
+		Bprint(o, "%s%s\n", str, path);
+}
+
+void
 usage(void)
 {
 	fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
@@ -217,14 +340,21 @@
 void
 main(int argc, char **argv)
 {
-	char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
-	char *p, *e;
-	int i, dirty;
-	Wres r;
-	Hash h;
-	Dir *d;
+	char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8];
+	int i, j, c, line, wfd, *argn;
+	Biobuf *f, *o, *w;
+	Hash h, hd;
+	Dir rn;
 
 	gitinit();
+	if(access(".git/fs/ctl", AEXIST) != 0)
+		sysfatal("no running git/fs");
+	if(getwd(wdirpath, sizeof(wdirpath)) == nil)
+		sysfatal("getwd: %r");
+	if(findrepo(repopath, sizeof(repopath), &nrel) == -1)
+		sysfatal("find root: %r");
+	if(chdir(repopath) == -1)
+		sysfatal("chdir: %r");
 
 	ARGBEGIN{
 	case 'q':
@@ -235,6 +365,7 @@
 		tstr = "";
 		mstr = "";
 		astr = "";
+		ustr = "";
 		break;
 	case 'f':
 		for(p = EARGF(usage()); *p; p++)
@@ -243,109 +374,164 @@
 			case 'A':	printflg |= Aflg;	break;
 			case 'M':	printflg |= Mflg;	break;
 			case 'R':	printflg |= Rflg;	break;
+			case 'U':	printflg |= Uflg;	break;
 			default:	usage();		break;
 		}
 		break;
 	case 'b':
-		useindex = 0;
+		useidx = 0;
 		base = EARGF(usage());
 		if(resolveref(&h, base) == -1)
 			sysfatal("no such ref '%s'", base);
+		/* optimization: we're a lot faster when using the index */
+		if(resolveref(&hd, "HEAD") == 0 && hasheq(&h, &hd))
+			useidx = 1;
 		bdir = smprint(".git/fs/object/%H/tree", h);
 		break;
 	default:
 		usage();
-	}ARGEND
+	}ARGEND;
 
-	gitinit();
-	if(findrepo(repo, sizeof(repo)) == -1)
-		sysfatal("find root: %r");
-	if(chdir(repo) == -1)
-		sysfatal("chdir: %r");
-	if(access(".git/fs/ctl", AEXIST) != 0)
-		sysfatal("no running git/fs");
-	dirty = 0;
-	memset(&r, 0, sizeof(r));
 	if(printflg == 0)
 		printflg = Tflg | Aflg | Mflg | Rflg;
-	if(argc == 0){
-		if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
-			sysfatal("read tracked: %r");
-		if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
-			sysfatal("read removed: %r");
-		if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
-			sysfatal("read base: %r");
-	}else{
-		for(i = 0; i < argc; i++){
-			tpath = smprint(TDIR"/%s", argv[i]);
-			rpath = smprint(RDIR"/%s", argv[i]);
-			bpath = smprint("%s/%s", bdir, argv[i]);
-			if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
-				goto nextarg;
-			if(d->mode & DMDIR){
-				readpaths(&r, TDIR, argv[i]);
-				readpaths(&r, RDIR, argv[i]);
-				readpaths(&r, bdir, argv[i]);
-			}else{
-				grow(&r);
-				r.path[r.npath++] = estrdup(argv[i]);
+
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	nwdir = 0;
+	wdirsz = 32;
+	wdir = emalloc(wdirsz*sizeof(Idxent));
+	argrel = emalloc(argc*sizeof(int));
+	argn = emalloc(argc*sizeof(int));
+	for(i = 0; i < argc; i++){
+		argrel[i] = reporel(argv[i]);
+		argn[i] = strlen(argrel[i]);
+	}
+	if((o = Bfdopen(1, OWRITE)) == nil)
+		sysfatal("open out: %r");
+	if(useidx){
+		if((f = Bopen(".git/INDEX9", OREAD)) == nil){
+			fprint(2, "open index: %r\n");
+			if(access(".git/index9", AEXIST) == 0){
+				fprint(2, "index format conversion needed:\n");
+				fprint(2, "\tcd %s && git/fs\n", repopath);
+				fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n");
+				fprint(2, "\t@{cd .git/index9/removed && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n");
 			}
-nextarg:
-			free(bpath);
-			free(tpath);
-			free(rpath);
-			free(d);
+			exits("noindex");
 		}
+		line = 0;
+		while((ln = Brdstr(f, '\n', 1)) != nil){
+			line++;
+			/* allow blank lines */
+			if(ln[0] == 0 || ln[0] == '\n')
+				continue;
+			if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+				sysfatal(".git/INDEX9:%d: corrupt index", line);
+			if(nidx == idxsz){
+				idxsz += idxsz/2;
+				idx = realloc(idx, idxsz*sizeof(Idxent));
+			}
+			cleanname(parts[3]);
+			idx[nidx].state = *parts[0];
+			idx[nidx].qid = parseqid(parts[1]);
+			idx[nidx].mode = strtol(parts[2], nil, 8);
+			idx[nidx].path = strdup(parts[3]);
+			idx[nidx].order = nidx;
+			nidx++;
+			free(ln);
+		}
+		qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	}
-	dedup(&r);
 
-	for(i = 0; i < r.npath; i++){
-		p = r.path[i];
-		d = dirstat(p);
-		if(d && d->mode & DMDIR)
-			goto next;
-		rpath = smprint(RDIR"/%s", p);
-		tpath = smprint(TDIR"/%s", p);
-		bpath = smprint("%s/%s", bdir, p);
-		/* Fast path: we don't want to force access to the rpath. */
-		if(useindex && d && sameqid(d, tpath)) {
-			if(!quiet && (printflg & Tflg))
-				print("%s%s\n", tstr, p);
+	if(argc == 0)
+		loadwdir(".");
+	else for(i = 0; i < argc; i++)
+		loadwdir(argrel[i]);
+	qsort(wdir, nwdir, sizeof(Idxent), idxcmp);
+	i = 0;
+	j = 0;
+	while(i < nidx || j < nwdir){
+		/* find the last entry we tracked for a path */
+		while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){
+			staleidx = 1;
+			i++;
+		}
+		while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0)
+			j++;
+		if(i < nidx && !pfxmatch(idx[i].path, argrel, argn, argc)){
+			i++;
+			continue;
+		}
+		if(i >= nidx)
+			c = 1;
+		else if(j >= nwdir)
+			c = -1;
+		else
+			c = strcmp(idx[i].path, wdir[j].path);
+		/* exists in both index and on disk */
+		if(c == 0){
+			if(idx[i].state == 'R' && checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			else if(idx[i].state == 'A' && !checkedin(&idx[i], 1))
+				show(o, Aflg, astr, idx[i].path);
+			else if(!samedata(&idx[i], &wdir[j]))
+				show(o, Mflg, mstr, idx[i].path);
+			else
+				show(o, Tflg, tstr, idx[i].path);
+			i++;
+			j++;
+		/* only exists in index */
+		}else if(c < 0){
+			if(checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			i++;
+		/* only exists on disk */
 		}else{
-			if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
-				if(access(bpath, AEXIST) == 0){
-					dirty |= Rflg;
-					if(!quiet && (printflg & Rflg))
-						print("%s%s\n", rstr, p);
-				}
-			}else if(access(bpath, AEXIST) == -1) {
-				dirty |= Aflg;
-				if(!quiet && (printflg & Aflg))
-					print("%s%s\n", astr, p);
-			}else if(samedata(p, bpath)){
-				if(!quiet && (printflg & Tflg))
-					print("%s%s\n", tstr, p);
-				if(useindex)
-					writeqid(d, tpath);
-			}else{
-				dirty |= Mflg;
-				if(!quiet && (printflg & Mflg))
-					print("%s%s\n", mstr, p);
-			}
+			if(!useidx && checkedin(&wdir[j], 0)){
+				if(samedata(nil, &wdir[j]))
+					show(o, Tflg, tstr, wdir[j].path);
+				else
+					show(o, Mflg, mstr, wdir[j].path);
+			}else if(printflg & Uflg)
+				show(o, Uflg, ustr, wdir[j].path);
+			j++;
 		}
-		free(rpath);
-		free(tpath);
-		free(bpath);
-next:
-		free(d);
 	}
+	Bterm(o);
+
+	if(useidx && staleidx)
+	if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){
+		if((w = Bfdopen(wfd, OWRITE)) == nil){
+			close(wfd);
+			goto Nope;
+		}
+		for(i = 0; i < nidx; i++){
+			while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0)
+				i++;
+			Bprint(w, "%c %Q %o %s\n",
+				idx[i].state,
+				idx[i].qid, 
+				idx[i].mode,
+				idx[i].path);
+		}
+		Bterm(w);
+		nulldir(&rn);
+		rn.name = "INDEX9";
+		if(remove(".git/INDEX9") == -1)
+			goto Nope;
+		if(dirwstat(".git/INDEX9.new", &rn) == -1)
+			sysfatal("rename: %r");
+	}
+
+Nope:
 	if(!dirty)
 		exits(nil);
 
-	p = buf;
-	e = buf + sizeof(buf);
+	p = xbuf;
+	e = p + sizeof(xbuf);
 	for(i = 0; (1 << i) != Tflg; i++)
 		if(dirty & (1 << i))
-			p = seprint(p, e, "%c", "DMAT"[i]);
-	exits(buf);
+			p = seprint(p, e, "%c", "RMAUT"[i]);
+	exits(xbuf);
 }


      reply	other threads:[~2023-09-18  3:51 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-18  2:21 ori
2023-09-18  3:49 ` ori [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=91B9FC3B10A4C61A17F2EA04575388E7@eigenstate.org \
    --to=ori@eigenstate.org \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).