* [9front] git: new index format, 235x faster walks
@ 2023-09-17 23:57 ori
0 siblings, 0 replies; only message in thread
From: ori @ 2023-09-17 23:57 UTC (permalink / raw)
To: 9front
This change replaces our index format with a flat file, where
each line is in the form of:
T qid path
R qid path
A qid path
where T is a tracked file, R is a file that's flagged for
removal, and A is a path that is added but not yet committed.
Lines may be repeated, and the last line in the file is used
to represent the status of a path.
The QID is in the file so that we can skip reading the data
from both the repo and the disk to see if a file has changed,
and may be set to the special string NOQID to force a recheck,
or to avoid needing to stat stuff. When runnig git/walk, we
will opportunistically rewrite this index file to update the
cached QIDs.
Tested on the 9front repo:
Old walk:
@{cd / && time /sys/src/cmd/git/6.owalk -q}
0.08u 1.73s 2.82r /sys/src/cmd/git/6.owalk -q # status= DM
New walk:
@{cd / && time git/walk -q}
0.05u 0.02s 0.12r git/walk -q # status= RM
The tests pass, but I'm sure there are still bugs. Testers
wanted, and bug reports higly desired.
diff cf6987b1f35c33402fe3061eae9f8ff526fee2a8 uncommitted
--- a/sys/src/cmd/git/add
+++ b/sys/src/cmd/git/add
@@ -7,32 +7,18 @@
flagfmt='r:remove'; args='file ...'
eval `''{aux/getflags $*} || exec aux/usage
-add='tracked'
-del='removed'
-if(~ $remove 1){
- add='removed'
- del='tracked'
-}
+s=A
+if(~ $remove 1)
+ s=R
if(~ $#* 0)
exec aux/usage
paths=`$nl{cleanname -d $gitrel $* | drop $gitroot}
-if(~ $add tracked)
+if(~ $s A)
files=`$nl{walk -f ./$paths}
if not
- files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths}
-
+ files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths}
for(f in $files){
- if(! ~ `$nl{cleanname $f} .git/*){
- addpath=.git/index9/$add/$f
- delpath=.git/index9/$del/$f
- mkdir -p `$nl{basename -d $addpath}
- mkdir -p `$nl{basename -d $delpath}
- # We don't want a matching qid, so that
- # git/walk doesn't think this came from
- # a checkout.
- echo -n > $addpath
- rm -f $delpath
- }
+ echo $s NOQID $f >> .git/INDEX9
}
exit ''
--- a/sys/src/cmd/git/clone
+++ b/sys/src/cmd/git/clone
@@ -87,11 +87,8 @@
git/fs
@ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \
|| die 'checkout failed:' $status
- for(f in `$nl{walk -f $tree | drop $tree}){
- idx=.git/index9/tracked/$f
- mkdir -p `$nl{basename -d $idx}
- walk -eq ./$f > $idx
- }
+ {for(f in `$nl{cd $tree && walk -f})
+ echo 'T NOQID '$f} > .git/INDEX9
}
if not{
echo no default branch >[1=2]
--- a/sys/src/cmd/git/commit
+++ b/sys/src/cmd/git/commit
@@ -65,8 +65,8 @@
fn parents{
if(! ~ $#revise 0)
parents=`{cat $gitfs/HEAD/parent}
- if not if(test -f .git/index9/merge-parents)
- parents=`{cat .git/index9/merge-parents | sort | uniq}
+ if not if(test -f .git/merge-parents)
+ parents=`{cat .git/merge-parents | sort | uniq}
if not if(~ $initial true)
parents=()
if not
@@ -78,7 +78,7 @@
if(! ~ $#parents 0)
pflags='-p'^$parents
hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status}
- rm -f .git/index9/merge-parents
+ rm -f .git/merge-parents
}
fn update{
@@ -89,14 +89,10 @@
echo $branch: $hash
echo $hash > $refpath
for(f in $files){
- if(test -e .git/index9/removed/$f || ! test -e $f){
- rm -f .git/index9/removed/$f
- rm -f .git/index9/tracked/$f
- }
- if not{
- mkdir -p `{basename -d $f}
- walk -eq $f > .git/index9/tracked/$f
- }
+ if(! test -e $f && ! test -e .git/object/$hash/tree/$f)
+ echo R NOQID $f >> .git/INDEX9
+ if not
+ echo T NOQID $f >> .git/INDEX9
}
}
@@ -120,11 +116,11 @@
}
files=()
-if(test -f .git/index9/merge-parents)
- files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'}
+if(test -f .git/merge-parents)
+ files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'}
if(! ~ $#* 0)
files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}})
-if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0)
+if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0)
die 'nothing to commit' $status
@{
flag e +
--- a/sys/src/cmd/git/conf.c
+++ b/sys/src/cmd/git/conf.c
@@ -59,7 +59,7 @@
main(int argc, char **argv)
{
char repo[512], *p, *s;
- int i, j;
+ int i, j, nrel;
ARGBEGIN{
case 'f': file[nfile++]=EARGF(usage()); break;
@@ -69,7 +69,7 @@
}ARGEND;
if(findroot){
- if(findrepo(repo, sizeof(repo)) == -1)
+ if(findrepo(repo, sizeof(repo), &nrel) == -1)
sysfatal("%r");
print("%s\n", repo);
exits(nil);
--- a/sys/src/cmd/git/diff
+++ b/sys/src/cmd/git/diff
@@ -7,8 +7,10 @@
flagfmt='c:commit branch, s:summarize'; args='[file ...]'
eval `''{aux/getflags $*} || exec aux/usage
-if(~ $#commit 0)
+if(~ $#commit 0){
commit=HEAD
+ cparam=()
+}
files=()
if(! ~ $#* 0)
@@ -16,7 +18,7 @@
branch=`{git/query -p $commit}
if(~ $summarize 1){
- git/walk -fMAR -b $commit $files
+ git/walk -fMAR $cparam $files
exit
}
@@ -24,7 +26,7 @@
mntgen /mnt/scratch
bind $branch/tree/ /mnt/scratch/a
bind . /mnt/scratch/b
-for(f in `$nl{git/walk -c -fRMA -b $commit $files}){
+for(f in `$nl{git/walk -c -fRMA $cparam $files}){
if(~ $#showed 0){
echo diff `{git/query $commit} uncommitted
showed=1
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -301,9 +301,10 @@
int hassuffix(char *, char *);
int swapsuffix(char *, int, char *, char *, char *);
char *strip(char *);
-int findrepo(char *, int);
+int findrepo(char *, int, int*);
int showprogress(int, int);
u64int murmurhash2(void*, usize);
+Qid parseqid(char*);
/* packing */
void dtinit(Dtab *, Object*);
--- a/sys/src/cmd/git/import
+++ b/sys/src/cmd/git/import
@@ -98,7 +98,8 @@
if(~ $#nocommit 0){
if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){
echo $hash > $refpath
- rm -f .git/index9/removed/$files
+ for(f in $files)
+ echo T NOQID $f >> .git/INDEX9
}
}
status=''''
--- a/sys/src/cmd/git/init
+++ b/sys/src/cmd/git/init
@@ -31,7 +31,9 @@
echo '[branch "'$branch'"]'
echo ' remote = origin'
}
-
+>$dir/.git/INDEX9 {
+ echo
+}
>$dir/.git/HEAD {
echo ref: refs/heads/$branch
}
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -239,7 +239,7 @@
main(int argc, char **argv)
{
char path[1024], repo[1024], *p, *r;
- int i, nrepo;
+ int i, nrel, nrepo;
ARGBEGIN{
case 'e':
@@ -259,7 +259,7 @@
break;
}ARGEND;
- if(findrepo(repo, sizeof(repo)) == -1)
+ if(findrepo(repo, sizeof(repo), &nrel) == -1)
sysfatal("find root: %r");
nrepo = strlen(repo);
if(argc != 0){
--- a/sys/src/cmd/git/merge
+++ b/sys/src/cmd/git/merge
@@ -38,8 +38,8 @@
git/revert .
exit ''
}
-echo $ours >> .git/index9/merge-parents
-echo $theirs >> .git/index9/merge-parents
+echo $ours >> .git/merge-parents
+echo $theirs >> .git/merge-parents
merge $ours $base $theirs
>[1=2] echo 'merge complete: remember to commit'
--- /dev/null
+++ b/sys/src/cmd/git/owalk.c
@@ -1,0 +1,346 @@
+#include <u.h>
+#include <libc.h>
+#include "git.h"
+
+#define NCACHE 4096
+#define TDIR ".git/index9/tracked"
+#define RDIR ".git/index9/removed"
+typedef struct Cache Cache;
+typedef struct Wres Wres;
+struct Cache {
+ Dir* cache;
+ int n;
+ int max;
+};
+
+struct Wres {
+ char **path;
+ int npath;
+ int pathsz;
+};
+
+enum {
+ Rflg = 1 << 0,
+ Mflg = 1 << 1,
+ Aflg = 1 << 2,
+ Tflg = 1 << 3,
+};
+
+Cache seencache[NCACHE];
+int quiet;
+int useindex;
+int printflg;
+char *bdir = ".git/fs/HEAD/tree";
+char *rstr = "R ";
+char *tstr = "T ";
+char *mstr = "M ";
+char *astr = "A ";
+
+int
+seen(Dir *dir)
+{
+ Dir *dp;
+ int i;
+ Cache *c;
+
+ c = &seencache[dir->qid.path&(NCACHE-1)];
+ dp = c->cache;
+ for(i=0; i<c->n; i++, dp++)
+ if(dir->qid.path == dp->qid.path &&
+ dir->type == dp->type &&
+ dir->dev == dp->dev)
+ return 1;
+ if(c->n == c->max){
+ if (c->max == 0)
+ c->max = 8;
+ else
+ c->max += c->max/2;
+ c->cache = realloc(c->cache, c->max*sizeof(Dir));
+ if(c->cache == nil)
+ sysfatal("realloc: %r");
+ }
+ c->cache[c->n++] = *dir;
+ return 0;
+}
+
+void
+grow(Wres *r)
+{
+ if(r->npath == r->pathsz){
+ r->pathsz = 2*r->pathsz + 1;
+ r->path = erealloc(r->path, r->pathsz * sizeof(char*));
+ }
+}
+
+int
+readpaths(Wres *r, char *pfx, char *dir)
+{
+ char *f, *sub, *full, *sep;
+ Dir *d;
+ int fd, ret, i, n;
+
+ d = nil;
+ ret = -1;
+ sep = "";
+ if(dir[0] != 0)
+ sep = "/";
+ if((full = smprint("%s/%s", pfx, dir)) == nil)
+ sysfatal("smprint: %r");
+ if((fd = open(full, OREAD)) < 0)
+ goto error;
+ while((n = dirread(fd, &d)) > 0){
+ for(i = 0; i < n; i++){
+ if(seen(&d[i]))
+ continue;
+ if(d[i].qid.type & QTDIR){
+ if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
+ sysfatal("smprint: %r");
+ if(readpaths(r, pfx, sub) == -1){
+ free(sub);
+ goto error;
+ }
+ free(sub);
+ }else{
+ grow(r);
+ if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
+ sysfatal("smprint: %r");
+ r->path[r->npath++] = f;
+ }
+ }
+ free(d);
+ }
+ ret = r->npath;
+error:
+ close(fd);
+ free(full);
+ return ret;
+}
+
+int
+cmp(void *pa, void *pb)
+{
+ return strcmp(*(char **)pa, *(char **)pb);
+}
+
+void
+dedup(Wres *r)
+{
+ int i, o;
+
+ if(r->npath <= 1)
+ return;
+ o = 0;
+ qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
+ for(i = 1; i < r->npath; i++)
+ if(strcmp(r->path[o], r->path[i]) != 0)
+ r->path[++o] = r->path[i];
+ r->npath = o + 1;
+}
+
+int
+sameqid(Dir *d, char *qf)
+{
+ char indexqid[64], fileqid[64], *p;
+ int fd, n;
+
+ if(!d)
+ return 0;
+ if((fd = open(qf, OREAD)) == -1)
+ return 0;
+ if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
+ return 0;
+ indexqid[n] = 0;
+ close(fd);
+ if((p = strpbrk(indexqid, " \t\n\r")) != nil)
+ *p = 0;
+
+ snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
+ d->qid.path, d->qid.vers, d->qid.type);
+
+ if(strcmp(indexqid, fileqid) == 0)
+ return 1;
+ return 0;
+}
+
+void
+writeqid(Dir *d, char *qf)
+{
+ int fd;
+
+ if((fd = create(qf, OWRITE, 0666)) == -1)
+ return;
+ fprint(fd, "%ullx.%uld.%.2uhhx\n",
+ d->qid.path, d->qid.vers, d->qid.type);
+ close(fd);
+}
+
+int
+samedata(char *pa, char *pb)
+{
+ char ba[32*1024], bb[32*1024];
+ int fa, fb, na, nb, same;
+
+ same = 0;
+ fa = open(pa, OREAD);
+ fb = open(pb, OREAD);
+ if(fa == -1 || fb == -1){
+ goto mismatch;
+ }
+ while(1){
+ if((na = readn(fa, ba, sizeof(ba))) == -1)
+ goto mismatch;
+ if((nb = readn(fb, bb, sizeof(bb))) == -1)
+ goto mismatch;
+ if(na != nb)
+ goto mismatch;
+ if(na == 0)
+ break;
+ if(memcmp(ba, bb, na) != 0)
+ goto mismatch;
+ }
+ same = 1;
+mismatch:
+ if(fa != -1)
+ close(fa);
+ if(fb != -1)
+ close(fb);
+ return same;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
+ exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+ char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
+ char *p, *e;
+ int i, dirty;
+ Wres r;
+ Hash h;
+ Dir *d;
+
+ gitinit();
+
+ ARGBEGIN{
+ case 'q':
+ quiet++;
+ break;
+ case 'c':
+ rstr = "";
+ tstr = "";
+ mstr = "";
+ astr = "";
+ break;
+ case 'f':
+ for(p = EARGF(usage()); *p; p++)
+ switch(*p){
+ case 'T': printflg |= Tflg; break;
+ case 'A': printflg |= Aflg; break;
+ case 'M': printflg |= Mflg; break;
+ case 'R': printflg |= Rflg; break;
+ default: usage(); break;
+ }
+ break;
+ case 'b':
+ useindex = 0;
+ base = EARGF(usage());
+ if(resolveref(&h, base) == -1)
+ sysfatal("no such ref '%s'", base);
+ bdir = smprint(".git/fs/object/%H/tree", h);
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ if(access(".git/fs/ctl", AEXIST) != 0)
+ sysfatal("no running git/fs");
+ dirty = 0;
+ memset(&r, 0, sizeof(r));
+ if(printflg == 0)
+ printflg = Tflg | Aflg | Mflg | Rflg;
+ if(argc == 0){
+ if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
+ sysfatal("read tracked: %r");
+ if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
+ sysfatal("read removed: %r");
+ if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
+ sysfatal("read base: %r");
+ }else{
+ for(i = 0; i < argc; i++){
+ tpath = smprint(TDIR"/%s", argv[i]);
+ rpath = smprint(RDIR"/%s", argv[i]);
+ bpath = smprint("%s/%s", bdir, argv[i]);
+ if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
+ goto nextarg;
+ if(d->mode & DMDIR){
+ readpaths(&r, TDIR, argv[i]);
+ readpaths(&r, RDIR, argv[i]);
+ readpaths(&r, bdir, argv[i]);
+ }else{
+ grow(&r);
+ r.path[r.npath++] = estrdup(argv[i]);
+ }
+nextarg:
+ free(bpath);
+ free(tpath);
+ free(rpath);
+ free(d);
+ }
+ }
+ dedup(&r);
+
+ for(i = 0; i < r.npath; i++){
+ p = r.path[i];
+ d = dirstat(p);
+ if(d && d->mode & DMDIR)
+ goto next;
+ rpath = smprint(RDIR"/%s", p);
+ tpath = smprint(TDIR"/%s", p);
+ bpath = smprint("%s/%s", bdir, p);
+ /* Fast path: we don't want to force access to the rpath. */
+ if(useindex && d && sameqid(d, tpath)) {
+ if(!quiet && (printflg & Tflg))
+ print("%s%s\n", tstr, p);
+ }else{
+ if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
+ if(access(bpath, AEXIST) == 0){
+ dirty |= Rflg;
+ if(!quiet && (printflg & Rflg))
+ print("%s%s\n", rstr, p);
+ }
+ }else if(access(bpath, AEXIST) == -1) {
+ dirty |= Aflg;
+ if(!quiet && (printflg & Aflg))
+ print("%s%s\n", astr, p);
+ }else if(samedata(p, bpath)){
+ if(!quiet && (printflg & Tflg))
+ print("%s%s\n", tstr, p);
+ if(useindex)
+ writeqid(d, tpath);
+ }else{
+ dirty |= Mflg;
+ if(!quiet && (printflg & Mflg))
+ print("%s%s\n", mstr, p);
+ }
+ }
+ free(rpath);
+ free(tpath);
+ free(bpath);
+next:
+ free(d);
+ }
+ if(!dirty)
+ exits(nil);
+
+ p = buf;
+ e = buf + sizeof(buf);
+ for(i = 0; (1 << i) != Tflg; i++)
+ if(dirty & (1 << i))
+ p = seprint(p, e, "%c", "DMAT"[i]);
+ exits(buf);
+}
--- a/sys/src/cmd/git/pack.c
+++ b/sys/src/cmd/git/pack.c
@@ -66,7 +66,7 @@
Object *lruhead;
Object *lrutail;
vlong ncache;
-vlong cachemax = 512*MiB;
+vlong cachemax = 1024*MiB;
Packf *packf;
int npackf;
int openpacks;
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -152,10 +152,10 @@
void
main(int argc, char **argv)
{
- int i, j, n;
- Hash *h;
- char *p, *e, *s, *objpfx;
char query[2048], repo[512];
+ char *p, *e, *s, *objpfx;
+ int i, j, n, nrel;
+ Hash *h;
ARGBEGIN{
case 'd': chattygit++; break;
@@ -170,7 +170,7 @@
if(argc == 0)
usage();
- if(findrepo(repo, sizeof(repo)) == -1)
+ if(findrepo(repo, sizeof(repo), &nrel) == -1)
sysfatal("find root: %r");
if(chdir(repo) == -1)
sysfatal("chdir: %r");
--- a/sys/src/cmd/git/save.c
+++ b/sys/src/cmd/git/save.c
@@ -10,6 +10,14 @@
char *dat;
int ndat;
};
+
+struct Idxent {
+ char *path;
+ Qid qid;
+ char state;
+ int order;
+};
+
enum {
Maxparents = 16,
};
@@ -21,7 +29,9 @@
char *commitmsg;
Hash parents[Maxparents];
int nparents;
-
+Idxent *idx;
+int idxsz;
+int nidx;
int
gitmode(Dirent *e)
{
@@ -38,6 +48,20 @@
}
int
+idxcmp(void *pa, void *pb)
+{
+ Idxent *a, *b;
+ int c;
+
+ a = (Idxent*)pa;
+ b = (Idxent*)pb;
+ if((c = strcmp(a->path, b->path)) != 0)
+ return c;
+ assert(a->order != b->order);
+ return a-> order < b->order ? -1 : 1;
+}
+
+int
entcmp(void *pa, void *pb)
{
char abuf[256], bbuf[256], *ae, *be;
@@ -184,27 +208,22 @@
int
tracked(char *path)
{
- char ipath[256];
- Dir *d;
+ int lo, hi, mid, r;
- /* Explicitly removed. */
- snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path);
- if(strstr(cleanname(ipath), ".git/index9/removed") != ipath)
- sysfatal("path %s leaves index", ipath);
- d = dirstat(ipath);
- if(d != nil && d->qid.type != QTDIR){
- free(d);
- return 0;
+ r = -1;
+ lo = 0;
+ hi = nidx-1;
+ while(lo <= hi){
+ mid = (hi + lo) / 2;
+ r = strcmp(path, idx[mid].path);
+ if(r < 0)
+ hi = mid-1;
+ else if(r > 0)
+ lo = mid+1;
+ else
+ break;
}
-
- /* Explicitly added. */
- snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path);
- if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath)
- sysfatal("path %s leaves index", ipath);
- if(access(ipath, AEXIST) == 0)
- return 1;
-
- return 0;
+ return r == 0;
}
int
@@ -354,10 +373,11 @@
void
main(int argc, char **argv)
{
+ char *ln, *dstr, *parts[3], cwd[1024];
+ int i, r, line, ncwd;
Hash th, ch;
- char *dstr, cwd[1024];
- int i, r, ncwd;
vlong date;
+ Biobuf *f;
Object *t;
gitinit();
@@ -425,6 +445,32 @@
}
t = findroot();
+ nidx = 0;
+ idxsz = 32;
+ idx = emalloc(idxsz*sizeof(Idxent));
+ if((f = Bopen(".git/INDEX9", OREAD)) == nil)
+ sysfatal("open index: %r");
+ line = 0;
+ while((ln = Brdstr(f, '\n', 1)) != nil){
+ line++;
+ if(ln[0] == 0 || ln[0] == '\n')
+ continue;
+ if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+ sysfatal(".git/INDEX9:%d: corrupt index", line);
+ if(nidx == idxsz){
+ idxsz += idxsz/2;
+ idx = realloc(idx, idxsz*sizeof(Idxent));
+ }
+ cleanname(parts[2]);
+ idx[nidx].state = *parts[0];
+ idx[nidx].qid = parseqid(parts[1]);
+ idx[nidx].path = strdup(parts[2]);
+ idx[nidx].order = nidx;
+ nidx++;
+ free(ln);
+ }
+ Bterm(f);
+ qsort(idx, nidx, sizeof(Idxent), idxcmp);
r = treeify(t, argv, argv + argc, 0, &th);
if(r == -1)
sysfatal("could not commit: %r\n");
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -294,7 +294,7 @@
/* Finds the directory containing the git repo. */
int
-findrepo(char *buf, int nbuf)
+findrepo(char *buf, int nbuf, int *nrel)
{
char *p, *suff;
@@ -302,6 +302,7 @@
if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
return -1;
+ *nrel = 0;
for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
strcpy(p, suff);
if(access(buf, AEXIST) == 0){
@@ -308,6 +309,7 @@
p[p == buf] = '\0';
return 0;
}
+ *nrel += 1;
*p = '\0';
}
werrstr("not a git repository");
@@ -441,4 +443,25 @@
h ^= h >> 15;
return h;
+}
+
+Qid
+parseqid(char *s)
+{
+ char *e;
+ Qid q;
+
+ if(strcmp(s, "NOQID") == 0)
+ return (Qid){-1, -1, -1};
+ e = s;
+ q.path = strtoull(e, &e, 16);
+ if(*e != '.')
+ sysfatal("corrupt qid: %s (%s)\n", s, e);
+ q.vers = strtoul(e+1, &e, 10);
+ if(*e != '.')
+ sysfatal("corrupt qid: %s (%s)\n", s, e);
+ q.type = strtoul(e+1, &e, 16);
+ if(*e != '\0')
+ sysfatal("corrupt qid: %s (%x)\n", s, *e);
+ return q;
}
--- a/sys/src/cmd/git/walk.c
+++ b/sys/src/cmd/git/walk.c
@@ -2,53 +2,79 @@
#include <libc.h>
#include "git.h"
+typedef struct Seen Seen;
+typedef struct Idxed Idxed;
+typedef struct Idxent Idxent;
+
#define NCACHE 4096
-#define TDIR ".git/index9/tracked"
-#define RDIR ".git/index9/removed"
-typedef struct Cache Cache;
-typedef struct Wres Wres;
-struct Cache {
+
+enum {
+ Rflg = 1 << 0,
+ Mflg = 1 << 1,
+ Aflg = 1 << 2,
+ Uflg = 1 << 3,
+ /* everything after this is not an error */
+ Tflg = 1 << 4,
+};
+
+struct Idxent {
+ char *path;
+ Qid qid;
+ char state;
+ int order;
+};
+
+struct Seen {
Dir* cache;
int n;
int max;
};
-struct Wres {
- char **path;
- int npath;
- int pathsz;
+struct Idxed {
+ char** cache;
+ int n;
+ int max;
};
-enum {
- Rflg = 1 << 0,
- Mflg = 1 << 1,
- Aflg = 1 << 2,
- Tflg = 1 << 3,
-};
+Seen seentab[NCACHE];
+Idxed idxtab[NCACHE];
+char repopath[1024];
+char wdirpath[1024];
+char *rstr = "R ";
+char *mstr = "M ";
+char *astr = "A ";
+char *ustr = "U ";
+char *tstr = "T ";
+char *bdir = ".git/fs/HEAD/tree";
+int useidx = 1;
+int nrel;
+int quiet;
+int dirty;
+int printflg;
-Cache seencache[NCACHE];
-int quiet;
-int useindex;
-int printflg;
-char *bdir = ".git/fs/HEAD/tree";
-char *rstr = "R ";
-char *tstr = "T ";
-char *mstr = "M ";
-char *astr = "A ";
+Idxent *idx;
+int idxsz;
+int nidx;
+int staleidx;
+Idxent *wdir;
+int wdirsz;
+int nwdir;
+int loadwdir(char*);
+
int
seen(Dir *dir)
{
+ Seen *c;
Dir *dp;
int i;
- Cache *c;
- c = &seencache[dir->qid.path&(NCACHE-1)];
+ c = &seentab[dir->qid.path&(NCACHE-1)];
dp = c->cache;
for(i=0; i<c->n; i++, dp++)
- if(dir->qid.path == dp->qid.path &&
- dir->type == dp->type &&
- dir->dev == dp->dev)
+ if(dir->qid.path == dp->qid.path
+ && dir->qid.type == dp->qid.type
+ && dir->dev == dp->dev)
return 1;
if(c->n == c->max){
if (c->max == 0)
@@ -63,129 +89,91 @@
return 0;
}
-void
-grow(Wres *r)
-{
- if(r->npath == r->pathsz){
- r->pathsz = 2*r->pathsz + 1;
- r->path = erealloc(r->path, r->pathsz * sizeof(char*));
- }
-}
-
int
-readpaths(Wres *r, char *pfx, char *dir)
+checkedin(Idxent *e)
{
- char *f, *sub, *full, *sep;
- Dir *d;
- int fd, ret, i, n;
+ char *p;
+ int r;
- d = nil;
- ret = -1;
- sep = "";
- if(dir[0] != 0)
- sep = "/";
- if((full = smprint("%s/%s", pfx, dir)) == nil)
- sysfatal("smprint: %r");
- if((fd = open(full, OREAD)) < 0)
- goto error;
- while((n = dirread(fd, &d)) > 0){
- for(i = 0; i < n; i++){
- if(seen(&d[i]))
- continue;
- if(d[i].qid.type & QTDIR){
- if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
- sysfatal("smprint: %r");
- if(readpaths(r, pfx, sub) == -1){
- free(sub);
- goto error;
- }
- free(sub);
- }else{
- grow(r);
- if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
- sysfatal("smprint: %r");
- r->path[r->npath++] = f;
- }
- }
- free(d);
- }
- ret = r->npath;
-error:
- close(fd);
- free(full);
- return ret;
+ p = smprint("%s/%s", bdir, e->path);
+ r = access(p, AEXIST);
+ e->state = 'T';
+ staleidx = 1;
+ free(p);
+ return r == 0;
}
int
-cmp(void *pa, void *pb)
+indexed(char *path, int isdir)
{
- return strcmp(*(char **)pa, *(char **)pb);
-}
+ int lo, hi, mid, n, r;
+ char *s;
-void
-dedup(Wres *r)
-{
- int i, o;
-
- if(r->npath <= 1)
- return;
- o = 0;
- qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
- for(i = 1; i < r->npath; i++)
- if(strcmp(r->path[o], r->path[i]) != 0)
- r->path[++o] = r->path[i];
- r->npath = o + 1;
+ if(!useidx){
+ s = smprint("%s/%s", bdir, path);
+ r = access(s, AEXIST);
+ free(s);
+ return r == 0;
+ }
+ s = path;
+ if(isdir)
+ s = smprint("%s/", path);
+ r = -1;
+ lo = 0;
+ hi = nidx-1;
+ n = strlen(s);
+ while(lo <= hi){
+ mid = (hi + lo) / 2;
+ if(isdir)
+ r = strncmp(s, idx[mid].path, n);
+ else
+ r = strcmp(s, idx[mid].path);
+ if(r < 0)
+ hi = mid-1;
+ else if(r > 0)
+ lo = mid+1;
+ else
+ break;
+ }
+ if(isdir)
+ free(s);
+ return r == 0;
}
int
-sameqid(Dir *d, char *qf)
+idxcmp(void *pa, void *pb)
{
- char indexqid[64], fileqid[64], *p;
- int fd, n;
+ Idxent *a, *b;
+ int c;
- if(!d)
- return 0;
- if((fd = open(qf, OREAD)) == -1)
- return 0;
- if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
- return 0;
- indexqid[n] = 0;
- close(fd);
- if((p = strpbrk(indexqid, " \t\n\r")) != nil)
- *p = 0;
-
- snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
- d->qid.path, d->qid.vers, d->qid.type);
-
- if(strcmp(indexqid, fileqid) == 0)
- return 1;
- return 0;
+ a = (Idxent*)pa;
+ b = (Idxent*)pb;
+ if((c = strcmp(a->path, b->path)) != 0)
+ return c;
+ assert(a->order != b->order);
+ return a-> order < b->order ? -1 : 1;
}
-void
-writeqid(Dir *d, char *qf)
-{
- int fd;
-
- if((fd = create(qf, OWRITE, 0666)) == -1)
- return;
- fprint(fd, "%ullx.%uld.%.2uhhx\n",
- d->qid.path, d->qid.vers, d->qid.type);
- close(fd);
-}
-
int
-samedata(char *pa, char *pb)
+samedata(Idxent *a, Idxent *b)
{
- char ba[32*1024], bb[32*1024];
+ char *gitpath, ba[IOUNIT], bb[IOUNIT];
int fa, fb, na, nb, same;
+ if(a != nil){
+ if(a->qid.path == b->qid.path
+ && a->qid.vers == b->qid.vers
+ && a->qid.type == b->qid.type)
+ return 1;
+ }
+
same = 0;
- fa = open(pa, OREAD);
- fb = open(pb, OREAD);
- if(fa == -1 || fb == -1){
+ if((gitpath = smprint("%s/%s", bdir, b->path)) == nil)
+ sysfatal("smprint: %r");
+ fa = open(b->path, OREAD);
+ fb = open(gitpath, OREAD);
+ if(fa == -1 || fb == -1)
goto mismatch;
- }
while(1){
if((na = readn(fa, ba, sizeof(ba))) == -1)
goto mismatch;
@@ -198,7 +186,11 @@
if(memcmp(ba, bb, na) != 0)
goto mismatch;
}
+ if(a != nil)
+ a->qid = b->qid;
same = 1;
+ staleidx = 1;
+
mismatch:
if(fa != -1)
close(fa);
@@ -207,7 +199,128 @@
return same;
}
+int
+loadent(char *dir, Dir *d, int fullpath)
+{
+ char *path;
+ int ret, isdir;
+ Idxent *e;
+
+ if(fullpath)
+ path = strdup(dir);
+ else
+ path = smprint("%s/%s", dir, d->name);
+ if(path == nil)
+ sysfatal("smprint: %r");
+
+ ret = 0;
+ isdir = d->qid.type & QTDIR;
+ cleanname(path);
+ if((printflg & Uflg) == 0 && !indexed(path, isdir))
+ return 0;
+ if(isdir){
+ ret = loadwdir(path);
+ free(path);
+ }else{
+ if(nwdir == wdirsz){
+ wdirsz += wdirsz/2;
+ wdir = erealloc(wdir, wdirsz*sizeof(Idxent));
+ }
+ e = wdir + nwdir;
+ e->path = path;
+ e->qid = d->qid;
+ e->order = nwdir;
+ e->state = 'T';
+ nwdir++;
+ }
+ return ret;
+}
+
+int
+loadwdir(char *path)
+{
+ int fd, ret, i, n;
+ Dir *d, *e;
+
+ d = nil;
+ e = nil;
+ ret = -1;
+
+ cleanname(path);
+ if(strncmp(path, ".git/", 5) == 0)
+ return 0;
+ if((fd = open(path, OREAD)) < 0)
+ goto error;
+ if((e = dirfstat(fd)) == nil)
+ sysfatal("fstat: %r");
+ if(e->qid.type & QTDIR)
+ while((n = dirread(fd, &d)) > 0){
+ for(i = 0; i < n; i++)
+ if(loadent(path, &d[i], 0) == -1)
+ goto error;
+ free(d);
+ }
+ else{
+ if(loadent(path, e, 1) == -1)
+ goto error;
+ }
+ ret = 0;
+error:
+ free(e);
+ if(fd != -1)
+ close(fd);
+ return ret;
+}
+
+int
+pfxmatch(char *p, char **pfx, int *pfxlen, int npfx)
+{
+ int i;
+
+ if(npfx == 0)
+ return 1;
+ for(i = 0; i < npfx; i++){
+ if(strncmp(p, pfx[i], pfxlen[i]) == 0
+ && (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0))
+ return 1;
+ if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0)
+ return 1;
+ }
+ return 0;
+}
+
+
+char*
+reporel(char *s)
+{
+ char *p;
+ int n;
+
+ if(*s == '/')
+ s = strdup(s);
+ else
+ s = smprint("%s/%s", wdirpath, s);
+ p = cleanname(s);
+ n = strlen(repopath);
+ if(strncmp(s, repopath, n) == 0)
+ p = s + n;
+ else
+ sysfatal("path outside repo: %s", s);
+ if(*p == '/')
+ p++;
+ memmove(s, p, strlen(p)+1);
+ return s;
+}
+
void
+show(Biobuf *o, int flg, char *str, char *path)
+{
+ dirty |= flg;
+ if(!quiet && (printflg & flg))
+ Bprint(o, "%s%s\n", str, path);
+}
+
+void
usage(void)
{
fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
@@ -217,14 +330,19 @@
void
main(int argc, char **argv)
{
- char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
- char *p, *e;
- int i, dirty;
- Wres r;
+ char *p, *e, *ln, *base, **argrel, *parts[3], xbuf[8];
+ int i, j, c, line, wfd, *argn;
+ Biobuf *f, *o, *w;
Hash h;
- Dir *d;
+ Dir rn;
gitinit();
+ if(getwd(wdirpath, sizeof(wdirpath)) == nil)
+ sysfatal("getwd: %r");
+ if(findrepo(repopath, sizeof(repopath), &nrel) == -1)
+ sysfatal("find root: %r");
+ if(chdir(repopath) == -1)
+ sysfatal("chdir: %r");
ARGBEGIN{
case 'q':
@@ -235,6 +353,7 @@
tstr = "";
mstr = "";
astr = "";
+ ustr = "";
break;
case 'f':
for(p = EARGF(usage()); *p; p++)
@@ -243,109 +362,155 @@
case 'A': printflg |= Aflg; break;
case 'M': printflg |= Mflg; break;
case 'R': printflg |= Rflg; break;
+ case 'U': printflg |= Uflg; break;
default: usage(); break;
}
break;
case 'b':
- useindex = 0;
+ useidx = 0;
base = EARGF(usage());
if(resolveref(&h, base) == -1)
- sysfatal("no such ref '%s'", base);
+ sysfatal("no such reXf '%s'", base);
bdir = smprint(".git/fs/object/%H/tree", h);
break;
default:
usage();
- }ARGEND
+ }ARGEND;
- gitinit();
- if(findrepo(repo, sizeof(repo)) == -1)
- sysfatal("find root: %r");
- if(chdir(repo) == -1)
- sysfatal("chdir: %r");
- if(access(".git/fs/ctl", AEXIST) != 0)
- sysfatal("no running git/fs");
- dirty = 0;
- memset(&r, 0, sizeof(r));
if(printflg == 0)
printflg = Tflg | Aflg | Mflg | Rflg;
- if(argc == 0){
- if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
- sysfatal("read tracked: %r");
- if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
- sysfatal("read removed: %r");
- if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
- sysfatal("read base: %r");
- }else{
- for(i = 0; i < argc; i++){
- tpath = smprint(TDIR"/%s", argv[i]);
- rpath = smprint(RDIR"/%s", argv[i]);
- bpath = smprint("%s/%s", bdir, argv[i]);
- if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
- goto nextarg;
- if(d->mode & DMDIR){
- readpaths(&r, TDIR, argv[i]);
- readpaths(&r, RDIR, argv[i]);
- readpaths(&r, bdir, argv[i]);
- }else{
- grow(&r);
- r.path[r.npath++] = estrdup(argv[i]);
+
+ nidx = 0;
+ idxsz = 32;
+ idx = emalloc(idxsz*sizeof(Idxent));
+ nwdir = 0;
+ wdirsz = 32;
+ wdir = emalloc(wdirsz*sizeof(Idxent));
+ argrel = emalloc(argc*sizeof(int));
+ argn = emalloc(argc*sizeof(int));
+ for(i = 0; i < argc; i++){
+ argrel[i] = reporel(argv[i]);
+ argn[i] = strlen(argrel[i]);
+ }
+ if((o = Bfdopen(1, OWRITE)) == nil)
+ sysfatal("open out: %r");
+ if(useidx){
+ if((f = Bopen(".git/INDEX9", OREAD)) == nil){
+ if(access(".git9/index9", AEXIST) == 0){
+ fprint(2, "index format conversion needed:\n");
+ fprint(2, "\t@{cd .git/index9/tracked && walk -f} | sed 's/^/T NOQID '} >> .git/INDEX9\n");
+ fprint(2, "\t@{cd .git/index9/removed && walk -f} | sed 's/^/R NOQID '} >> .git/INDEX9\n");
}
-nextarg:
- free(bpath);
- free(tpath);
- free(rpath);
- free(d);
+ sysfatal("open index: %r");
}
+ line = 0;
+ while((ln = Brdstr(f, '\n', 1)) != nil){
+ line++;
+ /* allow blank lines */
+ if(ln[0] == 0 || ln[0] == '\n')
+ continue;
+ if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+ sysfatal(".git/INDEX9:%d: corrupt index", line);
+ if(nidx == idxsz){
+ idxsz += idxsz/2;
+ idx = realloc(idx, idxsz*sizeof(Idxent));
+ }
+ cleanname(parts[2]);
+ if(pfxmatch(parts[2], argrel, argn, argc)){
+ idx[nidx].state = *parts[0];
+ idx[nidx].qid = parseqid(parts[1]);
+ idx[nidx].path = strdup(parts[2]);
+ idx[nidx].order = nidx;
+ nidx++;
+ }
+ free(ln);
+ }
+ qsort(idx, nidx, sizeof(Idxent), idxcmp);
}
- dedup(&r);
- for(i = 0; i < r.npath; i++){
- p = r.path[i];
- d = dirstat(p);
- if(d && d->mode & DMDIR)
- goto next;
- rpath = smprint(RDIR"/%s", p);
- tpath = smprint(TDIR"/%s", p);
- bpath = smprint("%s/%s", bdir, p);
- /* Fast path: we don't want to force access to the rpath. */
- if(useindex && d && sameqid(d, tpath)) {
- if(!quiet && (printflg & Tflg))
- print("%s%s\n", tstr, p);
+ if(argc == 0)
+ loadwdir(".");
+ else for(i = 0; i < argc; i++)
+ loadwdir(argrel[i]);
+ qsort(wdir, nwdir, sizeof(Idxent), idxcmp);
+ i = 0;
+ j = 0;
+ while(i < nidx || j < nwdir){
+ /* find the last entry we tracked for a path */
+ while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){
+ staleidx = 1;
+ i++;
+ }
+ while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0)
+ j++;
+
+ if(i >= nidx)
+ c = 1;
+ else if(j >= nwdir)
+ c = -1;
+ else
+ c = strcmp(idx[i].path, wdir[j].path);
+ /* exists in both index and on disk */
+ if(c == 0){
+ if(idx[i].state == 'R')
+ show(o, Rflg, rstr, idx[i].path);
+ else if(idx[i].state == 'A' && !checkedin(&idx[i]))
+ show(o, Aflg, astr, idx[i].path);
+ else if(!samedata(&idx[i], &wdir[j]))
+ show(o, Mflg, mstr, idx[i].path);
+ else
+ show(o, Tflg, tstr, idx[i].path);
+ i++;
+ j++;
+ /* only exists in index */
+ }else if(c < 0){
+ show(o, Rflg, rstr, idx[i].path);
+ i++;
+ /* only exists on disk */
}else{
- if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
- if(access(bpath, AEXIST) == 0){
- dirty |= Rflg;
- if(!quiet && (printflg & Rflg))
- print("%s%s\n", rstr, p);
- }
- }else if(access(bpath, AEXIST) == -1) {
- dirty |= Aflg;
- if(!quiet && (printflg & Aflg))
- print("%s%s\n", astr, p);
- }else if(samedata(p, bpath)){
- if(!quiet && (printflg & Tflg))
- print("%s%s\n", tstr, p);
- if(useindex)
- writeqid(d, tpath);
- }else{
- dirty |= Mflg;
- if(!quiet && (printflg & Mflg))
- print("%s%s\n", mstr, p);
- }
+ if(!useidx && checkedin(&wdir[j])){
+ if(samedata(nil, &wdir[j]))
+ show(o, Tflg, tstr, wdir[j].path);
+ else
+ show(o, Mflg, mstr, wdir[j].path);
+ }else if(printflg & Uflg)
+ show(o, Uflg, ustr, wdir[j].path);
+ j++;
}
- free(rpath);
- free(tpath);
- free(bpath);
-next:
- free(d);
}
+ Bterm(o);
+
+ if(0 && useidx && staleidx)
+ if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){
+ if((w = Bfdopen(wfd, OWRITE)) == nil){
+ close(wfd);
+ goto Nope;
+ }
+ for(i = 0; i < nidx; i++){
+ while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0)
+ i++;
+ Bprint(w, "%c %llx.%ld.%hhx %s\n",
+ idx[i].state,
+ idx[i].qid.path, idx[i].qid.vers, idx[i].qid.type,
+ idx[i].path);
+ }
+ Bterm(w);
+ nulldir(&rn);
+ rn.name = "INDEX9";
+ if(remove(".git/INDEX9") == -1)
+ goto Nope;
+ if(dirwstat(".git/INDEX9.new", &rn) == -1)
+ sysfatal("rename: %r");
+ }
+
+Nope:
if(!dirty)
exits(nil);
- p = buf;
- e = buf + sizeof(buf);
+ p = xbuf;
+ e = p + sizeof(xbuf);
for(i = 0; (1 << i) != Tflg; i++)
if(dirty & (1 << i))
- p = seprint(p, e, "%c", "DMAT"[i]);
- exits(buf);
+ p = seprint(p, e, "%c", "RMAUT"[i]);
+ exits(xbuf);
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-09-18 0:01 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-17 23:57 [9front] git: new index format, 235x faster walks ori
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).