From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 29384 invoked from network); 18 Sep 2023 00:01:16 -0000 Received: from 9front.inri.net (168.235.81.73) by inbox.vuxu.org with ESMTPUTF8; 18 Sep 2023 00:01:16 -0000 Received: from mimir.eigenstate.org ([206.124.132.107]) by 9front; Sun Sep 17 19:58:02 -0400 2023 Received: from abbatoir (pool-108-6-24-2.nycmny.fios.verizon.net [108.6.24.2]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id 9f5daf5f (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Sun, 17 Sep 2023 16:57:58 -0700 (PDT) Message-ID: To: 9front@9front.org Date: Sun, 17 Sep 2023 19:57:57 -0400 From: ori@eigenstate.org MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: managed webscale strategy-oriented blockchain Subject: [9front] git: new index format, 235x faster walks Reply-To: 9front@9front.org Precedence: bulk This change replaces our index format with a flat file, where each line is in the form of: T qid path R qid path A qid path where T is a tracked file, R is a file that's flagged for removal, and A is a path that is added but not yet committed. Lines may be repeated, and the last line in the file is used to represent the status of a path. The QID is in the file so that we can skip reading the data from both the repo and the disk to see if a file has changed, and may be set to the special string NOQID to force a recheck, or to avoid needing to stat stuff. When runnig git/walk, we will opportunistically rewrite this index file to update the cached QIDs. Tested on the 9front repo: Old walk: @{cd / && time /sys/src/cmd/git/6.owalk -q} 0.08u 1.73s 2.82r /sys/src/cmd/git/6.owalk -q # status= DM New walk: @{cd / && time git/walk -q} 0.05u 0.02s 0.12r git/walk -q # status= RM The tests pass, but I'm sure there are still bugs. Testers wanted, and bug reports higly desired. diff cf6987b1f35c33402fe3061eae9f8ff526fee2a8 uncommitted --- a/sys/src/cmd/git/add +++ b/sys/src/cmd/git/add @@ -7,32 +7,18 @@ flagfmt='r:remove'; args='file ...' eval `''{aux/getflags $*} || exec aux/usage -add='tracked' -del='removed' -if(~ $remove 1){ - add='removed' - del='tracked' -} +s=A +if(~ $remove 1) + s=R if(~ $#* 0) exec aux/usage paths=`$nl{cleanname -d $gitrel $* | drop $gitroot} -if(~ $add tracked) +if(~ $s A) files=`$nl{walk -f ./$paths} if not - files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths} - + files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths} for(f in $files){ - if(! ~ `$nl{cleanname $f} .git/*){ - addpath=.git/index9/$add/$f - delpath=.git/index9/$del/$f - mkdir -p `$nl{basename -d $addpath} - mkdir -p `$nl{basename -d $delpath} - # We don't want a matching qid, so that - # git/walk doesn't think this came from - # a checkout. - echo -n > $addpath - rm -f $delpath - } + echo $s NOQID $f >> .git/INDEX9 } exit '' --- a/sys/src/cmd/git/clone +++ b/sys/src/cmd/git/clone @@ -87,11 +87,8 @@ git/fs @ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \ || die 'checkout failed:' $status - for(f in `$nl{walk -f $tree | drop $tree}){ - idx=.git/index9/tracked/$f - mkdir -p `$nl{basename -d $idx} - walk -eq ./$f > $idx - } + {for(f in `$nl{cd $tree && walk -f}) + echo 'T NOQID '$f} > .git/INDEX9 } if not{ echo no default branch >[1=2] --- a/sys/src/cmd/git/commit +++ b/sys/src/cmd/git/commit @@ -65,8 +65,8 @@ fn parents{ if(! ~ $#revise 0) parents=`{cat $gitfs/HEAD/parent} - if not if(test -f .git/index9/merge-parents) - parents=`{cat .git/index9/merge-parents | sort | uniq} + if not if(test -f .git/merge-parents) + parents=`{cat .git/merge-parents | sort | uniq} if not if(~ $initial true) parents=() if not @@ -78,7 +78,7 @@ if(! ~ $#parents 0) pflags='-p'^$parents hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status} - rm -f .git/index9/merge-parents + rm -f .git/merge-parents } fn update{ @@ -89,14 +89,10 @@ echo $branch: $hash echo $hash > $refpath for(f in $files){ - if(test -e .git/index9/removed/$f || ! test -e $f){ - rm -f .git/index9/removed/$f - rm -f .git/index9/tracked/$f - } - if not{ - mkdir -p `{basename -d $f} - walk -eq $f > .git/index9/tracked/$f - } + if(! test -e $f && ! test -e .git/object/$hash/tree/$f) + echo R NOQID $f >> .git/INDEX9 + if not + echo T NOQID $f >> .git/INDEX9 } } @@ -120,11 +116,11 @@ } files=() -if(test -f .git/index9/merge-parents) - files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'} +if(test -f .git/merge-parents) + files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'} if(! ~ $#* 0) files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}}) -if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0) +if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0) die 'nothing to commit' $status @{ flag e + --- a/sys/src/cmd/git/conf.c +++ b/sys/src/cmd/git/conf.c @@ -59,7 +59,7 @@ main(int argc, char **argv) { char repo[512], *p, *s; - int i, j; + int i, j, nrel; ARGBEGIN{ case 'f': file[nfile++]=EARGF(usage()); break; @@ -69,7 +69,7 @@ }ARGEND; if(findroot){ - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("%r"); print("%s\n", repo); exits(nil); --- a/sys/src/cmd/git/diff +++ b/sys/src/cmd/git/diff @@ -7,8 +7,10 @@ flagfmt='c:commit branch, s:summarize'; args='[file ...]' eval `''{aux/getflags $*} || exec aux/usage -if(~ $#commit 0) +if(~ $#commit 0){ commit=HEAD + cparam=() +} files=() if(! ~ $#* 0) @@ -16,7 +18,7 @@ branch=`{git/query -p $commit} if(~ $summarize 1){ - git/walk -fMAR -b $commit $files + git/walk -fMAR $cparam $files exit } @@ -24,7 +26,7 @@ mntgen /mnt/scratch bind $branch/tree/ /mnt/scratch/a bind . /mnt/scratch/b -for(f in `$nl{git/walk -c -fRMA -b $commit $files}){ +for(f in `$nl{git/walk -c -fRMA $cparam $files}){ if(~ $#showed 0){ echo diff `{git/query $commit} uncommitted showed=1 --- a/sys/src/cmd/git/git.h +++ b/sys/src/cmd/git/git.h @@ -301,9 +301,10 @@ int hassuffix(char *, char *); int swapsuffix(char *, int, char *, char *, char *); char *strip(char *); -int findrepo(char *, int); +int findrepo(char *, int, int*); int showprogress(int, int); u64int murmurhash2(void*, usize); +Qid parseqid(char*); /* packing */ void dtinit(Dtab *, Object*); --- a/sys/src/cmd/git/import +++ b/sys/src/cmd/git/import @@ -98,7 +98,8 @@ if(~ $#nocommit 0){ if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){ echo $hash > $refpath - rm -f .git/index9/removed/$files + for(f in $files) + echo T NOQID $f >> .git/INDEX9 } } status='''' --- a/sys/src/cmd/git/init +++ b/sys/src/cmd/git/init @@ -31,7 +31,9 @@ echo '[branch "'$branch'"]' echo ' remote = origin' } - +>$dir/.git/INDEX9 { + echo +} >$dir/.git/HEAD { echo ref: refs/heads/$branch } --- a/sys/src/cmd/git/log.c +++ b/sys/src/cmd/git/log.c @@ -239,7 +239,7 @@ main(int argc, char **argv) { char path[1024], repo[1024], *p, *r; - int i, nrepo; + int i, nrel, nrepo; ARGBEGIN{ case 'e': @@ -259,7 +259,7 @@ break; }ARGEND; - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("find root: %r"); nrepo = strlen(repo); if(argc != 0){ --- a/sys/src/cmd/git/merge +++ b/sys/src/cmd/git/merge @@ -38,8 +38,8 @@ git/revert . exit '' } -echo $ours >> .git/index9/merge-parents -echo $theirs >> .git/index9/merge-parents +echo $ours >> .git/merge-parents +echo $theirs >> .git/merge-parents merge $ours $base $theirs >[1=2] echo 'merge complete: remember to commit' --- /dev/null +++ b/sys/src/cmd/git/owalk.c @@ -1,0 +1,346 @@ +#include +#include +#include "git.h" + +#define NCACHE 4096 +#define TDIR ".git/index9/tracked" +#define RDIR ".git/index9/removed" +typedef struct Cache Cache; +typedef struct Wres Wres; +struct Cache { + Dir* cache; + int n; + int max; +}; + +struct Wres { + char **path; + int npath; + int pathsz; +}; + +enum { + Rflg = 1 << 0, + Mflg = 1 << 1, + Aflg = 1 << 2, + Tflg = 1 << 3, +}; + +Cache seencache[NCACHE]; +int quiet; +int useindex; +int printflg; +char *bdir = ".git/fs/HEAD/tree"; +char *rstr = "R "; +char *tstr = "T "; +char *mstr = "M "; +char *astr = "A "; + +int +seen(Dir *dir) +{ + Dir *dp; + int i; + Cache *c; + + c = &seencache[dir->qid.path&(NCACHE-1)]; + dp = c->cache; + for(i=0; in; i++, dp++) + if(dir->qid.path == dp->qid.path && + dir->type == dp->type && + dir->dev == dp->dev) + return 1; + if(c->n == c->max){ + if (c->max == 0) + c->max = 8; + else + c->max += c->max/2; + c->cache = realloc(c->cache, c->max*sizeof(Dir)); + if(c->cache == nil) + sysfatal("realloc: %r"); + } + c->cache[c->n++] = *dir; + return 0; +} + +void +grow(Wres *r) +{ + if(r->npath == r->pathsz){ + r->pathsz = 2*r->pathsz + 1; + r->path = erealloc(r->path, r->pathsz * sizeof(char*)); + } +} + +int +readpaths(Wres *r, char *pfx, char *dir) +{ + char *f, *sub, *full, *sep; + Dir *d; + int fd, ret, i, n; + + d = nil; + ret = -1; + sep = ""; + if(dir[0] != 0) + sep = "/"; + if((full = smprint("%s/%s", pfx, dir)) == nil) + sysfatal("smprint: %r"); + if((fd = open(full, OREAD)) < 0) + goto error; + while((n = dirread(fd, &d)) > 0){ + for(i = 0; i < n; i++){ + if(seen(&d[i])) + continue; + if(d[i].qid.type & QTDIR){ + if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil) + sysfatal("smprint: %r"); + if(readpaths(r, pfx, sub) == -1){ + free(sub); + goto error; + } + free(sub); + }else{ + grow(r); + if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil) + sysfatal("smprint: %r"); + r->path[r->npath++] = f; + } + } + free(d); + } + ret = r->npath; +error: + close(fd); + free(full); + return ret; +} + +int +cmp(void *pa, void *pb) +{ + return strcmp(*(char **)pa, *(char **)pb); +} + +void +dedup(Wres *r) +{ + int i, o; + + if(r->npath <= 1) + return; + o = 0; + qsort(r->path, r->npath, sizeof(r->path[0]), cmp); + for(i = 1; i < r->npath; i++) + if(strcmp(r->path[o], r->path[i]) != 0) + r->path[++o] = r->path[i]; + r->npath = o + 1; +} + +int +sameqid(Dir *d, char *qf) +{ + char indexqid[64], fileqid[64], *p; + int fd, n; + + if(!d) + return 0; + if((fd = open(qf, OREAD)) == -1) + return 0; + if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1) + return 0; + indexqid[n] = 0; + close(fd); + if((p = strpbrk(indexqid, " \t\n\r")) != nil) + *p = 0; + + snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx", + d->qid.path, d->qid.vers, d->qid.type); + + if(strcmp(indexqid, fileqid) == 0) + return 1; + return 0; +} + +void +writeqid(Dir *d, char *qf) +{ + int fd; + + if((fd = create(qf, OWRITE, 0666)) == -1) + return; + fprint(fd, "%ullx.%uld.%.2uhhx\n", + d->qid.path, d->qid.vers, d->qid.type); + close(fd); +} + +int +samedata(char *pa, char *pb) +{ + char ba[32*1024], bb[32*1024]; + int fa, fb, na, nb, same; + + same = 0; + fa = open(pa, OREAD); + fb = open(pb, OREAD); + if(fa == -1 || fb == -1){ + goto mismatch; + } + while(1){ + if((na = readn(fa, ba, sizeof(ba))) == -1) + goto mismatch; + if((nb = readn(fb, bb, sizeof(bb))) == -1) + goto mismatch; + if(na != nb) + goto mismatch; + if(na == 0) + break; + if(memcmp(ba, bb, na) != 0) + goto mismatch; + } + same = 1; +mismatch: + if(fa != -1) + close(fa); + if(fb != -1) + close(fb); + return same; +} + +void +usage(void) +{ + fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + char *rpath, *tpath, *bpath, *base, buf[8], repo[512]; + char *p, *e; + int i, dirty; + Wres r; + Hash h; + Dir *d; + + gitinit(); + + ARGBEGIN{ + case 'q': + quiet++; + break; + case 'c': + rstr = ""; + tstr = ""; + mstr = ""; + astr = ""; + break; + case 'f': + for(p = EARGF(usage()); *p; p++) + switch(*p){ + case 'T': printflg |= Tflg; break; + case 'A': printflg |= Aflg; break; + case 'M': printflg |= Mflg; break; + case 'R': printflg |= Rflg; break; + default: usage(); break; + } + break; + case 'b': + useindex = 0; + base = EARGF(usage()); + if(resolveref(&h, base) == -1) + sysfatal("no such ref '%s'", base); + bdir = smprint(".git/fs/object/%H/tree", h); + break; + default: + usage(); + }ARGEND + + if(access(".git/fs/ctl", AEXIST) != 0) + sysfatal("no running git/fs"); + dirty = 0; + memset(&r, 0, sizeof(r)); + if(printflg == 0) + printflg = Tflg | Aflg | Mflg | Rflg; + if(argc == 0){ + if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1) + sysfatal("read tracked: %r"); + if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1) + sysfatal("read removed: %r"); + if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1) + sysfatal("read base: %r"); + }else{ + for(i = 0; i < argc; i++){ + tpath = smprint(TDIR"/%s", argv[i]); + rpath = smprint(RDIR"/%s", argv[i]); + bpath = smprint("%s/%s", bdir, argv[i]); + if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil) + goto nextarg; + if(d->mode & DMDIR){ + readpaths(&r, TDIR, argv[i]); + readpaths(&r, RDIR, argv[i]); + readpaths(&r, bdir, argv[i]); + }else{ + grow(&r); + r.path[r.npath++] = estrdup(argv[i]); + } +nextarg: + free(bpath); + free(tpath); + free(rpath); + free(d); + } + } + dedup(&r); + + for(i = 0; i < r.npath; i++){ + p = r.path[i]; + d = dirstat(p); + if(d && d->mode & DMDIR) + goto next; + rpath = smprint(RDIR"/%s", p); + tpath = smprint(TDIR"/%s", p); + bpath = smprint("%s/%s", bdir, p); + /* Fast path: we don't want to force access to the rpath. */ + if(useindex && d && sameqid(d, tpath)) { + if(!quiet && (printflg & Tflg)) + print("%s%s\n", tstr, p); + }else{ + if(d == nil || (useindex && access(rpath, AEXIST) == 0)){ + if(access(bpath, AEXIST) == 0){ + dirty |= Rflg; + if(!quiet && (printflg & Rflg)) + print("%s%s\n", rstr, p); + } + }else if(access(bpath, AEXIST) == -1) { + dirty |= Aflg; + if(!quiet && (printflg & Aflg)) + print("%s%s\n", astr, p); + }else if(samedata(p, bpath)){ + if(!quiet && (printflg & Tflg)) + print("%s%s\n", tstr, p); + if(useindex) + writeqid(d, tpath); + }else{ + dirty |= Mflg; + if(!quiet && (printflg & Mflg)) + print("%s%s\n", mstr, p); + } + } + free(rpath); + free(tpath); + free(bpath); +next: + free(d); + } + if(!dirty) + exits(nil); + + p = buf; + e = buf + sizeof(buf); + for(i = 0; (1 << i) != Tflg; i++) + if(dirty & (1 << i)) + p = seprint(p, e, "%c", "DMAT"[i]); + exits(buf); +} --- a/sys/src/cmd/git/pack.c +++ b/sys/src/cmd/git/pack.c @@ -66,7 +66,7 @@ Object *lruhead; Object *lrutail; vlong ncache; -vlong cachemax = 512*MiB; +vlong cachemax = 1024*MiB; Packf *packf; int npackf; int openpacks; --- a/sys/src/cmd/git/query.c +++ b/sys/src/cmd/git/query.c @@ -152,10 +152,10 @@ void main(int argc, char **argv) { - int i, j, n; - Hash *h; - char *p, *e, *s, *objpfx; char query[2048], repo[512]; + char *p, *e, *s, *objpfx; + int i, j, n, nrel; + Hash *h; ARGBEGIN{ case 'd': chattygit++; break; @@ -170,7 +170,7 @@ if(argc == 0) usage(); - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("find root: %r"); if(chdir(repo) == -1) sysfatal("chdir: %r"); --- a/sys/src/cmd/git/save.c +++ b/sys/src/cmd/git/save.c @@ -10,6 +10,14 @@ char *dat; int ndat; }; + +struct Idxent { + char *path; + Qid qid; + char state; + int order; +}; + enum { Maxparents = 16, }; @@ -21,7 +29,9 @@ char *commitmsg; Hash parents[Maxparents]; int nparents; - +Idxent *idx; +int idxsz; +int nidx; int gitmode(Dirent *e) { @@ -38,6 +48,20 @@ } int +idxcmp(void *pa, void *pb) +{ + Idxent *a, *b; + int c; + + a = (Idxent*)pa; + b = (Idxent*)pb; + if((c = strcmp(a->path, b->path)) != 0) + return c; + assert(a->order != b->order); + return a-> order < b->order ? -1 : 1; +} + +int entcmp(void *pa, void *pb) { char abuf[256], bbuf[256], *ae, *be; @@ -184,27 +208,22 @@ int tracked(char *path) { - char ipath[256]; - Dir *d; + int lo, hi, mid, r; - /* Explicitly removed. */ - snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path); - if(strstr(cleanname(ipath), ".git/index9/removed") != ipath) - sysfatal("path %s leaves index", ipath); - d = dirstat(ipath); - if(d != nil && d->qid.type != QTDIR){ - free(d); - return 0; + r = -1; + lo = 0; + hi = nidx-1; + while(lo <= hi){ + mid = (hi + lo) / 2; + r = strcmp(path, idx[mid].path); + if(r < 0) + hi = mid-1; + else if(r > 0) + lo = mid+1; + else + break; } - - /* Explicitly added. */ - snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path); - if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath) - sysfatal("path %s leaves index", ipath); - if(access(ipath, AEXIST) == 0) - return 1; - - return 0; + return r == 0; } int @@ -354,10 +373,11 @@ void main(int argc, char **argv) { + char *ln, *dstr, *parts[3], cwd[1024]; + int i, r, line, ncwd; Hash th, ch; - char *dstr, cwd[1024]; - int i, r, ncwd; vlong date; + Biobuf *f; Object *t; gitinit(); @@ -425,6 +445,32 @@ } t = findroot(); + nidx = 0; + idxsz = 32; + idx = emalloc(idxsz*sizeof(Idxent)); + if((f = Bopen(".git/INDEX9", OREAD)) == nil) + sysfatal("open index: %r"); + line = 0; + while((ln = Brdstr(f, '\n', 1)) != nil){ + line++; + if(ln[0] == 0 || ln[0] == '\n') + continue; + if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts)) + sysfatal(".git/INDEX9:%d: corrupt index", line); + if(nidx == idxsz){ + idxsz += idxsz/2; + idx = realloc(idx, idxsz*sizeof(Idxent)); + } + cleanname(parts[2]); + idx[nidx].state = *parts[0]; + idx[nidx].qid = parseqid(parts[1]); + idx[nidx].path = strdup(parts[2]); + idx[nidx].order = nidx; + nidx++; + free(ln); + } + Bterm(f); + qsort(idx, nidx, sizeof(Idxent), idxcmp); r = treeify(t, argv, argv + argc, 0, &th); if(r == -1) sysfatal("could not commit: %r\n"); --- a/sys/src/cmd/git/util.c +++ b/sys/src/cmd/git/util.c @@ -294,7 +294,7 @@ /* Finds the directory containing the git repo. */ int -findrepo(char *buf, int nbuf) +findrepo(char *buf, int nbuf, int *nrel) { char *p, *suff; @@ -302,6 +302,7 @@ if(getwd(buf, nbuf - strlen(suff) - 1) == nil) return -1; + *nrel = 0; for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){ strcpy(p, suff); if(access(buf, AEXIST) == 0){ @@ -308,6 +309,7 @@ p[p == buf] = '\0'; return 0; } + *nrel += 1; *p = '\0'; } werrstr("not a git repository"); @@ -441,4 +443,25 @@ h ^= h >> 15; return h; +} + +Qid +parseqid(char *s) +{ + char *e; + Qid q; + + if(strcmp(s, "NOQID") == 0) + return (Qid){-1, -1, -1}; + e = s; + q.path = strtoull(e, &e, 16); + if(*e != '.') + sysfatal("corrupt qid: %s (%s)\n", s, e); + q.vers = strtoul(e+1, &e, 10); + if(*e != '.') + sysfatal("corrupt qid: %s (%s)\n", s, e); + q.type = strtoul(e+1, &e, 16); + if(*e != '\0') + sysfatal("corrupt qid: %s (%x)\n", s, *e); + return q; } --- a/sys/src/cmd/git/walk.c +++ b/sys/src/cmd/git/walk.c @@ -2,53 +2,79 @@ #include #include "git.h" +typedef struct Seen Seen; +typedef struct Idxed Idxed; +typedef struct Idxent Idxent; + #define NCACHE 4096 -#define TDIR ".git/index9/tracked" -#define RDIR ".git/index9/removed" -typedef struct Cache Cache; -typedef struct Wres Wres; -struct Cache { + +enum { + Rflg = 1 << 0, + Mflg = 1 << 1, + Aflg = 1 << 2, + Uflg = 1 << 3, + /* everything after this is not an error */ + Tflg = 1 << 4, +}; + +struct Idxent { + char *path; + Qid qid; + char state; + int order; +}; + +struct Seen { Dir* cache; int n; int max; }; -struct Wres { - char **path; - int npath; - int pathsz; +struct Idxed { + char** cache; + int n; + int max; }; -enum { - Rflg = 1 << 0, - Mflg = 1 << 1, - Aflg = 1 << 2, - Tflg = 1 << 3, -}; +Seen seentab[NCACHE]; +Idxed idxtab[NCACHE]; +char repopath[1024]; +char wdirpath[1024]; +char *rstr = "R "; +char *mstr = "M "; +char *astr = "A "; +char *ustr = "U "; +char *tstr = "T "; +char *bdir = ".git/fs/HEAD/tree"; +int useidx = 1; +int nrel; +int quiet; +int dirty; +int printflg; -Cache seencache[NCACHE]; -int quiet; -int useindex; -int printflg; -char *bdir = ".git/fs/HEAD/tree"; -char *rstr = "R "; -char *tstr = "T "; -char *mstr = "M "; -char *astr = "A "; +Idxent *idx; +int idxsz; +int nidx; +int staleidx; +Idxent *wdir; +int wdirsz; +int nwdir; +int loadwdir(char*); + int seen(Dir *dir) { + Seen *c; Dir *dp; int i; - Cache *c; - c = &seencache[dir->qid.path&(NCACHE-1)]; + c = &seentab[dir->qid.path&(NCACHE-1)]; dp = c->cache; for(i=0; in; i++, dp++) - if(dir->qid.path == dp->qid.path && - dir->type == dp->type && - dir->dev == dp->dev) + if(dir->qid.path == dp->qid.path + && dir->qid.type == dp->qid.type + && dir->dev == dp->dev) return 1; if(c->n == c->max){ if (c->max == 0) @@ -63,129 +89,91 @@ return 0; } -void -grow(Wres *r) -{ - if(r->npath == r->pathsz){ - r->pathsz = 2*r->pathsz + 1; - r->path = erealloc(r->path, r->pathsz * sizeof(char*)); - } -} - int -readpaths(Wres *r, char *pfx, char *dir) +checkedin(Idxent *e) { - char *f, *sub, *full, *sep; - Dir *d; - int fd, ret, i, n; + char *p; + int r; - d = nil; - ret = -1; - sep = ""; - if(dir[0] != 0) - sep = "/"; - if((full = smprint("%s/%s", pfx, dir)) == nil) - sysfatal("smprint: %r"); - if((fd = open(full, OREAD)) < 0) - goto error; - while((n = dirread(fd, &d)) > 0){ - for(i = 0; i < n; i++){ - if(seen(&d[i])) - continue; - if(d[i].qid.type & QTDIR){ - if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil) - sysfatal("smprint: %r"); - if(readpaths(r, pfx, sub) == -1){ - free(sub); - goto error; - } - free(sub); - }else{ - grow(r); - if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil) - sysfatal("smprint: %r"); - r->path[r->npath++] = f; - } - } - free(d); - } - ret = r->npath; -error: - close(fd); - free(full); - return ret; + p = smprint("%s/%s", bdir, e->path); + r = access(p, AEXIST); + e->state = 'T'; + staleidx = 1; + free(p); + return r == 0; } int -cmp(void *pa, void *pb) +indexed(char *path, int isdir) { - return strcmp(*(char **)pa, *(char **)pb); -} + int lo, hi, mid, n, r; + char *s; -void -dedup(Wres *r) -{ - int i, o; - - if(r->npath <= 1) - return; - o = 0; - qsort(r->path, r->npath, sizeof(r->path[0]), cmp); - for(i = 1; i < r->npath; i++) - if(strcmp(r->path[o], r->path[i]) != 0) - r->path[++o] = r->path[i]; - r->npath = o + 1; + if(!useidx){ + s = smprint("%s/%s", bdir, path); + r = access(s, AEXIST); + free(s); + return r == 0; + } + s = path; + if(isdir) + s = smprint("%s/", path); + r = -1; + lo = 0; + hi = nidx-1; + n = strlen(s); + while(lo <= hi){ + mid = (hi + lo) / 2; + if(isdir) + r = strncmp(s, idx[mid].path, n); + else + r = strcmp(s, idx[mid].path); + if(r < 0) + hi = mid-1; + else if(r > 0) + lo = mid+1; + else + break; + } + if(isdir) + free(s); + return r == 0; } int -sameqid(Dir *d, char *qf) +idxcmp(void *pa, void *pb) { - char indexqid[64], fileqid[64], *p; - int fd, n; + Idxent *a, *b; + int c; - if(!d) - return 0; - if((fd = open(qf, OREAD)) == -1) - return 0; - if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1) - return 0; - indexqid[n] = 0; - close(fd); - if((p = strpbrk(indexqid, " \t\n\r")) != nil) - *p = 0; - - snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx", - d->qid.path, d->qid.vers, d->qid.type); - - if(strcmp(indexqid, fileqid) == 0) - return 1; - return 0; + a = (Idxent*)pa; + b = (Idxent*)pb; + if((c = strcmp(a->path, b->path)) != 0) + return c; + assert(a->order != b->order); + return a-> order < b->order ? -1 : 1; } -void -writeqid(Dir *d, char *qf) -{ - int fd; - - if((fd = create(qf, OWRITE, 0666)) == -1) - return; - fprint(fd, "%ullx.%uld.%.2uhhx\n", - d->qid.path, d->qid.vers, d->qid.type); - close(fd); -} - int -samedata(char *pa, char *pb) +samedata(Idxent *a, Idxent *b) { - char ba[32*1024], bb[32*1024]; + char *gitpath, ba[IOUNIT], bb[IOUNIT]; int fa, fb, na, nb, same; + if(a != nil){ + if(a->qid.path == b->qid.path + && a->qid.vers == b->qid.vers + && a->qid.type == b->qid.type) + return 1; + } + same = 0; - fa = open(pa, OREAD); - fb = open(pb, OREAD); - if(fa == -1 || fb == -1){ + if((gitpath = smprint("%s/%s", bdir, b->path)) == nil) + sysfatal("smprint: %r"); + fa = open(b->path, OREAD); + fb = open(gitpath, OREAD); + if(fa == -1 || fb == -1) goto mismatch; - } while(1){ if((na = readn(fa, ba, sizeof(ba))) == -1) goto mismatch; @@ -198,7 +186,11 @@ if(memcmp(ba, bb, na) != 0) goto mismatch; } + if(a != nil) + a->qid = b->qid; same = 1; + staleidx = 1; + mismatch: if(fa != -1) close(fa); @@ -207,7 +199,128 @@ return same; } +int +loadent(char *dir, Dir *d, int fullpath) +{ + char *path; + int ret, isdir; + Idxent *e; + + if(fullpath) + path = strdup(dir); + else + path = smprint("%s/%s", dir, d->name); + if(path == nil) + sysfatal("smprint: %r"); + + ret = 0; + isdir = d->qid.type & QTDIR; + cleanname(path); + if((printflg & Uflg) == 0 && !indexed(path, isdir)) + return 0; + if(isdir){ + ret = loadwdir(path); + free(path); + }else{ + if(nwdir == wdirsz){ + wdirsz += wdirsz/2; + wdir = erealloc(wdir, wdirsz*sizeof(Idxent)); + } + e = wdir + nwdir; + e->path = path; + e->qid = d->qid; + e->order = nwdir; + e->state = 'T'; + nwdir++; + } + return ret; +} + +int +loadwdir(char *path) +{ + int fd, ret, i, n; + Dir *d, *e; + + d = nil; + e = nil; + ret = -1; + + cleanname(path); + if(strncmp(path, ".git/", 5) == 0) + return 0; + if((fd = open(path, OREAD)) < 0) + goto error; + if((e = dirfstat(fd)) == nil) + sysfatal("fstat: %r"); + if(e->qid.type & QTDIR) + while((n = dirread(fd, &d)) > 0){ + for(i = 0; i < n; i++) + if(loadent(path, &d[i], 0) == -1) + goto error; + free(d); + } + else{ + if(loadent(path, e, 1) == -1) + goto error; + } + ret = 0; +error: + free(e); + if(fd != -1) + close(fd); + return ret; +} + +int +pfxmatch(char *p, char **pfx, int *pfxlen, int npfx) +{ + int i; + + if(npfx == 0) + return 1; + for(i = 0; i < npfx; i++){ + if(strncmp(p, pfx[i], pfxlen[i]) == 0 + && (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0)) + return 1; + if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0) + return 1; + } + return 0; +} + + +char* +reporel(char *s) +{ + char *p; + int n; + + if(*s == '/') + s = strdup(s); + else + s = smprint("%s/%s", wdirpath, s); + p = cleanname(s); + n = strlen(repopath); + if(strncmp(s, repopath, n) == 0) + p = s + n; + else + sysfatal("path outside repo: %s", s); + if(*p == '/') + p++; + memmove(s, p, strlen(p)+1); + return s; +} + void +show(Biobuf *o, int flg, char *str, char *path) +{ + dirty |= flg; + if(!quiet && (printflg & flg)) + Bprint(o, "%s%s\n", str, path); +} + +void usage(void) { fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0); @@ -217,14 +330,19 @@ void main(int argc, char **argv) { - char *rpath, *tpath, *bpath, *base, buf[8], repo[512]; - char *p, *e; - int i, dirty; - Wres r; + char *p, *e, *ln, *base, **argrel, *parts[3], xbuf[8]; + int i, j, c, line, wfd, *argn; + Biobuf *f, *o, *w; Hash h; - Dir *d; + Dir rn; gitinit(); + if(getwd(wdirpath, sizeof(wdirpath)) == nil) + sysfatal("getwd: %r"); + if(findrepo(repopath, sizeof(repopath), &nrel) == -1) + sysfatal("find root: %r"); + if(chdir(repopath) == -1) + sysfatal("chdir: %r"); ARGBEGIN{ case 'q': @@ -235,6 +353,7 @@ tstr = ""; mstr = ""; astr = ""; + ustr = ""; break; case 'f': for(p = EARGF(usage()); *p; p++) @@ -243,109 +362,155 @@ case 'A': printflg |= Aflg; break; case 'M': printflg |= Mflg; break; case 'R': printflg |= Rflg; break; + case 'U': printflg |= Uflg; break; default: usage(); break; } break; case 'b': - useindex = 0; + useidx = 0; base = EARGF(usage()); if(resolveref(&h, base) == -1) - sysfatal("no such ref '%s'", base); + sysfatal("no such reXf '%s'", base); bdir = smprint(".git/fs/object/%H/tree", h); break; default: usage(); - }ARGEND + }ARGEND; - gitinit(); - if(findrepo(repo, sizeof(repo)) == -1) - sysfatal("find root: %r"); - if(chdir(repo) == -1) - sysfatal("chdir: %r"); - if(access(".git/fs/ctl", AEXIST) != 0) - sysfatal("no running git/fs"); - dirty = 0; - memset(&r, 0, sizeof(r)); if(printflg == 0) printflg = Tflg | Aflg | Mflg | Rflg; - if(argc == 0){ - if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1) - sysfatal("read tracked: %r"); - if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1) - sysfatal("read removed: %r"); - if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1) - sysfatal("read base: %r"); - }else{ - for(i = 0; i < argc; i++){ - tpath = smprint(TDIR"/%s", argv[i]); - rpath = smprint(RDIR"/%s", argv[i]); - bpath = smprint("%s/%s", bdir, argv[i]); - if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil) - goto nextarg; - if(d->mode & DMDIR){ - readpaths(&r, TDIR, argv[i]); - readpaths(&r, RDIR, argv[i]); - readpaths(&r, bdir, argv[i]); - }else{ - grow(&r); - r.path[r.npath++] = estrdup(argv[i]); + + nidx = 0; + idxsz = 32; + idx = emalloc(idxsz*sizeof(Idxent)); + nwdir = 0; + wdirsz = 32; + wdir = emalloc(wdirsz*sizeof(Idxent)); + argrel = emalloc(argc*sizeof(int)); + argn = emalloc(argc*sizeof(int)); + for(i = 0; i < argc; i++){ + argrel[i] = reporel(argv[i]); + argn[i] = strlen(argrel[i]); + } + if((o = Bfdopen(1, OWRITE)) == nil) + sysfatal("open out: %r"); + if(useidx){ + if((f = Bopen(".git/INDEX9", OREAD)) == nil){ + if(access(".git9/index9", AEXIST) == 0){ + fprint(2, "index format conversion needed:\n"); + fprint(2, "\t@{cd .git/index9/tracked && walk -f} | sed 's/^/T NOQID '} >> .git/INDEX9\n"); + fprint(2, "\t@{cd .git/index9/removed && walk -f} | sed 's/^/R NOQID '} >> .git/INDEX9\n"); } -nextarg: - free(bpath); - free(tpath); - free(rpath); - free(d); + sysfatal("open index: %r"); } + line = 0; + while((ln = Brdstr(f, '\n', 1)) != nil){ + line++; + /* allow blank lines */ + if(ln[0] == 0 || ln[0] == '\n') + continue; + if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts)) + sysfatal(".git/INDEX9:%d: corrupt index", line); + if(nidx == idxsz){ + idxsz += idxsz/2; + idx = realloc(idx, idxsz*sizeof(Idxent)); + } + cleanname(parts[2]); + if(pfxmatch(parts[2], argrel, argn, argc)){ + idx[nidx].state = *parts[0]; + idx[nidx].qid = parseqid(parts[1]); + idx[nidx].path = strdup(parts[2]); + idx[nidx].order = nidx; + nidx++; + } + free(ln); + } + qsort(idx, nidx, sizeof(Idxent), idxcmp); } - dedup(&r); - for(i = 0; i < r.npath; i++){ - p = r.path[i]; - d = dirstat(p); - if(d && d->mode & DMDIR) - goto next; - rpath = smprint(RDIR"/%s", p); - tpath = smprint(TDIR"/%s", p); - bpath = smprint("%s/%s", bdir, p); - /* Fast path: we don't want to force access to the rpath. */ - if(useindex && d && sameqid(d, tpath)) { - if(!quiet && (printflg & Tflg)) - print("%s%s\n", tstr, p); + if(argc == 0) + loadwdir("."); + else for(i = 0; i < argc; i++) + loadwdir(argrel[i]); + qsort(wdir, nwdir, sizeof(Idxent), idxcmp); + i = 0; + j = 0; + while(i < nidx || j < nwdir){ + /* find the last entry we tracked for a path */ + while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){ + staleidx = 1; + i++; + } + while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0) + j++; + + if(i >= nidx) + c = 1; + else if(j >= nwdir) + c = -1; + else + c = strcmp(idx[i].path, wdir[j].path); + /* exists in both index and on disk */ + if(c == 0){ + if(idx[i].state == 'R') + show(o, Rflg, rstr, idx[i].path); + else if(idx[i].state == 'A' && !checkedin(&idx[i])) + show(o, Aflg, astr, idx[i].path); + else if(!samedata(&idx[i], &wdir[j])) + show(o, Mflg, mstr, idx[i].path); + else + show(o, Tflg, tstr, idx[i].path); + i++; + j++; + /* only exists in index */ + }else if(c < 0){ + show(o, Rflg, rstr, idx[i].path); + i++; + /* only exists on disk */ }else{ - if(d == nil || (useindex && access(rpath, AEXIST) == 0)){ - if(access(bpath, AEXIST) == 0){ - dirty |= Rflg; - if(!quiet && (printflg & Rflg)) - print("%s%s\n", rstr, p); - } - }else if(access(bpath, AEXIST) == -1) { - dirty |= Aflg; - if(!quiet && (printflg & Aflg)) - print("%s%s\n", astr, p); - }else if(samedata(p, bpath)){ - if(!quiet && (printflg & Tflg)) - print("%s%s\n", tstr, p); - if(useindex) - writeqid(d, tpath); - }else{ - dirty |= Mflg; - if(!quiet && (printflg & Mflg)) - print("%s%s\n", mstr, p); - } + if(!useidx && checkedin(&wdir[j])){ + if(samedata(nil, &wdir[j])) + show(o, Tflg, tstr, wdir[j].path); + else + show(o, Mflg, mstr, wdir[j].path); + }else if(printflg & Uflg) + show(o, Uflg, ustr, wdir[j].path); + j++; } - free(rpath); - free(tpath); - free(bpath); -next: - free(d); } + Bterm(o); + + if(0 && useidx && staleidx) + if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){ + if((w = Bfdopen(wfd, OWRITE)) == nil){ + close(wfd); + goto Nope; + } + for(i = 0; i < nidx; i++){ + while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0) + i++; + Bprint(w, "%c %llx.%ld.%hhx %s\n", + idx[i].state, + idx[i].qid.path, idx[i].qid.vers, idx[i].qid.type, + idx[i].path); + } + Bterm(w); + nulldir(&rn); + rn.name = "INDEX9"; + if(remove(".git/INDEX9") == -1) + goto Nope; + if(dirwstat(".git/INDEX9.new", &rn) == -1) + sysfatal("rename: %r"); + } + +Nope: if(!dirty) exits(nil); - p = buf; - e = buf + sizeof(buf); + p = xbuf; + e = p + sizeof(xbuf); for(i = 0; (1 << i) != Tflg; i++) if(dirty & (1 << i)) - p = seprint(p, e, "%c", "DMAT"[i]); - exits(buf); + p = seprint(p, e, "%c", "RMAUT"[i]); + exits(xbuf); }