From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 3741 invoked from network); 18 Sep 2023 02:22:27 -0000 Received: from 9front.inri.net (168.235.81.73) by inbox.vuxu.org with ESMTPUTF8; 18 Sep 2023 02:22:27 -0000 Received: from mimir.eigenstate.org ([206.124.132.107]) by 9front; Sun Sep 17 22:21:12 -0400 2023 Received: from abbatoir (pool-108-6-24-2.nycmny.fios.verizon.net [108.6.24.2]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id 063f5879 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Sun, 17 Sep 2023 19:21:09 -0700 (PDT) Message-ID: To: 9front@9front.org Date: Sun, 17 Sep 2023 22:21:07 -0400 From: ori@eigenstate.org MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: overflow-preventing responsive rich-client ACPI over ORM service STM strategy Subject: [9front] [v2] git: new index format, 235x faster walks Reply-To: 9front@9front.org Precedence: bulk New revision for testing, with a few bugs fixed: - The first revision disabled the opportunistic rewrite of the index; I'd turned it off for testing, but forgot to turn it back on; with this revision, you'll actually see the promised speedups. - The mode is now added to the index, so that we can detect changes to the execute bit and such. - The index is not regenerated as eagerly when checking if a file is checked in, which should reduce churn on the dump. - When passing walk a filter list, we still load the whole thing so that we don't remove files from the index by accident. This was hidden by the bug in the first rev. diff cf6987b1f35c33402fe3061eae9f8ff526fee2a8 uncommitted --- a/sys/src/cmd/git/add +++ b/sys/src/cmd/git/add @@ -7,32 +7,17 @@ flagfmt='r:remove'; args='file ...' eval `''{aux/getflags $*} || exec aux/usage -add='tracked' -del='removed' -if(~ $remove 1){ - add='removed' - del='tracked' -} +s=A +if(~ $remove 1) + s=R if(~ $#* 0) exec aux/usage paths=`$nl{cleanname -d $gitrel $* | drop $gitroot} -if(~ $add tracked) +if(~ $s A) files=`$nl{walk -f ./$paths} if not - files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths} - -for(f in $files){ - if(! ~ `$nl{cleanname $f} .git/*){ - addpath=.git/index9/$add/$f - delpath=.git/index9/$del/$f - mkdir -p `$nl{basename -d $addpath} - mkdir -p `$nl{basename -d $delpath} - # We don't want a matching qid, so that - # git/walk doesn't think this came from - # a checkout. - echo -n > $addpath - rm -f $delpath - } -} + files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths} +for(f in $files) + echo $s NOQID 0 $f >> .git/INDEX9 exit '' --- a/sys/src/cmd/git/clone +++ b/sys/src/cmd/git/clone @@ -87,11 +87,8 @@ git/fs @ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \ || die 'checkout failed:' $status - for(f in `$nl{walk -f $tree | drop $tree}){ - idx=.git/index9/tracked/$f - mkdir -p `$nl{basename -d $idx} - walk -eq ./$f > $idx - } + {for(f in `$nl{cd $tree && walk -f}) + echo 'T NOQID 0 '$f} > .git/INDEX9 } if not{ echo no default branch >[1=2] --- a/sys/src/cmd/git/commit +++ b/sys/src/cmd/git/commit @@ -65,8 +65,8 @@ fn parents{ if(! ~ $#revise 0) parents=`{cat $gitfs/HEAD/parent} - if not if(test -f .git/index9/merge-parents) - parents=`{cat .git/index9/merge-parents | sort | uniq} + if not if(test -f .git/merge-parents) + parents=`{cat .git/merge-parents | sort | uniq} if not if(~ $initial true) parents=() if not @@ -78,7 +78,7 @@ if(! ~ $#parents 0) pflags='-p'^$parents hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status} - rm -f .git/index9/merge-parents + rm -f .git/merge-parents } fn update{ @@ -89,14 +89,10 @@ echo $branch: $hash echo $hash > $refpath for(f in $files){ - if(test -e .git/index9/removed/$f || ! test -e $f){ - rm -f .git/index9/removed/$f - rm -f .git/index9/tracked/$f - } - if not{ - mkdir -p `{basename -d $f} - walk -eq $f > .git/index9/tracked/$f - } + if(! test -e $f && ! test -e .git/object/$hash/tree/$f) + echo R NOQID 0 $f >> .git/INDEX9 + if not + echo T NOQID 0 $f >> .git/INDEX9 } } @@ -120,12 +116,12 @@ } files=() -if(test -f .git/index9/merge-parents) - files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'} +if(test -f .git/merge-parents) + files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'} if(! ~ $#* 0) files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}}) -if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0) - die 'nothing to commit' $status +if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0) + die 'nothing to commit' @{ flag e + whoami --- a/sys/src/cmd/git/conf.c +++ b/sys/src/cmd/git/conf.c @@ -59,7 +59,7 @@ main(int argc, char **argv) { char repo[512], *p, *s; - int i, j; + int i, j, nrel; ARGBEGIN{ case 'f': file[nfile++]=EARGF(usage()); break; @@ -69,7 +69,7 @@ }ARGEND; if(findroot){ - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("%r"); print("%s\n", repo); exits(nil); --- a/sys/src/cmd/git/diff +++ b/sys/src/cmd/git/diff @@ -7,8 +7,10 @@ flagfmt='c:commit branch, s:summarize'; args='[file ...]' eval `''{aux/getflags $*} || exec aux/usage -if(~ $#commit 0) +if(~ $#commit 0){ commit=HEAD + cparam=() +} files=() if(! ~ $#* 0) @@ -16,7 +18,7 @@ branch=`{git/query -p $commit} if(~ $summarize 1){ - git/walk -fMAR -b $commit $files + git/walk -fMAR $cparam $files exit } @@ -24,7 +26,7 @@ mntgen /mnt/scratch bind $branch/tree/ /mnt/scratch/a bind . /mnt/scratch/b -for(f in `$nl{git/walk -c -fRMA -b $commit $files}){ +for(f in `$nl{git/walk -c -fRMA $cparam $files}){ if(~ $#showed 0){ echo diff `{git/query $commit} uncommitted showed=1 --- a/sys/src/cmd/git/git.h +++ b/sys/src/cmd/git/git.h @@ -21,6 +21,7 @@ typedef struct Dblock Dblock; typedef struct Objq Objq; typedef struct Qelt Qelt; +typedef struct Idxent Idxent; enum { Pathmax = 512, @@ -190,6 +191,13 @@ int len; }; +struct Idxent { + char *path; + Qid qid; + int mode; + int order; + char state; +}; #define GETBE16(b)\ ((((b)[0] & 0xFFul) << 8) | \ @@ -301,9 +309,10 @@ int hassuffix(char *, char *); int swapsuffix(char *, int, char *, char *, char *); char *strip(char *); -int findrepo(char *, int); +int findrepo(char *, int, int*); int showprogress(int, int); u64int murmurhash2(void*, usize); +Qid parseqid(char*); /* packing */ void dtinit(Dtab *, Object*); --- a/sys/src/cmd/git/import +++ b/sys/src/cmd/git/import @@ -98,7 +98,8 @@ if(~ $#nocommit 0){ if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){ echo $hash > $refpath - rm -f .git/index9/removed/$files + for(f in $files) + echo T NOQID 0 $f >> .git/INDEX9 } } status='''' --- a/sys/src/cmd/git/init +++ b/sys/src/cmd/git/init @@ -31,7 +31,7 @@ echo '[branch "'$branch'"]' echo ' remote = origin' } - +>$dir/.git/INDEX9 >$dir/.git/HEAD { echo ref: refs/heads/$branch } --- a/sys/src/cmd/git/log.c +++ b/sys/src/cmd/git/log.c @@ -239,7 +239,7 @@ main(int argc, char **argv) { char path[1024], repo[1024], *p, *r; - int i, nrepo; + int i, nrel, nrepo; ARGBEGIN{ case 'e': @@ -259,7 +259,7 @@ break; }ARGEND; - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("find root: %r"); nrepo = strlen(repo); if(argc != 0){ --- a/sys/src/cmd/git/merge +++ b/sys/src/cmd/git/merge @@ -38,8 +38,8 @@ git/revert . exit '' } -echo $ours >> .git/index9/merge-parents -echo $theirs >> .git/index9/merge-parents +echo $ours >> .git/merge-parents +echo $theirs >> .git/merge-parents merge $ours $base $theirs >[1=2] echo 'merge complete: remember to commit' --- a/sys/src/cmd/git/pack.c +++ b/sys/src/cmd/git/pack.c @@ -66,7 +66,7 @@ Object *lruhead; Object *lrutail; vlong ncache; -vlong cachemax = 512*MiB; +vlong cachemax = 1024*MiB; Packf *packf; int npackf; int openpacks; --- a/sys/src/cmd/git/query.c +++ b/sys/src/cmd/git/query.c @@ -152,10 +152,10 @@ void main(int argc, char **argv) { - int i, j, n; - Hash *h; - char *p, *e, *s, *objpfx; char query[2048], repo[512]; + char *p, *e, *s, *objpfx; + int i, j, n, nrel; + Hash *h; ARGBEGIN{ case 'd': chattygit++; break; @@ -170,7 +170,7 @@ if(argc == 0) usage(); - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("find root: %r"); if(chdir(repo) == -1) sysfatal("chdir: %r"); --- a/sys/src/cmd/git/save.c +++ b/sys/src/cmd/git/save.c @@ -10,6 +10,7 @@ char *dat; int ndat; }; + enum { Maxparents = 16, }; @@ -21,7 +22,9 @@ char *commitmsg; Hash parents[Maxparents]; int nparents; - +Idxent *idx; +int idxsz; +int nidx; int gitmode(Dirent *e) { @@ -38,6 +41,20 @@ } int +idxcmp(void *pa, void *pb) +{ + Idxent *a, *b; + int c; + + a = (Idxent*)pa; + b = (Idxent*)pb; + if((c = strcmp(a->path, b->path)) != 0) + return c; + assert(a->order != b->order); + return a-> order < b->order ? -1 : 1; +} + +int entcmp(void *pa, void *pb) { char abuf[256], bbuf[256], *ae, *be; @@ -184,27 +201,22 @@ int tracked(char *path) { - char ipath[256]; - Dir *d; + int lo, hi, mid, r; - /* Explicitly removed. */ - snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path); - if(strstr(cleanname(ipath), ".git/index9/removed") != ipath) - sysfatal("path %s leaves index", ipath); - d = dirstat(ipath); - if(d != nil && d->qid.type != QTDIR){ - free(d); - return 0; + r = -1; + lo = 0; + hi = nidx-1; + while(lo <= hi){ + mid = (hi + lo) / 2; + r = strcmp(path, idx[mid].path); + if(r < 0) + hi = mid-1; + else if(r > 0) + lo = mid+1; + else + break; } - - /* Explicitly added. */ - snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path); - if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath) - sysfatal("path %s leaves index", ipath); - if(access(ipath, AEXIST) == 0) - return 1; - - return 0; + return r == 0; } int @@ -354,10 +366,11 @@ void main(int argc, char **argv) { + char *ln, *dstr, *parts[4], cwd[1024]; + int i, r, line, ncwd; Hash th, ch; - char *dstr, cwd[1024]; - int i, r, ncwd; vlong date; + Biobuf *f; Object *t; gitinit(); @@ -425,6 +438,33 @@ } t = findroot(); + nidx = 0; + idxsz = 32; + idx = emalloc(idxsz*sizeof(Idxent)); + if((f = Bopen(".git/INDEX9", OREAD)) == nil) + sysfatal("open index: %r"); + line = 0; + while((ln = Brdstr(f, '\n', 1)) != nil){ + line++; + if(ln[0] == 0 || ln[0] == '\n') + continue; + if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts)) + sysfatal(".git/INDEX9:%d: corrupt index", line); + if(nidx == idxsz){ + idxsz += idxsz/2; + idx = realloc(idx, idxsz*sizeof(Idxent)); + } + cleanname(parts[3]); + idx[nidx].state = *parts[0]; + idx[nidx].qid = parseqid(parts[1]); + idx[nidx].mode = strtol(parts[2], nil, 8); + idx[nidx].path = strdup(parts[3]); + idx[nidx].order = nidx; + nidx++; + free(ln); + } + Bterm(f); + qsort(idx, nidx, sizeof(Idxent), idxcmp); r = treeify(t, argv, argv + argc, 0, &th); if(r == -1) sysfatal("could not commit: %r\n"); --- a/sys/src/cmd/git/test/basic.rc +++ b/sys/src/cmd/git/test/basic.rc @@ -64,4 +64,3 @@ q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch ! test -e scratch/upstream/file2.txt || die mismatch ! test -e scratch/downstream/file2.txt || die mismatch - --- a/sys/src/cmd/git/test/util.rc +++ b/sys/src/cmd/git/test/util.rc @@ -1,5 +1,5 @@ fn q { - {$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out + {$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid } fn qq { $* >/dev/null >[2]/dev/null --- a/sys/src/cmd/git/util.c +++ b/sys/src/cmd/git/util.c @@ -192,8 +192,10 @@ Qid q; q = va_arg(fmt->args, Qid); - return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}", - q.path, QDIR(&q), (q.path >> 8), q.vers, q.type); + if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff) + return fmtprint(fmt, "NOQID"); + else + return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type); } void @@ -294,7 +296,7 @@ /* Finds the directory containing the git repo. */ int -findrepo(char *buf, int nbuf) +findrepo(char *buf, int nbuf, int *nrel) { char *p, *suff; @@ -302,6 +304,7 @@ if(getwd(buf, nbuf - strlen(suff) - 1) == nil) return -1; + *nrel = 0; for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){ strcpy(p, suff); if(access(buf, AEXIST) == 0){ @@ -308,6 +311,7 @@ p[p == buf] = '\0'; return 0; } + *nrel += 1; *p = '\0'; } werrstr("not a git repository"); @@ -441,4 +445,25 @@ h ^= h >> 15; return h; +} + +Qid +parseqid(char *s) +{ + char *e; + Qid q; + + if(strcmp(s, "NOQID") == 0) + return (Qid){-1, -1, -1}; + e = s; + q.path = strtoull(e, &e, 16); + if(*e != '.') + sysfatal("corrupt qid: %s (%s)\n", s, e); + q.vers = strtoul(e+1, &e, 10); + if(*e != '.') + sysfatal("corrupt qid: %s (%s)\n", s, e); + q.type = strtoul(e+1, &e, 16); + if(*e != '\0') + sysfatal("corrupt qid: %s (%x)\n", s, *e); + return q; } --- a/sys/src/cmd/git/walk.c +++ b/sys/src/cmd/git/walk.c @@ -2,53 +2,72 @@ #include #include "git.h" +typedef struct Seen Seen; +typedef struct Idxed Idxed; +typedef struct Idxent Idxent; + #define NCACHE 4096 -#define TDIR ".git/index9/tracked" -#define RDIR ".git/index9/removed" -typedef struct Cache Cache; -typedef struct Wres Wres; -struct Cache { + +enum { + Rflg = 1 << 0, + Mflg = 1 << 1, + Aflg = 1 << 2, + Uflg = 1 << 3, + /* everything after this is not an error */ + Tflg = 1 << 4, +}; + +struct Seen { Dir* cache; int n; int max; }; -struct Wres { - char **path; - int npath; - int pathsz; +struct Idxed { + char** cache; + int n; + int max; }; -enum { - Rflg = 1 << 0, - Mflg = 1 << 1, - Aflg = 1 << 2, - Tflg = 1 << 3, -}; +Seen seentab[NCACHE]; +Idxed idxtab[NCACHE]; +char repopath[1024]; +char wdirpath[1024]; +char *rstr = "R "; +char *mstr = "M "; +char *astr = "A "; +char *ustr = "U "; +char *tstr = "T "; +char *bdir = ".git/fs/HEAD/tree"; +int useidx = 1; +int nrel; +int quiet; +int dirty; +int printflg; -Cache seencache[NCACHE]; -int quiet; -int useindex; -int printflg; -char *bdir = ".git/fs/HEAD/tree"; -char *rstr = "R "; -char *tstr = "T "; -char *mstr = "M "; -char *astr = "A "; +Idxent *idx; +int idxsz; +int nidx; +int staleidx; +Idxent *wdir; +int wdirsz; +int nwdir; +int loadwdir(char*); + int seen(Dir *dir) { + Seen *c; Dir *dp; int i; - Cache *c; - c = &seencache[dir->qid.path&(NCACHE-1)]; + c = &seentab[dir->qid.path&(NCACHE-1)]; dp = c->cache; for(i=0; in; i++, dp++) - if(dir->qid.path == dp->qid.path && - dir->type == dp->type && - dir->dev == dp->dev) + if(dir->qid.path == dp->qid.path + && dir->qid.type == dp->qid.type + && dir->dev == dp->dev) return 1; if(c->n == c->max){ if (c->max == 0) @@ -63,129 +82,100 @@ return 0; } -void -grow(Wres *r) -{ - if(r->npath == r->pathsz){ - r->pathsz = 2*r->pathsz + 1; - r->path = erealloc(r->path, r->pathsz * sizeof(char*)); - } -} - int -readpaths(Wres *r, char *pfx, char *dir) +checkedin(Idxent *e, int change) { - char *f, *sub, *full, *sep; - Dir *d; - int fd, ret, i, n; + char *p; + int r; - d = nil; - ret = -1; - sep = ""; - if(dir[0] != 0) - sep = "/"; - if((full = smprint("%s/%s", pfx, dir)) == nil) - sysfatal("smprint: %r"); - if((fd = open(full, OREAD)) < 0) - goto error; - while((n = dirread(fd, &d)) > 0){ - for(i = 0; i < n; i++){ - if(seen(&d[i])) - continue; - if(d[i].qid.type & QTDIR){ - if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil) - sysfatal("smprint: %r"); - if(readpaths(r, pfx, sub) == -1){ - free(sub); - goto error; - } - free(sub); - }else{ - grow(r); - if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil) - sysfatal("smprint: %r"); - r->path[r->npath++] = f; - } - } - free(d); + p = smprint("%s/%s", bdir, e->path); + r = access(p, AEXIST); + if(r == 0 && change){ + e->state = 'T'; + staleidx = 1; } - ret = r->npath; -error: - close(fd); - free(full); - return ret; + free(p); + return r == 0; } int -cmp(void *pa, void *pb) +indexed(char *path, int isdir) { - return strcmp(*(char **)pa, *(char **)pb); -} + int lo, hi, mid, n, r; + char *s; -void -dedup(Wres *r) -{ - int i, o; - - if(r->npath <= 1) - return; - o = 0; - qsort(r->path, r->npath, sizeof(r->path[0]), cmp); - for(i = 1; i < r->npath; i++) - if(strcmp(r->path[o], r->path[i]) != 0) - r->path[++o] = r->path[i]; - r->npath = o + 1; + if(!useidx){ + s = smprint("%s/%s", bdir, path); + r = access(s, AEXIST); + free(s); + return r == 0; + } + s = path; + if(isdir) + s = smprint("%s/", path); + r = -1; + lo = 0; + hi = nidx-1; + n = strlen(s); + while(lo <= hi){ + mid = (hi + lo) / 2; + if(isdir) + r = strncmp(s, idx[mid].path, n); + else + r = strcmp(s, idx[mid].path); + if(r < 0) + hi = mid-1; + else if(r > 0) + lo = mid+1; + else + break; + } + if(isdir) + free(s); + return r == 0; } int -sameqid(Dir *d, char *qf) +idxcmp(void *pa, void *pb) { - char indexqid[64], fileqid[64], *p; - int fd, n; + Idxent *a, *b; + int c; - if(!d) - return 0; - if((fd = open(qf, OREAD)) == -1) - return 0; - if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1) - return 0; - indexqid[n] = 0; - close(fd); - if((p = strpbrk(indexqid, " \t\n\r")) != nil) - *p = 0; - - snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx", - d->qid.path, d->qid.vers, d->qid.type); - - if(strcmp(indexqid, fileqid) == 0) - return 1; - return 0; + a = (Idxent*)pa; + b = (Idxent*)pb; + if((c = strcmp(a->path, b->path)) != 0) + return c; + assert(a->order != b->order); + return a-> order < b->order ? -1 : 1; } -void -writeqid(Dir *d, char *qf) -{ - int fd; - - if((fd = create(qf, OWRITE, 0666)) == -1) - return; - fprint(fd, "%ullx.%uld.%.2uhhx\n", - d->qid.path, d->qid.vers, d->qid.type); - close(fd); -} - int -samedata(char *pa, char *pb) +samedata(Idxent *a, Idxent *b) { - char ba[32*1024], bb[32*1024]; + char *gitpath, ba[IOUNIT], bb[IOUNIT]; int fa, fb, na, nb, same; + Dir *da, *db; + if(a != nil){ + if(a->qid.path == b->qid.path + && a->qid.vers == b->qid.vers + && a->qid.type == b->qid.type + && a->mode == b->mode + && a->mode != 0) + return 1; + } + + da = nil; + db = nil; same = 0; - fa = open(pa, OREAD); - fb = open(pb, OREAD); - if(fa == -1 || fb == -1){ + if((gitpath = smprint("%s/%s", bdir, b->path)) == nil) + sysfatal("smprint: %r"); + fa = open(b->path, OREAD); + fb = open(gitpath, OREAD); + if(fa == -1 || fb == -1) goto mismatch; - } + da = dirfstat(fa); + db = dirfstat(fb); while(1){ if((na = readn(fa, ba, sizeof(ba))) == -1) goto mismatch; @@ -198,8 +188,19 @@ if(memcmp(ba, bb, na) != 0) goto mismatch; } + if(a != nil){ + a->qid = b->qid; + if(da != nil) + a->mode = da->mode; + } + staleidx = 1; + if(da->mode != db->mode) + goto mismatch; same = 1; + mismatch: + free(da); + free(db); if(fa != -1) close(fa); if(fb != -1) @@ -207,7 +208,129 @@ return same; } +int +loadent(char *dir, Dir *d, int fullpath) +{ + char *path; + int ret, isdir; + Idxent *e; + + if(fullpath) + path = strdup(dir); + else + path = smprint("%s/%s", dir, d->name); + if(path == nil) + sysfatal("smprint: %r"); + + ret = 0; + isdir = d->qid.type & QTDIR; + cleanname(path); + if((printflg & Uflg) == 0 && !indexed(path, isdir)) + return 0; + if(isdir){ + ret = loadwdir(path); + free(path); + }else{ + if(nwdir == wdirsz){ + wdirsz += wdirsz/2; + wdir = erealloc(wdir, wdirsz*sizeof(Idxent)); + } + e = wdir + nwdir; + e->path = path; + e->qid = d->qid; + e->mode = d->mode; + e->order = nwdir; + e->state = 'T'; + nwdir++; + } + return ret; +} + +int +loadwdir(char *path) +{ + int fd, ret, i, n; + Dir *d, *e; + + d = nil; + e = nil; + ret = -1; + + cleanname(path); + if(strncmp(path, ".git/", 5) == 0) + return 0; + if((fd = open(path, OREAD)) < 0) + goto error; + if((e = dirfstat(fd)) == nil) + sysfatal("fstat: %r"); + if(e->qid.type & QTDIR) + while((n = dirread(fd, &d)) > 0){ + for(i = 0; i < n; i++) + if(loadent(path, &d[i], 0) == -1) + goto error; + free(d); + } + else{ + if(loadent(path, e, 1) == -1) + goto error; + } + ret = 0; +error: + free(e); + if(fd != -1) + close(fd); + return ret; +} + +int +pfxmatch(char *p, char **pfx, int *pfxlen, int npfx) +{ + int i; + + if(npfx == 0) + return 1; + for(i = 0; i < npfx; i++){ + if(strncmp(p, pfx[i], pfxlen[i]) == 0 + && (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0)) + return 1; + if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0) + return 1; + } + return 0; +} + + +char* +reporel(char *s) +{ + char *p; + int n; + + if(*s == '/') + s = strdup(s); + else + s = smprint("%s/%s", wdirpath, s); + p = cleanname(s); + n = strlen(repopath); + if(strncmp(s, repopath, n) == 0) + p = s + n; + else + sysfatal("path outside repo: %s", s); + if(*p == '/') + p++; + memmove(s, p, strlen(p)+1); + return s; +} + void +show(Biobuf *o, int flg, char *str, char *path) +{ + dirty |= flg; + if(!quiet && (printflg & flg)) + Bprint(o, "%s%s\n", str, path); +} + +void usage(void) { fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0); @@ -217,14 +340,21 @@ void main(int argc, char **argv) { - char *rpath, *tpath, *bpath, *base, buf[8], repo[512]; - char *p, *e; - int i, dirty; - Wres r; + char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8]; + int i, j, c, line, wfd, *argn; + Biobuf *f, *o, *w; Hash h; - Dir *d; + Dir rn; gitinit(); + if(access(".git/fs/ctl", AEXIST) != 0) + sysfatal("no running git/fs"); + if(getwd(wdirpath, sizeof(wdirpath)) == nil) + sysfatal("getwd: %r"); + if(findrepo(repopath, sizeof(repopath), &nrel) == -1) + sysfatal("find root: %r"); + if(chdir(repopath) == -1) + sysfatal("chdir: %r"); ARGBEGIN{ case 'q': @@ -235,6 +365,7 @@ tstr = ""; mstr = ""; astr = ""; + ustr = ""; break; case 'f': for(p = EARGF(usage()); *p; p++) @@ -243,109 +374,160 @@ case 'A': printflg |= Aflg; break; case 'M': printflg |= Mflg; break; case 'R': printflg |= Rflg; break; + case 'U': printflg |= Uflg; break; default: usage(); break; } break; case 'b': - useindex = 0; + useidx = 0; base = EARGF(usage()); if(resolveref(&h, base) == -1) - sysfatal("no such ref '%s'", base); + sysfatal("no such reXf '%s'", base); bdir = smprint(".git/fs/object/%H/tree", h); break; default: usage(); - }ARGEND + }ARGEND; - gitinit(); - if(findrepo(repo, sizeof(repo)) == -1) - sysfatal("find root: %r"); - if(chdir(repo) == -1) - sysfatal("chdir: %r"); - if(access(".git/fs/ctl", AEXIST) != 0) - sysfatal("no running git/fs"); - dirty = 0; - memset(&r, 0, sizeof(r)); if(printflg == 0) printflg = Tflg | Aflg | Mflg | Rflg; - if(argc == 0){ - if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1) - sysfatal("read tracked: %r"); - if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1) - sysfatal("read removed: %r"); - if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1) - sysfatal("read base: %r"); - }else{ - for(i = 0; i < argc; i++){ - tpath = smprint(TDIR"/%s", argv[i]); - rpath = smprint(RDIR"/%s", argv[i]); - bpath = smprint("%s/%s", bdir, argv[i]); - if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil) - goto nextarg; - if(d->mode & DMDIR){ - readpaths(&r, TDIR, argv[i]); - readpaths(&r, RDIR, argv[i]); - readpaths(&r, bdir, argv[i]); - }else{ - grow(&r); - r.path[r.npath++] = estrdup(argv[i]); + + nidx = 0; + idxsz = 32; + idx = emalloc(idxsz*sizeof(Idxent)); + nwdir = 0; + wdirsz = 32; + wdir = emalloc(wdirsz*sizeof(Idxent)); + argrel = emalloc(argc*sizeof(int)); + argn = emalloc(argc*sizeof(int)); + for(i = 0; i < argc; i++){ + argrel[i] = reporel(argv[i]); + argn[i] = strlen(argrel[i]); + } + if((o = Bfdopen(1, OWRITE)) == nil) + sysfatal("open out: %r"); + if(useidx){ + if((f = Bopen(".git/INDEX9", OREAD)) == nil){ + fprint(2, "open index: %r\n"); + if(access(".git/index9", AEXIST) == 0){ + fprint(2, "index format conversion needed:\n"); + fprint(2, "\tcd %s && git/fs\n", repopath); + fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n"); + fprint(2, "\t@{cd .git/index9/removed && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n"); } -nextarg: - free(bpath); - free(tpath); - free(rpath); - free(d); + exits("noindex"); } + line = 0; + while((ln = Brdstr(f, '\n', 1)) != nil){ + line++; + /* allow blank lines */ + if(ln[0] == 0 || ln[0] == '\n') + continue; + if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts)) + sysfatal(".git/INDEX9:%d: corrupt index", line); + if(nidx == idxsz){ + idxsz += idxsz/2; + idx = realloc(idx, idxsz*sizeof(Idxent)); + } + cleanname(parts[3]); + idx[nidx].state = *parts[0]; + idx[nidx].qid = parseqid(parts[1]); + idx[nidx].mode = strtol(parts[2], nil, 8); + idx[nidx].path = strdup(parts[3]); + idx[nidx].order = nidx; + nidx++; + free(ln); + } + qsort(idx, nidx, sizeof(Idxent), idxcmp); } - dedup(&r); - for(i = 0; i < r.npath; i++){ - p = r.path[i]; - d = dirstat(p); - if(d && d->mode & DMDIR) - goto next; - rpath = smprint(RDIR"/%s", p); - tpath = smprint(TDIR"/%s", p); - bpath = smprint("%s/%s", bdir, p); - /* Fast path: we don't want to force access to the rpath. */ - if(useindex && d && sameqid(d, tpath)) { - if(!quiet && (printflg & Tflg)) - print("%s%s\n", tstr, p); + if(argc == 0) + loadwdir("."); + else for(i = 0; i < argc; i++) + loadwdir(argrel[i]); + qsort(wdir, nwdir, sizeof(Idxent), idxcmp); + i = 0; + j = 0; + while(i < nidx || j < nwdir){ + /* find the last entry we tracked for a path */ + while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){ + staleidx = 1; + i++; + } + while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0) + j++; + if(!pfxmatch(idx[i].path, argrel, argn, argc)){ + i++; + continue; + } + if(i >= nidx) + c = 1; + else if(j >= nwdir) + c = -1; + else + c = strcmp(idx[i].path, wdir[j].path); + /* exists in both index and on disk */ + if(c == 0){ + if(idx[i].state == 'R' && checkedin(&idx[i], 0)) + show(o, Rflg, rstr, idx[i].path); + else if(idx[i].state == 'A' && !checkedin(&idx[i], 1)) + show(o, Aflg, astr, idx[i].path); + else if(!samedata(&idx[i], &wdir[j])) + show(o, Mflg, mstr, idx[i].path); + else + show(o, Tflg, tstr, idx[i].path); + i++; + j++; + /* only exists in index */ + }else if(c < 0){ + show(o, Rflg, rstr, idx[i].path); + i++; + /* only exists on disk */ }else{ - if(d == nil || (useindex && access(rpath, AEXIST) == 0)){ - if(access(bpath, AEXIST) == 0){ - dirty |= Rflg; - if(!quiet && (printflg & Rflg)) - print("%s%s\n", rstr, p); - } - }else if(access(bpath, AEXIST) == -1) { - dirty |= Aflg; - if(!quiet && (printflg & Aflg)) - print("%s%s\n", astr, p); - }else if(samedata(p, bpath)){ - if(!quiet && (printflg & Tflg)) - print("%s%s\n", tstr, p); - if(useindex) - writeqid(d, tpath); - }else{ - dirty |= Mflg; - if(!quiet && (printflg & Mflg)) - print("%s%s\n", mstr, p); - } + if(!useidx && checkedin(&wdir[j], 0)){ + if(samedata(nil, &wdir[j])) + show(o, Tflg, tstr, wdir[j].path); + else + show(o, Mflg, mstr, wdir[j].path); + }else if(printflg & Uflg) + show(o, Uflg, ustr, wdir[j].path); + j++; } - free(rpath); - free(tpath); - free(bpath); -next: - free(d); } + Bterm(o); + + if(useidx && staleidx) + if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){ + if((w = Bfdopen(wfd, OWRITE)) == nil){ + close(wfd); + goto Nope; + } + for(i = 0; i < nidx; i++){ + while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0) + i++; + Bprint(w, "%c %Q %o %s\n", + idx[i].state, + idx[i].qid, + idx[i].mode, + idx[i].path); + } + Bterm(w); + nulldir(&rn); + rn.name = "INDEX9"; + if(remove(".git/INDEX9") == -1) + goto Nope; + if(dirwstat(".git/INDEX9.new", &rn) == -1) + sysfatal("rename: %r"); + } + +Nope: if(!dirty) exits(nil); - p = buf; - e = buf + sizeof(buf); + p = xbuf; + e = p + sizeof(xbuf); for(i = 0; (1 << i) != Tflg; i++) if(dirty & (1 << i)) - p = seprint(p, e, "%c", "DMAT"[i]); - exits(buf); + p = seprint(p, e, "%c", "RMAUT"[i]); + exits(xbuf); }