From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on inbox.vuxu.org X-Spam-Level: X-Spam-Status: No, score=0.0 required=5.0 tests=none autolearn=ham autolearn_force=no version=3.4.4 Received: (qmail 7512 invoked from network); 18 Sep 2023 03:51:17 -0000 Received: from 9front.inri.net (168.235.81.73) by inbox.vuxu.org with ESMTPUTF8; 18 Sep 2023 03:51:17 -0000 Received: from mimir.eigenstate.org ([206.124.132.107]) by 9front; Sun Sep 17 23:50:04 -0400 2023 Received: from abbatoir (pool-108-6-24-2.nycmny.fios.verizon.net [108.6.24.2]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id 6354b089 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Sun, 17 Sep 2023 20:50:01 -0700 (PDT) Message-ID: <91B9FC3B10A4C61A17F2EA04575388E7@eigenstate.org> To: 9front@9front.org Date: Sun, 17 Sep 2023 23:49:59 -0400 From: ori@eigenstate.org In-Reply-To: MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: ISO-certified template-oriented factory DOM-scale locator Subject: Re: [9front] [v2] git: new index format, 235x faster walks Reply-To: 9front@9front.org Precedence: bulk Sorry for the spam; One more revision; found a couple of missing changes to the new 4 column index format. diff 597ff0b59658032e141553f286e02b9370f995b5 uncommitted --- a/sys/src/cmd/git/add +++ b/sys/src/cmd/git/add @@ -7,32 +7,17 @@ flagfmt='r:remove'; args='file ...' eval `''{aux/getflags $*} || exec aux/usage -add='tracked' -del='removed' -if(~ $remove 1){ - add='removed' - del='tracked' -} +s=A +if(~ $remove 1) + s=R if(~ $#* 0) exec aux/usage paths=`$nl{cleanname -d $gitrel $* | drop $gitroot} -if(~ $add tracked) +if(~ $s A) files=`$nl{walk -f ./$paths} if not - files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths} - -for(f in $files){ - if(! ~ `$nl{cleanname $f} .git/*){ - addpath=.git/index9/$add/$f - delpath=.git/index9/$del/$f - mkdir -p `$nl{basename -d $addpath} - mkdir -p `$nl{basename -d $delpath} - # We don't want a matching qid, so that - # git/walk doesn't think this came from - # a checkout. - echo -n > $addpath - rm -f $delpath - } -} + files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths} +for(f in $files) + echo $s NOQID 0 $f >> .git/INDEX9 exit '' --- a/sys/src/cmd/git/branch +++ b/sys/src/cmd/git/branch @@ -84,7 +84,6 @@ for(m in $cleanpaths){ d=`$nl{basename -d $m} mkdir -p $d - mkdir -p .git/index9/tracked/$d # Modifications can turn a file into # a directory, or vice versa, so we # need to delete and copy the files @@ -97,11 +96,11 @@ b=file if(! ~ $a $b){ rm -rf $m - rm -rf .git/index9/tracked/$m + echo R NOQID 0 $m >> .git/INDEX9 } if(~ $b file){ cp -x -- $basedir/tree/$m $m - walk -eq $m > .git/index9/tracked/$m + echo T NOQID 0 $m >> .git/INDEX9 touch $m } } @@ -112,9 +111,9 @@ merge1 $ours $ours $common $theirs } -if(! ~ $#deleted 0){ - rm -f $deleted - rm -f .git/index9/tracked/$deleted +for(d in $deleted){ + rm -f $d + echo R NOQID 0 $d >> .git/INDEX9 } echo ref: $new > .git/HEAD --- a/sys/src/cmd/git/clone +++ b/sys/src/cmd/git/clone @@ -87,11 +87,8 @@ git/fs @ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \ || die 'checkout failed:' $status - for(f in `$nl{walk -f $tree | drop $tree}){ - idx=.git/index9/tracked/$f - mkdir -p `$nl{basename -d $idx} - walk -eq ./$f > $idx - } + {for(f in `$nl{cd $tree && walk -f}) + echo 'T NOQID 0 '$f} > .git/INDEX9 } if not{ echo no default branch >[1=2] --- a/sys/src/cmd/git/commit +++ b/sys/src/cmd/git/commit @@ -65,8 +65,8 @@ fn parents{ if(! ~ $#revise 0) parents=`{cat $gitfs/HEAD/parent} - if not if(test -f .git/index9/merge-parents) - parents=`{cat .git/index9/merge-parents | sort | uniq} + if not if(test -f .git/merge-parents) + parents=`{cat .git/merge-parents | sort | uniq} if not if(~ $initial true) parents=() if not @@ -78,7 +78,7 @@ if(! ~ $#parents 0) pflags='-p'^$parents hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status} - rm -f .git/index9/merge-parents + rm -f .git/merge-parents } fn update{ @@ -89,14 +89,10 @@ echo $branch: $hash echo $hash > $refpath for(f in $files){ - if(test -e .git/index9/removed/$f || ! test -e $f){ - rm -f .git/index9/removed/$f - rm -f .git/index9/tracked/$f - } - if not{ - mkdir -p `{basename -d $f} - walk -eq $f > .git/index9/tracked/$f - } + if(! test -e $f && ! test -e .git/object/$hash/tree/$f) + echo R NOQID 0 $f >> .git/INDEX9 + if not + echo T NOQID 0 $f >> .git/INDEX9 } } @@ -120,12 +116,12 @@ } files=() -if(test -f .git/index9/merge-parents) - files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'} +if(test -f .git/merge-parents) + files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'} if(! ~ $#* 0) files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}}) -if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0) - die 'nothing to commit' $status +if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0) + die 'nothing to commit' @{ flag e + whoami --- a/sys/src/cmd/git/conf.c +++ b/sys/src/cmd/git/conf.c @@ -59,7 +59,7 @@ main(int argc, char **argv) { char repo[512], *p, *s; - int i, j; + int i, j, nrel; ARGBEGIN{ case 'f': file[nfile++]=EARGF(usage()); break; @@ -69,7 +69,7 @@ }ARGEND; if(findroot){ - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("%r"); print("%s\n", repo); exits(nil); --- a/sys/src/cmd/git/diff +++ b/sys/src/cmd/git/diff @@ -7,8 +7,10 @@ flagfmt='c:commit branch, s:summarize'; args='[file ...]' eval `''{aux/getflags $*} || exec aux/usage -if(~ $#commit 0) +if(~ $#commit 0){ commit=HEAD + cparam=() +} files=() if(! ~ $#* 0) @@ -16,7 +18,7 @@ branch=`{git/query -p $commit} if(~ $summarize 1){ - git/walk -fMAR -b $commit $files + git/walk -fMAR $cparam $files exit } @@ -24,7 +26,7 @@ mntgen /mnt/scratch bind $branch/tree/ /mnt/scratch/a bind . /mnt/scratch/b -for(f in `$nl{git/walk -c -fRMA -b $commit $files}){ +for(f in `$nl{git/walk -c -fRMA $cparam $files}){ if(~ $#showed 0){ echo diff `{git/query $commit} uncommitted showed=1 --- a/sys/src/cmd/git/git.h +++ b/sys/src/cmd/git/git.h @@ -21,6 +21,7 @@ typedef struct Dblock Dblock; typedef struct Objq Objq; typedef struct Qelt Qelt; +typedef struct Idxent Idxent; enum { Pathmax = 512, @@ -190,6 +191,13 @@ int len; }; +struct Idxent { + char *path; + Qid qid; + int mode; + int order; + char state; +}; #define GETBE16(b)\ ((((b)[0] & 0xFFul) << 8) | \ @@ -301,9 +309,10 @@ int hassuffix(char *, char *); int swapsuffix(char *, int, char *, char *, char *); char *strip(char *); -int findrepo(char *, int); +int findrepo(char *, int, int*); int showprogress(int, int); u64int murmurhash2(void*, usize); +Qid parseqid(char*); /* packing */ void dtinit(Dtab *, Object*); --- a/sys/src/cmd/git/import +++ b/sys/src/cmd/git/import @@ -98,7 +98,8 @@ if(~ $#nocommit 0){ if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){ echo $hash > $refpath - rm -f .git/index9/removed/$files + for(f in $files) + echo T NOQID 0 $f >> .git/INDEX9 } } status='''' --- a/sys/src/cmd/git/init +++ b/sys/src/cmd/git/init @@ -31,7 +31,7 @@ echo '[branch "'$branch'"]' echo ' remote = origin' } - +>$dir/.git/INDEX9 >$dir/.git/HEAD { echo ref: refs/heads/$branch } --- a/sys/src/cmd/git/log.c +++ b/sys/src/cmd/git/log.c @@ -239,7 +239,7 @@ main(int argc, char **argv) { char path[1024], repo[1024], *p, *r; - int i, nrepo; + int i, nrel, nrepo; ARGBEGIN{ case 'e': @@ -259,7 +259,7 @@ break; }ARGEND; - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("find root: %r"); nrepo = strlen(repo); if(argc != 0){ --- a/sys/src/cmd/git/merge +++ b/sys/src/cmd/git/merge @@ -38,8 +38,8 @@ git/revert . exit '' } -echo $ours >> .git/index9/merge-parents -echo $theirs >> .git/index9/merge-parents +echo $ours >> .git/merge-parents +echo $theirs >> .git/merge-parents merge $ours $base $theirs >[1=2] echo 'merge complete: remember to commit' --- a/sys/src/cmd/git/pack.c +++ b/sys/src/cmd/git/pack.c @@ -66,7 +66,7 @@ Object *lruhead; Object *lrutail; vlong ncache; -vlong cachemax = 512*MiB; +vlong cachemax = 1024*MiB; Packf *packf; int npackf; int openpacks; --- a/sys/src/cmd/git/query.c +++ b/sys/src/cmd/git/query.c @@ -152,10 +152,10 @@ void main(int argc, char **argv) { - int i, j, n; - Hash *h; - char *p, *e, *s, *objpfx; char query[2048], repo[512]; + char *p, *e, *s, *objpfx; + int i, j, n, nrel; + Hash *h; ARGBEGIN{ case 'd': chattygit++; break; @@ -170,7 +170,7 @@ if(argc == 0) usage(); - if(findrepo(repo, sizeof(repo)) == -1) + if(findrepo(repo, sizeof(repo), &nrel) == -1) sysfatal("find root: %r"); if(chdir(repo) == -1) sysfatal("chdir: %r"); --- a/sys/src/cmd/git/save.c +++ b/sys/src/cmd/git/save.c @@ -10,6 +10,7 @@ char *dat; int ndat; }; + enum { Maxparents = 16, }; @@ -21,7 +22,9 @@ char *commitmsg; Hash parents[Maxparents]; int nparents; - +Idxent *idx; +int idxsz; +int nidx; int gitmode(Dirent *e) { @@ -38,6 +41,20 @@ } int +idxcmp(void *pa, void *pb) +{ + Idxent *a, *b; + int c; + + a = (Idxent*)pa; + b = (Idxent*)pb; + if((c = strcmp(a->path, b->path)) != 0) + return c; + assert(a->order != b->order); + return a-> order < b->order ? -1 : 1; +} + +int entcmp(void *pa, void *pb) { char abuf[256], bbuf[256], *ae, *be; @@ -184,27 +201,22 @@ int tracked(char *path) { - char ipath[256]; - Dir *d; + int lo, hi, mid, r; - /* Explicitly removed. */ - snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path); - if(strstr(cleanname(ipath), ".git/index9/removed") != ipath) - sysfatal("path %s leaves index", ipath); - d = dirstat(ipath); - if(d != nil && d->qid.type != QTDIR){ - free(d); - return 0; + r = -1; + lo = 0; + hi = nidx-1; + while(lo <= hi){ + mid = (hi + lo) / 2; + r = strcmp(path, idx[mid].path); + if(r < 0) + hi = mid-1; + else if(r > 0) + lo = mid+1; + else + break; } - - /* Explicitly added. */ - snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path); - if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath) - sysfatal("path %s leaves index", ipath); - if(access(ipath, AEXIST) == 0) - return 1; - - return 0; + return r == 0; } int @@ -354,10 +366,11 @@ void main(int argc, char **argv) { + char *ln, *dstr, *parts[4], cwd[1024]; + int i, r, line, ncwd; Hash th, ch; - char *dstr, cwd[1024]; - int i, r, ncwd; vlong date; + Biobuf *f; Object *t; gitinit(); @@ -425,6 +438,33 @@ } t = findroot(); + nidx = 0; + idxsz = 32; + idx = emalloc(idxsz*sizeof(Idxent)); + if((f = Bopen(".git/INDEX9", OREAD)) == nil) + sysfatal("open index: %r"); + line = 0; + while((ln = Brdstr(f, '\n', 1)) != nil){ + line++; + if(ln[0] == 0 || ln[0] == '\n') + continue; + if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts)) + sysfatal(".git/INDEX9:%d: corrupt index", line); + if(nidx == idxsz){ + idxsz += idxsz/2; + idx = realloc(idx, idxsz*sizeof(Idxent)); + } + cleanname(parts[3]); + idx[nidx].state = *parts[0]; + idx[nidx].qid = parseqid(parts[1]); + idx[nidx].mode = strtol(parts[2], nil, 8); + idx[nidx].path = strdup(parts[3]); + idx[nidx].order = nidx; + nidx++; + free(ln); + } + Bterm(f); + qsort(idx, nidx, sizeof(Idxent), idxcmp); r = treeify(t, argv, argv + argc, 0, &th); if(r == -1) sysfatal("could not commit: %r\n"); --- a/sys/src/cmd/git/test/basic.rc +++ b/sys/src/cmd/git/test/basic.rc @@ -64,4 +64,3 @@ q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch ! test -e scratch/upstream/file2.txt || die mismatch ! test -e scratch/downstream/file2.txt || die mismatch - --- a/sys/src/cmd/git/test/util.rc +++ b/sys/src/cmd/git/test/util.rc @@ -1,5 +1,5 @@ fn q { - {$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out + {$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid } fn qq { $* >/dev/null >[2]/dev/null --- a/sys/src/cmd/git/util.c +++ b/sys/src/cmd/git/util.c @@ -192,8 +192,10 @@ Qid q; q = va_arg(fmt->args, Qid); - return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}", - q.path, QDIR(&q), (q.path >> 8), q.vers, q.type); + if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff) + return fmtprint(fmt, "NOQID"); + else + return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type); } void @@ -294,7 +296,7 @@ /* Finds the directory containing the git repo. */ int -findrepo(char *buf, int nbuf) +findrepo(char *buf, int nbuf, int *nrel) { char *p, *suff; @@ -302,6 +304,7 @@ if(getwd(buf, nbuf - strlen(suff) - 1) == nil) return -1; + *nrel = 0; for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){ strcpy(p, suff); if(access(buf, AEXIST) == 0){ @@ -308,6 +311,7 @@ p[p == buf] = '\0'; return 0; } + *nrel += 1; *p = '\0'; } werrstr("not a git repository"); @@ -441,4 +445,25 @@ h ^= h >> 15; return h; +} + +Qid +parseqid(char *s) +{ + char *e; + Qid q; + + if(strcmp(s, "NOQID") == 0) + return (Qid){-1, -1, -1}; + e = s; + q.path = strtoull(e, &e, 16); + if(*e != '.') + sysfatal("corrupt qid: %s (%s)\n", s, e); + q.vers = strtoul(e+1, &e, 10); + if(*e != '.') + sysfatal("corrupt qid: %s (%s)\n", s, e); + q.type = strtoul(e+1, &e, 16); + if(*e != '\0') + sysfatal("corrupt qid: %s (%x)\n", s, *e); + return q; } --- a/sys/src/cmd/git/walk.c +++ b/sys/src/cmd/git/walk.c @@ -2,53 +2,72 @@ #include #include "git.h" +typedef struct Seen Seen; +typedef struct Idxed Idxed; +typedef struct Idxent Idxent; + #define NCACHE 4096 -#define TDIR ".git/index9/tracked" -#define RDIR ".git/index9/removed" -typedef struct Cache Cache; -typedef struct Wres Wres; -struct Cache { + +enum { + Rflg = 1 << 0, + Mflg = 1 << 1, + Aflg = 1 << 2, + Uflg = 1 << 3, + /* everything after this is not an error */ + Tflg = 1 << 4, +}; + +struct Seen { Dir* cache; int n; int max; }; -struct Wres { - char **path; - int npath; - int pathsz; +struct Idxed { + char** cache; + int n; + int max; }; -enum { - Rflg = 1 << 0, - Mflg = 1 << 1, - Aflg = 1 << 2, - Tflg = 1 << 3, -}; +Seen seentab[NCACHE]; +Idxed idxtab[NCACHE]; +char repopath[1024]; +char wdirpath[1024]; +char *rstr = "R "; +char *mstr = "M "; +char *astr = "A "; +char *ustr = "U "; +char *tstr = "T "; +char *bdir = ".git/fs/HEAD/tree"; +int useidx = 1; +int nrel; +int quiet; +int dirty; +int printflg; -Cache seencache[NCACHE]; -int quiet; -int useindex; -int printflg; -char *bdir = ".git/fs/HEAD/tree"; -char *rstr = "R "; -char *tstr = "T "; -char *mstr = "M "; -char *astr = "A "; +Idxent *idx; +int idxsz; +int nidx; +int staleidx; +Idxent *wdir; +int wdirsz; +int nwdir; +int loadwdir(char*); + int seen(Dir *dir) { + Seen *c; Dir *dp; int i; - Cache *c; - c = &seencache[dir->qid.path&(NCACHE-1)]; + c = &seentab[dir->qid.path&(NCACHE-1)]; dp = c->cache; for(i=0; in; i++, dp++) - if(dir->qid.path == dp->qid.path && - dir->type == dp->type && - dir->dev == dp->dev) + if(dir->qid.path == dp->qid.path + && dir->qid.type == dp->qid.type + && dir->dev == dp->dev) return 1; if(c->n == c->max){ if (c->max == 0) @@ -63,129 +82,100 @@ return 0; } -void -grow(Wres *r) -{ - if(r->npath == r->pathsz){ - r->pathsz = 2*r->pathsz + 1; - r->path = erealloc(r->path, r->pathsz * sizeof(char*)); - } -} - int -readpaths(Wres *r, char *pfx, char *dir) +checkedin(Idxent *e, int change) { - char *f, *sub, *full, *sep; - Dir *d; - int fd, ret, i, n; + char *p; + int r; - d = nil; - ret = -1; - sep = ""; - if(dir[0] != 0) - sep = "/"; - if((full = smprint("%s/%s", pfx, dir)) == nil) - sysfatal("smprint: %r"); - if((fd = open(full, OREAD)) < 0) - goto error; - while((n = dirread(fd, &d)) > 0){ - for(i = 0; i < n; i++){ - if(seen(&d[i])) - continue; - if(d[i].qid.type & QTDIR){ - if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil) - sysfatal("smprint: %r"); - if(readpaths(r, pfx, sub) == -1){ - free(sub); - goto error; - } - free(sub); - }else{ - grow(r); - if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil) - sysfatal("smprint: %r"); - r->path[r->npath++] = f; - } - } - free(d); + p = smprint("%s/%s", bdir, e->path); + r = access(p, AEXIST); + if(r == 0 && change){ + e->state = 'T'; + staleidx = 1; } - ret = r->npath; -error: - close(fd); - free(full); - return ret; + free(p); + return r == 0; } int -cmp(void *pa, void *pb) +indexed(char *path, int isdir) { - return strcmp(*(char **)pa, *(char **)pb); -} + int lo, hi, mid, n, r; + char *s; -void -dedup(Wres *r) -{ - int i, o; - - if(r->npath <= 1) - return; - o = 0; - qsort(r->path, r->npath, sizeof(r->path[0]), cmp); - for(i = 1; i < r->npath; i++) - if(strcmp(r->path[o], r->path[i]) != 0) - r->path[++o] = r->path[i]; - r->npath = o + 1; + if(!useidx){ + s = smprint("%s/%s", bdir, path); + r = access(s, AEXIST); + free(s); + return r == 0; + } + s = path; + if(isdir) + s = smprint("%s/", path); + r = -1; + lo = 0; + hi = nidx-1; + n = strlen(s); + while(lo <= hi){ + mid = (hi + lo) / 2; + if(isdir) + r = strncmp(s, idx[mid].path, n); + else + r = strcmp(s, idx[mid].path); + if(r < 0) + hi = mid-1; + else if(r > 0) + lo = mid+1; + else + break; + } + if(isdir) + free(s); + return r == 0; } int -sameqid(Dir *d, char *qf) +idxcmp(void *pa, void *pb) { - char indexqid[64], fileqid[64], *p; - int fd, n; + Idxent *a, *b; + int c; - if(!d) - return 0; - if((fd = open(qf, OREAD)) == -1) - return 0; - if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1) - return 0; - indexqid[n] = 0; - close(fd); - if((p = strpbrk(indexqid, " \t\n\r")) != nil) - *p = 0; - - snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx", - d->qid.path, d->qid.vers, d->qid.type); - - if(strcmp(indexqid, fileqid) == 0) - return 1; - return 0; + a = (Idxent*)pa; + b = (Idxent*)pb; + if((c = strcmp(a->path, b->path)) != 0) + return c; + assert(a->order != b->order); + return a-> order < b->order ? -1 : 1; } -void -writeqid(Dir *d, char *qf) -{ - int fd; - - if((fd = create(qf, OWRITE, 0666)) == -1) - return; - fprint(fd, "%ullx.%uld.%.2uhhx\n", - d->qid.path, d->qid.vers, d->qid.type); - close(fd); -} - int -samedata(char *pa, char *pb) +samedata(Idxent *a, Idxent *b) { - char ba[32*1024], bb[32*1024]; + char *gitpath, ba[IOUNIT], bb[IOUNIT]; int fa, fb, na, nb, same; + Dir *da, *db; + if(a != nil){ + if(a->qid.path == b->qid.path + && a->qid.vers == b->qid.vers + && a->qid.type == b->qid.type + && a->mode == b->mode + && a->mode != 0) + return 1; + } + + da = nil; + db = nil; same = 0; - fa = open(pa, OREAD); - fb = open(pb, OREAD); - if(fa == -1 || fb == -1){ + if((gitpath = smprint("%s/%s", bdir, b->path)) == nil) + sysfatal("smprint: %r"); + fa = open(b->path, OREAD); + fb = open(gitpath, OREAD); + if(fa == -1 || fb == -1) goto mismatch; - } + da = dirfstat(fa); + db = dirfstat(fb); while(1){ if((na = readn(fa, ba, sizeof(ba))) == -1) goto mismatch; @@ -198,8 +188,19 @@ if(memcmp(ba, bb, na) != 0) goto mismatch; } + if(a != nil){ + a->qid = b->qid; + if(da != nil) + a->mode = da->mode; + } + staleidx = 1; + if(da->mode != db->mode) + goto mismatch; same = 1; + mismatch: + free(da); + free(db); if(fa != -1) close(fa); if(fb != -1) @@ -207,7 +208,129 @@ return same; } +int +loadent(char *dir, Dir *d, int fullpath) +{ + char *path; + int ret, isdir; + Idxent *e; + + if(fullpath) + path = strdup(dir); + else + path = smprint("%s/%s", dir, d->name); + if(path == nil) + sysfatal("smprint: %r"); + + ret = 0; + isdir = d->qid.type & QTDIR; + cleanname(path); + if((printflg & Uflg) == 0 && !indexed(path, isdir)) + return 0; + if(isdir){ + ret = loadwdir(path); + free(path); + }else{ + if(nwdir == wdirsz){ + wdirsz += wdirsz/2; + wdir = erealloc(wdir, wdirsz*sizeof(Idxent)); + } + e = wdir + nwdir; + e->path = path; + e->qid = d->qid; + e->mode = d->mode; + e->order = nwdir; + e->state = 'T'; + nwdir++; + } + return ret; +} + +int +loadwdir(char *path) +{ + int fd, ret, i, n; + Dir *d, *e; + + d = nil; + e = nil; + ret = -1; + + cleanname(path); + if(strncmp(path, ".git/", 5) == 0) + return 0; + if((fd = open(path, OREAD)) < 0) + goto error; + if((e = dirfstat(fd)) == nil) + sysfatal("fstat: %r"); + if(e->qid.type & QTDIR) + while((n = dirread(fd, &d)) > 0){ + for(i = 0; i < n; i++) + if(loadent(path, &d[i], 0) == -1) + goto error; + free(d); + } + else{ + if(loadent(path, e, 1) == -1) + goto error; + } + ret = 0; +error: + free(e); + if(fd != -1) + close(fd); + return ret; +} + +int +pfxmatch(char *p, char **pfx, int *pfxlen, int npfx) +{ + int i; + + if(npfx == 0) + return 1; + for(i = 0; i < npfx; i++){ + if(strncmp(p, pfx[i], pfxlen[i]) == 0 + && (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0)) + return 1; + if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0) + return 1; + } + return 0; +} + + +char* +reporel(char *s) +{ + char *p; + int n; + + if(*s == '/') + s = strdup(s); + else + s = smprint("%s/%s", wdirpath, s); + p = cleanname(s); + n = strlen(repopath); + if(strncmp(s, repopath, n) == 0) + p = s + n; + else + sysfatal("path outside repo: %s", s); + if(*p == '/') + p++; + memmove(s, p, strlen(p)+1); + return s; +} + void +show(Biobuf *o, int flg, char *str, char *path) +{ + dirty |= flg; + if(!quiet && (printflg & flg)) + Bprint(o, "%s%s\n", str, path); +} + +void usage(void) { fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0); @@ -217,14 +340,21 @@ void main(int argc, char **argv) { - char *rpath, *tpath, *bpath, *base, buf[8], repo[512]; - char *p, *e; - int i, dirty; - Wres r; - Hash h; - Dir *d; + char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8]; + int i, j, c, line, wfd, *argn; + Biobuf *f, *o, *w; + Hash h, hd; + Dir rn; gitinit(); + if(access(".git/fs/ctl", AEXIST) != 0) + sysfatal("no running git/fs"); + if(getwd(wdirpath, sizeof(wdirpath)) == nil) + sysfatal("getwd: %r"); + if(findrepo(repopath, sizeof(repopath), &nrel) == -1) + sysfatal("find root: %r"); + if(chdir(repopath) == -1) + sysfatal("chdir: %r"); ARGBEGIN{ case 'q': @@ -235,6 +365,7 @@ tstr = ""; mstr = ""; astr = ""; + ustr = ""; break; case 'f': for(p = EARGF(usage()); *p; p++) @@ -243,109 +374,164 @@ case 'A': printflg |= Aflg; break; case 'M': printflg |= Mflg; break; case 'R': printflg |= Rflg; break; + case 'U': printflg |= Uflg; break; default: usage(); break; } break; case 'b': - useindex = 0; + useidx = 0; base = EARGF(usage()); if(resolveref(&h, base) == -1) sysfatal("no such ref '%s'", base); + /* optimization: we're a lot faster when using the index */ + if(resolveref(&hd, "HEAD") == 0 && hasheq(&h, &hd)) + useidx = 1; bdir = smprint(".git/fs/object/%H/tree", h); break; default: usage(); - }ARGEND + }ARGEND; - gitinit(); - if(findrepo(repo, sizeof(repo)) == -1) - sysfatal("find root: %r"); - if(chdir(repo) == -1) - sysfatal("chdir: %r"); - if(access(".git/fs/ctl", AEXIST) != 0) - sysfatal("no running git/fs"); - dirty = 0; - memset(&r, 0, sizeof(r)); if(printflg == 0) printflg = Tflg | Aflg | Mflg | Rflg; - if(argc == 0){ - if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1) - sysfatal("read tracked: %r"); - if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1) - sysfatal("read removed: %r"); - if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1) - sysfatal("read base: %r"); - }else{ - for(i = 0; i < argc; i++){ - tpath = smprint(TDIR"/%s", argv[i]); - rpath = smprint(RDIR"/%s", argv[i]); - bpath = smprint("%s/%s", bdir, argv[i]); - if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil) - goto nextarg; - if(d->mode & DMDIR){ - readpaths(&r, TDIR, argv[i]); - readpaths(&r, RDIR, argv[i]); - readpaths(&r, bdir, argv[i]); - }else{ - grow(&r); - r.path[r.npath++] = estrdup(argv[i]); + + nidx = 0; + idxsz = 32; + idx = emalloc(idxsz*sizeof(Idxent)); + nwdir = 0; + wdirsz = 32; + wdir = emalloc(wdirsz*sizeof(Idxent)); + argrel = emalloc(argc*sizeof(int)); + argn = emalloc(argc*sizeof(int)); + for(i = 0; i < argc; i++){ + argrel[i] = reporel(argv[i]); + argn[i] = strlen(argrel[i]); + } + if((o = Bfdopen(1, OWRITE)) == nil) + sysfatal("open out: %r"); + if(useidx){ + if((f = Bopen(".git/INDEX9", OREAD)) == nil){ + fprint(2, "open index: %r\n"); + if(access(".git/index9", AEXIST) == 0){ + fprint(2, "index format conversion needed:\n"); + fprint(2, "\tcd %s && git/fs\n", repopath); + fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n"); + fprint(2, "\t@{cd .git/index9/removed && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n"); } -nextarg: - free(bpath); - free(tpath); - free(rpath); - free(d); + exits("noindex"); } + line = 0; + while((ln = Brdstr(f, '\n', 1)) != nil){ + line++; + /* allow blank lines */ + if(ln[0] == 0 || ln[0] == '\n') + continue; + if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts)) + sysfatal(".git/INDEX9:%d: corrupt index", line); + if(nidx == idxsz){ + idxsz += idxsz/2; + idx = realloc(idx, idxsz*sizeof(Idxent)); + } + cleanname(parts[3]); + idx[nidx].state = *parts[0]; + idx[nidx].qid = parseqid(parts[1]); + idx[nidx].mode = strtol(parts[2], nil, 8); + idx[nidx].path = strdup(parts[3]); + idx[nidx].order = nidx; + nidx++; + free(ln); + } + qsort(idx, nidx, sizeof(Idxent), idxcmp); } - dedup(&r); - for(i = 0; i < r.npath; i++){ - p = r.path[i]; - d = dirstat(p); - if(d && d->mode & DMDIR) - goto next; - rpath = smprint(RDIR"/%s", p); - tpath = smprint(TDIR"/%s", p); - bpath = smprint("%s/%s", bdir, p); - /* Fast path: we don't want to force access to the rpath. */ - if(useindex && d && sameqid(d, tpath)) { - if(!quiet && (printflg & Tflg)) - print("%s%s\n", tstr, p); + if(argc == 0) + loadwdir("."); + else for(i = 0; i < argc; i++) + loadwdir(argrel[i]); + qsort(wdir, nwdir, sizeof(Idxent), idxcmp); + i = 0; + j = 0; + while(i < nidx || j < nwdir){ + /* find the last entry we tracked for a path */ + while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){ + staleidx = 1; + i++; + } + while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0) + j++; + if(i < nidx && !pfxmatch(idx[i].path, argrel, argn, argc)){ + i++; + continue; + } + if(i >= nidx) + c = 1; + else if(j >= nwdir) + c = -1; + else + c = strcmp(idx[i].path, wdir[j].path); + /* exists in both index and on disk */ + if(c == 0){ + if(idx[i].state == 'R' && checkedin(&idx[i], 0)) + show(o, Rflg, rstr, idx[i].path); + else if(idx[i].state == 'A' && !checkedin(&idx[i], 1)) + show(o, Aflg, astr, idx[i].path); + else if(!samedata(&idx[i], &wdir[j])) + show(o, Mflg, mstr, idx[i].path); + else + show(o, Tflg, tstr, idx[i].path); + i++; + j++; + /* only exists in index */ + }else if(c < 0){ + if(checkedin(&idx[i], 0)) + show(o, Rflg, rstr, idx[i].path); + i++; + /* only exists on disk */ }else{ - if(d == nil || (useindex && access(rpath, AEXIST) == 0)){ - if(access(bpath, AEXIST) == 0){ - dirty |= Rflg; - if(!quiet && (printflg & Rflg)) - print("%s%s\n", rstr, p); - } - }else if(access(bpath, AEXIST) == -1) { - dirty |= Aflg; - if(!quiet && (printflg & Aflg)) - print("%s%s\n", astr, p); - }else if(samedata(p, bpath)){ - if(!quiet && (printflg & Tflg)) - print("%s%s\n", tstr, p); - if(useindex) - writeqid(d, tpath); - }else{ - dirty |= Mflg; - if(!quiet && (printflg & Mflg)) - print("%s%s\n", mstr, p); - } + if(!useidx && checkedin(&wdir[j], 0)){ + if(samedata(nil, &wdir[j])) + show(o, Tflg, tstr, wdir[j].path); + else + show(o, Mflg, mstr, wdir[j].path); + }else if(printflg & Uflg) + show(o, Uflg, ustr, wdir[j].path); + j++; } - free(rpath); - free(tpath); - free(bpath); -next: - free(d); } + Bterm(o); + + if(useidx && staleidx) + if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){ + if((w = Bfdopen(wfd, OWRITE)) == nil){ + close(wfd); + goto Nope; + } + for(i = 0; i < nidx; i++){ + while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0) + i++; + Bprint(w, "%c %Q %o %s\n", + idx[i].state, + idx[i].qid, + idx[i].mode, + idx[i].path); + } + Bterm(w); + nulldir(&rn); + rn.name = "INDEX9"; + if(remove(".git/INDEX9") == -1) + goto Nope; + if(dirwstat(".git/INDEX9.new", &rn) == -1) + sysfatal("rename: %r"); + } + +Nope: if(!dirty) exits(nil); - p = buf; - e = buf + sizeof(buf); + p = xbuf; + e = p + sizeof(xbuf); for(i = 0; (1 << i) != Tflg; i++) if(dirty & (1 << i)) - p = seprint(p, e, "%c", "DMAT"[i]); - exits(buf); + p = seprint(p, e, "%c", "RMAUT"[i]); + exits(xbuf); }