From: ori@eigenstate.org
To: 9front@9front.org
Subject: Re: [9front] [v2] git: new index format, 235x faster walks
Date: Sun, 17 Sep 2023 23:49:59 -0400 [thread overview]
Message-ID: <91B9FC3B10A4C61A17F2EA04575388E7@eigenstate.org> (raw)
In-Reply-To: <C700D47DA6F5C801827FD8F2D29D274D@eigenstate.org>
Sorry for the spam; One more revision; found a couple of missing
changes to the new 4 column index format.
diff 597ff0b59658032e141553f286e02b9370f995b5 uncommitted
--- a/sys/src/cmd/git/add
+++ b/sys/src/cmd/git/add
@@ -7,32 +7,17 @@
flagfmt='r:remove'; args='file ...'
eval `''{aux/getflags $*} || exec aux/usage
-add='tracked'
-del='removed'
-if(~ $remove 1){
- add='removed'
- del='tracked'
-}
+s=A
+if(~ $remove 1)
+ s=R
if(~ $#* 0)
exec aux/usage
paths=`$nl{cleanname -d $gitrel $* | drop $gitroot}
-if(~ $add tracked)
+if(~ $s A)
files=`$nl{walk -f ./$paths}
if not
- files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths}
-
-for(f in $files){
- if(! ~ `$nl{cleanname $f} .git/*){
- addpath=.git/index9/$add/$f
- delpath=.git/index9/$del/$f
- mkdir -p `$nl{basename -d $addpath}
- mkdir -p `$nl{basename -d $delpath}
- # We don't want a matching qid, so that
- # git/walk doesn't think this came from
- # a checkout.
- echo -n > $addpath
- rm -f $delpath
- }
-}
+ files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths}
+for(f in $files)
+ echo $s NOQID 0 $f >> .git/INDEX9
exit ''
--- a/sys/src/cmd/git/branch
+++ b/sys/src/cmd/git/branch
@@ -84,7 +84,6 @@
for(m in $cleanpaths){
d=`$nl{basename -d $m}
mkdir -p $d
- mkdir -p .git/index9/tracked/$d
# Modifications can turn a file into
# a directory, or vice versa, so we
# need to delete and copy the files
@@ -97,11 +96,11 @@
b=file
if(! ~ $a $b){
rm -rf $m
- rm -rf .git/index9/tracked/$m
+ echo R NOQID 0 $m >> .git/INDEX9
}
if(~ $b file){
cp -x -- $basedir/tree/$m $m
- walk -eq $m > .git/index9/tracked/$m
+ echo T NOQID 0 $m >> .git/INDEX9
touch $m
}
}
@@ -112,9 +111,9 @@
merge1 $ours $ours $common $theirs
}
-if(! ~ $#deleted 0){
- rm -f $deleted
- rm -f .git/index9/tracked/$deleted
+for(d in $deleted){
+ rm -f $d
+ echo R NOQID 0 $d >> .git/INDEX9
}
echo ref: $new > .git/HEAD
--- a/sys/src/cmd/git/clone
+++ b/sys/src/cmd/git/clone
@@ -87,11 +87,8 @@
git/fs
@ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \
|| die 'checkout failed:' $status
- for(f in `$nl{walk -f $tree | drop $tree}){
- idx=.git/index9/tracked/$f
- mkdir -p `$nl{basename -d $idx}
- walk -eq ./$f > $idx
- }
+ {for(f in `$nl{cd $tree && walk -f})
+ echo 'T NOQID 0 '$f} > .git/INDEX9
}
if not{
echo no default branch >[1=2]
--- a/sys/src/cmd/git/commit
+++ b/sys/src/cmd/git/commit
@@ -65,8 +65,8 @@
fn parents{
if(! ~ $#revise 0)
parents=`{cat $gitfs/HEAD/parent}
- if not if(test -f .git/index9/merge-parents)
- parents=`{cat .git/index9/merge-parents | sort | uniq}
+ if not if(test -f .git/merge-parents)
+ parents=`{cat .git/merge-parents | sort | uniq}
if not if(~ $initial true)
parents=()
if not
@@ -78,7 +78,7 @@
if(! ~ $#parents 0)
pflags='-p'^$parents
hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status}
- rm -f .git/index9/merge-parents
+ rm -f .git/merge-parents
}
fn update{
@@ -89,14 +89,10 @@
echo $branch: $hash
echo $hash > $refpath
for(f in $files){
- if(test -e .git/index9/removed/$f || ! test -e $f){
- rm -f .git/index9/removed/$f
- rm -f .git/index9/tracked/$f
- }
- if not{
- mkdir -p `{basename -d $f}
- walk -eq $f > .git/index9/tracked/$f
- }
+ if(! test -e $f && ! test -e .git/object/$hash/tree/$f)
+ echo R NOQID 0 $f >> .git/INDEX9
+ if not
+ echo T NOQID 0 $f >> .git/INDEX9
}
}
@@ -120,12 +116,12 @@
}
files=()
-if(test -f .git/index9/merge-parents)
- files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'}
+if(test -f .git/merge-parents)
+ files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'}
if(! ~ $#* 0)
files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}})
-if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0)
- die 'nothing to commit' $status
+if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0)
+ die 'nothing to commit'
@{
flag e +
whoami
--- a/sys/src/cmd/git/conf.c
+++ b/sys/src/cmd/git/conf.c
@@ -59,7 +59,7 @@
main(int argc, char **argv)
{
char repo[512], *p, *s;
- int i, j;
+ int i, j, nrel;
ARGBEGIN{
case 'f': file[nfile++]=EARGF(usage()); break;
@@ -69,7 +69,7 @@
}ARGEND;
if(findroot){
- if(findrepo(repo, sizeof(repo)) == -1)
+ if(findrepo(repo, sizeof(repo), &nrel) == -1)
sysfatal("%r");
print("%s\n", repo);
exits(nil);
--- a/sys/src/cmd/git/diff
+++ b/sys/src/cmd/git/diff
@@ -7,8 +7,10 @@
flagfmt='c:commit branch, s:summarize'; args='[file ...]'
eval `''{aux/getflags $*} || exec aux/usage
-if(~ $#commit 0)
+if(~ $#commit 0){
commit=HEAD
+ cparam=()
+}
files=()
if(! ~ $#* 0)
@@ -16,7 +18,7 @@
branch=`{git/query -p $commit}
if(~ $summarize 1){
- git/walk -fMAR -b $commit $files
+ git/walk -fMAR $cparam $files
exit
}
@@ -24,7 +26,7 @@
mntgen /mnt/scratch
bind $branch/tree/ /mnt/scratch/a
bind . /mnt/scratch/b
-for(f in `$nl{git/walk -c -fRMA -b $commit $files}){
+for(f in `$nl{git/walk -c -fRMA $cparam $files}){
if(~ $#showed 0){
echo diff `{git/query $commit} uncommitted
showed=1
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -21,6 +21,7 @@
typedef struct Dblock Dblock;
typedef struct Objq Objq;
typedef struct Qelt Qelt;
+typedef struct Idxent Idxent;
enum {
Pathmax = 512,
@@ -190,6 +191,13 @@
int len;
};
+struct Idxent {
+ char *path;
+ Qid qid;
+ int mode;
+ int order;
+ char state;
+};
#define GETBE16(b)\
((((b)[0] & 0xFFul) << 8) | \
@@ -301,9 +309,10 @@
int hassuffix(char *, char *);
int swapsuffix(char *, int, char *, char *, char *);
char *strip(char *);
-int findrepo(char *, int);
+int findrepo(char *, int, int*);
int showprogress(int, int);
u64int murmurhash2(void*, usize);
+Qid parseqid(char*);
/* packing */
void dtinit(Dtab *, Object*);
--- a/sys/src/cmd/git/import
+++ b/sys/src/cmd/git/import
@@ -98,7 +98,8 @@
if(~ $#nocommit 0){
if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){
echo $hash > $refpath
- rm -f .git/index9/removed/$files
+ for(f in $files)
+ echo T NOQID 0 $f >> .git/INDEX9
}
}
status=''''
--- a/sys/src/cmd/git/init
+++ b/sys/src/cmd/git/init
@@ -31,7 +31,7 @@
echo '[branch "'$branch'"]'
echo ' remote = origin'
}
-
+>$dir/.git/INDEX9
>$dir/.git/HEAD {
echo ref: refs/heads/$branch
}
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -239,7 +239,7 @@
main(int argc, char **argv)
{
char path[1024], repo[1024], *p, *r;
- int i, nrepo;
+ int i, nrel, nrepo;
ARGBEGIN{
case 'e':
@@ -259,7 +259,7 @@
break;
}ARGEND;
- if(findrepo(repo, sizeof(repo)) == -1)
+ if(findrepo(repo, sizeof(repo), &nrel) == -1)
sysfatal("find root: %r");
nrepo = strlen(repo);
if(argc != 0){
--- a/sys/src/cmd/git/merge
+++ b/sys/src/cmd/git/merge
@@ -38,8 +38,8 @@
git/revert .
exit ''
}
-echo $ours >> .git/index9/merge-parents
-echo $theirs >> .git/index9/merge-parents
+echo $ours >> .git/merge-parents
+echo $theirs >> .git/merge-parents
merge $ours $base $theirs
>[1=2] echo 'merge complete: remember to commit'
--- a/sys/src/cmd/git/pack.c
+++ b/sys/src/cmd/git/pack.c
@@ -66,7 +66,7 @@
Object *lruhead;
Object *lrutail;
vlong ncache;
-vlong cachemax = 512*MiB;
+vlong cachemax = 1024*MiB;
Packf *packf;
int npackf;
int openpacks;
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -152,10 +152,10 @@
void
main(int argc, char **argv)
{
- int i, j, n;
- Hash *h;
- char *p, *e, *s, *objpfx;
char query[2048], repo[512];
+ char *p, *e, *s, *objpfx;
+ int i, j, n, nrel;
+ Hash *h;
ARGBEGIN{
case 'd': chattygit++; break;
@@ -170,7 +170,7 @@
if(argc == 0)
usage();
- if(findrepo(repo, sizeof(repo)) == -1)
+ if(findrepo(repo, sizeof(repo), &nrel) == -1)
sysfatal("find root: %r");
if(chdir(repo) == -1)
sysfatal("chdir: %r");
--- a/sys/src/cmd/git/save.c
+++ b/sys/src/cmd/git/save.c
@@ -10,6 +10,7 @@
char *dat;
int ndat;
};
+
enum {
Maxparents = 16,
};
@@ -21,7 +22,9 @@
char *commitmsg;
Hash parents[Maxparents];
int nparents;
-
+Idxent *idx;
+int idxsz;
+int nidx;
int
gitmode(Dirent *e)
{
@@ -38,6 +41,20 @@
}
int
+idxcmp(void *pa, void *pb)
+{
+ Idxent *a, *b;
+ int c;
+
+ a = (Idxent*)pa;
+ b = (Idxent*)pb;
+ if((c = strcmp(a->path, b->path)) != 0)
+ return c;
+ assert(a->order != b->order);
+ return a-> order < b->order ? -1 : 1;
+}
+
+int
entcmp(void *pa, void *pb)
{
char abuf[256], bbuf[256], *ae, *be;
@@ -184,27 +201,22 @@
int
tracked(char *path)
{
- char ipath[256];
- Dir *d;
+ int lo, hi, mid, r;
- /* Explicitly removed. */
- snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path);
- if(strstr(cleanname(ipath), ".git/index9/removed") != ipath)
- sysfatal("path %s leaves index", ipath);
- d = dirstat(ipath);
- if(d != nil && d->qid.type != QTDIR){
- free(d);
- return 0;
+ r = -1;
+ lo = 0;
+ hi = nidx-1;
+ while(lo <= hi){
+ mid = (hi + lo) / 2;
+ r = strcmp(path, idx[mid].path);
+ if(r < 0)
+ hi = mid-1;
+ else if(r > 0)
+ lo = mid+1;
+ else
+ break;
}
-
- /* Explicitly added. */
- snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path);
- if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath)
- sysfatal("path %s leaves index", ipath);
- if(access(ipath, AEXIST) == 0)
- return 1;
-
- return 0;
+ return r == 0;
}
int
@@ -354,10 +366,11 @@
void
main(int argc, char **argv)
{
+ char *ln, *dstr, *parts[4], cwd[1024];
+ int i, r, line, ncwd;
Hash th, ch;
- char *dstr, cwd[1024];
- int i, r, ncwd;
vlong date;
+ Biobuf *f;
Object *t;
gitinit();
@@ -425,6 +438,33 @@
}
t = findroot();
+ nidx = 0;
+ idxsz = 32;
+ idx = emalloc(idxsz*sizeof(Idxent));
+ if((f = Bopen(".git/INDEX9", OREAD)) == nil)
+ sysfatal("open index: %r");
+ line = 0;
+ while((ln = Brdstr(f, '\n', 1)) != nil){
+ line++;
+ if(ln[0] == 0 || ln[0] == '\n')
+ continue;
+ if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+ sysfatal(".git/INDEX9:%d: corrupt index", line);
+ if(nidx == idxsz){
+ idxsz += idxsz/2;
+ idx = realloc(idx, idxsz*sizeof(Idxent));
+ }
+ cleanname(parts[3]);
+ idx[nidx].state = *parts[0];
+ idx[nidx].qid = parseqid(parts[1]);
+ idx[nidx].mode = strtol(parts[2], nil, 8);
+ idx[nidx].path = strdup(parts[3]);
+ idx[nidx].order = nidx;
+ nidx++;
+ free(ln);
+ }
+ Bterm(f);
+ qsort(idx, nidx, sizeof(Idxent), idxcmp);
r = treeify(t, argv, argv + argc, 0, &th);
if(r == -1)
sysfatal("could not commit: %r\n");
--- a/sys/src/cmd/git/test/basic.rc
+++ b/sys/src/cmd/git/test/basic.rc
@@ -64,4 +64,3 @@
q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch
! test -e scratch/upstream/file2.txt || die mismatch
! test -e scratch/downstream/file2.txt || die mismatch
-
--- a/sys/src/cmd/git/test/util.rc
+++ b/sys/src/cmd/git/test/util.rc
@@ -1,5 +1,5 @@
fn q {
- {$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out
+ {$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid
}
fn qq {
$* >/dev/null >[2]/dev/null
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -192,8 +192,10 @@
Qid q;
q = va_arg(fmt->args, Qid);
- return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}",
- q.path, QDIR(&q), (q.path >> 8), q.vers, q.type);
+ if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff)
+ return fmtprint(fmt, "NOQID");
+ else
+ return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type);
}
void
@@ -294,7 +296,7 @@
/* Finds the directory containing the git repo. */
int
-findrepo(char *buf, int nbuf)
+findrepo(char *buf, int nbuf, int *nrel)
{
char *p, *suff;
@@ -302,6 +304,7 @@
if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
return -1;
+ *nrel = 0;
for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
strcpy(p, suff);
if(access(buf, AEXIST) == 0){
@@ -308,6 +311,7 @@
p[p == buf] = '\0';
return 0;
}
+ *nrel += 1;
*p = '\0';
}
werrstr("not a git repository");
@@ -441,4 +445,25 @@
h ^= h >> 15;
return h;
+}
+
+Qid
+parseqid(char *s)
+{
+ char *e;
+ Qid q;
+
+ if(strcmp(s, "NOQID") == 0)
+ return (Qid){-1, -1, -1};
+ e = s;
+ q.path = strtoull(e, &e, 16);
+ if(*e != '.')
+ sysfatal("corrupt qid: %s (%s)\n", s, e);
+ q.vers = strtoul(e+1, &e, 10);
+ if(*e != '.')
+ sysfatal("corrupt qid: %s (%s)\n", s, e);
+ q.type = strtoul(e+1, &e, 16);
+ if(*e != '\0')
+ sysfatal("corrupt qid: %s (%x)\n", s, *e);
+ return q;
}
--- a/sys/src/cmd/git/walk.c
+++ b/sys/src/cmd/git/walk.c
@@ -2,53 +2,72 @@
#include <libc.h>
#include "git.h"
+typedef struct Seen Seen;
+typedef struct Idxed Idxed;
+typedef struct Idxent Idxent;
+
#define NCACHE 4096
-#define TDIR ".git/index9/tracked"
-#define RDIR ".git/index9/removed"
-typedef struct Cache Cache;
-typedef struct Wres Wres;
-struct Cache {
+
+enum {
+ Rflg = 1 << 0,
+ Mflg = 1 << 1,
+ Aflg = 1 << 2,
+ Uflg = 1 << 3,
+ /* everything after this is not an error */
+ Tflg = 1 << 4,
+};
+
+struct Seen {
Dir* cache;
int n;
int max;
};
-struct Wres {
- char **path;
- int npath;
- int pathsz;
+struct Idxed {
+ char** cache;
+ int n;
+ int max;
};
-enum {
- Rflg = 1 << 0,
- Mflg = 1 << 1,
- Aflg = 1 << 2,
- Tflg = 1 << 3,
-};
+Seen seentab[NCACHE];
+Idxed idxtab[NCACHE];
+char repopath[1024];
+char wdirpath[1024];
+char *rstr = "R ";
+char *mstr = "M ";
+char *astr = "A ";
+char *ustr = "U ";
+char *tstr = "T ";
+char *bdir = ".git/fs/HEAD/tree";
+int useidx = 1;
+int nrel;
+int quiet;
+int dirty;
+int printflg;
-Cache seencache[NCACHE];
-int quiet;
-int useindex;
-int printflg;
-char *bdir = ".git/fs/HEAD/tree";
-char *rstr = "R ";
-char *tstr = "T ";
-char *mstr = "M ";
-char *astr = "A ";
+Idxent *idx;
+int idxsz;
+int nidx;
+int staleidx;
+Idxent *wdir;
+int wdirsz;
+int nwdir;
+int loadwdir(char*);
+
int
seen(Dir *dir)
{
+ Seen *c;
Dir *dp;
int i;
- Cache *c;
- c = &seencache[dir->qid.path&(NCACHE-1)];
+ c = &seentab[dir->qid.path&(NCACHE-1)];
dp = c->cache;
for(i=0; i<c->n; i++, dp++)
- if(dir->qid.path == dp->qid.path &&
- dir->type == dp->type &&
- dir->dev == dp->dev)
+ if(dir->qid.path == dp->qid.path
+ && dir->qid.type == dp->qid.type
+ && dir->dev == dp->dev)
return 1;
if(c->n == c->max){
if (c->max == 0)
@@ -63,129 +82,100 @@
return 0;
}
-void
-grow(Wres *r)
-{
- if(r->npath == r->pathsz){
- r->pathsz = 2*r->pathsz + 1;
- r->path = erealloc(r->path, r->pathsz * sizeof(char*));
- }
-}
-
int
-readpaths(Wres *r, char *pfx, char *dir)
+checkedin(Idxent *e, int change)
{
- char *f, *sub, *full, *sep;
- Dir *d;
- int fd, ret, i, n;
+ char *p;
+ int r;
- d = nil;
- ret = -1;
- sep = "";
- if(dir[0] != 0)
- sep = "/";
- if((full = smprint("%s/%s", pfx, dir)) == nil)
- sysfatal("smprint: %r");
- if((fd = open(full, OREAD)) < 0)
- goto error;
- while((n = dirread(fd, &d)) > 0){
- for(i = 0; i < n; i++){
- if(seen(&d[i]))
- continue;
- if(d[i].qid.type & QTDIR){
- if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
- sysfatal("smprint: %r");
- if(readpaths(r, pfx, sub) == -1){
- free(sub);
- goto error;
- }
- free(sub);
- }else{
- grow(r);
- if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
- sysfatal("smprint: %r");
- r->path[r->npath++] = f;
- }
- }
- free(d);
+ p = smprint("%s/%s", bdir, e->path);
+ r = access(p, AEXIST);
+ if(r == 0 && change){
+ e->state = 'T';
+ staleidx = 1;
}
- ret = r->npath;
-error:
- close(fd);
- free(full);
- return ret;
+ free(p);
+ return r == 0;
}
int
-cmp(void *pa, void *pb)
+indexed(char *path, int isdir)
{
- return strcmp(*(char **)pa, *(char **)pb);
-}
+ int lo, hi, mid, n, r;
+ char *s;
-void
-dedup(Wres *r)
-{
- int i, o;
-
- if(r->npath <= 1)
- return;
- o = 0;
- qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
- for(i = 1; i < r->npath; i++)
- if(strcmp(r->path[o], r->path[i]) != 0)
- r->path[++o] = r->path[i];
- r->npath = o + 1;
+ if(!useidx){
+ s = smprint("%s/%s", bdir, path);
+ r = access(s, AEXIST);
+ free(s);
+ return r == 0;
+ }
+ s = path;
+ if(isdir)
+ s = smprint("%s/", path);
+ r = -1;
+ lo = 0;
+ hi = nidx-1;
+ n = strlen(s);
+ while(lo <= hi){
+ mid = (hi + lo) / 2;
+ if(isdir)
+ r = strncmp(s, idx[mid].path, n);
+ else
+ r = strcmp(s, idx[mid].path);
+ if(r < 0)
+ hi = mid-1;
+ else if(r > 0)
+ lo = mid+1;
+ else
+ break;
+ }
+ if(isdir)
+ free(s);
+ return r == 0;
}
int
-sameqid(Dir *d, char *qf)
+idxcmp(void *pa, void *pb)
{
- char indexqid[64], fileqid[64], *p;
- int fd, n;
+ Idxent *a, *b;
+ int c;
- if(!d)
- return 0;
- if((fd = open(qf, OREAD)) == -1)
- return 0;
- if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
- return 0;
- indexqid[n] = 0;
- close(fd);
- if((p = strpbrk(indexqid, " \t\n\r")) != nil)
- *p = 0;
-
- snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
- d->qid.path, d->qid.vers, d->qid.type);
-
- if(strcmp(indexqid, fileqid) == 0)
- return 1;
- return 0;
+ a = (Idxent*)pa;
+ b = (Idxent*)pb;
+ if((c = strcmp(a->path, b->path)) != 0)
+ return c;
+ assert(a->order != b->order);
+ return a-> order < b->order ? -1 : 1;
}
-void
-writeqid(Dir *d, char *qf)
-{
- int fd;
-
- if((fd = create(qf, OWRITE, 0666)) == -1)
- return;
- fprint(fd, "%ullx.%uld.%.2uhhx\n",
- d->qid.path, d->qid.vers, d->qid.type);
- close(fd);
-}
-
int
-samedata(char *pa, char *pb)
+samedata(Idxent *a, Idxent *b)
{
- char ba[32*1024], bb[32*1024];
+ char *gitpath, ba[IOUNIT], bb[IOUNIT];
int fa, fb, na, nb, same;
+ Dir *da, *db;
+ if(a != nil){
+ if(a->qid.path == b->qid.path
+ && a->qid.vers == b->qid.vers
+ && a->qid.type == b->qid.type
+ && a->mode == b->mode
+ && a->mode != 0)
+ return 1;
+ }
+
+ da = nil;
+ db = nil;
same = 0;
- fa = open(pa, OREAD);
- fb = open(pb, OREAD);
- if(fa == -1 || fb == -1){
+ if((gitpath = smprint("%s/%s", bdir, b->path)) == nil)
+ sysfatal("smprint: %r");
+ fa = open(b->path, OREAD);
+ fb = open(gitpath, OREAD);
+ if(fa == -1 || fb == -1)
goto mismatch;
- }
+ da = dirfstat(fa);
+ db = dirfstat(fb);
while(1){
if((na = readn(fa, ba, sizeof(ba))) == -1)
goto mismatch;
@@ -198,8 +188,19 @@
if(memcmp(ba, bb, na) != 0)
goto mismatch;
}
+ if(a != nil){
+ a->qid = b->qid;
+ if(da != nil)
+ a->mode = da->mode;
+ }
+ staleidx = 1;
+ if(da->mode != db->mode)
+ goto mismatch;
same = 1;
+
mismatch:
+ free(da);
+ free(db);
if(fa != -1)
close(fa);
if(fb != -1)
@@ -207,7 +208,129 @@
return same;
}
+int
+loadent(char *dir, Dir *d, int fullpath)
+{
+ char *path;
+ int ret, isdir;
+ Idxent *e;
+
+ if(fullpath)
+ path = strdup(dir);
+ else
+ path = smprint("%s/%s", dir, d->name);
+ if(path == nil)
+ sysfatal("smprint: %r");
+
+ ret = 0;
+ isdir = d->qid.type & QTDIR;
+ cleanname(path);
+ if((printflg & Uflg) == 0 && !indexed(path, isdir))
+ return 0;
+ if(isdir){
+ ret = loadwdir(path);
+ free(path);
+ }else{
+ if(nwdir == wdirsz){
+ wdirsz += wdirsz/2;
+ wdir = erealloc(wdir, wdirsz*sizeof(Idxent));
+ }
+ e = wdir + nwdir;
+ e->path = path;
+ e->qid = d->qid;
+ e->mode = d->mode;
+ e->order = nwdir;
+ e->state = 'T';
+ nwdir++;
+ }
+ return ret;
+}
+
+int
+loadwdir(char *path)
+{
+ int fd, ret, i, n;
+ Dir *d, *e;
+
+ d = nil;
+ e = nil;
+ ret = -1;
+
+ cleanname(path);
+ if(strncmp(path, ".git/", 5) == 0)
+ return 0;
+ if((fd = open(path, OREAD)) < 0)
+ goto error;
+ if((e = dirfstat(fd)) == nil)
+ sysfatal("fstat: %r");
+ if(e->qid.type & QTDIR)
+ while((n = dirread(fd, &d)) > 0){
+ for(i = 0; i < n; i++)
+ if(loadent(path, &d[i], 0) == -1)
+ goto error;
+ free(d);
+ }
+ else{
+ if(loadent(path, e, 1) == -1)
+ goto error;
+ }
+ ret = 0;
+error:
+ free(e);
+ if(fd != -1)
+ close(fd);
+ return ret;
+}
+
+int
+pfxmatch(char *p, char **pfx, int *pfxlen, int npfx)
+{
+ int i;
+
+ if(npfx == 0)
+ return 1;
+ for(i = 0; i < npfx; i++){
+ if(strncmp(p, pfx[i], pfxlen[i]) == 0
+ && (p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0))
+ return 1;
+ if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0)
+ return 1;
+ }
+ return 0;
+}
+
+
+char*
+reporel(char *s)
+{
+ char *p;
+ int n;
+
+ if(*s == '/')
+ s = strdup(s);
+ else
+ s = smprint("%s/%s", wdirpath, s);
+ p = cleanname(s);
+ n = strlen(repopath);
+ if(strncmp(s, repopath, n) == 0)
+ p = s + n;
+ else
+ sysfatal("path outside repo: %s", s);
+ if(*p == '/')
+ p++;
+ memmove(s, p, strlen(p)+1);
+ return s;
+}
+
void
+show(Biobuf *o, int flg, char *str, char *path)
+{
+ dirty |= flg;
+ if(!quiet && (printflg & flg))
+ Bprint(o, "%s%s\n", str, path);
+}
+
+void
usage(void)
{
fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
@@ -217,14 +340,21 @@
void
main(int argc, char **argv)
{
- char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
- char *p, *e;
- int i, dirty;
- Wres r;
- Hash h;
- Dir *d;
+ char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8];
+ int i, j, c, line, wfd, *argn;
+ Biobuf *f, *o, *w;
+ Hash h, hd;
+ Dir rn;
gitinit();
+ if(access(".git/fs/ctl", AEXIST) != 0)
+ sysfatal("no running git/fs");
+ if(getwd(wdirpath, sizeof(wdirpath)) == nil)
+ sysfatal("getwd: %r");
+ if(findrepo(repopath, sizeof(repopath), &nrel) == -1)
+ sysfatal("find root: %r");
+ if(chdir(repopath) == -1)
+ sysfatal("chdir: %r");
ARGBEGIN{
case 'q':
@@ -235,6 +365,7 @@
tstr = "";
mstr = "";
astr = "";
+ ustr = "";
break;
case 'f':
for(p = EARGF(usage()); *p; p++)
@@ -243,109 +374,164 @@
case 'A': printflg |= Aflg; break;
case 'M': printflg |= Mflg; break;
case 'R': printflg |= Rflg; break;
+ case 'U': printflg |= Uflg; break;
default: usage(); break;
}
break;
case 'b':
- useindex = 0;
+ useidx = 0;
base = EARGF(usage());
if(resolveref(&h, base) == -1)
sysfatal("no such ref '%s'", base);
+ /* optimization: we're a lot faster when using the index */
+ if(resolveref(&hd, "HEAD") == 0 && hasheq(&h, &hd))
+ useidx = 1;
bdir = smprint(".git/fs/object/%H/tree", h);
break;
default:
usage();
- }ARGEND
+ }ARGEND;
- gitinit();
- if(findrepo(repo, sizeof(repo)) == -1)
- sysfatal("find root: %r");
- if(chdir(repo) == -1)
- sysfatal("chdir: %r");
- if(access(".git/fs/ctl", AEXIST) != 0)
- sysfatal("no running git/fs");
- dirty = 0;
- memset(&r, 0, sizeof(r));
if(printflg == 0)
printflg = Tflg | Aflg | Mflg | Rflg;
- if(argc == 0){
- if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
- sysfatal("read tracked: %r");
- if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
- sysfatal("read removed: %r");
- if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
- sysfatal("read base: %r");
- }else{
- for(i = 0; i < argc; i++){
- tpath = smprint(TDIR"/%s", argv[i]);
- rpath = smprint(RDIR"/%s", argv[i]);
- bpath = smprint("%s/%s", bdir, argv[i]);
- if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
- goto nextarg;
- if(d->mode & DMDIR){
- readpaths(&r, TDIR, argv[i]);
- readpaths(&r, RDIR, argv[i]);
- readpaths(&r, bdir, argv[i]);
- }else{
- grow(&r);
- r.path[r.npath++] = estrdup(argv[i]);
+
+ nidx = 0;
+ idxsz = 32;
+ idx = emalloc(idxsz*sizeof(Idxent));
+ nwdir = 0;
+ wdirsz = 32;
+ wdir = emalloc(wdirsz*sizeof(Idxent));
+ argrel = emalloc(argc*sizeof(int));
+ argn = emalloc(argc*sizeof(int));
+ for(i = 0; i < argc; i++){
+ argrel[i] = reporel(argv[i]);
+ argn[i] = strlen(argrel[i]);
+ }
+ if((o = Bfdopen(1, OWRITE)) == nil)
+ sysfatal("open out: %r");
+ if(useidx){
+ if((f = Bopen(".git/INDEX9", OREAD)) == nil){
+ fprint(2, "open index: %r\n");
+ if(access(".git/index9", AEXIST) == 0){
+ fprint(2, "index format conversion needed:\n");
+ fprint(2, "\tcd %s && git/fs\n", repopath);
+ fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n");
+ fprint(2, "\t@{cd .git/index9/removed && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n");
}
-nextarg:
- free(bpath);
- free(tpath);
- free(rpath);
- free(d);
+ exits("noindex");
}
+ line = 0;
+ while((ln = Brdstr(f, '\n', 1)) != nil){
+ line++;
+ /* allow blank lines */
+ if(ln[0] == 0 || ln[0] == '\n')
+ continue;
+ if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+ sysfatal(".git/INDEX9:%d: corrupt index", line);
+ if(nidx == idxsz){
+ idxsz += idxsz/2;
+ idx = realloc(idx, idxsz*sizeof(Idxent));
+ }
+ cleanname(parts[3]);
+ idx[nidx].state = *parts[0];
+ idx[nidx].qid = parseqid(parts[1]);
+ idx[nidx].mode = strtol(parts[2], nil, 8);
+ idx[nidx].path = strdup(parts[3]);
+ idx[nidx].order = nidx;
+ nidx++;
+ free(ln);
+ }
+ qsort(idx, nidx, sizeof(Idxent), idxcmp);
}
- dedup(&r);
- for(i = 0; i < r.npath; i++){
- p = r.path[i];
- d = dirstat(p);
- if(d && d->mode & DMDIR)
- goto next;
- rpath = smprint(RDIR"/%s", p);
- tpath = smprint(TDIR"/%s", p);
- bpath = smprint("%s/%s", bdir, p);
- /* Fast path: we don't want to force access to the rpath. */
- if(useindex && d && sameqid(d, tpath)) {
- if(!quiet && (printflg & Tflg))
- print("%s%s\n", tstr, p);
+ if(argc == 0)
+ loadwdir(".");
+ else for(i = 0; i < argc; i++)
+ loadwdir(argrel[i]);
+ qsort(wdir, nwdir, sizeof(Idxent), idxcmp);
+ i = 0;
+ j = 0;
+ while(i < nidx || j < nwdir){
+ /* find the last entry we tracked for a path */
+ while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){
+ staleidx = 1;
+ i++;
+ }
+ while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0)
+ j++;
+ if(i < nidx && !pfxmatch(idx[i].path, argrel, argn, argc)){
+ i++;
+ continue;
+ }
+ if(i >= nidx)
+ c = 1;
+ else if(j >= nwdir)
+ c = -1;
+ else
+ c = strcmp(idx[i].path, wdir[j].path);
+ /* exists in both index and on disk */
+ if(c == 0){
+ if(idx[i].state == 'R' && checkedin(&idx[i], 0))
+ show(o, Rflg, rstr, idx[i].path);
+ else if(idx[i].state == 'A' && !checkedin(&idx[i], 1))
+ show(o, Aflg, astr, idx[i].path);
+ else if(!samedata(&idx[i], &wdir[j]))
+ show(o, Mflg, mstr, idx[i].path);
+ else
+ show(o, Tflg, tstr, idx[i].path);
+ i++;
+ j++;
+ /* only exists in index */
+ }else if(c < 0){
+ if(checkedin(&idx[i], 0))
+ show(o, Rflg, rstr, idx[i].path);
+ i++;
+ /* only exists on disk */
}else{
- if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
- if(access(bpath, AEXIST) == 0){
- dirty |= Rflg;
- if(!quiet && (printflg & Rflg))
- print("%s%s\n", rstr, p);
- }
- }else if(access(bpath, AEXIST) == -1) {
- dirty |= Aflg;
- if(!quiet && (printflg & Aflg))
- print("%s%s\n", astr, p);
- }else if(samedata(p, bpath)){
- if(!quiet && (printflg & Tflg))
- print("%s%s\n", tstr, p);
- if(useindex)
- writeqid(d, tpath);
- }else{
- dirty |= Mflg;
- if(!quiet && (printflg & Mflg))
- print("%s%s\n", mstr, p);
- }
+ if(!useidx && checkedin(&wdir[j], 0)){
+ if(samedata(nil, &wdir[j]))
+ show(o, Tflg, tstr, wdir[j].path);
+ else
+ show(o, Mflg, mstr, wdir[j].path);
+ }else if(printflg & Uflg)
+ show(o, Uflg, ustr, wdir[j].path);
+ j++;
}
- free(rpath);
- free(tpath);
- free(bpath);
-next:
- free(d);
}
+ Bterm(o);
+
+ if(useidx && staleidx)
+ if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){
+ if((w = Bfdopen(wfd, OWRITE)) == nil){
+ close(wfd);
+ goto Nope;
+ }
+ for(i = 0; i < nidx; i++){
+ while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0)
+ i++;
+ Bprint(w, "%c %Q %o %s\n",
+ idx[i].state,
+ idx[i].qid,
+ idx[i].mode,
+ idx[i].path);
+ }
+ Bterm(w);
+ nulldir(&rn);
+ rn.name = "INDEX9";
+ if(remove(".git/INDEX9") == -1)
+ goto Nope;
+ if(dirwstat(".git/INDEX9.new", &rn) == -1)
+ sysfatal("rename: %r");
+ }
+
+Nope:
if(!dirty)
exits(nil);
- p = buf;
- e = buf + sizeof(buf);
+ p = xbuf;
+ e = p + sizeof(xbuf);
for(i = 0; (1 << i) != Tflg; i++)
if(dirty & (1 << i))
- p = seprint(p, e, "%c", "DMAT"[i]);
- exits(buf);
+ p = seprint(p, e, "%c", "RMAUT"[i]);
+ exits(xbuf);
}
prev parent reply other threads:[~2023-09-18 3:51 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-18 2:21 ori
2023-09-18 3:49 ` ori [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=91B9FC3B10A4C61A17F2EA04575388E7@eigenstate.org \
--to=ori@eigenstate.org \
--cc=9front@9front.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).