9front - general discussion about 9front
 help / color / mirror / Atom feed
From: ori@eigenstate.org
To: 9front@9front.org
Subject: [9front] merge3: a first draft
Date: Sun, 12 Feb 2023 18:17:59 -0500	[thread overview]
Message-ID: <C57DB22650EAFBEB21A061CB9A4B1A0A@eigenstate.org> (raw)

I've been annoyed that we have to use ape/diff3 to merge
files for a while, so I finally sat down this weekend and
hacked 3-way merge capabilities into diff(1).

This is a rough draft, but here's the current approach.

3 way merging operates as follows:

With 3 files 'left', 'base', and 'right', we start by
producing the diff hunks against 'base' to 'left', and
'base' to 'right.

Then, for each hunk, pick the earlier diff hunk between
b/l and b/r, as you would with an array merge.

If the hunks overlap, adjust the sizes so that the common
section is the same size in both hunks, and emit a conflict
marker; otherwise, just emit that hunk, as well as the
unmodified text before it.

The bulk of this diff is refactoring diff(1) to allow two
diffs to be done at the same time, without overwriting each
other's globals.

diff 7f071685e9b7d9e32c1a9675f7fca0407d18e6e1 uncommitted
--- /dev/null
+++ b/sys/src/cmd/diff/diff.c
@@ -1,0 +1,84 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+void	
+done(int status)
+{
+	switch(status)
+	{
+	case 0:
+		exits("");
+	case 1:
+		exits("some");
+	default:
+		exits("error");
+	}
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s [-abcefmnrw] file1 ... file2\n", argv0);
+	exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+	int i;
+	Dir *fsb, *tsb;
+
+	Binit(&stdout, 1, OWRITE);
+	ARGBEGIN{
+	case 'e':
+	case 'f':
+	case 'n':
+	case 'c':
+	case 'a':
+	case 'u':
+		mode = ARGC();
+		break;
+	case 'w':
+		bflag = 2;
+		break;
+
+	case 'b':
+		bflag = 1;
+		break;
+
+	case 'r':
+		rflag = 1;
+		break;
+
+	case 'm':
+		mflag = 1;	
+		break;
+
+	case 'h':
+	default:
+		usage();
+	}ARGEND;
+	if (argc < 2)
+		usage();
+	if ((tsb = dirstat(argv[argc-1])) == nil)
+		sysfatal("can't stat %s", argv[argc-1]);
+	if (argc > 2) {
+		if (!DIRECTORY(tsb))
+			sysfatal("not directory: %s", argv[argc-1]);
+		mflag = 1;
+	} else {
+		if ((fsb = dirstat(argv[0])) == nil)
+			sysfatal("can't stat %s", argv[0]);
+		if (DIRECTORY(fsb) && DIRECTORY(tsb))
+			mflag = 1;
+		free(fsb);
+	}
+	free(tsb);
+	for (i = 0; i < argc-1; i++)
+		diff(argv[i], argv[argc-1], 0);
+		
+	done(anychange);
+	/*NOTREACHED*/
+}
--- a/sys/src/cmd/diff/diff.h
+++ b/sys/src/cmd/diff/diff.h
@@ -1,13 +1,53 @@
-typedef struct Line Line;
+typedef struct Line	Line;
+typedef struct Cand	Cand;
+typedef struct Diff	Diff;
+typedef struct Change	Change;
 
 struct Line {
 	int	serial;
 	int	value;
 };
-extern Line *file[2];
-extern int len[2];
-extern long *ixold, *ixnew;
-extern int *J;
+
+struct Cand {
+	int x;
+	int y;
+	int pred;
+};
+
+struct Change
+{
+	int a;
+	int b;
+	int c;
+	int d;
+};
+
+struct Diff {
+	Cand cand;
+	Line *file[2], line;
+	int len[2];
+	int binary;
+	Line *sfile[2];	/*shortened by pruning common prefix and suffix*/
+	int slen[2];
+	int pref, suff;	/*length of prefix and suffix*/
+	int *class;	/*will be overlaid on file[0]*/
+	int *member;	/*will be overlaid on file[1]*/
+	int *klist;	/*will be overlaid on file[0] after class*/
+	Cand *clist;	/* merely a free storage pot for candidates */
+	int clen;
+	int *J;		/*will be overlaid on class*/
+	long *ixold;	/*will be overlaid on klist*/
+	long *ixnew;	/*will be overlaid on file[1]*/
+	char *file1;
+	char *file2;
+	Biobuf *input[2];
+	Biobuf *b0;
+	Biobuf *b1;
+	int firstchange;
+	Change *changes;
+	int nchanges;
+};
+
 extern char mode;
 extern char bflag;
 extern char rflag;
@@ -14,19 +54,24 @@
 extern char mflag;
 extern int anychange;
 extern Biobuf	stdout;
-extern int	binary;
 
 #define MAXPATHLEN	1024
 
+#define	DIRECTORY(s)		((s)->qid.type&QTDIR)
+#define	REGULAR_FILE(s)		((s)->type == 'M' && !DIRECTORY(s))
+
 int mkpathname(char *, char *, char *);
+char *mktmpfile(int, Dir **);
+char *statfile(char *, Dir **);
 void *emalloc(unsigned);
 void *erealloc(void *, unsigned);
 void diff(char *, char *, int);
+void diffreg(char*, char*, char*, char*);
 void diffdir(char *, char *, int);
-void diffreg(char *, char *, char *, char *);
-Biobuf *prepare(int, char *, char *);
-void panic(int, char *, ...);
-void check(Biobuf *, Biobuf *);
-void change(int, int, int, int);
-void flushchanges(void);
-
+void calcdiff(Diff *, char *, char *, char *, char *);
+Biobuf *prepare(Diff*, int, char *, char *);
+void check(Diff *, Biobuf *, Biobuf *);
+void change(Diff *, int, int, int, int);
+void freediff(Diff *);
+void flushchanges(Diff *);
+void fetch(Diff *d, long *f, int a, int b, Biobuf *bp, char *s);
--- a/sys/src/cmd/diff/diffdir.c
+++ b/sys/src/cmd/diff/diffdir.c
@@ -111,3 +111,45 @@
 	free(dirf);
 	free(dirt);
 }
+
+void
+diff(char *f, char *t, int level)
+{
+	char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+	Dir *fsb, *tsb;
+
+	fsb = nil;
+	tsb = nil;
+	if ((fp = statfile(f, &fsb)) == 0)
+		goto Return;
+	if ((tp = statfile(t, &tsb)) == 0)
+		goto Return;
+	if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
+		if (rflag || level == 0)
+			diffdir(fp, tp, level);
+		else
+			Bprint(&stdout, "Common subdirectories: %s and %s\n", fp, tp);
+	}
+	else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
+		diffreg(fp, f, tp, t);
+	else {
+		if (REGULAR_FILE(fsb)) {
+			if ((p = utfrrune(f, '/')) == 0)
+				p = f;
+			else
+				p++;
+			if (mkpathname(tb, tp, p) == 0)
+				diffreg(fp, f, tb, t);
+		} else {
+			if ((p = utfrrune(t, '/')) == 0)
+				p = t;
+			else
+				p++;
+			if (mkpathname(fb, fp, p) == 0)
+				diffreg(fb, f, tp, t);
+		}
+	}
+Return:
+	free(fsb);
+	free(tsb);
+}
--- a/sys/src/cmd/diff/diffio.c
+++ b/sys/src/cmd/diff/diffio.c
@@ -4,10 +4,6 @@
 #include <ctype.h>
 #include "diff.h"
 
-static Biobuf *input[2];
-static char *file1, *file2;
-static int firstchange;
-
 #define MAXLINELEN	4096
 #define MIN(x, y)	((x) < (y) ? (x): (y))
 
@@ -104,7 +100,7 @@
 }
 
 Biobuf *
-prepare(int i, char *arg, char *orig)
+prepare(Diff *d, int i, char *arg, char *orig)
 {
 	Line *p;
 	int j, h;
@@ -115,10 +111,10 @@
 
 	bp = Bopen(arg, OREAD);
 	if (!bp) {
-		panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
+		sysfatal("cannot open %s: %r", arg);
 		return 0;
 	}
-	if (binary)
+	if (d->binary)
 		return bp;
 	nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
 	if (nbytes > 0) {
@@ -130,7 +126,7 @@
 			 */
 			cp += chartorune(&r, cp);
 			if (r == 0 || (r > 0x7f && r <= 0xa0)) {
-				binary++;
+				d->binary++;
 				return bp;
 			}
 		}
@@ -139,14 +135,14 @@
 	p = emalloc(3*sizeof(Line));
 	for (j = 0; h = readhash(bp, buf); p[j].value = h)
 		p = erealloc(p, (++j+3)*sizeof(Line));
-	len[i] = j;
-	file[i] = p;
-	input[i] = bp;
+	d->len[i] = j;
+	d->file[i] = p;
+	d->input[i] = bp;
 	if (i == 0) {
-		file1 = orig;
-		firstchange = 0;
+		d->file1 = orig;
+		d->firstchange = 0;
 	} else
-		file2 = orig;
+		d->file2 = orig;
 	return bp;
 }
 
@@ -175,31 +171,32 @@
  * need to fix up for unexpected EOF's
  */
 void
-check(Biobuf *bf, Biobuf *bt)
+check(Diff *d, Biobuf *bf, Biobuf *bt)
 {
 	int f, t, flen, tlen;
 	char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
 
-	ixold[0] = ixnew[0] = 0;
-	for (f = t = 1; f < len[0]; f++) {
+	d->ixold[0] = 0;
+	d->ixnew[0] = 0;
+	for (f = t = 1; f < d->len[0]; f++) {
 		flen = readline(bf, fbuf);
-		ixold[f] = ixold[f-1] + flen + 1;		/* ftell(bf) */
-		if (J[f] == 0)
+		d->ixold[f] = d->ixold[f-1] + flen + 1;		/* ftell(bf) */
+		if (d->J[f] == 0)
 			continue;
 		do {
 			tlen = readline(bt, tbuf);
-			ixnew[t] = ixnew[t-1] + tlen + 1;	/* ftell(bt) */
-		} while (t++ < J[f]);
+			d->ixnew[t] = d->ixnew[t-1] + tlen + 1;	/* ftell(bt) */
+		} while (t++ < d->J[f]);
 		if (bflag) {
 			flen = squishspace(fbuf);
 			tlen = squishspace(tbuf);
 		}
 		if (flen != tlen || strcmp(fbuf, tbuf))
-			J[f] = 0;
+			d->J[f] = 0;
 	}
-	while (t < len[1]) {
+	while (t < d->len[1]) {
 		tlen = readline(bt, tbuf);
-		ixnew[t] = ixnew[t-1] + tlen + 1;	/* fseek(bt) */
+		d->ixnew[t] = d->ixnew[t-1] + tlen + 1;	/* fseek(bt) */
 		t++;
 	}
 }
@@ -212,8 +209,8 @@
 		Bprint(&stdout, "%s%d", separator, b);
 }
 
-static void
-fetch(long *f, int a, int b, Biobuf *bp, char *s)
+void
+fetch(Diff *d, long *f, int a, int b, Biobuf *bp, char *s)
 {
 	char buf[MAXLINELEN];
 	int maxb;
@@ -220,10 +217,10 @@
 
 	if(a <= 1)
 		a = 1;
-	if(bp == input[0])
-		maxb = len[0];
+	if(bp == d->input[0])
+		maxb = d->len[0];
 	else
-		maxb = len[1];
+		maxb = d->len[1];
 	if(b > maxb)
 		b = maxb;
 	if(a > maxb)
@@ -232,23 +229,12 @@
 	while (a++ <= b) {
 		readline(bp, buf);
 		Bprint(&stdout, "%s%s\n", s, buf);
+		Bflush(&stdout);
 	}
 }
 
-typedef struct Change Change;
-struct Change
-{
-	int a;
-	int b;
-	int c;
-	int d;
-};
-
-Change *changes;
-int nchanges;
-
 void
-change(int a, int b, int c, int d)
+change(Diff *df, int a, int b, int c, int d)
 {
 	char verb;
 	char buf[4];
@@ -257,7 +243,7 @@
 	if (a > b && c > d)
 		return;
 	anychange = 1;
-	if (mflag && firstchange == 0) {
+	if (mflag && df->firstchange == 0) {
 		if(mode) {
 			buf[0] = '-';
 			buf[1] = mode;
@@ -266,8 +252,8 @@
 		} else {
 			buf[0] = '\0';
 		}
-		Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
-		firstchange = 1;
+		Bprint(&stdout, "diff %s%s %s\n", buf, df->file1, df->file2);
+		df->firstchange = 1;
 	}
 	verb = a > b ? 'a': c > d ? 'd': 'c';
 	switch(mode) {
@@ -281,10 +267,10 @@
 		range(c, d, ",");
 		break;
 	case 'n':
-		Bprint(&stdout, "%s:", file1);
+		Bprint(&stdout, "%s:", df->file1);
 		range(a, b, ",");
 		Bprint(&stdout, " %c ", verb);
-		Bprint(&stdout, "%s:", file2);
+		Bprint(&stdout, "%s:", df->file2);
 		range(c, d, ",");
 		break;
 	case 'f':
@@ -294,9 +280,9 @@
 	case 'c':
 	case 'a':
 	case 'u':
-		if(nchanges%1024 == 0)
-			changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
-		ch = &changes[nchanges++];
+		if(df->nchanges%1024 == 0)
+			df->changes = erealloc(df->changes, (df->nchanges+1024)*sizeof(df->changes[0]));
+		ch = &df->changes[df->nchanges++];
 		ch->a = a;
 		ch->b = b;
 		ch->c = c;
@@ -305,11 +291,11 @@
 	}
 	Bputc(&stdout, '\n');
 	if (mode == 0 || mode == 'n') {
-		fetch(ixold, a, b, input[0], "< ");
+		fetch(df, df->ixold, a, b, df->input[0], "< ");
 		if (a <= b && c <= d)
 			Bprint(&stdout, "---\n");
 	}
-	fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
+	fetch(df, df->ixnew, c, d, df->input[1], mode == 0 || mode == 'n' ? "> ": "");
 	if (mode != 0 && mode != 'n' && c <= d)
 		Bprint(&stdout, ".\n");
 }
@@ -320,69 +306,69 @@
 };
 
 int
-changeset(int i)
+changeset(Diff *d, int i)
 {
-	while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
+	while(i < d->nchanges && d->changes[i].b + 1 + 2*Lines > d->changes[i+1].a)
 		i++;
-	if(i<nchanges)
+	if(i < d->nchanges)
 		return i+1;
-	return nchanges;
+	return d->nchanges;
 }
 
 void
-flushchanges(void)
+flushchanges(Diff *df)
 {
-	int a, b, c, d, at, hdr;
-	int i, j;
+	vlong a, b, c, d, at, hdr;
+	vlong i, j;
 
-	if(nchanges == 0)
+	if(df->nchanges == 0)
 		return;
 
 	hdr = 0;
-	for(i=0; i<nchanges; ){
-		j = changeset(i);
-		a = changes[i].a-Lines;
-		b = changes[j-1].b+Lines;
-		c = changes[i].c-Lines;
-		d = changes[j-1].d+Lines;
+	for(i=0; i < df->nchanges; ){
+		j = changeset(df, i);
+		a = df->changes[i].a - Lines;
+		b = df->changes[j-1].b + Lines;
+		c = df->changes[i].c - Lines;
+		d = df->changes[j-1].d + Lines;
 		if(a < 1)
 			a = 1;
 		if(c < 1)
 			c = 1;
-		if(b > len[0])
-			b = len[0];
-		if(d > len[1])
-			d = len[1];
+		if(b > df->len[0])
+			b = df->len[0];
+		if(d > df->len[1])
+			d = df->len[1];
 		if(mode == 'a'){
 			a = 1;
-			b = len[0];
+			b = df->len[0];
 			c = 1;
-			d = len[1];
-			j = nchanges;
+			d = df->len[1];
+			j = df->nchanges;
 		}
 		if(mode == 'u'){
 			if(!hdr){
-				Bprint(&stdout, "--- %s\n", file1);
-				Bprint(&stdout, "+++ %s\n", file2);
+				Bprint(&stdout, "--- %s\n", df->file1);
+				Bprint(&stdout, "+++ %s\n", df->file2);
 				hdr = 1;
 			}
-			Bprint(&stdout, "@@ -%d,%d +%d,%d @@\n", a, b-a+1, c, d-c+1);
+			Bprint(&stdout, "@@ -%lld,%lld +%lld,%lld @@\n", a, b-a+1, c, d-c+1);
 		}else{
-			Bprint(&stdout, "%s:", file1);
+			Bprint(&stdout, "%s:", df->file1);
 			range(a, b, ",");
 			Bprint(&stdout, " - ");
-			Bprint(&stdout, "%s:", file2);
+			Bprint(&stdout, "%s:", df->file2);
 			range(c, d, ",");
 			Bputc(&stdout, '\n');
 		}
 		at = a;
 		for(; i<j; i++){
-			fetch(ixold, at, changes[i].a-1, input[0], mode == 'u' ? " " : "  ");
-			fetch(ixold, changes[i].a, changes[i].b, input[0], mode == 'u' ? "-" : "- ");
-			fetch(ixnew, changes[i].c, changes[i].d, input[1], mode == 'u' ? "+" : "+ ");
-			at = changes[i].b+1;
+			fetch(df, df->ixold, at, df->changes[i].a-1, df->input[0], mode == 'u' ? " " : "  ");
+			fetch(df, df->ixold, df->changes[i].a, df->changes[i].b, df->input[0], mode == 'u' ? "-" : "- ");
+			fetch(df, df->ixnew, df->changes[i].c, df->changes[i].d, df->input[1], mode == 'u' ? "+" : "+ ");
+			at = df->changes[i].b+1;
 		}
-		fetch(ixold, at, b, input[0], mode == 'u' ? " " : "  ");
+		fetch(df, df->ixold, at, b, df->input[0], mode == 'u' ? " " : "  ");
 	}
-	nchanges = 0;
+	df->nchanges = 0;
 }
--- a/sys/src/cmd/diff/diffreg.c
+++ b/sys/src/cmd/diff/diffreg.c
@@ -66,30 +66,7 @@
 *	3*(number of k-candidates installed),  typically about
 *	6n words for files of length n. 
 */
-typedef struct Cand Cand;
 
-struct Cand {
-	int x;
-	int y;
-	int pred;
-};
-
-Cand cand;
-Line *file[2], line;
-int len[2];
-int binary;
-Line *sfile[2];	/*shortened by pruning common prefix and suffix*/
-int slen[2];
-int pref, suff;	/*length of prefix and suffix*/
-int *class;	/*will be overlaid on file[0]*/
-int *member;	/*will be overlaid on file[1]*/
-int *klist;	/*will be overlaid on file[0] after class*/
-Cand *clist;	/* merely a free storage pot for candidates */
-int clen;
-int *J;		/*will be overlaid on class*/
-long *ixold;	/*will be overlaid on klist*/
-long *ixnew;	/*will be overlaid on file[1]*/
-
 static void	
 sort(Line *a, int n)	/*shellsort CACM #201*/
 {
@@ -137,21 +114,21 @@
 }
 
 static void
-prune(void)
+prune(Diff *d)
 {
 	int i,j;
 
-	for(pref=0;pref<len[0]&&pref<len[1]&&
-		file[0][pref+1].value==file[1][pref+1].value;
-		pref++ ) ;
-	for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
-		file[0][len[0]-suff].value==file[1][len[1]-suff].value;
-		suff++) ;
+	for(d->pref = 0; d->pref < d->len[0] && d->pref < d->len[1] &&
+		d->file[0][d->pref+1].value == d->file[1][d->pref+1].value;
+		d->pref++) ;
+	for(d->suff=0; d->suff < d->len[0] - d->pref && d->suff < d->len[1] - d->pref &&
+		d->file[0][d->len[0] - d->suff].value == d->file[1][d->len[1] - d->suff].value;
+		d->suff++) ;
 	for(j=0;j<2;j++) {
-		sfile[j] = file[j]+pref;
-		slen[j] = len[j]-pref-suff;
-		for(i=0;i<=slen[j];i++)
-			sfile[j][i].serial = i;
+		d->sfile[j] = d->file[j]+d->pref;
+		d->slen[j] = d->len[j]-d->pref-d->suff;
+		for(i=0;i<=d->slen[j];i++)
+			d->sfile[j][i].serial = i;
 	}
 }
 
@@ -184,30 +161,30 @@
 }
 
 static int
-newcand(int x, int  y, int pred)
+newcand(Diff *d, int x, int  y, int pred)
 {
 	Cand *q;
 
-	clist = erealloc(clist, (clen+1)*sizeof(Cand));
-	q = clist + clen;
+	d->clist = erealloc(d->clist, (d->clen+1)*sizeof(Cand));
+	q = d->clist + d->clen;
 	q->x = x;
 	q->y = y;
 	q->pred = pred;
-	return clen++;
+	return d->clen++;
 }
 
 static int
-search(int *c, int k, int y)
+search(Diff *d, int *c, int k, int y)
 {
 	int i, j, l;
 	int t;
 
-	if(clist[c[k]].y < y)	/*quick look for typical case*/
+	if(d->clist[c[k]].y < y)	/*quick look for typical case*/
 		return k+1;
 	i = 0;
 	j = k+1;
 	while((l=(i+j)/2) > i) {
-		t = clist[c[l]].y;
+		t = d->clist[c[l]].y;
 		if(t > y)
 			j = l;
 		else if(t < y)
@@ -219,7 +196,7 @@
 }
 
 static int
-stone(int *a, int n, int *b, int *c)
+stone(Diff *d, int *a, int n, int *b, int *c)
 {
 	int i, k,y;
 	int j, l;
@@ -227,7 +204,7 @@
 	int oldl;
 
 	k = 0;
-	c[0] = newcand(0,0,0);
+	c[0] = newcand(d, 0, 0, 0);
 	for(i=1; i<=n; i++) {
 		j = a[i];
 		if(j==0)
@@ -236,20 +213,20 @@
 		oldl = 0;
 		oldc = c[0];
 		do {
-			if(y <= clist[oldc].y)
+			if(y <= d->clist[oldc].y)
 				continue;
-			l = search(c, k, y);
+			l = search(d, c, k, y);
 			if(l!=oldl+1)
 				oldc = c[l-1];
 			if(l<=k) {
-				if(clist[c[l]].y <= y)
+				if(d->clist[c[l]].y <= y)
 					continue;
 				tc = c[l];
-				c[l] = newcand(i,y,oldc);
+				c[l] = newcand(d, i, y, oldc);
 				oldc = tc;
 				oldl = l;
 			} else {
-				c[l] = newcand(i,y,oldc);
+				c[l] = newcand(d, i,y,oldc);
 				k++;
 				break;
 			}
@@ -259,61 +236,23 @@
 }
 
 static void
-unravel(int p)
+unravel(Diff *d, int p)
 {
 	int i;
 	Cand *q;
 
-	for(i=0; i<=len[0]; i++) {
-		if (i <= pref)
-			J[i] = i;
-		else if (i > len[0]-suff)
-			J[i] = i+len[1]-len[0];
+	for(i=0; i<=d->len[0]; i++) {
+		if (i <= d->pref)
+			d->J[i] = i;
+		else if (i > d->len[0]-d->suff)
+			d->J[i] = i+d->len[1] - d->len[0];
 		else
-			J[i] = 0;
+			d->J[i] = 0;
 	}
-	for(q=clist+p;q->y!=0;q=clist+q->pred)
-		J[q->x+pref] = q->y+pref;
+	for(q=d->clist+p; q->y != 0; q= d->clist + q->pred)
+		d->J[q->x+d->pref] = q->y+d->pref;
 }
 
-static void
-output(void)
-{
-	int m, i0, i1, j0, j1;
-
-	m = len[0];
-	J[0] = 0;
-	J[m+1] = len[1]+1;
-	if (mode != 'e') {
-		for (i0 = 1; i0 <= m; i0 = i1+1) {
-			while (i0 <= m && J[i0] == J[i0-1]+1)
-				i0++;
-			j0 = J[i0-1]+1;
-			i1 = i0-1;
-			while (i1 < m && J[i1+1] == 0)
-				i1++;
-			j1 = J[i1+1]-1;
-			J[i1] = j1;
-			change(i0, i1, j0, j1);
-		}
-	} else {
-		for (i0 = m; i0 >= 1; i0 = i1-1) {
-			while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
-				i0--;
-			j0 = J[i0+1]-1;
-			i1 = i0+1;
-			while (i1 > 1 && J[i1-1] == 0)
-				i1--;
-			j1 = J[i1-1]+1;
-			J[i1] = j1;
-			change(i1 , i0, j1, j0);
-		}
-	}
-	if (m == 0)
-		change(1, 0, 1, len[1]);
-	flushchanges();
-}
-
 #define BUF 4096
 static int
 cmp(Biobuf* b1, Biobuf* b2)
@@ -361,21 +300,20 @@
 }
 
 void
-diffreg(char *f, char *fo, char *t, char *to)
+calcdiff(Diff *d, char *f, char *fo, char *t, char *to)
 {
 	Biobuf *b0, *b1;
 	int k;
 
-	binary = 0;
-	b0 = prepare(0, f, fo);
+	b0 = prepare(d, 0, f, fo);
 	if (!b0)
 		return;
-	b1 = prepare(1, t, to);
+	b1 = prepare(d, 1, t, to);
 	if (!b1) {
 		Bterm(b0);
 		return;
 	}
-	if (binary){
+	if (d->binary){
 		// could use b0 and b1 but this is simpler.
 		if (cmp(b0, b1))
 			print("binary files %s %s differ\n", f, t);
@@ -383,38 +321,91 @@
 		Bterm(b1);
 		return;
 	}
-	clen = 0;
-	prune();
-	sort(sfile[0], slen[0]);
-	sort(sfile[1], slen[1]);
+	d->clen = 0;
+	prune(d);
+	sort(d->sfile[0], d->slen[0]);
+	sort(d->sfile[1], d->slen[1]);
 
-	member = (int *)file[1];
-	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
-	member = erealloc(member, (slen[1]+2)*sizeof(int));
+	d->member = (int *)d->file[1];
+	equiv(d->sfile[0], d->slen[0], d->sfile[1], d->slen[1], d->member);
+	d->member = erealloc(d->member, (d->slen[1]+2)*sizeof(int));
 
-	class = (int *)file[0];
-	unsort(sfile[0], slen[0], class);
-	class = erealloc(class, (slen[0]+2)*sizeof(int));
+	d->class = (int *)d->file[0];
+	unsort(d->sfile[0], d->slen[0], d->class);
+	d->class = erealloc(d->class, (d->slen[0]+2)*sizeof(int));
 
-	klist = emalloc((slen[0]+2)*sizeof(int));
-	clist = emalloc(sizeof(Cand));
-	k = stone(class, slen[0], member, klist);
-	free(member);
-	free(class);
+	d->klist = emalloc((d->slen[0]+2)*sizeof(int));
+	d->clist = emalloc(sizeof(Cand));
+	k = stone(d, d->class, d->slen[0], d->member, d->klist);
+	free(d->member);
+	free(d->class);
 
-	J = emalloc((len[0]+2)*sizeof(int));
-	unravel(klist[k]);
-	free(clist);
-	free(klist);
+	d->J = emalloc((d->len[0]+2)*sizeof(int));
+	unravel(d, d->klist[k]);
+	free(d->clist);
+	free(d->klist);
 
-	ixold = emalloc((len[0]+2)*sizeof(long));
-	ixnew = emalloc((len[1]+2)*sizeof(long));
+	d->ixold = emalloc((d->len[0]+2)*sizeof(long));
+	d->ixnew = emalloc((d->len[1]+2)*sizeof(long));
 	Bseek(b0, 0, 0); Bseek(b1, 0, 0);
-	check(b0, b1);
-	output();
-	free(J);
-	free(ixold);
-	free(ixnew);
-	Bterm(b0);
-	Bterm(b1);
+	check(d, b0, b1);
+}
+
+static void
+output(Diff *d)
+{
+	int m, i0, i1, j0, j1;
+
+	m = d->len[0];
+	d->J[0] = 0;
+	d->J[m+1] = d->len[1]+1;
+	if (mode != 'e') {
+		for (i0 = 1; i0 <= m; i0 = i1+1) {
+			while (i0 <= m && d->J[i0] == d->J[i0-1]+1)
+				i0++;
+			j0 = d->J[i0-1]+1;
+			i1 = i0-1;
+			while (i1 < m && d->J[i1+1] == 0)
+				i1++;
+			j1 = d->J[i1+1]-1;
+			d->J[i1] = j1;
+			change(d, i0, i1, j0, j1);
+		}
+	} else {
+		for (i0 = m; i0 >= 1; i0 = i1-1) {
+			while (i0 >= 1 && d->J[i0] == d->J[i0+1]-1 && d->J[i0])
+				i0--;
+			j0 = d->J[i0+1]-1;
+			i1 = i0+1;
+			while (i1 > 1 && d->J[i1-1] == 0)
+				i1--;
+			j1 = d->J[i1-1]+1;
+			d->J[i1] = j1;
+			change(d, i1 , i0, j1, j0);
+		}
+	}
+	if (m == 0)
+		change(d, 1, 0, 1, d->len[1]);
+	flushchanges(d);
+}
+
+void
+diffreg(char *f, char *fo, char *t, char *to)
+{
+	Diff d;
+
+	memset(&d, 0, sizeof(d));
+	calcdiff(&d, f, fo, t, to);
+	output(&d);
+	freediff(&d);
+}
+
+void
+freediff(Diff *d)
+{
+	Bterm(d->input[0]);
+	Bterm(d->input[1]);
+	free(d->J);
+	free(d->ixold);
+	free(d->ixnew);
 }
--- /dev/null
+++ b/sys/src/cmd/diff/merge3.c
@@ -1,0 +1,169 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+static int
+changecmp(void *a, void *b)
+{
+	return ((Change*)a)->a - ((Change*)b)->a;
+}
+
+static void
+addchange(Diff *df, int a, int b, int c, int d)
+{
+	Change *ch;
+
+	if (a > b && c > d)
+		return;
+	if(df->nchanges%1024 == 0)
+		df->changes = erealloc(df->changes, (df->nchanges+1024)*sizeof(df->changes[0]));
+	ch = &df->changes[df->nchanges++];
+	ch->a = a;
+	ch->b = b;
+	ch->c = c;
+	ch->d = d;
+}
+
+static void
+collect(Diff *d)
+{
+	int m, i0, i1, j0, j1;
+
+	m = d->len[0];
+	d->J[0] = 0;
+	d->J[m+1] = d->len[1]+1;
+	for (i0 = m; i0 >= 1; i0 = i1-1) {
+		while (i0 >= 1 && d->J[i0] == d->J[i0+1]-1 && d->J[i0])
+			i0--;
+		j0 = d->J[i0+1]-1;
+		i1 = i0+1;
+		while (i1 > 1 && d->J[i1-1] == 0)
+			i1--;
+		j1 = d->J[i1-1]+1;
+		d->J[i1] = j1;
+		addchange(d, i1 , i0, j1, j0);
+	}
+	if (m == 0)
+		change(d, 1, 0, 1, d->len[1]);
+	qsort(d->changes, d->nchanges, sizeof(Change), changecmp);
+}
+
+static int
+overlaps(Change *a, Change *b)
+{
+	if(a == nil || b == nil)
+		return 0;
+	if(a->a <= b->a)
+		return a->b >= b->a;
+	else
+		return b->b >= a->a;
+}
+
+char*
+merge(Diff *l, Diff *r)
+{
+	int il, ir, a, b, δ;
+	Change *lc, *rc;
+	char *status;
+	vlong ln;
+
+	il = 0;
+	ir = 0;
+	ln = 0;
+	status = nil;
+	collect(l);
+	collect(r);
+	while(il < l->nchanges || ir < r->nchanges){
+		lc = nil;
+		rc = nil;
+		if(il < l->nchanges)
+			lc = &l->changes[il];
+		if(ir < r->nchanges)
+			rc = &r->changes[ir];
+		if(overlaps(lc, rc)){
+			/*
+			 * align the edges of the chunks
+			 */
+			if(lc->a < rc->a){
+				a = lc->a;
+				δ = rc->a - lc->a;
+				rc->a -= δ;
+				rc->c -= δ;
+			}else{
+				a = rc->a;
+				δ = lc->a - rc->a;
+				lc->a -= δ;
+				lc->c -= δ;
+			}
+			if(lc->b > rc->b){
+				b = lc->b;
+				δ = lc->b - rc->b;
+				rc->b += δ;
+				rc->d += δ;
+			}else{
+				b = rc->b;
+				δ = rc->b - lc->b;
+				rc->b += δ;
+				rc->d += δ;
+			}
+			fetch(l, l->ixold, ln, a-1, l->input[0], "");
+			Bprint(&stdout, "<<<<<<<<<< %s\n", l->file2);
+			fetch(l, l->ixnew, lc->c, lc->d, l->input[1], "");
+			Bprint(&stdout, "========== common\n");
+			fetch(l, l->ixold, a, b, l->input[0], "");
+			Bprint(&stdout, "========== %s\n", r->file2);
+			fetch(r, r->ixnew, rc->c, rc->d, r->input[1], "");
+			Bprint(&stdout, ">>>>>>>>>>\n");
+			ln = b+1;
+			il++;
+			ir++;
+			status = "conflict";
+		}else if(rc == nil || lc->a < rc->a){
+			fetch(l, l->ixold, ln, lc->a-1, l->input[0], "");
+			fetch(l, l->ixnew, lc->c, lc->d, l->input[1], "");
+			ln = lc->b+1;
+			il++;
+		}else if(lc == nil || rc->a < lc->a){
+			fetch(l, l->ixold, ln, rc->a-1, l->input[0], "");
+			fetch(r, r->ixnew, rc->c, rc->d, r->input[1], "");
+			ln = lc->b+1;
+			ir++;
+		}else
+			abort();
+	}
+	if(ln < l->len[0])
+		fetch(l, l->ixold, ln, l->len[0], l->input[0], "");
+	return status;
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s theirs base ours\n", argv0);
+	exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+	Diff l, r;
+	char *x;
+
+	ARGBEGIN{
+	default:
+		usage();
+	}ARGEND;
+
+	if(argc != 3)
+		usage();
+	Binit(&stdout, 1, OWRITE);
+	memset(&l, 0, sizeof(l));
+	memset(&r, 0, sizeof(r));
+	calcdiff(&l, argv[1], argv[1], argv[0], argv[0]);
+	calcdiff(&r, argv[1], argv[1], argv[2], argv[2]);
+	x = merge(&l, &r);
+	freediff(&l);
+	freediff(&r);
+	exits(x);
+}
--- a/sys/src/cmd/diff/mkfile
+++ b/sys/src/cmd/diff/mkfile
@@ -1,13 +1,13 @@
 < /$objtype/mkfile
 
-TARG=diff
+TARG=diff merge3
 OFILES=\
 	diffdir.$O\
 	diffio.$O\
 	diffreg.$O\
-	main.$O\
+	util.$O
 
 HFILES=diff.h
 
 BIN=/$objtype/bin
-</sys/src/cmd/mkone
+</sys/src/cmd/mkmany
--- /dev/null
+++ b/sys/src/cmd/diff/util.c
@@ -1,0 +1,107 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+Biobuf	stdout;
+char	mode;			/* '\0', 'e', 'f', 'h' */
+char	bflag;			/* ignore multiple and trailing blanks */
+char	rflag;			/* recurse down directory trees */
+char	mflag;			/* pseudo flag: doing multiple files, one dir */
+int	anychange;
+
+static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
+static int whichtmp;
+
+void *
+emalloc(unsigned n)
+{
+	register void *p;
+
+	if ((p = malloc(n)) == 0)
+		sysfatal("malloc: %r");
+	return p;
+}
+
+void *
+erealloc(void *p, unsigned n)
+{
+	void *rp;
+
+	if ((rp = realloc(p, n)) == 0)
+		sysfatal("realloc: %r");
+	return rp;
+}
+
+int
+mkpathname(char *pathname, char *path, char *name)
+{
+	if (strlen(path) + strlen(name) > MAXPATHLEN) {
+		sysfatal("pathname %s/%s too long", path, name);
+		return 1;
+	}
+	sprint(pathname, "%s/%s", path, name);
+	return 0;
+}
+
+char *
+mktmpfile(int input, Dir **sb)
+{
+	int fd, i;
+	char *p;
+	char buf[8192];
+
+	p = mktemp(tmp[whichtmp++]);
+	fd = create(p, OWRITE|ORCLOSE, 0600);
+	if (fd < 0) {
+		sysfatal("cannot create %s: %r", p);
+		return 0;
+	}
+	while ((i = read(input, buf, sizeof(buf))) > 0) {
+		if ((i = write(fd, buf, i)) < 0)
+			break;
+	}
+	*sb = dirfstat(fd);
+	close(fd);
+	if (i < 0) {
+		sysfatal("cannot read/write %s: %r", p);
+		return 0;
+	}
+	return p;
+}
+
+void
+rmtmpfiles(void)
+{
+	while (whichtmp > 0) {
+		whichtmp--;
+		remove(tmp[whichtmp]);
+	}
+}
+
+char *
+statfile(char *file, Dir **sb)
+{
+	Dir *dir;
+	int input;
+
+	dir = dirstat(file);
+	if(dir == nil) {
+		if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
+			sysfatal("cannot stat %s: %r", file);
+			return 0;
+		}
+		free(dir);
+		return mktmpfile(0, sb);
+	} else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
+		free(dir);
+		if ((input = open(file, OREAD)) == -1) {
+			sysfatal("cannot open %s: %r", file);
+			return 0;
+		}
+		file = mktmpfile(input, sb);
+		close(input);
+	} else
+		*sb = dir;
+	return file;
+}


             reply	other threads:[~2023-02-12 23:19 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-12 23:17 ori [this message]
2023-02-25 16:56 ` ori
2023-03-04 20:06   ` ori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=C57DB22650EAFBEB21A061CB9A4B1A0A@eigenstate.org \
    --to=ori@eigenstate.org \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).