9front - general discussion about 9front
 help / color / mirror / Atom feed
From: ori@eigenstate.org
To: 9front@9front.org
Subject: Re: [9front] patch: import replacement for ape/patch
Date: Mon, 23 May 2022 17:40:02 -0400	[thread overview]
Message-ID: <753EA3FDBC98DCDA7268E21AFF3AC230@eigenstate.org> (raw)
In-Reply-To: <20220522221820.4294230d@spruce.localdomain>

Quoth Amavect <amavect@gmail.com>:
> On Sun, 22 May 2022 19:09:40 -0400
> ori@eigenstate.org wrote:
> 
> > We've been dragging along an antique version of Gnu patch
> > as part of ape. It's a bulky program with built in remote
> > code execution as part of ed-style diffs.
> 
> This will conflict with /rc/bin/patch and /sys/man/1/patch,
> which I'm all for removing.

done -- a59e61a6a4e11e0256da0d209afa38ccacd460a2

> Attached is my edit of the man page:
> correct Synopsis patch italics
> clarify what patch does
> Addtions -> Additions
> fix table italics
> clarify -p
> in See Also, add comma
> in Bugs, use all bold for commands

Taken, thanks.

> I can't seem to apply your diff with your program:
> cpu% touch patch patch.c
> cpu% lc
> 6.out*	diff	patch	patch.c
> cpu% 6.out diff
> 6.out: diff:4: unable to find patch offset
> cpu% 6.out -p4 diff
> 6.out: open null: 'null' not found
> cpu% ape/patch -i diff
> patching file `patch'
> patching file `patch.c'
> cpu% 
> 
> I'm guessing /dev/null is special-cased in gnu patch.
> 

Yeah, and there were a couple of other bugs around that,
like handling paths starting with (or repeating) /.

I also fixed the '--' ambiguity: Hunk headers start
with '---', but if you have a line starting with '--',
then diff -u can generate an output line that also
starts with '---'; to disambiguate, you need to look
ahead two lines for a line starting with '@@'. The code
to handle it is an ugly goto-ridden mess :(

new revision:

diff 6fbb1acc8fa0b6655b14e8c46240a4a8d2d8c672 uncommitted
--- /dev/null
+++ b//sys/man/1/patch
@@ -1,0 +1,58 @@
+.TH PATCH 1
+.SH NAME
+patch \- apply diffs
+.SH SYNOPSIS
+.B patch
+[
+.B -R
+]
+[
+.B -p
+.I nstrip
+]
+[
+.B -f
+.I maxfuzz
+]
+[
+.B patch...
+]
+.SH DESCRIPTION
+.I Patch
+will take a patch file in unified diff format, and apply it,
+skipping leading and trailing junk.
+If the file is not an exact match, it will search forwards and
+backwards for the surrounding context up to
+.I maxfuzz
+lines.
+
+The following options are supported:
+.TP
+.I -R
+Reverse direction of the patch. Addtions become removals,
+and the new and old file names are swapped.
+.TP
+.I -p nstrip
+Remove
+.I nstrip
+elements from the paths to the files.
+.TP
+.I -f maxfuzz
+Controls how far
+.I patch
+searches for context when applying a patch.
+If not specified, this defaults to 250 lines.
+.SH SEE ALSO
+.IR diff (1)
+.IR git/export (1)
+
+.SH BUGS
+.PP
+The output of
+.IR diff -c
+is not handled.
+.PP
+Reject files and backups are not supported.
+.PP
+All files are processed in memory, which limits
+the files handled to 2 gigabytes.
--- /dev/null
+++ b//sys/src/cmd/patch.c
@@ -1,0 +1,617 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include <bio.h>
+
+typedef struct Patch Patch;
+typedef struct Hunk Hunk;
+typedef struct Fbuf Fbuf;
+
+struct Patch {
+	char	*name;
+	Hunk	*hunk;
+	usize	nhunk;
+};
+
+struct Hunk {
+	int	lnum;
+
+	char	*oldpath;
+	int	oldln;
+	int	oldcnt;
+	int	oldlen;
+	int	oldsz;
+	char	*old;
+
+	char	*newpath;
+	int	newln;
+	int	newcnt;
+	int	newlen;
+	int	newsz;
+	char	*new;
+};
+
+struct Fbuf {
+	int	*lines;
+	int	nlines;
+	int	lastln;
+	char	*buf;
+	int	len;
+};
+
+int	strip;
+int	reverse;
+int	maxfuzz	= 250;
+int	listpatch;
+void	(*addnew)(Hunk*, char*);
+void	(*addold)(Hunk*, char*);
+
+char*
+readline(Biobuf *f, int *lnum)
+{
+	char *ln;
+
+	if((ln = Brdstr(f, '\n', 0)) == nil)
+		return nil;
+	*lnum += 1;
+	return ln;
+}
+
+void *
+emalloc(ulong n)
+{
+	void *v;
+	
+	v = mallocz(n, 1);
+	if(v == nil)
+		sysfatal("malloc: %r");
+	setmalloctag(v, getcallerpc(&n));
+	return v;
+}
+
+void *
+erealloc(void *v, ulong n)
+{
+	if(n == 0)
+		n++;
+	v = realloc(v, n);
+	if(v == nil)
+		sysfatal("malloc: %r");
+	setmalloctag(v, getcallerpc(&n));
+	return v;
+}
+
+int
+fileheader(char *s, char *pfx, char **name)
+{
+	int len, n, nnull;
+	char *e;
+
+	if((strncmp(s, pfx, strlen(pfx))) != 0)
+		return -1;
+	for(s += strlen(pfx); *s; s++)
+		if(!isspace(*s))
+			break;
+	for(e = s; *e; e++)
+		if(isspace(*e))
+			break;
+	if(s == e)
+		return -1;
+	nnull = strlen("/dev/null");
+	if((e - s) != nnull || strncmp(s, "/dev/null", nnull) != 0){
+		n = strip;
+		while(s != e && n > 0){
+			while(s != e && *s == '/')
+				s++;
+			while(s != e && *s != '/')
+				s++;
+			n--;
+		}
+		while(*s == '/')
+			s++;
+		if(*s == '\0')
+			sysfatal("too many components stripped");
+	}
+	len = (e - s) + 1;
+	*name = emalloc(len);
+	strecpy(*name, *name + len, s);
+	return 0;
+}
+
+int
+hunkheader(Hunk *h, char *s, char *oldpath, char *newpath, int lnum)
+{
+	char *e;
+
+	memset(h, 0, sizeof(*h));
+	h->lnum = lnum;
+	h->oldpath = strdup(oldpath);
+	h->newpath = strdup(newpath);
+	h->oldlen = 0;
+	h->oldsz = 32;
+	h->old = emalloc(h->oldsz);
+	h->newlen = 0;
+	h->newsz = 32;
+	h->new = emalloc(h->newsz);
+	if(strncmp(s, "@@ -", 4) != 0)
+		return -1;
+	e = s + 4;
+	h->oldln = strtol(e, &e, 10);
+	if(*e != ',')
+		return -1;
+	e++;
+	h->oldcnt = strtol(e, &e, 10);
+	while(*e == ' ' || *e == '\t')
+		e++;
+	if(*e != '+')
+		return -1;
+	e++;
+	h->newln = strtol(e, &e, 10);
+	if(e == s || *e != ',')
+		return -1;
+	e++;
+	h->newcnt = strtol(e, &e, 10);
+	if(e == s || *e != ' ')
+		return -1;
+	if(strncmp(e, " @@", 3) != 0)
+		return -1;
+	/*
+	 * empty files have line number 0: keep that,
+	 * otherwise adjust down.
+	 */
+	if(h->oldln > 0)
+		h->oldln--;
+	if(h->newln > 0)
+		h->newln--;
+	if(h->oldln < 0 || h->newln < 0 || h->oldcnt < 0 || h->newcnt < 0)
+		sysfatal("malformed hunk %s", s);
+	return 0;
+}
+
+void
+addnewfn(Hunk *h, char *ln)
+{
+	int n;
+
+	ln++;
+	n = strlen(ln);
+	while(h->newlen + n >= h->newsz){
+		h->newsz *= 2;
+		h->new = erealloc(h->new, h->newsz);
+	}
+	memcpy(h->new + h->newlen, ln, n);
+	h->newlen += n;
+}
+
+void
+addoldfn(Hunk *h, char *ln)
+{
+	int n;
+
+	ln++;
+	n = strlen(ln);
+	while(h->oldlen + n >= h->oldsz){
+		h->oldsz *= 2;
+		h->old = erealloc(h->old, h->oldsz);
+	}
+	memcpy(h->old + h->oldlen, ln, n);
+	h->oldlen += n;
+}
+
+int
+addmiss(Hunk *h, char *ln, int *nold, int *nnew)
+{
+	if(ln == nil)
+		return 1;
+	else if(ln[0] != '-' && ln[0] != '+')
+		return 0;
+	if(ln[0] == '-'){
+		addold(h, ln);
+		*nold += 1;
+	}else{
+		addnew(h, ln);
+		*nnew += 1;
+	}
+	return 1;
+}
+
+void
+addhunk(Patch *p, Hunk *h)
+{
+	p->hunk = erealloc(p->hunk, ++p->nhunk*sizeof(Hunk));
+	p->hunk[p->nhunk-1] = *h;
+}
+
+int
+hunkcmp(void *a, void *b)
+{
+	int c;
+
+	c = strcmp(((Hunk*)a)->oldpath, ((Hunk*)b)->oldpath);
+	if(c != 0)
+		return c;
+	return ((Hunk*)b)->oldln - ((Hunk*)a)->oldln;
+}
+
+Patch*
+parse(Biobuf *f, char *name)
+{
+	char *ln, *old, *new, *oldhdr, *newhdr, *hunkhdr, **oldp, **newp;
+	int inbody, oldcnt, newcnt, fromdash, lnum;
+	Patch *p;
+	Hunk h, hh;
+
+	ln = nil;
+	lnum = 0;
+	inbody = 0;
+	fromdash = 0;
+	p = emalloc(sizeof(Patch));
+	if(!reverse){
+		oldp = &old;
+		newp = &new;
+	}else{
+		oldp = &new;
+		newp = &old;
+	}
+comment:
+	free(ln);
+	while((ln = readline(f, &lnum)) != nil){
+		if(strncmp(ln, "--- ", 4) == 0)
+			goto patch;
+		free(ln);
+	}
+	sysfatal("%s: could not find start of patch", name);
+
+patch:
+	oldhdr = ln;
+	ln = nil;
+	newhdr = nil;
+	hunkhdr = nil;
+	if(fileheader(oldhdr, "--- ", oldp) == -1){
+		if(!inbody)
+			goto comment;
+		else if(fromdash)
+			goto mishunk;
+		else
+			goto out;
+	}
+	if((newhdr = readline(f, &lnum)) == nil)
+		goto out;
+	if(fileheader(newhdr, "+++ ", newp) == -1){
+		if(!inbody)
+			goto comment;
+		else if(fromdash)
+			goto mishunk;
+		else
+			goto out;
+	}
+	if((hunkhdr = readline(f, &lnum)) == nil)
+		goto out;
+hunk:
+	if(hunkheader(&hh, hunkhdr, old, new, lnum) == -1){
+		if(!inbody)
+			goto comment;
+		else if(!fromdash)
+			goto out;
+mishunk:
+		if(!addmiss(&h, oldhdr, &oldcnt, &newcnt))
+			goto out;
+		if(!addmiss(&h, newhdr, &oldcnt, &newcnt))
+			goto out;
+		if(!addmiss(&h, hunkhdr, &oldcnt, &newcnt))
+			goto out;
+		goto nextln;
+	}
+	if(inbody)
+		addhunk(p, &h);
+	h = hh;
+	inbody = 1;
+	oldcnt = 0;
+	newcnt = 0;
+	fromdash = 0;
+	free(ln);
+	free(oldhdr);
+	free(newhdr);
+	free(hunkhdr);
+	while(1){
+nextln:
+		if((ln = readline(f, &lnum)) == nil){
+			if(oldcnt != h.oldcnt || newcnt != h.newcnt)
+				sysfatal("%s:%d: malformed hunk", name, lnum);
+			addhunk(p, &h);
+			break;
+		}
+		switch(ln[0]){
+		default:
+			if(oldcnt != h.oldcnt || newcnt != h.newcnt)
+				sysfatal("%s:%d: malformed hunk", name, lnum);
+			addhunk(p, &h);
+			goto patch;
+		case '@':
+			addhunk(p, &h);
+			goto hunk;
+		case '-':
+			if(strncmp(ln, "--- ", 4) == 0){
+				fromdash = 1;
+				goto patch;
+			}
+			addold(&h, ln);
+			oldcnt++;
+			break;
+		case '+':
+			addnew(&h, ln);
+			newcnt++;
+			break;
+		case ' ':
+			addold(&h, ln);
+			addnew(&h, ln);
+			oldcnt++;
+			newcnt++;
+			break;
+		}
+	}
+
+out:
+	qsort(p->hunk, p->nhunk, sizeof(Hunk), hunkcmp);
+	free(old);
+	free(new);
+	free(ln);
+	return p;
+}
+
+int
+rename(int fd, char *name)
+{
+	Dir st;
+	char *p;
+
+	nulldir(&st);
+	if((p = strrchr(name, '/')) == nil)
+		st.name = name;
+	else
+		st.name = p + 1;
+	return dirfwstat(fd, &st);
+}
+
+void
+blat(char *old, char *new, char *o, usize len)
+{
+	char *tmp;
+	int fd;
+
+	if(strcmp(new, "/dev/null") == 0){
+		if(len != 0)
+			sysfatal("diff modifies removed file");
+		if(remove(old) == -1)
+			sysfatal("removeold %s: %r", old);
+		return;
+	}
+	if((tmp = smprint("%s.tmp%d", new, getpid())) == nil)
+		sysfatal("smprint: %r");
+	if((fd = create(tmp, OWRITE, 0666)) == -1)
+		sysfatal("open %s: %r", tmp);
+	if(write(fd, o, len) != len)
+		sysfatal("write %s: %r", tmp);
+	if(strcmp(old, "/dev/null") != 0 && remove(old) == -1)
+		sysfatal("remove %s: %r", old);
+	if(strcmp(new, old) == 0)
+		remove(new);
+	if(rename(fd, new) == -1)
+		sysfatal("create %s: %r", new);
+	if(close(fd) == -1)
+		sysfatal("close %s: %r", tmp);
+	free(tmp);
+}
+
+int
+slurp(Fbuf *f, char *path)
+{
+	int n, i, fd, sz, len, nlines, linesz;
+	char *buf;
+	int *lines;
+
+	if((fd = open(path, OREAD)) == -1)
+		sysfatal("open %s: %r", path);
+	sz = 8192;
+	len = 0;
+	buf = emalloc(sz);
+	while(1){
+		if(len == sz){
+			sz *= 2;
+			buf = erealloc(buf, sz);
+		}
+		n = read(fd, buf + len, sz - len);
+		if(n == 0)
+			break;
+		if(n == -1)
+			sysfatal("read %s: %r", path);
+		len += n;
+	}
+
+	nlines = 0;
+	linesz = 32;
+	lines = emalloc(linesz*sizeof(int));
+	lines[nlines++] = 0;
+	for(i = 0; i < len; i++){
+		if(buf[i] != '\n')
+			continue;
+		if(nlines+1 == linesz){
+			linesz *= 2;
+			lines = erealloc(lines, linesz*sizeof(int));
+		}
+		lines[nlines++] = i+1;
+	}
+	f->len = len;
+	f->buf = buf;
+	f->lines = lines;
+	f->nlines = nlines;
+	f->lastln = 0;
+	return 0;
+}
+
+char*
+search(Fbuf *f, Hunk *h, char *fname)
+{
+	int ln, len, off, fuzz, nfuzz, scanning;
+
+	scanning = 1;
+	len = h->oldlen;
+	nfuzz = (f->nlines < maxfuzz) ? f->nlines : maxfuzz;
+	for(fuzz = 0; scanning && fuzz <= nfuzz; fuzz++){
+		scanning = 0;
+		ln = h->oldln - fuzz;
+		if(ln >= f->lastln){
+			off = f->lines[ln];
+			if(off + len > f->len)
+				continue;
+			scanning = 1;
+			if(memcmp(f->buf + off, h->old, h->oldlen) == 0){
+				f->lastln = ln;
+				return f->buf + off;
+			}
+		}
+		ln = h->oldln + fuzz - 1;
+		if(ln <= f->nlines){
+			off = f->lines[ln];
+			if(off + len >= f->len)
+				continue;
+			scanning = 1;
+			if(memcmp(f->buf + off, h->old, h->oldlen) == 0){
+				f->lastln = ln;
+				return f->buf + off;
+			}
+		}
+	}
+	sysfatal("%s:%d: unable to find hunk offset in %s", fname, h->lnum, h->oldpath);
+	return nil;
+}
+
+char*
+append(char *o, int *sz, char *s, char *e)
+{
+	int n;
+
+	n = (e - s);
+	o = erealloc(o, *sz + n);
+	memcpy(o + *sz, s, n);
+	*sz += n;
+	return o;
+}
+
+int
+apply(Patch *p, char *fname)
+{
+	char *o, *s, *e, *curfile;
+	int i, osz;
+	Hunk *h;
+	Fbuf f;
+
+	e = nil;
+	o = nil;
+	osz = 0;
+	curfile = nil;
+	for(i = 0; i < p->nhunk; i++){
+		h = &p->hunk[i];
+		if(curfile == nil || strcmp(curfile, h->newpath) != 0){
+			if(slurp(&f, h->oldpath) == -1)
+				sysfatal("slurp %s: %r", h->oldpath);
+			curfile = h->newpath;
+			e = f.buf;
+		}
+		s = e;
+		e = search(&f, h, fname);
+		o = append(o, &osz, s, e);
+		o = append(o, &osz, h->new, h->new + h->newlen);
+		e += h->oldlen;
+		if(i+1 == p->nhunk || strcmp(curfile, p->hunk[i+1].newpath) != 0){
+			o = append(o, &osz, e, f.buf + f.len);
+			blat(h->oldpath, h->newpath, o, osz);
+			if(listpatch)
+				print("%s\n", h->newpath);
+			osz = 0;
+		}
+	}
+	free(o);
+	return 0;
+}
+
+void
+freepatch(Patch *p)
+{
+	Hunk *h;
+	int i;
+
+	for(i = 0; i < p->nhunk; i++){
+		h = &p->hunk[i];
+		free(h->oldpath);
+		free(h->newpath);
+		free(h->old);
+		free(h->new);
+	}
+	free(p->hunk);
+	free(p->name);
+	free(p);
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s [-R] [-p nstrip] [-f maxfuzz] [patch...]\n", argv0);
+	exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+	Biobuf *f;
+	Patch *p;
+	int i;
+
+	ARGBEGIN{
+	case 'p':
+		strip = atoi(EARGF(usage()));
+		break;
+	case 'R':
+		reverse++;
+		break;
+	case 'f':
+		maxfuzz = atoi(EARGF(usage()));
+		break;
+	case 'l':
+		listpatch++;
+		break;
+	default:
+		usage();
+		break;
+	}ARGEND;
+
+	if(reverse){
+		addnew = addoldfn;
+		addold = addnewfn;
+	}else{
+		addnew = addnewfn;
+		addold = addoldfn;
+	}
+	if(argc == 0){
+		if((f = Bfdopen(0, OREAD)) == nil)
+			sysfatal("open stdin: %r");
+		if((p = parse(f, "stdin")) == nil)
+			sysfatal("parse patch: %r");
+		if(apply(p, "stdin") == -1)
+			sysfatal("apply stdin: %r");
+		freepatch(p);
+		Bterm(f);
+	}else{
+		for(i = 0; i < argc; i++){
+			if((f = Bopen(argv[i], OREAD)) == nil)
+				sysfatal("open %s: %r", argv[i]);
+			if((p = parse(f, argv[i])) == nil)
+				sysfatal("parse patch: %r");
+			if(apply(p, argv[i]) == -1)
+				sysfatal("apply %s: %r", argv[i]);
+			freepatch(p);
+			Bterm(f);
+		}
+	}
+	exits(nil);
+}


  parent reply	other threads:[~2022-05-23 21:44 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-22 22:57 ori
2022-05-22 23:09 ` ori
2022-05-23  3:18   ` Amavect
2022-05-23 12:57     ` Humm
2022-05-24  2:38       ` Amavect
2022-05-24 16:39         ` Humm
2022-05-24 18:44           ` umbraticus
2022-05-24 19:42           ` Lyndon Nerenberg (VE7TFX/VE6BBM)
2022-05-24 21:19           ` Amavect
2022-05-23 21:40     ` ori [this message]
2022-05-23 23:51       ` ori
2022-05-28 21:09         ` ori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=753EA3FDBC98DCDA7268E21AFF3AC230@eigenstate.org \
    --to=ori@eigenstate.org \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).