9front - general discussion about 9front
 help / color / mirror / Atom feed
From: ori@eigenstate.org
To: ori@eigenstate.org, cinap_lenrek@felloff.net, 9front@9front.org
Subject: Re: [9front] Fix CPP Token Paste: Testing requested.
Date: Fri, 6 Mar 2020 16:47:33 -0800	[thread overview]
Message-ID: <94224BDA65B7C92201491757EC52C7F9@eigenstate.org> (raw)
In-Reply-To: <B825A72128C5A7DF8D6AE7070FEB4641@eigenstate.org>

>>> good work!
>>> 
>>> how about we run the old and new cpp binaries on every c file we get and
>>> check for differences in the output for testing?
>> 
>> Will do, though we don't have too much preprocessor abuse in our
>> code. (ape doesn't use '##' even once!)
>> 
>> Did find one bug I introduced when testing ocaml -- added the NCAT
>> case to test.c to cover it. Here's an updated patch:
>> 
> 
> Ok. Tested with:
> 
> 	fn t {
> 		ramfs -m /tmp/m
> 		for(f in `{walk -f /sys/src|grep '\.c$'}){
> 			mkdir -p /tmp/m/^$f;
> 			>/tmp/m/^$f^/old >[2]/dev/null cpp $f;
> 			>/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f
> 		}
> 		for(f in `{walk -f /sys/src|grep '\.c$'})
> 			ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new
> 	}
> 
> With this diff, we have no changes in the cpp output.
> It preprocesses and builds ocaml, and doesn't seem to
> error on perl5, so far (Though, we have other issues.
> The bitfields thing is biting us again.)
> 

And, found one more edge case: CAT(,) should produce an
empty token. Now working.

It looks like we mishandle empty vararg lists too, but
that's a separate issue, and probably a separate patch.

So:
	- No diffs on /sys/src
	- Looks like it works ok on ocaml, gmake and my wip perl5 fixes

Any other tests anyone thinks I should do?

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Fri Mar 06 16:31:21 2020 -0800
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Fri Mar 06 16:31:21 2020 -0800
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+void	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Fri Mar 06 16:31:21 2020 -0800
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Fri Mar 06 16:31:21 2020 -0800
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Fri Mar 06 16:31:21 2020 -0800
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -214,8 +214,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,7 +226,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
+	free(ntr.bp);
 	return;
 }	
 
@@ -326,7 +324,25 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	*ap = nil;
+	*an = nil;
+	/* EMPTY ## tok */
+	if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+		rtr->tp--;
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		*ntok = 1 + (*ap != nil) + (*an != nil);
+		return 1;
+	}
+	return 0;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,8 +350,8 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
+	Tokenrow tatr, tcat;
+	Token *tp, *ap, *an, *pp, *pn;
 	int ntok, argno;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,19 +366,31 @@
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
 			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = ap;
+			pn = an;
+			if (ap && (argno = lookuparg(np, ap)) >= 0){
+				pp = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			}
+			if (an && (argno = lookuparg(np, an)) >= 0) {
+				pn = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			}
+			glue(&tcat, pp, pn);
+			insertrow(rtr, ntok, &tcat);
+			free(tcat.bp);
+			continue;
+		} else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
 			if (rtr->tp < rtr->bp)
 				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
 			else {
 				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
+				expandrow(&tatr, "<macro>");
 				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
+				free(tatr.bp);
 			}
 			continue;
 		}
@@ -374,41 +402,32 @@
  * Evaluate the ## operators in a tokenrow
  */
 void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
-		}
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (np + nn == 0) {
+		ntr->lp = ntr->bp;
+	} else {
+		if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+			error(WARNING, "Bad token %r produced by ##", &ntr);
+		ntr->lp = ntr->bp+1;
 	}
+	makespace(ntr);
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Fri Mar 06 16:31:21 2020 -0800
@@ -1,4 +1,18 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X CAT(,) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)
+
+#define CAT3(foo)	a##foo##b
+CAT3(blah)



  reply	other threads:[~2020-03-07  0:47 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-05 17:26 ori
2020-03-05 19:21 ` [9front] " cinap_lenrek
2020-03-05 21:42   ` ori
2020-03-06  7:01     ` ori
2020-03-07  0:47       ` ori [this message]
2020-03-08 17:42         ` ori
2020-03-16  2:55           ` ori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=94224BDA65B7C92201491757EC52C7F9@eigenstate.org \
    --to=ori@eigenstate.org \
    --cc=9front@9front.org \
    --cc=cinap_lenrek@felloff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).