9front - general discussion about 9front
 help / color / mirror / Atom feed
* Fix CPP Token Paste: Testing requested.
@ 2020-03-05 17:26 ori
  2020-03-05 19:21 ` [9front] " cinap_lenrek
  0 siblings, 1 reply; 7+ messages in thread
From: ori @ 2020-03-05 17:26 UTC (permalink / raw)
  To: 9front

** This change is relatively subtle, and I'd like
some more testing before committing **

Perl does a bunch of macro stuff that breaks on our
preprocessor. So does TCC. Specifically, when token
pasting in a function like macro, we should expand
the to the parameters of the function. We do this,
but only when the function like macro is not expanded
inside another macro parameter. For example, in:

	#define NOP(x) x
	#define CAT(a, b) a ## b
	#define EOF	(-1)
	CAT(foo, EOF)
	NOP(CAT(foo, EOF))

the current version of cpp expands the first to:

	fooEOF

but the second is expanded to

	foo(

and, then because it's not a token, we error.

This happens because we're fully substituting
parameters, and then doing the token pasting in a
separate pass, when we should be doing it as part
of substituting a function like macro.


The algorithm that cpp uses is documented here:

	https://www.spinellis.gr/blog/20060626/

However, reading it, it's clear that we deviated
when doing the concatenation.

This change to cpp fixes the expansion, and makes
us fully follow Prosser's algorithm.

This change fixes things so that we expand both
correctly.

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Thu Mar 05 09:13:58 2020 -0800
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Thu Mar 05 09:13:58 2020 -0800
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+int	doconcat(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Thu Mar 05 09:13:58 2020 -0800
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Thu Mar 05 09:13:58 2020 -0800
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Thu Mar 05 09:13:58 2020 -0800
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -214,8 +214,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -326,7 +324,30 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	/* EMPTY ## tok */
+	*ap = nil;
+	*an = nil;
+	if (rtr->tp->type == DSHARP) {
+		if (rtr->tp + 1 != rtr->lp)
+			*an = rtr->tp + 1;
+		goto is;
+	}
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		goto is;
+	}
+	return 0;
+is:
+	*ntok = 1 + (*ap != nil) + (*an != nil);
+	return 1;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,8 +355,8 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
+	Tokenrow tatr, tcat;
+	Token *tp, *ap, *an, *pp, *pn;
 	int ntok, argno;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,17 +371,24 @@
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
 			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = nil;
+			pn = nil;
+			if(ap && (argno = lookuparg(np, ap)) >= 0)
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			if(an && (argno = lookuparg(np, an)) >= 0)
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			doconcat(&tcat, pp, pn);
+			insertrow(rtr, ntok, &tcat);
+			continue;
+		} else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
 			if (rtr->tp < rtr->bp)
 				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
 			else {
 				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
+				expandrow(&tatr, "<macro>");
 				insertrow(rtr, 1, &tatr);
 				dofree(tatr.bp);
 			}
@@ -373,42 +401,32 @@
 /*
  * Evaluate the ## operators in a tokenrow
  */
-void
-doconcat(Tokenrow *trp)
+int
+doconcat(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
-		}
-	}
+	if(tp == nil && tn == nil)
+		return 0;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	free(tt);
+	if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+		error(WARNING, "Bad token %r produced by ##", &ntr);
+	ntr->lp = ntr->bp+1;
+	makespace(ntr);
+	return 1;
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Thu Mar 05 09:13:58 2020 -0800
@@ -1,4 +1,23 @@
+/* Should generate P1() */
 #define M1()
 #define M2(A1) A1()
 M2(M1)
 M2(P1)
+
+/*
+ * should generate:
+ *	x fooEOF y
+ *	x EOFfoo y
+ *	x(-1) y
+ *	y foo x
+ *	x foo y
+*/
+
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [9front] Fix CPP Token Paste: Testing requested.
  2020-03-05 17:26 Fix CPP Token Paste: Testing requested ori
@ 2020-03-05 19:21 ` cinap_lenrek
  2020-03-05 21:42   ` ori
  0 siblings, 1 reply; 7+ messages in thread
From: cinap_lenrek @ 2020-03-05 19:21 UTC (permalink / raw)
  To: 9front

good work!

how about we run the old and new cpp binaries on every c file we get and
check for differences in the output for testing?

--
cinap


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [9front] Fix CPP Token Paste: Testing requested.
  2020-03-05 19:21 ` [9front] " cinap_lenrek
@ 2020-03-05 21:42   ` ori
  2020-03-06  7:01     ` ori
  0 siblings, 1 reply; 7+ messages in thread
From: ori @ 2020-03-05 21:42 UTC (permalink / raw)
  To: cinap_lenrek, 9front

> good work!
> 
> how about we run the old and new cpp binaries on every c file we get and
> check for differences in the output for testing?

Will do, though we don't have too much preprocessor abuse in our
code. (ape doesn't use '##' even once!)

Did find one bug I introduced when testing ocaml -- added the NCAT
case to test.c to cover it. Here's an updated patch:

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Thu Mar 05 13:41:46 2020 -0800
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Thu Mar 05 13:41:46 2020 -0800
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+int	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Thu Mar 05 13:41:46 2020 -0800
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Thu Mar 05 13:41:46 2020 -0800
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Thu Mar 05 13:41:46 2020 -0800
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -214,8 +214,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,7 +226,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
+	free(ntr.bp);
 	return;
 }	
 
@@ -326,7 +324,30 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	/* EMPTY ## tok */
+	*ap = nil;
+	*an = nil;
+	if (rtr->tp->type == DSHARP) {
+		if (rtr->tp + 1 != rtr->lp)
+			*an = rtr->tp + 1;
+		goto is;
+	}
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		goto is;
+	}
+	return 0;
+is:
+	*ntok = 1 + (*ap != nil) + (*an != nil);
+	return 1;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,8 +355,8 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
+	Tokenrow tatr, tcat;
+	Token *tp, *ap, *an, *pp, *pn;
 	int ntok, argno;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,19 +371,31 @@
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
 			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = nil;
+			pn = nil;
+			if (ap && (argno = lookuparg(np, ap)) >= 0) {
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			} else
+				pp = ap;
+			if (an && (argno = lookuparg(np, an)) >= 0) {
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			} else
+				pn = an;
+			glue(&tcat, pp, pn);
+			insertrow(rtr, ntok, &tcat);
+			free(tcat.bp);
+			continue;
+		} else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
 			if (rtr->tp < rtr->bp)
 				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
 			else {
 				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
+				expandrow(&tatr, "<macro>");
 				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
+				free(tatr.bp);
 			}
 			continue;
 		}
@@ -373,42 +406,32 @@
 /*
  * Evaluate the ## operators in a tokenrow
  */
-void
-doconcat(Tokenrow *trp)
+int
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
-		}
-	}
+	if(tp == nil && tn == nil)
+		return 0;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+		error(WARNING, "Bad token %r produced by ##", &ntr);
+	ntr->lp = ntr->bp+1;
+	makespace(ntr);
+	return 1;
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Thu Mar 05 13:41:46 2020 -0800
@@ -1,4 +1,14 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [9front] Fix CPP Token Paste: Testing requested.
  2020-03-05 21:42   ` ori
@ 2020-03-06  7:01     ` ori
  2020-03-07  0:47       ` ori
  0 siblings, 1 reply; 7+ messages in thread
From: ori @ 2020-03-06  7:01 UTC (permalink / raw)
  To: ori, cinap_lenrek, 9front

>> good work!
>> 
>> how about we run the old and new cpp binaries on every c file we get and
>> check for differences in the output for testing?
> 
> Will do, though we don't have too much preprocessor abuse in our
> code. (ape doesn't use '##' even once!)
> 
> Did find one bug I introduced when testing ocaml -- added the NCAT
> case to test.c to cover it. Here's an updated patch:
> 

Ok. Tested with:

	fn t {
		ramfs -m /tmp/m
		for(f in `{walk -f /sys/src|grep '\.c$'}){
			mkdir -p /tmp/m/^$f;
			>/tmp/m/^$f^/old >[2]/dev/null cpp $f;
			>/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f
		}
		for(f in `{walk -f /sys/src|grep '\.c$'})
			ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new
	}

With this diff, we have no changes in the cpp output.
It preprocesses and builds ocaml, and doesn't seem to
error on perl5, so far (Though, we have other issues.
The bitfields thing is biting us again.)

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Thu Mar 05 22:58:34 2020 -0800
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Thu Mar 05 22:58:34 2020 -0800
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+int	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Thu Mar 05 22:58:34 2020 -0800
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Thu Mar 05 22:58:34 2020 -0800
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Thu Mar 05 22:58:34 2020 -0800
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -214,8 +214,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,7 +226,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
+	free(ntr.bp);
 	return;
 }	
 
@@ -326,7 +324,25 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	*ap = nil;
+	*an = nil;
+	/* EMPTY ## tok */
+	if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+		rtr->tp--;
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		*ntok = 1 + (*ap != nil) + (*an != nil);
+		return 1;
+	}
+	return 0;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,8 +350,8 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
+	Tokenrow tatr, tcat;
+	Token *tp, *ap, *an, *pp, *pn;
 	int ntok, argno;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,19 +366,27 @@
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
 			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = ap;
+			pn = an;
+			if (ap && (argno = lookuparg(np, ap)) >= 0)
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			if (an && (argno = lookuparg(np, an)) >= 0)
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			glue(&tcat, pp, pn);
+			insertrow(rtr, ntok, &tcat);
+			free(tcat.bp);
+			continue;
+		} else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
 			if (rtr->tp < rtr->bp)
 				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
 			else {
 				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
+				expandrow(&tatr, "<macro>");
 				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
+				free(tatr.bp);
 			}
 			continue;
 		}
@@ -373,42 +397,32 @@
 /*
  * Evaluate the ## operators in a tokenrow
  */
-void
-doconcat(Tokenrow *trp)
+int
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
-		}
-	}
+	if(tp == nil && tn == nil)
+		return 0;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+		error(WARNING, "Bad token %r produced by ##", &ntr);
+	ntr->lp = ntr->bp+1;
+	makespace(ntr);
+	return 1;
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Thu Mar 05 22:58:34 2020 -0800
@@ -1,4 +1,17 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)
+
+#define CAT3(foo)	a##foo##b
+CAT3(blah)



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [9front] Fix CPP Token Paste: Testing requested.
  2020-03-06  7:01     ` ori
@ 2020-03-07  0:47       ` ori
  2020-03-08 17:42         ` ori
  0 siblings, 1 reply; 7+ messages in thread
From: ori @ 2020-03-07  0:47 UTC (permalink / raw)
  To: ori, cinap_lenrek, 9front

>>> good work!
>>> 
>>> how about we run the old and new cpp binaries on every c file we get and
>>> check for differences in the output for testing?
>> 
>> Will do, though we don't have too much preprocessor abuse in our
>> code. (ape doesn't use '##' even once!)
>> 
>> Did find one bug I introduced when testing ocaml -- added the NCAT
>> case to test.c to cover it. Here's an updated patch:
>> 
> 
> Ok. Tested with:
> 
> 	fn t {
> 		ramfs -m /tmp/m
> 		for(f in `{walk -f /sys/src|grep '\.c$'}){
> 			mkdir -p /tmp/m/^$f;
> 			>/tmp/m/^$f^/old >[2]/dev/null cpp $f;
> 			>/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f
> 		}
> 		for(f in `{walk -f /sys/src|grep '\.c$'})
> 			ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new
> 	}
> 
> With this diff, we have no changes in the cpp output.
> It preprocesses and builds ocaml, and doesn't seem to
> error on perl5, so far (Though, we have other issues.
> The bitfields thing is biting us again.)
> 

And, found one more edge case: CAT(,) should produce an
empty token. Now working.

It looks like we mishandle empty vararg lists too, but
that's a separate issue, and probably a separate patch.

So:
	- No diffs on /sys/src
	- Looks like it works ok on ocaml, gmake and my wip perl5 fixes

Any other tests anyone thinks I should do?

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Fri Mar 06 16:31:21 2020 -0800
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Fri Mar 06 16:31:21 2020 -0800
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+void	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Fri Mar 06 16:31:21 2020 -0800
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Fri Mar 06 16:31:21 2020 -0800
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Fri Mar 06 16:31:21 2020 -0800
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -214,8 +214,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,7 +226,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
+	free(ntr.bp);
 	return;
 }	
 
@@ -326,7 +324,25 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	*ap = nil;
+	*an = nil;
+	/* EMPTY ## tok */
+	if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+		rtr->tp--;
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		*ntok = 1 + (*ap != nil) + (*an != nil);
+		return 1;
+	}
+	return 0;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,8 +350,8 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
+	Tokenrow tatr, tcat;
+	Token *tp, *ap, *an, *pp, *pn;
 	int ntok, argno;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,19 +366,31 @@
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
 			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = ap;
+			pn = an;
+			if (ap && (argno = lookuparg(np, ap)) >= 0){
+				pp = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			}
+			if (an && (argno = lookuparg(np, an)) >= 0) {
+				pn = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			}
+			glue(&tcat, pp, pn);
+			insertrow(rtr, ntok, &tcat);
+			free(tcat.bp);
+			continue;
+		} else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
 			if (rtr->tp < rtr->bp)
 				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
 			else {
 				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
+				expandrow(&tatr, "<macro>");
 				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
+				free(tatr.bp);
 			}
 			continue;
 		}
@@ -374,41 +402,32 @@
  * Evaluate the ## operators in a tokenrow
  */
 void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
-		}
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (np + nn == 0) {
+		ntr->lp = ntr->bp;
+	} else {
+		if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+			error(WARNING, "Bad token %r produced by ##", &ntr);
+		ntr->lp = ntr->bp+1;
 	}
+	makespace(ntr);
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Fri Mar 06 16:31:21 2020 -0800
@@ -1,4 +1,18 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X CAT(,) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)
+
+#define CAT3(foo)	a##foo##b
+CAT3(blah)



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [9front] Fix CPP Token Paste: Testing requested.
  2020-03-07  0:47       ` ori
@ 2020-03-08 17:42         ` ori
  2020-03-16  2:55           ` ori
  0 siblings, 1 reply; 7+ messages in thread
From: ori @ 2020-03-08 17:42 UTC (permalink / raw)
  To: ori, cinap_lenrek, 9front


>> 
>> Ok. Tested with:
>> 
>> 	fn t {
>> 		ramfs -m /tmp/m
>> 		for(f in `{walk -f /sys/src|grep '\.c$'}){
>> 			mkdir -p /tmp/m/^$f;
>> 			>/tmp/m/^$f^/old >[2]/dev/null cpp $f;
>> 			>/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f
>> 		}
>> 		for(f in `{walk -f /sys/src|grep '\.c$'})
>> 			ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new
>> 	}
>> 
>> With this diff, we have no changes in the cpp output.
>> It preprocesses and builds ocaml, and doesn't seem to
>> error on perl5, so far (Though, we have other issues.
>> The bitfields thing is biting us again.)
>> 
> 
> And, found one more edge case: CAT(,) should produce an
> empty token. Now working.
> 
> It looks like we mishandle empty vararg lists too, but
> that's a separate issue, and probably a separate patch.
> 
> So:
> 	- No diffs on /sys/src
> 	- Looks like it works ok on ocaml, gmake and my wip perl5 fixes
> 
> Any other tests anyone thinks I should do?

More progress on perl caught another edge case:

	#define V 42
	#define X CAT(V, .0)

should give the result 42.0.

This version fixes that issue. It does introduce changes
when diffing against the system, but that seems to be just
whitespace, which comes from insertrow(). This is expected.

The system builds, as do a selection of ports (ocaml, gmake,
netsurf).

Good to commit?

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Sun Mar 08 10:34:29 2020 -0700
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Sun Mar 08 10:34:29 2020 -0700
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+void	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Sun Mar 08 10:34:29 2020 -0700
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Sun Mar 08 10:34:29 2020 -0700
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Sun Mar 08 10:34:29 2020 -0700
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -193,12 +193,14 @@
 	int hs;
 
 	copytokenrow(&ntr, np->vp);		/* copy macro value */
-	if (np->ap==NULL)			/* parameterless */
+	if (np->ap==NULL) {			/* parameterless */
 		ntokc = 1;
-	else {
+		/* substargs for handling # and ## */
+		atr[0] = nil;
+		substargs(np, &ntr, atr);
+	} else {
 		ntokc = gatherargs(trp, atr, (np->flag&ISVARMAC) ? rowlen(np->ap) : 0, &narg);
 		if (narg<0) {			/* not actually a call (no '(') */
-/* error(WARNING, "%d %r\n", narg, trp); */
 			/* gatherargs has already pushed trp->tr to the next token */
 			return;
 		}
@@ -214,8 +216,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,7 +228,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
+	free(ntr.bp);
 	return;
 }	
 
@@ -255,7 +255,6 @@
 		if (trp->tp >= trp->lp) {
 			gettokens(trp, 0);
 			if ((trp->lp-1)->type==END) {
-/* error(WARNING, "reach END\n"); */
 				trp->lp -= 1;
 				if (*narg>=0)
 					trp->tp -= ntok;
@@ -326,7 +325,25 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	*ap = nil;
+	*an = nil;
+	/* EMPTY ## tok */
+	if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+		rtr->tp--;
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		*ntok = 1 + (*ap != nil) + (*an != nil);
+		return 1;
+	}
+	return 0;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,12 +351,14 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
-	int ntok, argno;
+	Tokenrow ttr;
+	Token *tp, *ap, *an, *pp, *pn;
+	int ntok, argno, hs;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
-		if (rtr->tp->type==SHARP) {	/* string operator */
+		if(rtr->tp->hideset && checkhideset(rtr->tp->hideset, np)) {
+			rtr->tp++;
+		} else if (rtr->tp->type==SHARP) {	/* string operator */
 			tp = rtr->tp;
 			rtr->tp += 1;
 			if ((argno = lookuparg(np, rtr->tp))<0) {
@@ -349,24 +368,49 @@
 			ntok = 1 + (rtr->tp - tp);
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
-			continue;
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = ap;
+			pn = an;
+			if (ap && (argno = lookuparg(np, ap)) >= 0){
+				pp = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			}
+			if (an && (argno = lookuparg(np, an)) >= 0) {
+				pn = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			}
+			glue(&ttr, pp, pn);
+			insertrow(rtr, ntok, &ttr);
+			free(ttr.bp);
+		} else if (rtr->tp->type==NAME) {
+			if((argno = lookuparg(np, rtr->tp)) >= 0) {
+				if (rtr->tp < rtr->bp) {
+					error(ERROR, "access out of bounds");
+					continue;
+				}
+				copytokenrow(&ttr, atr[argno]);
+				expandrow(&ttr, "<macro>");
+				insertrow(rtr, 1, &ttr);
+				free(ttr.bp);
+			} else {
+				maketokenrow(1, &ttr);
+				ttr.lp = ttr.tp + 1;
+				*ttr.tp = *rtr->tp;
+
+				hs = newhideset(rtr->tp->hideset, np);
+				if(ttr.tp->hideset == 0)
+					ttr.tp->hideset = hs;
+				else
+					ttr.tp->hideset = unionhideset(ttr.tp->hideset, hs);
+				expandrow(&ttr, (char*)np->name);
+				insertrow(rtr, 1, &ttr);
+				dofree(ttr.bp);
+			}
+		} else {
+			rtr->tp++;
 		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
-			if (rtr->tp < rtr->bp)
-				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
-			else {
-				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
-				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
-			}
-			continue;
-		}
-		rtr->tp++;
 	}
 }
 
@@ -374,41 +418,35 @@
  * Evaluate the ## operators in a tokenrow
  */
 void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt, *p, *n;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (np + nn == 0) {
+		ntr->lp = ntr->bp;
+	} else {
+		if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) {
+			p = tp ? (char*)tp->t : "<empty>";
+			n = tn ? (char*)tn->t : "<empty>";
+			error(WARNING, "Bad token %r produced by %s ## %s", &ntr, p, n);
 		}
+		ntr->lp = ntr->bp+1;
 	}
+	makespace(ntr);
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Sun Mar 08 10:34:29 2020 -0700
@@ -1,4 +1,28 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x NOP(CAT(foo, EOF)) y
+x NOP(CAT(EOF, foo)) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X NOP(CAT(,)) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)
+
+#define CAT3(foo)	a##foo##b
+CAT3(blah)
+
+#define BAR	3
+#define FOO	CAT(BAR, 3)
+FOO
+
+/*
+#define xprint(a, ...)	print(a, __VA_ARGS__)
+xprint("hi", "there")
+xprint("hi")
+*/
diff -r 0dd419f096e2 sys/src/cmd/cpp/tokens.c
--- a/sys/src/cmd/cpp/tokens.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/tokens.c	Sun Mar 08 10:34:29 2020 -0700
@@ -136,7 +136,6 @@
 	movetokenrow(dtr, str);
 	makespace(dtr);
 	dtr->tp += nrtok;
-	makespace(dtr);
 }
 
 /*



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [9front] Fix CPP Token Paste: Testing requested.
  2020-03-08 17:42         ` ori
@ 2020-03-16  2:55           ` ori
  0 siblings, 0 replies; 7+ messages in thread
From: ori @ 2020-03-16  2:55 UTC (permalink / raw)
  To: ori, cinap_lenrek, 9front

>>> 
>>> Ok. Tested with:
>>> 
>>> 	fn t {
>>> 		ramfs -m /tmp/m
>>> 		for(f in `{walk -f /sys/src|grep '\.c$'}){
>>> 			mkdir -p /tmp/m/^$f;
>>> 			>/tmp/m/^$f^/old >[2]/dev/null cpp $f;
>>> 			>/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f
>>> 		}
>>> 		for(f in `{walk -f /sys/src|grep '\.c$'})
>>> 			ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new
>>> 	}
>>> 
>>> With this diff, we have no changes in the cpp output.
>>> It preprocesses and builds ocaml, and doesn't seem to
>>> error on perl5, so far (Though, we have other issues.
>>> The bitfields thing is biting us again.)
>>> 
>> 
>> And, found one more edge case: CAT(,) should produce an
>> empty token. Now working.
>> 
>> It looks like we mishandle empty vararg lists too, but
>> that's a separate issue, and probably a separate patch.
>> 
>> So:
>> 	- No diffs on /sys/src
>> 	- Looks like it works ok on ocaml, gmake and my wip perl5 fixes
>> 
>> Any other tests anyone thinks I should do?
> 
> More progress on perl caught another edge case:
> 
> 	#define V 42
> 	#define X CAT(V, .0)
> 
> should give the result 42.0.
> 
> This version fixes that issue. It does introduce changes
> when diffing against the system, but that seems to be just
> whitespace, which comes from insertrow(). This is expected.
> 
> The system builds, as do a selection of ports (ocaml, gmake,
> netsurf).
> 

...And, of course, this had a regression, where

	#define Y(x)   x
	#define X      a,b
	#define Z      Y(X)
	Z

would expand X to 'a,b' before expanding Y, which
would lead to us complaining about invalid ','s.

We needed to replace the ',' in there with an XCOMMA
token that is not a parameter separator.

There are still some edge cases around counting arguments,
specifically, if we #define MAC(x), we  should expand MAC()
as though it had a single empty argument. This is not a
regressions, so I'll fix.

Most recent patch here:

diff -r e87c04248053 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 15 15:08:04 2020 -0700
+++ b/sys/src/cmd/cpp/cpp.c	Sun Mar 15 19:54:26 2020 -0700
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r e87c04248053 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 15 15:08:04 2020 -0700
+++ b/sys/src/cmd/cpp/cpp.h	Sun Mar 15 19:54:26 2020 -0700
@@ -14,7 +14,7 @@
 		EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS,
 		ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS,
 		TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST,
-		COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET,
+		COLON, ASGN, COMMA, XCOMMA, SHARP, SEMIC, CBRA, CKET,
 		ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH,
 		ASRSH, ASOR, ASAND, ELLIPS,
 		DSHARP1, NAME1, DEFINED, UMINUS };
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+void	glue(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r e87c04248053 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 15 15:08:04 2020 -0700
+++ b/sys/src/cmd/cpp/eval.c	Sun Mar 15 19:54:26 2020 -0700
@@ -28,66 +28,67 @@
 	char	arity;
 	char	ctype;
 } priority[] = {
-	{ 0, 0, 0 },		/* END */
-	{ 0, 0, 0 },		/* UNCLASS */
-	{ 0, 0, 0 },		/* NAME */
-	{ 0, 0, 0 },		/* NUMBER */
-	{ 0, 0, 0 },		/* STRING */
-	{ 0, 0, 0 },		/* CCON */
-	{ 0, 0, 0 },		/* NL */
-	{ 0, 0, 0 },		/* WS */
-	{ 0, 0, 0 },		/* DSHARP */
-	{ 11, 2, RELAT },	/* EQ */
-	{ 11, 2, RELAT },	/* NEQ */
-	{ 12, 2, RELAT },	/* LEQ */
-	{ 12, 2, RELAT },	/* GEQ */
-	{ 13, 2, SHIFT },	/* LSH */
-	{ 13, 2, SHIFT },	/* RSH */
-	{ 7, 2, LOGIC },	/* LAND */
-	{ 6, 2, LOGIC },	/* LOR */
-	{ 0, 0, 0 },		/* PPLUS */
-	{ 0, 0, 0 },		/* MMINUS */
-	{ 0, 0, 0 },		/* ARROW */
-	{ 0, 0, 0 },		/* SBRA */
-	{ 0, 0, 0 },		/* SKET */
-	{ 3, 0, 0 },		/* LP */
-	{ 3, 0, 0 },		/* RP */
-	{ 0, 0, 0 },		/* DOT */
-	{ 10, 2, ARITH },	/* AND */
-	{ 15, 2, ARITH },	/* STAR */
-	{ 14, 2, ARITH },	/* PLUS */
-	{ 14, 2, ARITH },	/* MINUS */
-	{ 16, 1, UNARY },	/* TILDE */
-	{ 16, 1, UNARY },	/* NOT */
-	{ 15, 2, ARITH },	/* SLASH */
-	{ 15, 2, ARITH },	/* PCT */
-	{ 12, 2, RELAT },	/* LT */
-	{ 12, 2, RELAT },	/* GT */
-	{ 9, 2, ARITH },	/* CIRC */
-	{ 8, 2, ARITH },	/* OR */
-	{ 5, 2, SPCL },		/* QUEST */
-	{ 5, 2, SPCL },		/* COLON */
-	{ 0, 0, 0 },		/* ASGN */
-	{ 4, 2, 0 },		/* COMMA */
-	{ 0, 0, 0 },		/* SHARP */
-	{ 0, 0, 0 },		/* SEMIC */
-	{ 0, 0, 0 },		/* CBRA */
-	{ 0, 0, 0 },		/* CKET */
-	{ 0, 0, 0 },		/* ASPLUS */
- 	{ 0, 0, 0 },		/* ASMINUS */
- 	{ 0, 0, 0 },		/* ASSTAR */
- 	{ 0, 0, 0 },		/* ASSLASH */
- 	{ 0, 0, 0 },		/* ASPCT */
- 	{ 0, 0, 0 },		/* ASCIRC */
- 	{ 0, 0, 0 },		/* ASLSH */
-	{ 0, 0, 0 },		/* ASRSH */
- 	{ 0, 0, 0 },		/* ASOR */
- 	{ 0, 0, 0 },		/* ASAND */
-	{ 0, 0, 0 },		/* ELLIPS */
-	{ 0, 0, 0 },		/* DSHARP1 */
-	{ 0, 0, 0 },		/* NAME1 */
-	{ 16, 1, UNARY },	/* DEFINED */
-	{ 16, 0, UNARY },	/* UMINUS */
+	[END]		{ 0, 0, 0 },
+	[UNCLASS]	{ 0, 0, 0 },
+	[NAME]		{ 0, 0, 0 },
+	[NUMBER]	{ 0, 0, 0 },
+	[STRING]	{ 0, 0, 0 },
+	[CCON]		{ 0, 0, 0 },
+	[NL]		{ 0, 0, 0 },
+	[WS]		{ 0, 0, 0 },
+	[DSHARP]	{ 0, 0, 0 },
+	[EQ]		{ 11, 2, RELAT },
+	[NEQ]		{ 11, 2, RELAT },
+	[LEQ]		{ 12, 2, RELAT },
+	[GEQ]		{ 12, 2, RELAT },
+	[LSH]		{ 13, 2, SHIFT },
+	[RSH]		{ 13, 2, SHIFT },
+	[LAND]		{ 7, 2, LOGIC },
+	[LOR]		{ 6, 2, LOGIC },
+	[PPLUS]		{ 0, 0, 0 },
+	[MMINUS]	{ 0, 0, 0 },
+	[ARROW]		{ 0, 0, 0 },
+	[SBRA]		{ 0, 0, 0 },
+	[SKET]		{ 0, 0, 0 },
+	[LP]		{ 3, 0, 0 },
+	[RP]		{ 3, 0, 0 },
+	[DOT]		{ 0, 0, 0 },
+	[AND]		{ 10, 2, ARITH },
+	[STAR]		{ 15, 2, ARITH },
+	[PLUS]		{ 14, 2, ARITH },
+	[MINUS]		{ 14, 2, ARITH },
+	[TILDE]		{ 16, 1, UNARY },
+	[NOT]		{ 16, 1, UNARY },
+	[SLASH]		{ 15, 2, ARITH },
+	[PCT]		{ 15, 2, ARITH },
+	[LT]		{ 12, 2, RELAT },
+	[GT]		{ 12, 2, RELAT },
+	[CIRC]		{ 9, 2, ARITH },
+	[OR]		{ 8, 2, ARITH },
+	[QUEST]		{ 5, 2, SPCL },
+	[COLON]		{ 5, 2, SPCL },
+	[ASGN]		{ 0, 0, 0 },
+	[COMMA]		{ 4, 2, 0 },
+	[XCOMMA]	{ 4, 2, 0 },
+	[SHARP]		{ 0, 0, 0 },
+	[SEMIC]		{ 0, 0, 0 },
+	[CBRA]		{ 0, 0, 0 },
+	[CKET]		{ 0, 0, 0 },
+	[ASPLUS]	{ 0, 0, 0 },
+ 	[ASMINUS]	{ 0, 0, 0 },
+ 	[ASSTAR]	{ 0, 0, 0 },
+ 	[ASSLASH]	{ 0, 0, 0 },
+ 	[ASPCT]		{ 0, 0, 0 },
+ 	[ASCIRC]	{ 0, 0, 0 },
+ 	[ASLSH]		{ 0, 0, 0 },
+	[ASRSH]		{ 0, 0, 0 },
+ 	[ASOR]		{ 0, 0, 0 },
+ 	[ASAND]		{ 0, 0, 0 },
+	[ELLIPS]	{ 0, 0, 0 },
+	[DSHARP1]	{ 0, 0, 0 },
+	[NAME1]		{ 0, 0, 0 },
+	[DEFINED]	{ 16, 1, UNARY },
+	[UMINUS]	{ 16, 0, UNARY },
 };
 
 int	evalop(struct pri);
@@ -116,7 +117,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
@@ -165,7 +166,7 @@
 		case EQ: case NEQ: case LEQ: case GEQ: case LSH: case RSH:
 		case LAND: case LOR: case SLASH: case PCT:
 		case LT: case GT: case CIRC: case OR: case QUEST:
-		case COLON: case COMMA:
+		case COLON: case COMMA: case XCOMMA:
 			if (rand==0)
 				goto syntax;
 			if (evalop(priority[tp->type])!=0)
diff -r e87c04248053 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 15 15:08:04 2020 -0700
+++ b/sys/src/cmd/cpp/include.c	Sun Mar 15 19:54:26 2020 -0700
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r e87c04248053 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 15 15:08:04 2020 -0700
+++ b/sys/src/cmd/cpp/macro.c	Sun Mar 15 19:54:26 2020 -0700
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -193,12 +193,14 @@
 	int hs;
 
 	copytokenrow(&ntr, np->vp);		/* copy macro value */
-	if (np->ap==NULL)			/* parameterless */
+	if (np->ap==NULL) {			/* parameterless */
 		ntokc = 1;
-	else {
+		/* substargs for handling # and ## */
+		atr[0] = nil;
+		substargs(np, &ntr, atr);
+	} else {
 		ntokc = gatherargs(trp, atr, (np->flag&ISVARMAC) ? rowlen(np->ap) : 0, &narg);
 		if (narg<0) {			/* not actually a call (no '(') */
-/* error(WARNING, "%d %r\n", narg, trp); */
 			/* gatherargs has already pushed trp->tr to the next token */
 			return;
 		}
@@ -214,8 +216,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -228,8 +228,7 @@
 	ntr.tp = ntr.bp;
 	insertrow(trp, ntokc, &ntr);
 	trp->tp -= rowlen(&ntr);
-	dofree(ntr.bp);
-	return;
+	free(ntr.bp);
 }	
 
 /*
@@ -255,7 +254,6 @@
 		if (trp->tp >= trp->lp) {
 			gettokens(trp, 0);
 			if ((trp->lp-1)->type==END) {
-/* error(WARNING, "reach END\n"); */
 				trp->lp -= 1;
 				if (*narg>=0)
 					trp->tp -= ntok;
@@ -326,7 +324,25 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	*ap = nil;
+	*an = nil;
+	/* EMPTY ## tok */
+	if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+		rtr->tp--;
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		*ntok = 1 + (*ap != nil) + (*an != nil);
+		return 1;
+	}
+	return 0;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,12 +350,14 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
-	int ntok, argno;
+	Tokenrow ttr;
+	Token *tp, *ap, *an, *pp, *pn;
+	int ntok, argno, hs;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
-		if (rtr->tp->type==SHARP) {	/* string operator */
+		if(rtr->tp->hideset && checkhideset(rtr->tp->hideset, np)) {
+			rtr->tp++;
+		} else if (rtr->tp->type==SHARP) {	/* string operator */
 			tp = rtr->tp;
 			rtr->tp += 1;
 			if ((argno = lookuparg(np, rtr->tp))<0) {
@@ -349,24 +367,52 @@
 			ntok = 1 + (rtr->tp - tp);
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
-			continue;
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = ap;
+			pn = an;
+			if (ap && (argno = lookuparg(np, ap)) >= 0){
+				pp = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			}
+			if (an && (argno = lookuparg(np, an)) >= 0) {
+				pn = nil;
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			}
+			glue(&ttr, pp, pn);
+			insertrow(rtr, ntok, &ttr);
+			free(ttr.bp);
+		} else if (rtr->tp->type==NAME) {
+			if((argno = lookuparg(np, rtr->tp)) >= 0) {
+				if (rtr->tp < rtr->bp) {
+					error(ERROR, "access out of bounds");
+					continue;
+				}
+				copytokenrow(&ttr, atr[argno]);
+				expandrow(&ttr, "<macro>");
+				insertrow(rtr, 1, &ttr);
+				free(ttr.bp);
+			} else {
+				maketokenrow(1, &ttr);
+				ttr.lp = ttr.tp + 1;
+				*ttr.tp = *rtr->tp;
+
+				hs = newhideset(rtr->tp->hideset, np);
+				if(ttr.tp->hideset == 0)
+					ttr.tp->hideset = hs;
+				else
+					ttr.tp->hideset = unionhideset(ttr.tp->hideset, hs);
+				expandrow(&ttr, (char*)np->name);
+				for(tp = ttr.bp; tp != ttr.lp; tp++)
+					if(tp->type == COMMA)
+						tp->type = XCOMMA;
+				insertrow(rtr, 1, &ttr);
+				dofree(ttr.bp);
+			}
+		} else {
+			rtr->tp++;
 		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
-			if (rtr->tp < rtr->bp)
-				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
-			else {
-				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
-				insertrow(rtr, 1, &tatr);
-				dofree(tatr.bp);
-			}
-			continue;
-		}
-		rtr->tp++;
 	}
 }
 
@@ -374,41 +420,35 @@
  * Evaluate the ## operators in a tokenrow
  */
 void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt, *p, *n;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	dofree(tt);
+	if (np + nn == 0) {
+		ntr->lp = ntr->bp;
+	} else {
+		if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) {
+			p = tp ? (char*)tp->t : "<empty>";
+			n = tn ? (char*)tn->t : "<empty>";
+			error(WARNING, "Bad token %r produced by %s ## %s", &ntr, p, n);
 		}
+		ntr->lp = ntr->bp+1;
 	}
+	makespace(ntr);
 }
 
 /*
diff -r e87c04248053 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 15 15:08:04 2020 -0700
+++ b/sys/src/cmd/cpp/test.c	Sun Mar 15 19:54:26 2020 -0700
@@ -1,4 +1,61 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x NOP(CAT(foo, EOF)) y
+x NOP(CAT(EOF, foo)) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X NOP(CAT(,)) y
+
+#define NCAT(a)	foo ## a
+NCAT(bar)
+
+#define XCAT(a)	## a
+foo XCAT(bar)
+
+#define CAT3(foo)	a##foo##b
+CAT3(blah)
+
+#define BAR	3
+#define FOO	CAT(BAR, 3)
+FOO
+
+/*
+ * CURRENTLY BROKEN:
+ *     __VA_ARGS__ requires at least one item.
+ *     It should accept an empty list.
+#define xprint(a, ...)	print(a, __VA_ARGS__)
+xprint("hi", "there")
+xprint("hi")
+*/
+
+#define C	a,b
+#define X(a)	a
+#define Y	X(C)
+Y
+
+#define    x          3
+#define    f(a)       f(x * (a))
+#undef     x
+#define    x          2
+#define    g          f
+#define    z          z[0]
+#define    h          g(~
+#define    m(a)       a(w)
+#define    w          0,1
+#define    t(a)       a
+#define    p()        int
+#define    q(x)       x
+#define    r(x,y)     x ## y
+#define    str(x)     # x
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) & m
+(f)^m(m);
+/*
+ * CURRENTLY BROKEN:
+ *     mac() needs at least one argument.
+ *     It should treat no args as a single empty arg list.
+p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
+char c[2][6] = { str(hello), str() };
+*/
\ No newline at end of file
diff -r e87c04248053 sys/src/cmd/cpp/test.expected
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/src/cmd/cpp/test.expected	Sun Mar 15 19:54:26 2020 -0700
@@ -0,0 +1,49 @@
+#line 1 "/usr/ori/src/cpp/test.c"
+
+
+
+x fooEOF y
+x EOFfoo y
+x(-1) y
+y foo x
+x foo y
+X y
+
+
+ foobar
+
+
+foo ## bar
+
+
+ ablahb
+
+
+
+ 33
+
+
+#line 32 "/usr/ori/src/cpp/test.c"
+
+
+
+
+ a,b
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+ f(2 * (2+(3,4)- 0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^ m(0,1);
+#line 55 "/usr/ori/src/cpp/test.c"



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-03-16  2:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-05 17:26 Fix CPP Token Paste: Testing requested ori
2020-03-05 19:21 ` [9front] " cinap_lenrek
2020-03-05 21:42   ` ori
2020-03-06  7:01     ` ori
2020-03-07  0:47       ` ori
2020-03-08 17:42         ` ori
2020-03-16  2:55           ` ori

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).