9front - general discussion about 9front
 help / color / mirror / Atom feed
From: ori@eigenstate.org
To: 9front@9front.org
Subject: Fix CPP Token Paste: Testing requested.
Date: Thu, 5 Mar 2020 09:26:08 -0800	[thread overview]
Message-ID: <5999B9A7D842E3DD4153EEE1186C63DB@eigenstate.org> (raw)

** This change is relatively subtle, and I'd like
some more testing before committing **

Perl does a bunch of macro stuff that breaks on our
preprocessor. So does TCC. Specifically, when token
pasting in a function like macro, we should expand
the to the parameters of the function. We do this,
but only when the function like macro is not expanded
inside another macro parameter. For example, in:

	#define NOP(x) x
	#define CAT(a, b) a ## b
	#define EOF	(-1)
	CAT(foo, EOF)
	NOP(CAT(foo, EOF))

the current version of cpp expands the first to:

	fooEOF

but the second is expanded to

	foo(

and, then because it's not a token, we error.

This happens because we're fully substituting
parameters, and then doing the token pasting in a
separate pass, when we should be doing it as part
of substituting a function like macro.


The algorithm that cpp uses is documented here:

	https://www.spinellis.gr/blog/20060626/

However, reading it, it's clear that we deviated
when doing the concatenation.

This change to cpp fixes the expansion, and makes
us fully follow Prosser's algorithm.

This change fixes things so that we expand both
correctly.

diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c	Thu Mar 05 09:13:58 2020 -0800
@@ -68,7 +68,7 @@
 			trp->tp += 1;
 			control(trp);
 		} else if (!skipping && anymacros)
-			expandrow(trp, NULL, Notinmacro);
+			expandrow(trp, NULL);
 		if (skipping)
 			setempty(trp);
 		puttokens(trp);
@@ -217,7 +217,7 @@
 
 	case KLINE:
 		trp->tp = tp+1;
-		expandrow(trp, "<line>", Notinmacro);
+		expandrow(trp, "<line>");
 		tp = trp->bp+2;
 	kline:
 		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h	Thu Mar 05 09:13:58 2020 -0800
@@ -107,11 +107,11 @@
 void	doadefine(Tokenrow *, int);
 void	doinclude(Tokenrow *);
 void	doif(Tokenrow *, enum kwtype);
-void	expand(Tokenrow *, Nlist *, int);
+void	expand(Tokenrow *, Nlist *);
 void	builtin(Tokenrow *, int);
 int	gatherargs(Tokenrow *, Tokenrow **, int, int *);
 void	substargs(Nlist *, Tokenrow *, Tokenrow **);
-void	expandrow(Tokenrow *, char *, int);
+void	expandrow(Tokenrow *, char *);
 void	maketokenrow(int, Tokenrow *);
 Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
 Token	*growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
 void	movetokenrow(Tokenrow *, Tokenrow *);
 void	insertrow(Tokenrow *, int, Tokenrow *);
 void	peektokens(Tokenrow *, char *);
-void	doconcat(Tokenrow *);
+int	doconcat(Tokenrow *, Token *, Token *);
 Tokenrow *stringify(Tokenrow *);
 int	lookuparg(Nlist *, Token *);
 long	eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c	Thu Mar 05 09:13:58 2020 -0800
@@ -116,7 +116,7 @@
 	}
 	ntok = trp->tp - trp->bp;
 	kwdefined->val = KDEFINED;	/* activate special meaning of defined */
-	expandrow(trp, "<if>", Notinmacro);
+	expandrow(trp, "<if>");
 	kwdefined->val = NAME;
 	vp = vals;
 	op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c	Thu Mar 05 09:13:58 2020 -0800
@@ -18,7 +18,7 @@
 		goto syntax;
 	if (trp->tp->type!=STRING && trp->tp->type!=LT) {
 		len = trp->tp - trp->bp;
-		expandrow(trp, "<include>", Notinmacro);
+		expandrow(trp, "<include>");
 		trp->tp = trp->bp+len;
 	}
 	if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c	Thu Mar 05 09:13:58 2020 -0800
@@ -138,7 +138,7 @@
  * Flag is NULL if more input can be gathered.
  */
 void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
 {
 	Token *tp;
 	Nlist *np;
@@ -170,7 +170,7 @@
 		if (np->flag&ISMAC)
 			builtin(trp, np->val);
 		else {
-			expand(trp, np, inmacro);
+			expand(trp, np);
 		}
 		tp = trp->tp;
 	}
@@ -184,7 +184,7 @@
  * (ordinarily the beginning of the expansion)
  */
 void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
 {
 	Tokenrow ntr;
 	int ntokc, narg, i;
@@ -214,8 +214,6 @@
 			dofree(atr[i]);
 		}
 	}
-	if(!inmacro)
-		doconcat(&ntr);				/* execute ## operators */
 	hs = newhideset(trp->tp->hideset, np);
 	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */
 		if (tp->type==NAME) {
@@ -326,7 +324,30 @@
 	}
 	return ntok;
 }
-
+	
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+	/* EMPTY ## tok */
+	*ap = nil;
+	*an = nil;
+	if (rtr->tp->type == DSHARP) {
+		if (rtr->tp + 1 != rtr->lp)
+			*an = rtr->tp + 1;
+		goto is;
+	}
+	/* tok ## tok */
+	if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+		*ap = rtr->tp;
+		if(rtr->tp + 2 != rtr->lp)
+			*an = rtr->tp + 2;
+		goto is;
+	}
+	return 0;
+is:
+	*ntok = 1 + (*ap != nil) + (*an != nil);
+	return 1;
+}
 /*
  * substitute the argument list into the replacement string
  *  This would be simple except for ## and #
@@ -334,8 +355,8 @@
 void
 substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
 {
-	Tokenrow tatr;
-	Token *tp;
+	Tokenrow tatr, tcat;
+	Token *tp, *ap, *an, *pp, *pn;
 	int ntok, argno;
 
 	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,17 +371,24 @@
 			rtr->tp = tp;
 			insertrow(rtr, ntok, stringify(atr[argno]));
 			continue;
-		}
-		if (rtr->tp->type==NAME
-		 && (argno = lookuparg(np, rtr->tp)) >= 0) {
+		} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+			pp = nil;
+			pn = nil;
+			if(ap && (argno = lookuparg(np, ap)) >= 0)
+				if(atr[argno]->tp != atr[argno]->lp)
+					pp = atr[argno]->lp - 1;
+			if(an && (argno = lookuparg(np, an)) >= 0)
+				if(atr[argno]->tp != atr[argno]->lp)
+					pn = atr[argno]->lp - 1;
+			doconcat(&tcat, pp, pn);
+			insertrow(rtr, ntok, &tcat);
+			continue;
+		} else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
 			if (rtr->tp < rtr->bp)
 				error(ERROR, "access out of bounds");
-			if ((rtr->tp+1)->type==DSHARP
-			 || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
-				insertrow(rtr, 1, atr[argno]);
 			else {
 				copytokenrow(&tatr, atr[argno]);
-				expandrow(&tatr, "<macro>", Inmacro);
+				expandrow(&tatr, "<macro>");
 				insertrow(rtr, 1, &tatr);
 				dofree(tatr.bp);
 			}
@@ -373,42 +401,32 @@
 /*
  * Evaluate the ## operators in a tokenrow
  */
-void
-doconcat(Tokenrow *trp)
+int
+doconcat(Tokenrow *ntr, Token *tp, Token *tn)
 {
-	Token *ltp, *ntp;
-	Tokenrow ntr;
-	int len;
+	int np, nn;
+	char *tt;
 
-	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
-		if (trp->tp->type==DSHARP1)
-			trp->tp->type = DSHARP;
-		else if (trp->tp->type==DSHARP) {
-			char tt[128];
-			ltp = trp->tp-1;
-			ntp = trp->tp+1;
-			if (ltp<trp->bp || ntp>=trp->lp) {
-				error(ERROR, "## occurs at border of replacement");
-				continue;
-			}
-			len = ltp->len + ntp->len;
-			strncpy((char*)tt, (char*)ltp->t, ltp->len);
-			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
-			tt[len] = '\0';
-			setsource("<##>", -1, tt);
-			maketokenrow(3, &ntr);
-			gettokens(&ntr, 1);
-			unsetsource();
-			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
-				error(WARNING, "Bad token %r produced by ##", &ntr);
-			ntr.lp = ntr.bp+1;
-			trp->tp = ltp;
-			makespace(&ntr);
-			insertrow(trp, (ntp-ltp)+1, &ntr);
-			dofree(ntr.bp);
-			trp->tp--;
-		}
-	}
+	if(tp == nil && tn == nil)
+		return 0;
+	np = tp ? tp->len : 0;
+	nn = tn ? tn->len : 0;
+	tt = domalloc(np + nn + 1);
+	if(tp)
+		memcpy(tt, tp->t, tp->len);
+	if(tn)
+		memcpy(tt+np, tn->t, tn->len);
+	tt[np+nn] = '\0';
+	setsource("<##>", -1, tt);
+	maketokenrow(3, ntr);
+	gettokens(ntr, 1);
+	unsetsource();
+	free(tt);
+	if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+		error(WARNING, "Bad token %r produced by ##", &ntr);
+	ntr->lp = ntr->bp+1;
+	makespace(ntr);
+	return 1;
 }
 
 /*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c	Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c	Thu Mar 05 09:13:58 2020 -0800
@@ -1,4 +1,23 @@
+/* Should generate P1() */
 #define M1()
 #define M2(A1) A1()
 M2(M1)
 M2(P1)
+
+/*
+ * should generate:
+ *	x fooEOF y
+ *	x EOFfoo y
+ *	x(-1) y
+ *	y foo x
+ *	x foo y
+*/
+
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF	(-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y



             reply	other threads:[~2020-03-05 17:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-05 17:26 ori [this message]
2020-03-05 19:21 ` [9front] " cinap_lenrek
2020-03-05 21:42   ` ori
2020-03-06  7:01     ` ori
2020-03-07  0:47       ` ori
2020-03-08 17:42         ` ori
2020-03-16  2:55           ` ori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5999B9A7D842E3DD4153EEE1186C63DB@eigenstate.org \
    --to=ori@eigenstate.org \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).