From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mimir.eigenstate.org ([206.124.132.107]) by ewsd; Thu Mar 5 12:26:18 EST 2020 Received: from stockyard.bk.recurse-network.net (gateway.bk.recurse-network.net [185.230.222.2]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id f21eb920 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO) for <9front@9front.org>; Thu, 5 Mar 2020 09:26:09 -0800 (PST) Message-ID: <5999B9A7D842E3DD4153EEE1186C63DB@eigenstate.org> To: 9front@9front.org Subject: Fix CPP Token Paste: Testing requested. Date: Thu, 5 Mar 2020 09:26:08 -0800 From: ori@eigenstate.org MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: realtime-scale replication XMPP deep-learning hardware standard blockchain ** This change is relatively subtle, and I'd like some more testing before committing ** Perl does a bunch of macro stuff that breaks on our preprocessor. So does TCC. Specifically, when token pasting in a function like macro, we should expand the to the parameters of the function. We do this, but only when the function like macro is not expanded inside another macro parameter. For example, in: #define NOP(x) x #define CAT(a, b) a ## b #define EOF (-1) CAT(foo, EOF) NOP(CAT(foo, EOF)) the current version of cpp expands the first to: fooEOF but the second is expanded to foo( and, then because it's not a token, we error. This happens because we're fully substituting parameters, and then doing the token pasting in a separate pass, when we should be doing it as part of substituting a function like macro. The algorithm that cpp uses is documented here: https://www.spinellis.gr/blog/20060626/ However, reading it, it's clear that we deviated when doing the concatenation. This change to cpp fixes the expansion, and makes us fully follow Prosser's algorithm. This change fixes things so that we expand both correctly. diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c --- a/sys/src/cmd/cpp/cpp.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.c Thu Mar 05 09:13:58 2020 -0800 @@ -68,7 +68,7 @@ trp->tp += 1; control(trp); } else if (!skipping && anymacros) - expandrow(trp, NULL, Notinmacro); + expandrow(trp, NULL); if (skipping) setempty(trp); puttokens(trp); @@ -217,7 +217,7 @@ case KLINE: trp->tp = tp+1; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); tp = trp->bp+2; kline: if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3lp diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h --- a/sys/src/cmd/cpp/cpp.h Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.h Thu Mar 05 09:13:58 2020 -0800 @@ -107,11 +107,11 @@ void doadefine(Tokenrow *, int); void doinclude(Tokenrow *); void doif(Tokenrow *, enum kwtype); -void expand(Tokenrow *, Nlist *, int); +void expand(Tokenrow *, Nlist *); void builtin(Tokenrow *, int); int gatherargs(Tokenrow *, Tokenrow **, int, int *); void substargs(Nlist *, Tokenrow *, Tokenrow **); -void expandrow(Tokenrow *, char *, int); +void expandrow(Tokenrow *, char *); void maketokenrow(int, Tokenrow *); Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); Token *growtokenrow(Tokenrow *); @@ -120,7 +120,7 @@ void movetokenrow(Tokenrow *, Tokenrow *); void insertrow(Tokenrow *, int, Tokenrow *); void peektokens(Tokenrow *, char *); -void doconcat(Tokenrow *); +int doconcat(Tokenrow *, Token *, Token *); Tokenrow *stringify(Tokenrow *); int lookuparg(Nlist *, Token *); long eval(Tokenrow *, int); diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c --- a/sys/src/cmd/cpp/eval.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/eval.c Thu Mar 05 09:13:58 2020 -0800 @@ -116,7 +116,7 @@ } ntok = trp->tp - trp->bp; kwdefined->val = KDEFINED; /* activate special meaning of defined */ - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); kwdefined->val = NAME; vp = vals; op = ops; diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c --- a/sys/src/cmd/cpp/include.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/include.c Thu Mar 05 09:13:58 2020 -0800 @@ -18,7 +18,7 @@ goto syntax; if (trp->tp->type!=STRING && trp->tp->type!=LT) { len = trp->tp - trp->bp; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); trp->tp = trp->bp+len; } if (trp->tp->type==STRING) { diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c --- a/sys/src/cmd/cpp/macro.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/macro.c Thu Mar 05 09:13:58 2020 -0800 @@ -138,7 +138,7 @@ * Flag is NULL if more input can be gathered. */ void -expandrow(Tokenrow *trp, char *flag, int inmacro) +expandrow(Tokenrow *trp, char *flag) { Token *tp; Nlist *np; @@ -170,7 +170,7 @@ if (np->flag&ISMAC) builtin(trp, np->val); else { - expand(trp, np, inmacro); + expand(trp, np); } tp = trp->tp; } @@ -184,7 +184,7 @@ * (ordinarily the beginning of the expansion) */ void -expand(Tokenrow *trp, Nlist *np, int inmacro) +expand(Tokenrow *trp, Nlist *np) { Tokenrow ntr; int ntokc, narg, i; @@ -214,8 +214,6 @@ dofree(atr[i]); } } - if(!inmacro) - doconcat(&ntr); /* execute ## operators */ hs = newhideset(trp->tp->hideset, np); for (tp=ntr.bp; tptype==NAME) { @@ -326,7 +324,30 @@ } return ntok; } - + +int +ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok) +{ + /* EMPTY ## tok */ + *ap = nil; + *an = nil; + if (rtr->tp->type == DSHARP) { + if (rtr->tp + 1 != rtr->lp) + *an = rtr->tp + 1; + goto is; + } + /* tok ## tok */ + if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) { + *ap = rtr->tp; + if(rtr->tp + 2 != rtr->lp) + *an = rtr->tp + 2; + goto is; + } + return 0; +is: + *ntok = 1 + (*ap != nil) + (*an != nil); + return 1; +} /* * substitute the argument list into the replacement string * This would be simple except for ## and # @@ -334,8 +355,8 @@ void substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) { - Tokenrow tatr; - Token *tp; + Tokenrow tatr, tcat; + Token *tp, *ap, *an, *pp, *pn; int ntok, argno; for (rtr->tp=rtr->bp; rtr->tplp; ) { @@ -350,17 +371,24 @@ rtr->tp = tp; insertrow(rtr, ntok, stringify(atr[argno])); continue; - } - if (rtr->tp->type==NAME - && (argno = lookuparg(np, rtr->tp)) >= 0) { + } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */ + pp = nil; + pn = nil; + if(ap && (argno = lookuparg(np, ap)) >= 0) + if(atr[argno]->tp != atr[argno]->lp) + pp = atr[argno]->lp - 1; + if(an && (argno = lookuparg(np, an)) >= 0) + if(atr[argno]->tp != atr[argno]->lp) + pn = atr[argno]->lp - 1; + doconcat(&tcat, pp, pn); + insertrow(rtr, ntok, &tcat); + continue; + } else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) { if (rtr->tp < rtr->bp) error(ERROR, "access out of bounds"); - if ((rtr->tp+1)->type==DSHARP - || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP) - insertrow(rtr, 1, atr[argno]); else { copytokenrow(&tatr, atr[argno]); - expandrow(&tatr, "", Inmacro); + expandrow(&tatr, ""); insertrow(rtr, 1, &tatr); dofree(tatr.bp); } @@ -373,42 +401,32 @@ /* * Evaluate the ## operators in a tokenrow */ -void -doconcat(Tokenrow *trp) +int +doconcat(Tokenrow *ntr, Token *tp, Token *tn) { - Token *ltp, *ntp; - Tokenrow ntr; - int len; + int np, nn; + char *tt; - for (trp->tp=trp->bp; trp->tplp; trp->tp++) { - if (trp->tp->type==DSHARP1) - trp->tp->type = DSHARP; - else if (trp->tp->type==DSHARP) { - char tt[128]; - ltp = trp->tp-1; - ntp = trp->tp+1; - if (ltpbp || ntp>=trp->lp) { - error(ERROR, "## occurs at border of replacement"); - continue; - } - len = ltp->len + ntp->len; - strncpy((char*)tt, (char*)ltp->t, ltp->len); - strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); - tt[len] = '\0'; - setsource("<##>", -1, tt); - maketokenrow(3, &ntr); - gettokens(&ntr, 1); - unsetsource(); - if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) - error(WARNING, "Bad token %r produced by ##", &ntr); - ntr.lp = ntr.bp+1; - trp->tp = ltp; - makespace(&ntr); - insertrow(trp, (ntp-ltp)+1, &ntr); - dofree(ntr.bp); - trp->tp--; - } - } + if(tp == nil && tn == nil) + return 0; + np = tp ? tp->len : 0; + nn = tn ? tn->len : 0; + tt = domalloc(np + nn + 1); + if(tp) + memcpy(tt, tp->t, tp->len); + if(tn) + memcpy(tt+np, tn->t, tn->len); + tt[np+nn] = '\0'; + setsource("<##>", -1, tt); + maketokenrow(3, ntr); + gettokens(ntr, 1); + unsetsource(); + free(tt); + if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) + error(WARNING, "Bad token %r produced by ##", &ntr); + ntr->lp = ntr->bp+1; + makespace(ntr); + return 1; } /* diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c --- a/sys/src/cmd/cpp/test.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/test.c Thu Mar 05 09:13:58 2020 -0800 @@ -1,4 +1,23 @@ +/* Should generate P1() */ #define M1() #define M2(A1) A1() M2(M1) M2(P1) + +/* + * should generate: + * x fooEOF y + * x EOFfoo y + * x(-1) y + * y foo x + * x foo y +*/ + +#define NOP(x) x +#define CAT(a, b) a ## b +#define EOF (-1) +x CAT(foo, EOF) y +x CAT(EOF, foo) y +x CAT(, EOF) y +y CAT(foo,) x +x CAT(,foo) y