From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mimir.eigenstate.org ([206.124.132.107]) by ewsd; Fri Mar 6 02:01:42 EST 2020 Received: from abbatoir.fios-router.home (pool-162-83-132-245.nycmny.fios.verizon.net [162.83.132.245]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id 066e5e57 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO); Thu, 5 Mar 2020 23:01:34 -0800 (PST) Message-ID: To: ori@eigenstate.org, cinap_lenrek@felloff.net, 9front@9front.org Subject: Re: [9front] Fix CPP Token Paste: Testing requested. Date: Thu, 5 Mar 2020 23:01:32 -0800 From: ori@eigenstate.org In-Reply-To: <5394930B85C57CE4C95F0BB41E4DD899@eigenstate.org> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: pipelining callback table-aware solution >> good work! >> >> how about we run the old and new cpp binaries on every c file we get and >> check for differences in the output for testing? > > Will do, though we don't have too much preprocessor abuse in our > code. (ape doesn't use '##' even once!) > > Did find one bug I introduced when testing ocaml -- added the NCAT > case to test.c to cover it. Here's an updated patch: > Ok. Tested with: fn t { ramfs -m /tmp/m for(f in `{walk -f /sys/src|grep '\.c$'}){ mkdir -p /tmp/m/^$f; >/tmp/m/^$f^/old >[2]/dev/null cpp $f; >/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f } for(f in `{walk -f /sys/src|grep '\.c$'}) ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new } With this diff, we have no changes in the cpp output. It preprocesses and builds ocaml, and doesn't seem to error on perl5, so far (Though, we have other issues. The bitfields thing is biting us again.) diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c --- a/sys/src/cmd/cpp/cpp.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.c Thu Mar 05 22:58:34 2020 -0800 @@ -68,7 +68,7 @@ trp->tp += 1; control(trp); } else if (!skipping && anymacros) - expandrow(trp, NULL, Notinmacro); + expandrow(trp, NULL); if (skipping) setempty(trp); puttokens(trp); @@ -217,7 +217,7 @@ case KLINE: trp->tp = tp+1; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); tp = trp->bp+2; kline: if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3lp diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h --- a/sys/src/cmd/cpp/cpp.h Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.h Thu Mar 05 22:58:34 2020 -0800 @@ -107,11 +107,11 @@ void doadefine(Tokenrow *, int); void doinclude(Tokenrow *); void doif(Tokenrow *, enum kwtype); -void expand(Tokenrow *, Nlist *, int); +void expand(Tokenrow *, Nlist *); void builtin(Tokenrow *, int); int gatherargs(Tokenrow *, Tokenrow **, int, int *); void substargs(Nlist *, Tokenrow *, Tokenrow **); -void expandrow(Tokenrow *, char *, int); +void expandrow(Tokenrow *, char *); void maketokenrow(int, Tokenrow *); Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); Token *growtokenrow(Tokenrow *); @@ -120,7 +120,7 @@ void movetokenrow(Tokenrow *, Tokenrow *); void insertrow(Tokenrow *, int, Tokenrow *); void peektokens(Tokenrow *, char *); -void doconcat(Tokenrow *); +int glue(Tokenrow *, Token *, Token *); Tokenrow *stringify(Tokenrow *); int lookuparg(Nlist *, Token *); long eval(Tokenrow *, int); diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c --- a/sys/src/cmd/cpp/eval.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/eval.c Thu Mar 05 22:58:34 2020 -0800 @@ -116,7 +116,7 @@ } ntok = trp->tp - trp->bp; kwdefined->val = KDEFINED; /* activate special meaning of defined */ - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); kwdefined->val = NAME; vp = vals; op = ops; diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c --- a/sys/src/cmd/cpp/include.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/include.c Thu Mar 05 22:58:34 2020 -0800 @@ -18,7 +18,7 @@ goto syntax; if (trp->tp->type!=STRING && trp->tp->type!=LT) { len = trp->tp - trp->bp; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); trp->tp = trp->bp+len; } if (trp->tp->type==STRING) { diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c --- a/sys/src/cmd/cpp/macro.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/macro.c Thu Mar 05 22:58:34 2020 -0800 @@ -138,7 +138,7 @@ * Flag is NULL if more input can be gathered. */ void -expandrow(Tokenrow *trp, char *flag, int inmacro) +expandrow(Tokenrow *trp, char *flag) { Token *tp; Nlist *np; @@ -170,7 +170,7 @@ if (np->flag&ISMAC) builtin(trp, np->val); else { - expand(trp, np, inmacro); + expand(trp, np); } tp = trp->tp; } @@ -184,7 +184,7 @@ * (ordinarily the beginning of the expansion) */ void -expand(Tokenrow *trp, Nlist *np, int inmacro) +expand(Tokenrow *trp, Nlist *np) { Tokenrow ntr; int ntokc, narg, i; @@ -214,8 +214,6 @@ dofree(atr[i]); } } - if(!inmacro) - doconcat(&ntr); /* execute ## operators */ hs = newhideset(trp->tp->hideset, np); for (tp=ntr.bp; tptype==NAME) { @@ -228,7 +226,7 @@ ntr.tp = ntr.bp; insertrow(trp, ntokc, &ntr); trp->tp -= rowlen(&ntr); - dofree(ntr.bp); + free(ntr.bp); return; } @@ -326,7 +324,25 @@ } return ntok; } - + +int +ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok) +{ + *ap = nil; + *an = nil; + /* EMPTY ## tok */ + if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp) + rtr->tp--; + /* tok ## tok */ + if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) { + *ap = rtr->tp; + if(rtr->tp + 2 != rtr->lp) + *an = rtr->tp + 2; + *ntok = 1 + (*ap != nil) + (*an != nil); + return 1; + } + return 0; +} /* * substitute the argument list into the replacement string * This would be simple except for ## and # @@ -334,8 +350,8 @@ void substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) { - Tokenrow tatr; - Token *tp; + Tokenrow tatr, tcat; + Token *tp, *ap, *an, *pp, *pn; int ntok, argno; for (rtr->tp=rtr->bp; rtr->tplp; ) { @@ -350,19 +366,27 @@ rtr->tp = tp; insertrow(rtr, ntok, stringify(atr[argno])); continue; - } - if (rtr->tp->type==NAME - && (argno = lookuparg(np, rtr->tp)) >= 0) { + } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */ + pp = ap; + pn = an; + if (ap && (argno = lookuparg(np, ap)) >= 0) + if(atr[argno]->tp != atr[argno]->lp) + pp = atr[argno]->lp - 1; + if (an && (argno = lookuparg(np, an)) >= 0) + if(atr[argno]->tp != atr[argno]->lp) + pn = atr[argno]->lp - 1; + glue(&tcat, pp, pn); + insertrow(rtr, ntok, &tcat); + free(tcat.bp); + continue; + } else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) { if (rtr->tp < rtr->bp) error(ERROR, "access out of bounds"); - if ((rtr->tp+1)->type==DSHARP - || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP) - insertrow(rtr, 1, atr[argno]); else { copytokenrow(&tatr, atr[argno]); - expandrow(&tatr, "", Inmacro); + expandrow(&tatr, ""); insertrow(rtr, 1, &tatr); - dofree(tatr.bp); + free(tatr.bp); } continue; } @@ -373,42 +397,32 @@ /* * Evaluate the ## operators in a tokenrow */ -void -doconcat(Tokenrow *trp) +int +glue(Tokenrow *ntr, Token *tp, Token *tn) { - Token *ltp, *ntp; - Tokenrow ntr; - int len; + int np, nn; + char *tt; - for (trp->tp=trp->bp; trp->tplp; trp->tp++) { - if (trp->tp->type==DSHARP1) - trp->tp->type = DSHARP; - else if (trp->tp->type==DSHARP) { - char tt[128]; - ltp = trp->tp-1; - ntp = trp->tp+1; - if (ltpbp || ntp>=trp->lp) { - error(ERROR, "## occurs at border of replacement"); - continue; - } - len = ltp->len + ntp->len; - strncpy((char*)tt, (char*)ltp->t, ltp->len); - strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); - tt[len] = '\0'; - setsource("<##>", -1, tt); - maketokenrow(3, &ntr); - gettokens(&ntr, 1); - unsetsource(); - if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) - error(WARNING, "Bad token %r produced by ##", &ntr); - ntr.lp = ntr.bp+1; - trp->tp = ltp; - makespace(&ntr); - insertrow(trp, (ntp-ltp)+1, &ntr); - dofree(ntr.bp); - trp->tp--; - } - } + if(tp == nil && tn == nil) + return 0; + np = tp ? tp->len : 0; + nn = tn ? tn->len : 0; + tt = domalloc(np + nn + 1); + if(tp) + memcpy(tt, tp->t, tp->len); + if(tn) + memcpy(tt+np, tn->t, tn->len); + tt[np+nn] = '\0'; + setsource("<##>", -1, tt); + maketokenrow(3, ntr); + gettokens(ntr, 1); + unsetsource(); + dofree(tt); + if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) + error(WARNING, "Bad token %r produced by ##", &ntr); + ntr->lp = ntr->bp+1; + makespace(ntr); + return 1; } /* diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c --- a/sys/src/cmd/cpp/test.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/test.c Thu Mar 05 22:58:34 2020 -0800 @@ -1,4 +1,17 @@ -#define M1() -#define M2(A1) A1() -M2(M1) -M2(P1) +#define NOP(x) x +#define CAT(a, b) a ## b +#define EOF (-1) +x CAT(foo, EOF) y +x CAT(EOF, foo) y +x CAT(, EOF) y +y CAT(foo,) x +x CAT(,foo) y + +#define NCAT(a) foo ## a +NCAT(bar) + +#define XCAT(a) ## a +foo XCAT(bar) + +#define CAT3(foo) a##foo##b +CAT3(blah)