From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mimir.eigenstate.org ([206.124.132.107]) by ewsd; Fri Mar 6 19:47:42 EST 2020 Received: from abbatoir.fios-router.home (pool-162-83-132-245.nycmny.fios.verizon.net [162.83.132.245]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id b0cf5016 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO); Fri, 6 Mar 2020 16:47:34 -0800 (PST) Message-ID: <94224BDA65B7C92201491757EC52C7F9@eigenstate.org> To: ori@eigenstate.org, cinap_lenrek@felloff.net, 9front@9front.org Subject: Re: [9front] Fix CPP Token Paste: Testing requested. Date: Fri, 6 Mar 2020 16:47:33 -0800 From: ori@eigenstate.org In-Reply-To: MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: lossless patented persistence proxy-oriented interface >>> good work! >>> >>> how about we run the old and new cpp binaries on every c file we get and >>> check for differences in the output for testing? >> >> Will do, though we don't have too much preprocessor abuse in our >> code. (ape doesn't use '##' even once!) >> >> Did find one bug I introduced when testing ocaml -- added the NCAT >> case to test.c to cover it. Here's an updated patch: >> > > Ok. Tested with: > > fn t { > ramfs -m /tmp/m > for(f in `{walk -f /sys/src|grep '\.c$'}){ > mkdir -p /tmp/m/^$f; > >/tmp/m/^$f^/old >[2]/dev/null cpp $f; > >/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f > } > for(f in `{walk -f /sys/src|grep '\.c$'}) > ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new > } > > With this diff, we have no changes in the cpp output. > It preprocesses and builds ocaml, and doesn't seem to > error on perl5, so far (Though, we have other issues. > The bitfields thing is biting us again.) > And, found one more edge case: CAT(,) should produce an empty token. Now working. It looks like we mishandle empty vararg lists too, but that's a separate issue, and probably a separate patch. So: - No diffs on /sys/src - Looks like it works ok on ocaml, gmake and my wip perl5 fixes Any other tests anyone thinks I should do? diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c --- a/sys/src/cmd/cpp/cpp.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.c Fri Mar 06 16:31:21 2020 -0800 @@ -68,7 +68,7 @@ trp->tp += 1; control(trp); } else if (!skipping && anymacros) - expandrow(trp, NULL, Notinmacro); + expandrow(trp, NULL); if (skipping) setempty(trp); puttokens(trp); @@ -217,7 +217,7 @@ case KLINE: trp->tp = tp+1; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); tp = trp->bp+2; kline: if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3lp diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h --- a/sys/src/cmd/cpp/cpp.h Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.h Fri Mar 06 16:31:21 2020 -0800 @@ -107,11 +107,11 @@ void doadefine(Tokenrow *, int); void doinclude(Tokenrow *); void doif(Tokenrow *, enum kwtype); -void expand(Tokenrow *, Nlist *, int); +void expand(Tokenrow *, Nlist *); void builtin(Tokenrow *, int); int gatherargs(Tokenrow *, Tokenrow **, int, int *); void substargs(Nlist *, Tokenrow *, Tokenrow **); -void expandrow(Tokenrow *, char *, int); +void expandrow(Tokenrow *, char *); void maketokenrow(int, Tokenrow *); Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); Token *growtokenrow(Tokenrow *); @@ -120,7 +120,7 @@ void movetokenrow(Tokenrow *, Tokenrow *); void insertrow(Tokenrow *, int, Tokenrow *); void peektokens(Tokenrow *, char *); -void doconcat(Tokenrow *); +void glue(Tokenrow *, Token *, Token *); Tokenrow *stringify(Tokenrow *); int lookuparg(Nlist *, Token *); long eval(Tokenrow *, int); diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c --- a/sys/src/cmd/cpp/eval.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/eval.c Fri Mar 06 16:31:21 2020 -0800 @@ -116,7 +116,7 @@ } ntok = trp->tp - trp->bp; kwdefined->val = KDEFINED; /* activate special meaning of defined */ - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); kwdefined->val = NAME; vp = vals; op = ops; diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c --- a/sys/src/cmd/cpp/include.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/include.c Fri Mar 06 16:31:21 2020 -0800 @@ -18,7 +18,7 @@ goto syntax; if (trp->tp->type!=STRING && trp->tp->type!=LT) { len = trp->tp - trp->bp; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); trp->tp = trp->bp+len; } if (trp->tp->type==STRING) { diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c --- a/sys/src/cmd/cpp/macro.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/macro.c Fri Mar 06 16:31:21 2020 -0800 @@ -138,7 +138,7 @@ * Flag is NULL if more input can be gathered. */ void -expandrow(Tokenrow *trp, char *flag, int inmacro) +expandrow(Tokenrow *trp, char *flag) { Token *tp; Nlist *np; @@ -170,7 +170,7 @@ if (np->flag&ISMAC) builtin(trp, np->val); else { - expand(trp, np, inmacro); + expand(trp, np); } tp = trp->tp; } @@ -184,7 +184,7 @@ * (ordinarily the beginning of the expansion) */ void -expand(Tokenrow *trp, Nlist *np, int inmacro) +expand(Tokenrow *trp, Nlist *np) { Tokenrow ntr; int ntokc, narg, i; @@ -214,8 +214,6 @@ dofree(atr[i]); } } - if(!inmacro) - doconcat(&ntr); /* execute ## operators */ hs = newhideset(trp->tp->hideset, np); for (tp=ntr.bp; tptype==NAME) { @@ -228,7 +226,7 @@ ntr.tp = ntr.bp; insertrow(trp, ntokc, &ntr); trp->tp -= rowlen(&ntr); - dofree(ntr.bp); + free(ntr.bp); return; } @@ -326,7 +324,25 @@ } return ntok; } - + +int +ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok) +{ + *ap = nil; + *an = nil; + /* EMPTY ## tok */ + if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp) + rtr->tp--; + /* tok ## tok */ + if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) { + *ap = rtr->tp; + if(rtr->tp + 2 != rtr->lp) + *an = rtr->tp + 2; + *ntok = 1 + (*ap != nil) + (*an != nil); + return 1; + } + return 0; +} /* * substitute the argument list into the replacement string * This would be simple except for ## and # @@ -334,8 +350,8 @@ void substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) { - Tokenrow tatr; - Token *tp; + Tokenrow tatr, tcat; + Token *tp, *ap, *an, *pp, *pn; int ntok, argno; for (rtr->tp=rtr->bp; rtr->tplp; ) { @@ -350,19 +366,31 @@ rtr->tp = tp; insertrow(rtr, ntok, stringify(atr[argno])); continue; - } - if (rtr->tp->type==NAME - && (argno = lookuparg(np, rtr->tp)) >= 0) { + } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */ + pp = ap; + pn = an; + if (ap && (argno = lookuparg(np, ap)) >= 0){ + pp = nil; + if(atr[argno]->tp != atr[argno]->lp) + pp = atr[argno]->lp - 1; + } + if (an && (argno = lookuparg(np, an)) >= 0) { + pn = nil; + if(atr[argno]->tp != atr[argno]->lp) + pn = atr[argno]->lp - 1; + } + glue(&tcat, pp, pn); + insertrow(rtr, ntok, &tcat); + free(tcat.bp); + continue; + } else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) { if (rtr->tp < rtr->bp) error(ERROR, "access out of bounds"); - if ((rtr->tp+1)->type==DSHARP - || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP) - insertrow(rtr, 1, atr[argno]); else { copytokenrow(&tatr, atr[argno]); - expandrow(&tatr, "", Inmacro); + expandrow(&tatr, ""); insertrow(rtr, 1, &tatr); - dofree(tatr.bp); + free(tatr.bp); } continue; } @@ -374,41 +402,32 @@ * Evaluate the ## operators in a tokenrow */ void -doconcat(Tokenrow *trp) +glue(Tokenrow *ntr, Token *tp, Token *tn) { - Token *ltp, *ntp; - Tokenrow ntr; - int len; + int np, nn; + char *tt; - for (trp->tp=trp->bp; trp->tplp; trp->tp++) { - if (trp->tp->type==DSHARP1) - trp->tp->type = DSHARP; - else if (trp->tp->type==DSHARP) { - char tt[128]; - ltp = trp->tp-1; - ntp = trp->tp+1; - if (ltpbp || ntp>=trp->lp) { - error(ERROR, "## occurs at border of replacement"); - continue; - } - len = ltp->len + ntp->len; - strncpy((char*)tt, (char*)ltp->t, ltp->len); - strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); - tt[len] = '\0'; - setsource("<##>", -1, tt); - maketokenrow(3, &ntr); - gettokens(&ntr, 1); - unsetsource(); - if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) - error(WARNING, "Bad token %r produced by ##", &ntr); - ntr.lp = ntr.bp+1; - trp->tp = ltp; - makespace(&ntr); - insertrow(trp, (ntp-ltp)+1, &ntr); - dofree(ntr.bp); - trp->tp--; - } + np = tp ? tp->len : 0; + nn = tn ? tn->len : 0; + tt = domalloc(np + nn + 1); + if(tp) + memcpy(tt, tp->t, tp->len); + if(tn) + memcpy(tt+np, tn->t, tn->len); + tt[np+nn] = '\0'; + setsource("<##>", -1, tt); + maketokenrow(3, ntr); + gettokens(ntr, 1); + unsetsource(); + dofree(tt); + if (np + nn == 0) { + ntr->lp = ntr->bp; + } else { + if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) + error(WARNING, "Bad token %r produced by ##", &ntr); + ntr->lp = ntr->bp+1; } + makespace(ntr); } /* diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c --- a/sys/src/cmd/cpp/test.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/test.c Fri Mar 06 16:31:21 2020 -0800 @@ -1,4 +1,18 @@ -#define M1() -#define M2(A1) A1() -M2(M1) -M2(P1) +#define NOP(x) x +#define CAT(a, b) a ## b +#define EOF (-1) +x CAT(foo, EOF) y +x CAT(EOF, foo) y +x CAT(, EOF) y +y CAT(foo,) x +x CAT(,foo) y +X CAT(,) y + +#define NCAT(a) foo ## a +NCAT(bar) + +#define XCAT(a) ## a +foo XCAT(bar) + +#define CAT3(foo) a##foo##b +CAT3(blah)