From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mimir.eigenstate.org ([206.124.132.107]) by ewsd; Thu Mar 5 16:43:04 EST 2020 Received: from stockyard.bk.recurse-network.net (gateway.bk.recurse-network.net [185.230.222.2]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id c16a44d7 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO); Thu, 5 Mar 2020 13:42:51 -0800 (PST) Message-ID: <5394930B85C57CE4C95F0BB41E4DD899@eigenstate.org> To: cinap_lenrek@felloff.net, 9front@9front.org Subject: Re: [9front] Fix CPP Token Paste: Testing requested. Date: Thu, 5 Mar 2020 13:42:49 -0800 From: ori@eigenstate.org In-Reply-To: <2C15ED71920E8BE2C12AB49FA7402EBF@felloff.net> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: self-healing core persistence-oriented cloud general-purpose frontend > good work! > > how about we run the old and new cpp binaries on every c file we get and > check for differences in the output for testing? Will do, though we don't have too much preprocessor abuse in our code. (ape doesn't use '##' even once!) Did find one bug I introduced when testing ocaml -- added the NCAT case to test.c to cover it. Here's an updated patch: diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c --- a/sys/src/cmd/cpp/cpp.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.c Thu Mar 05 13:41:46 2020 -0800 @@ -68,7 +68,7 @@ trp->tp += 1; control(trp); } else if (!skipping && anymacros) - expandrow(trp, NULL, Notinmacro); + expandrow(trp, NULL); if (skipping) setempty(trp); puttokens(trp); @@ -217,7 +217,7 @@ case KLINE: trp->tp = tp+1; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); tp = trp->bp+2; kline: if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3lp diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h --- a/sys/src/cmd/cpp/cpp.h Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.h Thu Mar 05 13:41:46 2020 -0800 @@ -107,11 +107,11 @@ void doadefine(Tokenrow *, int); void doinclude(Tokenrow *); void doif(Tokenrow *, enum kwtype); -void expand(Tokenrow *, Nlist *, int); +void expand(Tokenrow *, Nlist *); void builtin(Tokenrow *, int); int gatherargs(Tokenrow *, Tokenrow **, int, int *); void substargs(Nlist *, Tokenrow *, Tokenrow **); -void expandrow(Tokenrow *, char *, int); +void expandrow(Tokenrow *, char *); void maketokenrow(int, Tokenrow *); Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); Token *growtokenrow(Tokenrow *); @@ -120,7 +120,7 @@ void movetokenrow(Tokenrow *, Tokenrow *); void insertrow(Tokenrow *, int, Tokenrow *); void peektokens(Tokenrow *, char *); -void doconcat(Tokenrow *); +int glue(Tokenrow *, Token *, Token *); Tokenrow *stringify(Tokenrow *); int lookuparg(Nlist *, Token *); long eval(Tokenrow *, int); diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c --- a/sys/src/cmd/cpp/eval.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/eval.c Thu Mar 05 13:41:46 2020 -0800 @@ -116,7 +116,7 @@ } ntok = trp->tp - trp->bp; kwdefined->val = KDEFINED; /* activate special meaning of defined */ - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); kwdefined->val = NAME; vp = vals; op = ops; diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c --- a/sys/src/cmd/cpp/include.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/include.c Thu Mar 05 13:41:46 2020 -0800 @@ -18,7 +18,7 @@ goto syntax; if (trp->tp->type!=STRING && trp->tp->type!=LT) { len = trp->tp - trp->bp; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); trp->tp = trp->bp+len; } if (trp->tp->type==STRING) { diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c --- a/sys/src/cmd/cpp/macro.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/macro.c Thu Mar 05 13:41:46 2020 -0800 @@ -138,7 +138,7 @@ * Flag is NULL if more input can be gathered. */ void -expandrow(Tokenrow *trp, char *flag, int inmacro) +expandrow(Tokenrow *trp, char *flag) { Token *tp; Nlist *np; @@ -170,7 +170,7 @@ if (np->flag&ISMAC) builtin(trp, np->val); else { - expand(trp, np, inmacro); + expand(trp, np); } tp = trp->tp; } @@ -184,7 +184,7 @@ * (ordinarily the beginning of the expansion) */ void -expand(Tokenrow *trp, Nlist *np, int inmacro) +expand(Tokenrow *trp, Nlist *np) { Tokenrow ntr; int ntokc, narg, i; @@ -214,8 +214,6 @@ dofree(atr[i]); } } - if(!inmacro) - doconcat(&ntr); /* execute ## operators */ hs = newhideset(trp->tp->hideset, np); for (tp=ntr.bp; tptype==NAME) { @@ -228,7 +226,7 @@ ntr.tp = ntr.bp; insertrow(trp, ntokc, &ntr); trp->tp -= rowlen(&ntr); - dofree(ntr.bp); + free(ntr.bp); return; } @@ -326,7 +324,30 @@ } return ntok; } - + +int +ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok) +{ + /* EMPTY ## tok */ + *ap = nil; + *an = nil; + if (rtr->tp->type == DSHARP) { + if (rtr->tp + 1 != rtr->lp) + *an = rtr->tp + 1; + goto is; + } + /* tok ## tok */ + if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) { + *ap = rtr->tp; + if(rtr->tp + 2 != rtr->lp) + *an = rtr->tp + 2; + goto is; + } + return 0; +is: + *ntok = 1 + (*ap != nil) + (*an != nil); + return 1; +} /* * substitute the argument list into the replacement string * This would be simple except for ## and # @@ -334,8 +355,8 @@ void substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) { - Tokenrow tatr; - Token *tp; + Tokenrow tatr, tcat; + Token *tp, *ap, *an, *pp, *pn; int ntok, argno; for (rtr->tp=rtr->bp; rtr->tplp; ) { @@ -350,19 +371,31 @@ rtr->tp = tp; insertrow(rtr, ntok, stringify(atr[argno])); continue; - } - if (rtr->tp->type==NAME - && (argno = lookuparg(np, rtr->tp)) >= 0) { + } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */ + pp = nil; + pn = nil; + if (ap && (argno = lookuparg(np, ap)) >= 0) { + if(atr[argno]->tp != atr[argno]->lp) + pp = atr[argno]->lp - 1; + } else + pp = ap; + if (an && (argno = lookuparg(np, an)) >= 0) { + if(atr[argno]->tp != atr[argno]->lp) + pn = atr[argno]->lp - 1; + } else + pn = an; + glue(&tcat, pp, pn); + insertrow(rtr, ntok, &tcat); + free(tcat.bp); + continue; + } else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) { if (rtr->tp < rtr->bp) error(ERROR, "access out of bounds"); - if ((rtr->tp+1)->type==DSHARP - || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP) - insertrow(rtr, 1, atr[argno]); else { copytokenrow(&tatr, atr[argno]); - expandrow(&tatr, "", Inmacro); + expandrow(&tatr, ""); insertrow(rtr, 1, &tatr); - dofree(tatr.bp); + free(tatr.bp); } continue; } @@ -373,42 +406,32 @@ /* * Evaluate the ## operators in a tokenrow */ -void -doconcat(Tokenrow *trp) +int +glue(Tokenrow *ntr, Token *tp, Token *tn) { - Token *ltp, *ntp; - Tokenrow ntr; - int len; + int np, nn; + char *tt; - for (trp->tp=trp->bp; trp->tplp; trp->tp++) { - if (trp->tp->type==DSHARP1) - trp->tp->type = DSHARP; - else if (trp->tp->type==DSHARP) { - char tt[128]; - ltp = trp->tp-1; - ntp = trp->tp+1; - if (ltpbp || ntp>=trp->lp) { - error(ERROR, "## occurs at border of replacement"); - continue; - } - len = ltp->len + ntp->len; - strncpy((char*)tt, (char*)ltp->t, ltp->len); - strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); - tt[len] = '\0'; - setsource("<##>", -1, tt); - maketokenrow(3, &ntr); - gettokens(&ntr, 1); - unsetsource(); - if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) - error(WARNING, "Bad token %r produced by ##", &ntr); - ntr.lp = ntr.bp+1; - trp->tp = ltp; - makespace(&ntr); - insertrow(trp, (ntp-ltp)+1, &ntr); - dofree(ntr.bp); - trp->tp--; - } - } + if(tp == nil && tn == nil) + return 0; + np = tp ? tp->len : 0; + nn = tn ? tn->len : 0; + tt = domalloc(np + nn + 1); + if(tp) + memcpy(tt, tp->t, tp->len); + if(tn) + memcpy(tt+np, tn->t, tn->len); + tt[np+nn] = '\0'; + setsource("<##>", -1, tt); + maketokenrow(3, ntr); + gettokens(ntr, 1); + unsetsource(); + dofree(tt); + if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) + error(WARNING, "Bad token %r produced by ##", &ntr); + ntr->lp = ntr->bp+1; + makespace(ntr); + return 1; } /* diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c --- a/sys/src/cmd/cpp/test.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/test.c Thu Mar 05 13:41:46 2020 -0800 @@ -1,4 +1,14 @@ -#define M1() -#define M2(A1) A1() -M2(M1) -M2(P1) +#define NOP(x) x +#define CAT(a, b) a ## b +#define EOF (-1) +x CAT(foo, EOF) y +x CAT(EOF, foo) y +x CAT(, EOF) y +y CAT(foo,) x +x CAT(,foo) y + +#define NCAT(a) foo ## a +NCAT(bar) + +#define XCAT(a) ## a +foo XCAT(bar)