From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mimir.eigenstate.org ([206.124.132.107]) by ewsd; Sun Mar 8 13:42:32 EDT 2020 Received: from abbatoir.fios-router.home (pool-162-83-132-245.nycmny.fios.verizon.net [162.83.132.245]) by mimir.eigenstate.org (OpenSMTPD) with ESMTPSA id ba28d7a7 (TLSv1.2:ECDHE-RSA-AES256-SHA:256:NO); Sun, 8 Mar 2020 10:42:24 -0700 (PDT) Message-ID: <8D3E6BE787FA52FB0F1C16D77A77EE75@eigenstate.org> To: ori@eigenstate.org, cinap_lenrek@felloff.net, 9front@9front.org Subject: Re: [9front] Fix CPP Token Paste: Testing requested. Date: Sun, 8 Mar 2020 10:42:22 -0700 From: ori@eigenstate.org In-Reply-To: <94224BDA65B7C92201491757EC52C7F9@eigenstate.org> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit List-ID: <9front.9front.org> List-Help: X-Glyph: ➈ X-Bullshit: transactional strategy-scale self-signing backend >> >> Ok. Tested with: >> >> fn t { >> ramfs -m /tmp/m >> for(f in `{walk -f /sys/src|grep '\.c$'}){ >> mkdir -p /tmp/m/^$f; >> >/tmp/m/^$f^/old >[2]/dev/null cpp $f; >> >/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f >> } >> for(f in `{walk -f /sys/src|grep '\.c$'}) >> ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new >> } >> >> With this diff, we have no changes in the cpp output. >> It preprocesses and builds ocaml, and doesn't seem to >> error on perl5, so far (Though, we have other issues. >> The bitfields thing is biting us again.) >> > > And, found one more edge case: CAT(,) should produce an > empty token. Now working. > > It looks like we mishandle empty vararg lists too, but > that's a separate issue, and probably a separate patch. > > So: > - No diffs on /sys/src > - Looks like it works ok on ocaml, gmake and my wip perl5 fixes > > Any other tests anyone thinks I should do? More progress on perl caught another edge case: #define V 42 #define X CAT(V, .0) should give the result 42.0. This version fixes that issue. It does introduce changes when diffing against the system, but that seems to be just whitespace, which comes from insertrow(). This is expected. The system builds, as do a selection of ports (ocaml, gmake, netsurf). Good to commit? diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c --- a/sys/src/cmd/cpp/cpp.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.c Sun Mar 08 10:34:29 2020 -0700 @@ -68,7 +68,7 @@ trp->tp += 1; control(trp); } else if (!skipping && anymacros) - expandrow(trp, NULL, Notinmacro); + expandrow(trp, NULL); if (skipping) setempty(trp); puttokens(trp); @@ -217,7 +217,7 @@ case KLINE: trp->tp = tp+1; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); tp = trp->bp+2; kline: if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3lp diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h --- a/sys/src/cmd/cpp/cpp.h Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/cpp.h Sun Mar 08 10:34:29 2020 -0700 @@ -107,11 +107,11 @@ void doadefine(Tokenrow *, int); void doinclude(Tokenrow *); void doif(Tokenrow *, enum kwtype); -void expand(Tokenrow *, Nlist *, int); +void expand(Tokenrow *, Nlist *); void builtin(Tokenrow *, int); int gatherargs(Tokenrow *, Tokenrow **, int, int *); void substargs(Nlist *, Tokenrow *, Tokenrow **); -void expandrow(Tokenrow *, char *, int); +void expandrow(Tokenrow *, char *); void maketokenrow(int, Tokenrow *); Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); Token *growtokenrow(Tokenrow *); @@ -120,7 +120,7 @@ void movetokenrow(Tokenrow *, Tokenrow *); void insertrow(Tokenrow *, int, Tokenrow *); void peektokens(Tokenrow *, char *); -void doconcat(Tokenrow *); +void glue(Tokenrow *, Token *, Token *); Tokenrow *stringify(Tokenrow *); int lookuparg(Nlist *, Token *); long eval(Tokenrow *, int); diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c --- a/sys/src/cmd/cpp/eval.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/eval.c Sun Mar 08 10:34:29 2020 -0700 @@ -116,7 +116,7 @@ } ntok = trp->tp - trp->bp; kwdefined->val = KDEFINED; /* activate special meaning of defined */ - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); kwdefined->val = NAME; vp = vals; op = ops; diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c --- a/sys/src/cmd/cpp/include.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/include.c Sun Mar 08 10:34:29 2020 -0700 @@ -18,7 +18,7 @@ goto syntax; if (trp->tp->type!=STRING && trp->tp->type!=LT) { len = trp->tp - trp->bp; - expandrow(trp, "", Notinmacro); + expandrow(trp, ""); trp->tp = trp->bp+len; } if (trp->tp->type==STRING) { diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c --- a/sys/src/cmd/cpp/macro.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/macro.c Sun Mar 08 10:34:29 2020 -0700 @@ -138,7 +138,7 @@ * Flag is NULL if more input can be gathered. */ void -expandrow(Tokenrow *trp, char *flag, int inmacro) +expandrow(Tokenrow *trp, char *flag) { Token *tp; Nlist *np; @@ -170,7 +170,7 @@ if (np->flag&ISMAC) builtin(trp, np->val); else { - expand(trp, np, inmacro); + expand(trp, np); } tp = trp->tp; } @@ -184,7 +184,7 @@ * (ordinarily the beginning of the expansion) */ void -expand(Tokenrow *trp, Nlist *np, int inmacro) +expand(Tokenrow *trp, Nlist *np) { Tokenrow ntr; int ntokc, narg, i; @@ -193,12 +193,14 @@ int hs; copytokenrow(&ntr, np->vp); /* copy macro value */ - if (np->ap==NULL) /* parameterless */ + if (np->ap==NULL) { /* parameterless */ ntokc = 1; - else { + /* substargs for handling # and ## */ + atr[0] = nil; + substargs(np, &ntr, atr); + } else { ntokc = gatherargs(trp, atr, (np->flag&ISVARMAC) ? rowlen(np->ap) : 0, &narg); if (narg<0) { /* not actually a call (no '(') */ -/* error(WARNING, "%d %r\n", narg, trp); */ /* gatherargs has already pushed trp->tr to the next token */ return; } @@ -214,8 +216,6 @@ dofree(atr[i]); } } - if(!inmacro) - doconcat(&ntr); /* execute ## operators */ hs = newhideset(trp->tp->hideset, np); for (tp=ntr.bp; tptype==NAME) { @@ -228,7 +228,7 @@ ntr.tp = ntr.bp; insertrow(trp, ntokc, &ntr); trp->tp -= rowlen(&ntr); - dofree(ntr.bp); + free(ntr.bp); return; } @@ -255,7 +255,6 @@ if (trp->tp >= trp->lp) { gettokens(trp, 0); if ((trp->lp-1)->type==END) { -/* error(WARNING, "reach END\n"); */ trp->lp -= 1; if (*narg>=0) trp->tp -= ntok; @@ -326,7 +325,25 @@ } return ntok; } - + +int +ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok) +{ + *ap = nil; + *an = nil; + /* EMPTY ## tok */ + if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp) + rtr->tp--; + /* tok ## tok */ + if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) { + *ap = rtr->tp; + if(rtr->tp + 2 != rtr->lp) + *an = rtr->tp + 2; + *ntok = 1 + (*ap != nil) + (*an != nil); + return 1; + } + return 0; +} /* * substitute the argument list into the replacement string * This would be simple except for ## and # @@ -334,12 +351,14 @@ void substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) { - Tokenrow tatr; - Token *tp; - int ntok, argno; + Tokenrow ttr; + Token *tp, *ap, *an, *pp, *pn; + int ntok, argno, hs; for (rtr->tp=rtr->bp; rtr->tplp; ) { - if (rtr->tp->type==SHARP) { /* string operator */ + if(rtr->tp->hideset && checkhideset(rtr->tp->hideset, np)) { + rtr->tp++; + } else if (rtr->tp->type==SHARP) { /* string operator */ tp = rtr->tp; rtr->tp += 1; if ((argno = lookuparg(np, rtr->tp))<0) { @@ -349,24 +368,49 @@ ntok = 1 + (rtr->tp - tp); rtr->tp = tp; insertrow(rtr, ntok, stringify(atr[argno])); - continue; + } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */ + pp = ap; + pn = an; + if (ap && (argno = lookuparg(np, ap)) >= 0){ + pp = nil; + if(atr[argno]->tp != atr[argno]->lp) + pp = atr[argno]->lp - 1; + } + if (an && (argno = lookuparg(np, an)) >= 0) { + pn = nil; + if(atr[argno]->tp != atr[argno]->lp) + pn = atr[argno]->lp - 1; + } + glue(&ttr, pp, pn); + insertrow(rtr, ntok, &ttr); + free(ttr.bp); + } else if (rtr->tp->type==NAME) { + if((argno = lookuparg(np, rtr->tp)) >= 0) { + if (rtr->tp < rtr->bp) { + error(ERROR, "access out of bounds"); + continue; + } + copytokenrow(&ttr, atr[argno]); + expandrow(&ttr, ""); + insertrow(rtr, 1, &ttr); + free(ttr.bp); + } else { + maketokenrow(1, &ttr); + ttr.lp = ttr.tp + 1; + *ttr.tp = *rtr->tp; + + hs = newhideset(rtr->tp->hideset, np); + if(ttr.tp->hideset == 0) + ttr.tp->hideset = hs; + else + ttr.tp->hideset = unionhideset(ttr.tp->hideset, hs); + expandrow(&ttr, (char*)np->name); + insertrow(rtr, 1, &ttr); + dofree(ttr.bp); + } + } else { + rtr->tp++; } - if (rtr->tp->type==NAME - && (argno = lookuparg(np, rtr->tp)) >= 0) { - if (rtr->tp < rtr->bp) - error(ERROR, "access out of bounds"); - if ((rtr->tp+1)->type==DSHARP - || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP) - insertrow(rtr, 1, atr[argno]); - else { - copytokenrow(&tatr, atr[argno]); - expandrow(&tatr, "", Inmacro); - insertrow(rtr, 1, &tatr); - dofree(tatr.bp); - } - continue; - } - rtr->tp++; } } @@ -374,41 +418,35 @@ * Evaluate the ## operators in a tokenrow */ void -doconcat(Tokenrow *trp) +glue(Tokenrow *ntr, Token *tp, Token *tn) { - Token *ltp, *ntp; - Tokenrow ntr; - int len; + int np, nn; + char *tt, *p, *n; - for (trp->tp=trp->bp; trp->tplp; trp->tp++) { - if (trp->tp->type==DSHARP1) - trp->tp->type = DSHARP; - else if (trp->tp->type==DSHARP) { - char tt[128]; - ltp = trp->tp-1; - ntp = trp->tp+1; - if (ltpbp || ntp>=trp->lp) { - error(ERROR, "## occurs at border of replacement"); - continue; - } - len = ltp->len + ntp->len; - strncpy((char*)tt, (char*)ltp->t, ltp->len); - strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); - tt[len] = '\0'; - setsource("<##>", -1, tt); - maketokenrow(3, &ntr); - gettokens(&ntr, 1); - unsetsource(); - if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) - error(WARNING, "Bad token %r produced by ##", &ntr); - ntr.lp = ntr.bp+1; - trp->tp = ltp; - makespace(&ntr); - insertrow(trp, (ntp-ltp)+1, &ntr); - dofree(ntr.bp); - trp->tp--; + np = tp ? tp->len : 0; + nn = tn ? tn->len : 0; + tt = domalloc(np + nn + 1); + if(tp) + memcpy(tt, tp->t, tp->len); + if(tn) + memcpy(tt+np, tn->t, tn->len); + tt[np+nn] = '\0'; + setsource("<##>", -1, tt); + maketokenrow(3, ntr); + gettokens(ntr, 1); + unsetsource(); + dofree(tt); + if (np + nn == 0) { + ntr->lp = ntr->bp; + } else { + if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) { + p = tp ? (char*)tp->t : ""; + n = tn ? (char*)tn->t : ""; + error(WARNING, "Bad token %r produced by %s ## %s", &ntr, p, n); } + ntr->lp = ntr->bp+1; } + makespace(ntr); } /* diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c --- a/sys/src/cmd/cpp/test.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/test.c Sun Mar 08 10:34:29 2020 -0700 @@ -1,4 +1,28 @@ -#define M1() -#define M2(A1) A1() -M2(M1) -M2(P1) +#define NOP(x) x +#define CAT(a, b) a ## b +#define EOF (-1) +x NOP(CAT(foo, EOF)) y +x NOP(CAT(EOF, foo)) y +x CAT(, EOF) y +y CAT(foo,) x +x CAT(,foo) y +X NOP(CAT(,)) y + +#define NCAT(a) foo ## a +NCAT(bar) + +#define XCAT(a) ## a +foo XCAT(bar) + +#define CAT3(foo) a##foo##b +CAT3(blah) + +#define BAR 3 +#define FOO CAT(BAR, 3) +FOO + +/* +#define xprint(a, ...) print(a, __VA_ARGS__) +xprint("hi", "there") +xprint("hi") +*/ diff -r 0dd419f096e2 sys/src/cmd/cpp/tokens.c --- a/sys/src/cmd/cpp/tokens.c Sun Mar 01 23:23:01 2020 +0100 +++ b/sys/src/cmd/cpp/tokens.c Sun Mar 08 10:34:29 2020 -0700 @@ -136,7 +136,6 @@ movetokenrow(dtr, str); makespace(dtr); dtr->tp += nrtok; - makespace(dtr); } /*