From: ori@eigenstate.org
To: ori@eigenstate.org, cinap_lenrek@felloff.net, 9front@9front.org
Subject: Re: [9front] Fix CPP Token Paste: Testing requested.
Date: Fri, 6 Mar 2020 16:47:33 -0800 [thread overview]
Message-ID: <94224BDA65B7C92201491757EC52C7F9@eigenstate.org> (raw)
In-Reply-To: <B825A72128C5A7DF8D6AE7070FEB4641@eigenstate.org>
>>> good work!
>>>
>>> how about we run the old and new cpp binaries on every c file we get and
>>> check for differences in the output for testing?
>>
>> Will do, though we don't have too much preprocessor abuse in our
>> code. (ape doesn't use '##' even once!)
>>
>> Did find one bug I introduced when testing ocaml -- added the NCAT
>> case to test.c to cover it. Here's an updated patch:
>>
>
> Ok. Tested with:
>
> fn t {
> ramfs -m /tmp/m
> for(f in `{walk -f /sys/src|grep '\.c$'}){
> mkdir -p /tmp/m/^$f;
> >/tmp/m/^$f^/old >[2]/dev/null cpp $f;
> >/tmp/m/^$f^/new >[2]/dev/null /usr/ori/src/cpp/6.out $f
> }
> for(f in `{walk -f /sys/src|grep '\.c$'})
> ape/diff -u /tmp/m/^$f^/old /tmp/m/^$f^/new
> }
>
> With this diff, we have no changes in the cpp output.
> It preprocesses and builds ocaml, and doesn't seem to
> error on perl5, so far (Though, we have other issues.
> The bitfields thing is biting us again.)
>
And, found one more edge case: CAT(,) should produce an
empty token. Now working.
It looks like we mishandle empty vararg lists too, but
that's a separate issue, and probably a separate patch.
So:
- No diffs on /sys/src
- Looks like it works ok on ocaml, gmake and my wip perl5 fixes
Any other tests anyone thinks I should do?
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.c
--- a/sys/src/cmd/cpp/cpp.c Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.c Fri Mar 06 16:31:21 2020 -0800
@@ -68,7 +68,7 @@
trp->tp += 1;
control(trp);
} else if (!skipping && anymacros)
- expandrow(trp, NULL, Notinmacro);
+ expandrow(trp, NULL);
if (skipping)
setempty(trp);
puttokens(trp);
@@ -217,7 +217,7 @@
case KLINE:
trp->tp = tp+1;
- expandrow(trp, "<line>", Notinmacro);
+ expandrow(trp, "<line>");
tp = trp->bp+2;
kline:
if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
diff -r 0dd419f096e2 sys/src/cmd/cpp/cpp.h
--- a/sys/src/cmd/cpp/cpp.h Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/cpp.h Fri Mar 06 16:31:21 2020 -0800
@@ -107,11 +107,11 @@
void doadefine(Tokenrow *, int);
void doinclude(Tokenrow *);
void doif(Tokenrow *, enum kwtype);
-void expand(Tokenrow *, Nlist *, int);
+void expand(Tokenrow *, Nlist *);
void builtin(Tokenrow *, int);
int gatherargs(Tokenrow *, Tokenrow **, int, int *);
void substargs(Nlist *, Tokenrow *, Tokenrow **);
-void expandrow(Tokenrow *, char *, int);
+void expandrow(Tokenrow *, char *);
void maketokenrow(int, Tokenrow *);
Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
Token *growtokenrow(Tokenrow *);
@@ -120,7 +120,7 @@
void movetokenrow(Tokenrow *, Tokenrow *);
void insertrow(Tokenrow *, int, Tokenrow *);
void peektokens(Tokenrow *, char *);
-void doconcat(Tokenrow *);
+void glue(Tokenrow *, Token *, Token *);
Tokenrow *stringify(Tokenrow *);
int lookuparg(Nlist *, Token *);
long eval(Tokenrow *, int);
diff -r 0dd419f096e2 sys/src/cmd/cpp/eval.c
--- a/sys/src/cmd/cpp/eval.c Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/eval.c Fri Mar 06 16:31:21 2020 -0800
@@ -116,7 +116,7 @@
}
ntok = trp->tp - trp->bp;
kwdefined->val = KDEFINED; /* activate special meaning of defined */
- expandrow(trp, "<if>", Notinmacro);
+ expandrow(trp, "<if>");
kwdefined->val = NAME;
vp = vals;
op = ops;
diff -r 0dd419f096e2 sys/src/cmd/cpp/include.c
--- a/sys/src/cmd/cpp/include.c Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/include.c Fri Mar 06 16:31:21 2020 -0800
@@ -18,7 +18,7 @@
goto syntax;
if (trp->tp->type!=STRING && trp->tp->type!=LT) {
len = trp->tp - trp->bp;
- expandrow(trp, "<include>", Notinmacro);
+ expandrow(trp, "<include>");
trp->tp = trp->bp+len;
}
if (trp->tp->type==STRING) {
diff -r 0dd419f096e2 sys/src/cmd/cpp/macro.c
--- a/sys/src/cmd/cpp/macro.c Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/macro.c Fri Mar 06 16:31:21 2020 -0800
@@ -138,7 +138,7 @@
* Flag is NULL if more input can be gathered.
*/
void
-expandrow(Tokenrow *trp, char *flag, int inmacro)
+expandrow(Tokenrow *trp, char *flag)
{
Token *tp;
Nlist *np;
@@ -170,7 +170,7 @@
if (np->flag&ISMAC)
builtin(trp, np->val);
else {
- expand(trp, np, inmacro);
+ expand(trp, np);
}
tp = trp->tp;
}
@@ -184,7 +184,7 @@
* (ordinarily the beginning of the expansion)
*/
void
-expand(Tokenrow *trp, Nlist *np, int inmacro)
+expand(Tokenrow *trp, Nlist *np)
{
Tokenrow ntr;
int ntokc, narg, i;
@@ -214,8 +214,6 @@
dofree(atr[i]);
}
}
- if(!inmacro)
- doconcat(&ntr); /* execute ## operators */
hs = newhideset(trp->tp->hideset, np);
for (tp=ntr.bp; tp<ntr.lp; tp++) { /* distribute hidesets */
if (tp->type==NAME) {
@@ -228,7 +226,7 @@
ntr.tp = ntr.bp;
insertrow(trp, ntokc, &ntr);
trp->tp -= rowlen(&ntr);
- dofree(ntr.bp);
+ free(ntr.bp);
return;
}
@@ -326,7 +324,25 @@
}
return ntok;
}
-
+
+int
+ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
+{
+ *ap = nil;
+ *an = nil;
+ /* EMPTY ## tok */
+ if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
+ rtr->tp--;
+ /* tok ## tok */
+ if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
+ *ap = rtr->tp;
+ if(rtr->tp + 2 != rtr->lp)
+ *an = rtr->tp + 2;
+ *ntok = 1 + (*ap != nil) + (*an != nil);
+ return 1;
+ }
+ return 0;
+}
/*
* substitute the argument list into the replacement string
* This would be simple except for ## and #
@@ -334,8 +350,8 @@
void
substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
{
- Tokenrow tatr;
- Token *tp;
+ Tokenrow tatr, tcat;
+ Token *tp, *ap, *an, *pp, *pn;
int ntok, argno;
for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
@@ -350,19 +366,31 @@
rtr->tp = tp;
insertrow(rtr, ntok, stringify(atr[argno]));
continue;
- }
- if (rtr->tp->type==NAME
- && (argno = lookuparg(np, rtr->tp)) >= 0) {
+ } else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
+ pp = ap;
+ pn = an;
+ if (ap && (argno = lookuparg(np, ap)) >= 0){
+ pp = nil;
+ if(atr[argno]->tp != atr[argno]->lp)
+ pp = atr[argno]->lp - 1;
+ }
+ if (an && (argno = lookuparg(np, an)) >= 0) {
+ pn = nil;
+ if(atr[argno]->tp != atr[argno]->lp)
+ pn = atr[argno]->lp - 1;
+ }
+ glue(&tcat, pp, pn);
+ insertrow(rtr, ntok, &tcat);
+ free(tcat.bp);
+ continue;
+ } else if (rtr->tp->type==NAME && (argno = lookuparg(np, rtr->tp)) >= 0) {
if (rtr->tp < rtr->bp)
error(ERROR, "access out of bounds");
- if ((rtr->tp+1)->type==DSHARP
- || rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
- insertrow(rtr, 1, atr[argno]);
else {
copytokenrow(&tatr, atr[argno]);
- expandrow(&tatr, "<macro>", Inmacro);
+ expandrow(&tatr, "<macro>");
insertrow(rtr, 1, &tatr);
- dofree(tatr.bp);
+ free(tatr.bp);
}
continue;
}
@@ -374,41 +402,32 @@
* Evaluate the ## operators in a tokenrow
*/
void
-doconcat(Tokenrow *trp)
+glue(Tokenrow *ntr, Token *tp, Token *tn)
{
- Token *ltp, *ntp;
- Tokenrow ntr;
- int len;
+ int np, nn;
+ char *tt;
- for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
- if (trp->tp->type==DSHARP1)
- trp->tp->type = DSHARP;
- else if (trp->tp->type==DSHARP) {
- char tt[128];
- ltp = trp->tp-1;
- ntp = trp->tp+1;
- if (ltp<trp->bp || ntp>=trp->lp) {
- error(ERROR, "## occurs at border of replacement");
- continue;
- }
- len = ltp->len + ntp->len;
- strncpy((char*)tt, (char*)ltp->t, ltp->len);
- strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
- tt[len] = '\0';
- setsource("<##>", -1, tt);
- maketokenrow(3, &ntr);
- gettokens(&ntr, 1);
- unsetsource();
- if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
- error(WARNING, "Bad token %r produced by ##", &ntr);
- ntr.lp = ntr.bp+1;
- trp->tp = ltp;
- makespace(&ntr);
- insertrow(trp, (ntp-ltp)+1, &ntr);
- dofree(ntr.bp);
- trp->tp--;
- }
+ np = tp ? tp->len : 0;
+ nn = tn ? tn->len : 0;
+ tt = domalloc(np + nn + 1);
+ if(tp)
+ memcpy(tt, tp->t, tp->len);
+ if(tn)
+ memcpy(tt+np, tn->t, tn->len);
+ tt[np+nn] = '\0';
+ setsource("<##>", -1, tt);
+ maketokenrow(3, ntr);
+ gettokens(ntr, 1);
+ unsetsource();
+ dofree(tt);
+ if (np + nn == 0) {
+ ntr->lp = ntr->bp;
+ } else {
+ if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS)
+ error(WARNING, "Bad token %r produced by ##", &ntr);
+ ntr->lp = ntr->bp+1;
}
+ makespace(ntr);
}
/*
diff -r 0dd419f096e2 sys/src/cmd/cpp/test.c
--- a/sys/src/cmd/cpp/test.c Sun Mar 01 23:23:01 2020 +0100
+++ b/sys/src/cmd/cpp/test.c Fri Mar 06 16:31:21 2020 -0800
@@ -1,4 +1,18 @@
-#define M1()
-#define M2(A1) A1()
-M2(M1)
-M2(P1)
+#define NOP(x) x
+#define CAT(a, b) a ## b
+#define EOF (-1)
+x CAT(foo, EOF) y
+x CAT(EOF, foo) y
+x CAT(, EOF) y
+y CAT(foo,) x
+x CAT(,foo) y
+X CAT(,) y
+
+#define NCAT(a) foo ## a
+NCAT(bar)
+
+#define XCAT(a) ## a
+foo XCAT(bar)
+
+#define CAT3(foo) a##foo##b
+CAT3(blah)
next prev parent reply other threads:[~2020-03-07 0:47 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-05 17:26 ori
2020-03-05 19:21 ` [9front] " cinap_lenrek
2020-03-05 21:42 ` ori
2020-03-06 7:01 ` ori
2020-03-07 0:47 ` ori [this message]
2020-03-08 17:42 ` ori
2020-03-16 2:55 ` ori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=94224BDA65B7C92201491757EC52C7F9@eigenstate.org \
--to=ori@eigenstate.org \
--cc=9front@9front.org \
--cc=cinap_lenrek@felloff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).