From mboxrd@z Thu Jan 1 00:00:00 1970 Date: Tue, 15 Nov 2011 13:10:35 -0800 From: Anthony Martin To: Fans of the OS Plan 9 from Bell Labs <9fans@9fans.net> Message-ID: <20111115211034.GA6135@dinah> References: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="VbJkn9YxBvnuCH5J" Content-Disposition: inline In-Reply-To: Subject: Re: [9fans] all you yacc experts Topicbox-Message-UUID: 409d4c0a-ead7-11e9-9d60-3106f5b1d025 --VbJkn9YxBvnuCH5J Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Attached is a modified version of p9p yacc that supports the Go grammar. I'll be sending a version of Plan 9 yacc later today. The following is a description of the changes. 1. The %error-verbose directive is ignored. 2. A description of the final grammar is printed before the state descriptions in y.output. 3. The 'x' format for character literals is now used instead of prefixing with a space. 4. The YYEMPTY define is now used to clear the lookahead token (instead of an explicit negative one). 5. Make yychar and yystate globals so they can be inspected by external code. 5. Support C++ style // comments in actions. 6. Add a usage message. 7. Fix a few uses of sprint and strcpy. I've also sent out a changeset to the Go development list which adds support for using Plan 9 yacc to generate the special errors. One tiny nit is that Plan 9 uses the name yytoknames for debugging where Bison uses yytname. I've just used sed for this. Any questions? Anthony --VbJkn9YxBvnuCH5J Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="yacc.diff" diff -r 44a7194d00cf lib/yaccpar --- a/lib/yaccpar Sat Nov 12 11:52:10 2011 -0800 +++ b/lib/yaccpar Tue Nov 15 13:10:13 2011 -0800 @@ -2,7 +2,7 @@ #define YYERROR goto yyerrlab #define YYACCEPT return(0) #define YYABORT return(1) -#define yyclearin yychar = -1 +#define yyclearin yychar = YYEMPTY #define yyerrok yyerrflag = 0 #ifdef yydebug @@ -51,6 +51,8 @@ return x; } +long yychar; + static long #ifdef YYARG yylex1(struct Yyarg *yyarg) @@ -58,7 +60,6 @@ yylex1(void) #endif { - long yychar; const long *t3p; int c; @@ -68,6 +69,7 @@ yychar = yylex(); #endif if(yychar <= 0) { + yychar = 0; c = yytok1[0]; goto out; } @@ -99,6 +101,8 @@ return c; } +int yystate; + int #ifdef YYARG yyparse(struct Yyarg *yyarg) @@ -112,8 +116,8 @@ int yys; } yys[YYMAXDEPTH], *yyp, *yypt; const short *yyxi; - int yyj, yym, yystate, yyn, yyg; - long yychar; + int yyj, yym, yyn, yyg; + long yyc; #ifndef YYARG YYSTYPE save1, save2; int save3, save4; @@ -125,7 +129,8 @@ #endif yystate = 0; - yychar = -1; + yychar = YYEMPTY; + yyc = YYEMPTY; yynerrs = 0; yyerrflag = 0; yyp = &yys[-1]; @@ -151,7 +156,7 @@ yystack: /* put a state and value onto the stack */ if(yydebug >= 4) - fprint(2, "char %s in %s", yytokname(yychar), yystatname(yystate)); + fprint(2, "char %s in %s", yytokname(yyc), yystatname(yystate)); yyp++; if(yyp >= &yys[YYMAXDEPTH]) { @@ -165,18 +170,19 @@ yyn = yypact[yystate]; if(yyn <= YYFLAG) goto yydefault; /* simple state */ - if(yychar < 0) + if(yyc < 0) #ifdef YYARG - yychar = yylex1(yyarg); + yyc = yylex1(yyarg); #else - yychar = yylex1(); + yyc = yylex1(); #endif - yyn += yychar; + yyn += yyc; if(yyn < 0 || yyn >= YYLAST) goto yydefault; yyn = yyact[yyn]; - if(yychk[yyn] == yychar) { /* valid shift */ - yychar = -1; + if(yychk[yyn] == yyc) { /* valid shift */ + yyc = YYEMPTY; + yychar = YYEMPTY; yyval = yylval; yystate = yyn; if(yyerrflag > 0) @@ -188,11 +194,11 @@ /* default state action */ yyn = yydef[yystate]; if(yyn == -2) { - if(yychar < 0) + if(yyc < 0) #ifdef YYARG - yychar = yylex1(yyarg); + yyc = yylex1(yyarg); #else - yychar = yylex1(); + yyc = yylex1(); #endif /* look through exception table */ @@ -201,21 +207,24 @@ break; for(yyxi += 2;; yyxi += 2) { yyn = yyxi[0]; - if(yyn < 0 || yyn == yychar) + if(yyn < 0 || yyn == yyc) break; } yyn = yyxi[1]; - if(yyn < 0) + if(yyn < 0) { + yyc = YYEMPTY; + yychar = YYEMPTY; goto ret0; + } } if(yyn == 0) { /* error ... attempt to resume parsing */ switch(yyerrflag) { case 0: /* brand new error */ yyerror("syntax error"); - if(yydebug >= 1) { + if(yydebug >= 2) { fprint(2, "%s", yystatname(yystate)); - fprint(2, "saw %s\n", yytokname(yychar)); + fprint(2, "saw %s\n", yytokname(yyc)); } goto yyerrlab; yyerrlab: @@ -245,10 +254,11 @@ case 3: /* no shift yet; clobber input char */ if(yydebug >= 2) - fprint(2, "error recovery discards %s\n", yytokname(yychar)); - if(yychar == YYEOFCODE) + fprint(2, "error recovery discards %s\n", yytokname(yyc)); + if(yyc == YYEOFCODE) goto ret1; - yychar = -1; + yyc = YYEMPTY; + yychar = YYEMPTY; goto yynewstate; /* try again in the same state */ } } diff -r 44a7194d00cf src/cmd/yacc.c --- a/src/cmd/yacc.c Sat Nov 12 11:52:10 2011 -0800 +++ b/src/cmd/yacc.c Tue Nov 15 13:10:13 2011 -0800 @@ -92,6 +92,7 @@ TYPEDEF, TYPENAME, UNION, + IGNORE, ENDFILE = 0, @@ -319,6 +320,9 @@ "token", TERM, "type", TYPEDEF, "union", UNION, + + /* ignored bison directives */ + "error-verbose", IGNORE, 0, }; @@ -330,6 +334,7 @@ char* writem(int*); char* symnam(int); void summary(void); +void grammar(void); void error(char*, ...); void aryfil(int*, int, int); int setunion(int*, int*); @@ -388,6 +393,7 @@ cempty(); /* make a table of which nonterminals can match the empty string */ cpfir(); /* make a table of firsts of nonterminals */ stagen(); /* generate the states */ + grammar(); output(); /* write the states and the tables */ go2out(); hideprod(); @@ -531,14 +537,14 @@ ; p = prdptr[-*p]; q = chcopy(sarr, nontrst[*p-NTBASE].name); - q = chcopy(q, ": "); + q = chcopy(q, ":"); for(;;) { *q = ' '; p++; - if(p == pp) - *q = '.'; q++; *q = '\0'; + if(p == pp) + q = chcopy(q, ". "); i = *p; if(i <= 0) break; @@ -550,7 +556,7 @@ /* an item calling for a reduction */ i = *pp; if(i < 0 ) { - q = chcopy(q, " ("); + q = chcopy(q, " ("); sprint(q, "%d)", -i); } return sarr; @@ -562,12 +568,41 @@ char* symnam(int i) { - char* cp; + return (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; +} - cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; - if(*cp == ' ') - cp++; - return cp; +/* + * output the grammar rules on y.output + */ +void +grammar(void) +{ + int i, j, n, prev; + int *p; + + if(foutput == 0) + return; + + Bprint(foutput, "\nGrammar\n"); + prev = 0; + n = 0; + PLOOP(0, i) { + p = prdptr[i]; + j = 0; + if(p[0] == prev) + Bprint(foutput, "\t%d \t|", n++); + else { + Bprint(foutput, "\n\t%d %s:", n++, symnam(p[0])); + if(p[1] <= 0) + Bprint(foutput, " /* empty */"); + } + for(j = 1; p[j] > 0; j++) { + Bprint(foutput, " %s", symnam(p[j])); + } + Bputc(foutput, '\n'); + prev = p[0]; + } + Bprint(foutput, "\n"); } /* @@ -1173,10 +1208,17 @@ } void +usage(void) +{ + fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n"); + exits("usage"); +} + +void setup(int argc, char *argv[]) { long c, t; - int i, j, fd, lev, ty, ytab, *p; + int i, j, lev, ty, ytab, *p; int vflag, dflag, stem; char actnm[8], *stemc, *s, dirbuf[128]; Biobuf *fout; @@ -1195,7 +1237,7 @@ vflag++; break; case 'D': - yydebug = ARGF(); + yydebug = EARGF(usage()); break; case 'a': yyarg = 1; @@ -1208,7 +1250,7 @@ break; case 'o': ytab++; - ytabc = ARGF(); + ytabc = EARGF(usage()); break; case 's': stem++; @@ -1221,18 +1263,11 @@ error("illegal option: %c", ARGC()); }ARGEND openup(stemc, dflag, vflag, ytab, ytabc); - fout = dflag?fdefine:ftable; - if(yyarg){ + if(yyarg) Bprint(ftable, "#define\tYYARG\t1\n\n"); - } - if((fd = mkstemp(ttempname)) >= 0){ - tempname = ttempname; - ftemp = Bfdopen(fd, OWRITE); - } - if((fd = mkstemp(tactname)) >= 0){ - actname = tactname; - faction = Bfdopen(fd, OWRITE); - } + + ftemp = Bopen(tempname = mktemp(ttempname), OWRITE); + faction = Bopen(actname = mktemp(tactname), OWRITE); if(ftemp == 0 || faction == 0) error("cannot open temp file"); if(argc < 1) @@ -1375,6 +1410,10 @@ t = gettok(); continue; + case IGNORE: + t = gettok(); + continue; + default: error("syntax error"); } @@ -1396,8 +1435,11 @@ Bprint(ftable, "YYSTYPE yylval;\n"); Bprint(ftable, "YYSTYPE yyval;\n"); }else{ - if(dflag) + fout = ftable; + if(dflag){ + fout = fdefine; Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED); + } Bprint(fout, "struct Yyarg {\n"); Bprint(fout, "\tint\tyynerrs;\n"); Bprint(fout, "\tint\tyyerrflag;\n"); @@ -1406,6 +1448,7 @@ Bprint(fout, "\tYYSTYPE\tyylval;\n"); Bprint(fout, "};\n\n"); } + prdptr[0] = mem; /* added production */ @@ -1552,6 +1595,7 @@ Bterm(faction); Bprint(ftable, "#define YYEOFCODE %d\n", 1); Bprint(ftable, "#define YYERRCODE %d\n", 2); + Bprint(ftable, "#define YYEMPTY (%d)\n", -2); } /* @@ -1581,17 +1625,17 @@ /* establish value for token */ /* single character literal */ - if(s[0] == ' ') { + if(s[0] == '\'') { val = chartorune(&rune, &s[1]); - if(s[val+1] == 0) { + if(s[val+1] == '\'') { val = rune; goto out; } } /* escape sequence */ - if(s[0] == ' ' && s[1] == '\\') { - if(s[3] == 0) { + if(s[0] == '\'' && s[1] == '\\') { + if(s[3] == '\'') { /* single character escape sequence */ switch(s[2]) { case 'n': val = '\n'; break; @@ -1625,6 +1669,7 @@ val = extval++; out: + //print("%s = %d\n", s, val); tokset[ntokens].value = val; toklev[ntokens] = 0; return ntokens; @@ -1642,7 +1687,7 @@ for(i=ndefout; i<=ntokens; i++) { /* non-literals */ c = tokset[i].name[0]; - if(c != ' ' && c != '$') { + if(c != '\'' && c != '$') { Bprint(ftable, "#define %s %d\n", tokset[i].name, tokset[i].value); if(fdefine) @@ -1737,7 +1782,7 @@ case '"': case '\'': match = c; - tokname[0] = ' '; + tokname[0] = '\''; i = 1; for(;;) { c = Bgetrune(finput); @@ -1756,6 +1801,8 @@ if(i < NAMESIZE) i += c; } + tokname[i] = '\''; + i++; break; case '%': @@ -1847,7 +1894,7 @@ { int i; - if(s[0] == ' ') + if(s[0] == '\'') t = 0; TLOOP(i) if(!strcmp(s, tokset[i].name)) @@ -1915,6 +1962,7 @@ void cpycode(void) { + long c; c = Bgetrune(finput); @@ -1956,17 +2004,22 @@ /* i is the number of lines skipped */ i = 0; - if(Bgetrune(finput) != '*') + c = Bgetrune(finput); + if(c == '/'){ /* C++ //: skip to end of line */ + while((c = Bgetrune(finput)) != Beof) + if(c == '\n') + return 1; + }else if(c == '*'){ /* normal C comment */ + while((c = Bgetrune(finput)) != Beof) { + while(c == '*') + if((c = Bgetrune(finput)) == '/') + return i; + if(c == '\n') + i++; + } + }else error("illegal comment"); - c = Bgetrune(finput); - while(c != Beof) { - while(c == '*') - if((c=Bgetrune(finput)) == '/') - return i; - if(c == '\n') - i++; - c = Bgetrune(finput); - } + error("EOF inside comment"); return 0; } @@ -2094,22 +2147,30 @@ /* look for comments */ Bputrune(faction, c); c = Bgetrune(finput); - if(c != '*') + switch(c) { + case '/': + while(c != Beof) { + if(c == '\n') + goto swt; + Bputrune(faction, c); + c = Bgetrune(finput); + } + break; + case '*': + while(c != Beof) { + while(c == '*') { + Bputrune(faction, c); + if((c = Bgetrune(finput)) == '/') + goto lcopy; + } + Bputrune(faction, c); + if(c == '\n') + lineno++; + c = Bgetrune(faction); + } + break; + default: goto swt; - - /* it really is a comment */ - Bputrune(faction, c); - c = Bgetrune(finput); - while(c >= 0) { - while(c == '*') { - Bputrune(faction, c); - if((c=Bgetrune(finput)) == '/') - goto lcopy; - } - Bputrune(faction, c); - if(c == '\n') - lineno++; - c = Bgetrune(finput); } error("EOF inside comment"); @@ -2158,26 +2219,26 @@ char buf[256]; if(vflag) { - sprint(buf, "%s.%s", stem, FILEU); + snprint(buf, sizeof buf, "%s.%s", stem, FILEU); foutput = Bopen(buf, OWRITE); if(foutput == 0) error("cannot open %s", buf); } if(yydebug) { - sprint(buf, "%s.%s", stem, FILEDEBUG); + snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG); if((fdebug = Bopen(buf, OWRITE)) == 0) error("can't open %s", buf); } if(dflag) { - sprint(buf, "%s.%s", stem, FILED); + snprint(buf, sizeof buf, "%s.%s", stem, FILED); fdefine = Bopen(buf, OWRITE); if(fdefine == 0) error("can't create %s", buf); } if(ytab == 0) - sprint(buf, "%s.%s", stem, OFILE); + snprint(buf, sizeof buf, "%s.%s", stem, OFILE); else - strcpy(buf, ytabc); + strecpy(buf, buf+sizeof buf, ytabc); ftable = Bopen(buf, OWRITE); if(ftable == 0) error("cannot open table file %s", buf); --VbJkn9YxBvnuCH5J--