9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
From: Anthony Martin <ality@pbrane.org>
To: Fans of the OS Plan 9 from Bell Labs <9fans@9fans.net>
Subject: Re: [9fans] all you yacc experts
Date: Tue, 15 Nov 2011 13:10:35 -0800	[thread overview]
Message-ID: <20111115211034.GA6135@dinah> (raw)
In-Reply-To: <CAP6exY+7R9z3uusW683gFCCjvt+t6cZVpq-0TWZL2SiDBbMw3g@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1060 bytes --]

Attached is a modified version of p9p yacc that
supports the Go grammar.  I'll be sending a
version of Plan 9 yacc later today.

The following is a description of the changes.

  1. The %error-verbose directive is ignored.

  2. A description of the final grammar is
     printed before the state descriptions
	 in y.output.

  3. The 'x' format for character literals is
     now used instead of prefixing with a space.

  4. The YYEMPTY define is now used to clear
     the lookahead token (instead of an explicit
	 negative one).

  5. Make yychar and yystate globals so they
     can be inspected by external code.

  5. Support C++ style // comments in actions.

  6. Add a usage message.

  7. Fix a few uses of sprint and strcpy.


I've also sent out a changeset to the Go
development list which adds support for
using Plan 9 yacc to generate the special
errors.

One tiny nit is that Plan 9 uses the name
yytoknames for debugging where Bison uses
yytname.  I've just used sed for this.

Any questions?
  Anthony

[-- Attachment #2: yacc.diff --]
[-- Type: text/plain, Size: 11953 bytes --]

diff -r 44a7194d00cf lib/yaccpar
--- a/lib/yaccpar	Sat Nov 12 11:52:10 2011 -0800
+++ b/lib/yaccpar	Tue Nov 15 13:10:13 2011 -0800
@@ -2,7 +2,7 @@
 #define YYERROR		goto yyerrlab
 #define YYACCEPT	return(0)
 #define YYABORT		return(1)
-#define	yyclearin	yychar = -1
+#define	yyclearin	yychar = YYEMPTY
 #define	yyerrok		yyerrflag = 0

 #ifdef	yydebug
@@ -51,6 +51,8 @@
 	return x;
 }

+long yychar;
+
 static long
 #ifdef YYARG
 yylex1(struct Yyarg *yyarg)
@@ -58,7 +60,6 @@
 yylex1(void)
 #endif
 {
-	long yychar;
 	const long *t3p;
 	int c;

@@ -68,6 +69,7 @@
 	yychar = yylex();
 #endif
 	if(yychar <= 0) {
+		yychar = 0;
 		c = yytok1[0];
 		goto out;
 	}
@@ -99,6 +101,8 @@
 	return c;
 }

+int yystate;
+
 int
 #ifdef YYARG
 yyparse(struct Yyarg *yyarg)
@@ -112,8 +116,8 @@
 		int	yys;
 	} yys[YYMAXDEPTH], *yyp, *yypt;
 	const short *yyxi;
-	int yyj, yym, yystate, yyn, yyg;
-	long yychar;
+	int yyj, yym, yyn, yyg;
+	long yyc;
 #ifndef YYARG
 	YYSTYPE save1, save2;
 	int save3, save4;
@@ -125,7 +129,8 @@
 #endif

 	yystate = 0;
-	yychar = -1;
+	yychar = YYEMPTY;
+	yyc = YYEMPTY;
 	yynerrs = 0;
 	yyerrflag = 0;
 	yyp = &yys[-1];
@@ -151,7 +156,7 @@
 yystack:
 	/* put a state and value onto the stack */
 	if(yydebug >= 4)
-		fprint(2, "char %s in %s", yytokname(yychar), yystatname(yystate));
+		fprint(2, "char %s in %s", yytokname(yyc), yystatname(yystate));

 	yyp++;
 	if(yyp >= &yys[YYMAXDEPTH]) {
@@ -165,18 +170,19 @@
 	yyn = yypact[yystate];
 	if(yyn <= YYFLAG)
 		goto yydefault; /* simple state */
-	if(yychar < 0)
+	if(yyc < 0)
 #ifdef YYARG
-		yychar = yylex1(yyarg);
+		yyc = yylex1(yyarg);
 #else
-		yychar = yylex1();
+		yyc = yylex1();
 #endif
-	yyn += yychar;
+	yyn += yyc;
 	if(yyn < 0 || yyn >= YYLAST)
 		goto yydefault;
 	yyn = yyact[yyn];
-	if(yychk[yyn] == yychar) { /* valid shift */
-		yychar = -1;
+	if(yychk[yyn] == yyc) { /* valid shift */
+		yyc = YYEMPTY;
+		yychar = YYEMPTY;
 		yyval = yylval;
 		yystate = yyn;
 		if(yyerrflag > 0)
@@ -188,11 +194,11 @@
 	/* default state action */
 	yyn = yydef[yystate];
 	if(yyn == -2) {
-		if(yychar < 0)
+		if(yyc < 0)
 #ifdef YYARG
-		yychar = yylex1(yyarg);
+			yyc = yylex1(yyarg);
 #else
-		yychar = yylex1();
+			yyc = yylex1();
 #endif

 		/* look through exception table */
@@ -201,21 +207,24 @@
 				break;
 		for(yyxi += 2;; yyxi += 2) {
 			yyn = yyxi[0];
-			if(yyn < 0 || yyn == yychar)
+			if(yyn < 0 || yyn == yyc)
 				break;
 		}
 		yyn = yyxi[1];
-		if(yyn < 0)
+		if(yyn < 0) {
+			yyc = YYEMPTY;
+			yychar = YYEMPTY;
 			goto ret0;
+		}
 	}
 	if(yyn == 0) {
 		/* error ... attempt to resume parsing */
 		switch(yyerrflag) {
 		case 0:   /* brand new error */
 			yyerror("syntax error");
-			if(yydebug >= 1) {
+			if(yydebug >= 2) {
 				fprint(2, "%s", yystatname(yystate));
-				fprint(2, "saw %s\n", yytokname(yychar));
+				fprint(2, "saw %s\n", yytokname(yyc));
 			}
 			goto yyerrlab;
 		yyerrlab:
@@ -245,10 +254,11 @@

 		case 3:  /* no shift yet; clobber input char */
 			if(yydebug >= 2)
-				fprint(2, "error recovery discards %s\n", yytokname(yychar));
-			if(yychar == YYEOFCODE)
+				fprint(2, "error recovery discards %s\n", yytokname(yyc));
+			if(yyc == YYEOFCODE)
 				goto ret1;
-			yychar = -1;
+			yyc = YYEMPTY;
+			yychar = YYEMPTY;
 			goto yynewstate;   /* try again in the same state */
 		}
 	}
diff -r 44a7194d00cf src/cmd/yacc.c
--- a/src/cmd/yacc.c	Sat Nov 12 11:52:10 2011 -0800
+++ b/src/cmd/yacc.c	Tue Nov 15 13:10:13 2011 -0800
@@ -92,6 +92,7 @@
 	TYPEDEF,
 	TYPENAME,
 	UNION,
+	IGNORE,

 	ENDFILE		= 0,

@@ -319,6 +320,9 @@
 	"token",	TERM,
 	"type",		TYPEDEF,
 	"union",	UNION,
+
+	/* ignored bison directives */
+	"error-verbose",	IGNORE,
 	0,
 };

@@ -330,6 +334,7 @@
 char*	writem(int*);
 char*	symnam(int);
 void	summary(void);
+void	grammar(void);
 void	error(char*, ...);
 void	aryfil(int*, int, int);
 int	setunion(int*, int*);
@@ -388,6 +393,7 @@
 	cempty();		/* make a table of which nonterminals can match the empty string */
 	cpfir();		/* make a table of firsts of nonterminals */
 	stagen();		/* generate the states */
+	grammar();
 	output();		/* write the states and the tables */
 	go2out();
 	hideprod();
@@ -531,14 +537,14 @@
 		;
 	p = prdptr[-*p];
 	q = chcopy(sarr, nontrst[*p-NTBASE].name);
-	q = chcopy(q, ": ");
+	q = chcopy(q, ":");
 	for(;;) {
 		*q = ' ';
 		p++;
-		if(p == pp)
-			*q = '.';
 		q++;
 		*q = '\0';
+		if(p == pp)
+			q = chcopy(q, ". ");
 		i = *p;
 		if(i <= 0)
 			break;
@@ -550,7 +556,7 @@
 	/* an item calling for a reduction */
 	i = *pp;
 	if(i < 0 ) {
-		q = chcopy(q, "    (");
+		q = chcopy(q, "   (");
 		sprint(q, "%d)", -i);
 	}
 	return sarr;
@@ -562,12 +568,41 @@
 char*
 symnam(int i)
 {
-	char* cp;
+	return (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
+}

-	cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
-	if(*cp == ' ')
-		cp++;
-	return cp;
+/*
+ * output the grammar rules on y.output
+ */
+void
+grammar(void)
+{
+	int i, j, n, prev;
+	int *p;
+
+	if(foutput == 0)
+		return;
+
+	Bprint(foutput, "\nGrammar\n");
+	prev = 0;
+	n = 0;
+	PLOOP(0, i) {
+		p = prdptr[i];
+		j = 0;
+		if(p[0] == prev)
+			Bprint(foutput, "\t%d \t|", n++);
+		else {
+			Bprint(foutput, "\n\t%d %s:", n++, symnam(p[0]));
+			if(p[1] <= 0)
+				Bprint(foutput, " /* empty */");
+		}
+		for(j = 1; p[j] > 0; j++) {
+			Bprint(foutput, " %s", symnam(p[j]));
+		}
+		Bputc(foutput, '\n');
+		prev = p[0];
+	}
+	Bprint(foutput, "\n");
 }

 /*
@@ -1173,10 +1208,17 @@
 }

 void
+usage(void)
+{
+	fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] grammar\n");
+	exits("usage");
+}
+
+void
 setup(int argc, char *argv[])
 {
 	long c, t;
-	int i, j, fd, lev, ty, ytab, *p;
+	int i, j, lev, ty, ytab, *p;
 	int vflag, dflag, stem;
 	char actnm[8], *stemc, *s, dirbuf[128];
 	Biobuf *fout;
@@ -1195,7 +1237,7 @@
 		vflag++;
 		break;
 	case 'D':
-		yydebug = ARGF();
+		yydebug = EARGF(usage());
 		break;
 	case 'a':
 		yyarg = 1;
@@ -1208,7 +1250,7 @@
 		break;
 	case 'o':
 		ytab++;
-		ytabc = ARGF();
+		ytabc = EARGF(usage());
 		break;
 	case 's':
 		stem++;
@@ -1221,18 +1263,11 @@
 		error("illegal option: %c", ARGC());
 	}ARGEND
 	openup(stemc, dflag, vflag, ytab, ytabc);
-	fout = dflag?fdefine:ftable;
-	if(yyarg){
+	if(yyarg)
 		Bprint(ftable, "#define\tYYARG\t1\n\n");
-	}
-	if((fd = mkstemp(ttempname)) >= 0){
-		tempname = ttempname;
-		ftemp = Bfdopen(fd, OWRITE);
-	}
-	if((fd = mkstemp(tactname)) >= 0){
-		actname = tactname;
-		faction = Bfdopen(fd, OWRITE);
-	}
+
+	ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
+	faction = Bopen(actname = mktemp(tactname), OWRITE);
 	if(ftemp == 0 || faction == 0)
 		error("cannot open temp file");
 	if(argc < 1)
@@ -1375,6 +1410,10 @@
 		t = gettok();
 		continue;

+	case IGNORE:
+		t = gettok();
+		continue;
+
 	default:
 		error("syntax error");
 	}
@@ -1396,8 +1435,11 @@
 		Bprint(ftable, "YYSTYPE	yylval;\n");
 		Bprint(ftable, "YYSTYPE	yyval;\n");
 	}else{
-		if(dflag)
+		fout = ftable;
+		if(dflag){
+			fout = fdefine;
 			Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
+		}
 		Bprint(fout, "struct Yyarg {\n");
 		Bprint(fout, "\tint\tyynerrs;\n");
 		Bprint(fout, "\tint\tyyerrflag;\n");
@@ -1406,6 +1448,7 @@
 		Bprint(fout, "\tYYSTYPE\tyylval;\n");
 		Bprint(fout, "};\n\n");
 	}
+
 	prdptr[0] = mem;

 	/* added production */
@@ -1552,6 +1595,7 @@
 	Bterm(faction);
 	Bprint(ftable, "#define YYEOFCODE %d\n", 1);
 	Bprint(ftable, "#define YYERRCODE %d\n", 2);
+	Bprint(ftable, "#define YYEMPTY (%d)\n", -2);
 }

 /*
@@ -1581,17 +1625,17 @@

 	/* establish value for token */
 	/* single character literal */
-	if(s[0] == ' ') {
+	if(s[0] == '\'') {
 		val = chartorune(&rune, &s[1]);
-		if(s[val+1] == 0) {
+		if(s[val+1] == '\'') {
 			val = rune;
 			goto out;
 		}
 	}

 	/* escape sequence */
-	if(s[0] == ' ' && s[1] == '\\') {
-		if(s[3] == 0) {
+	if(s[0] == '\'' && s[1] == '\\') {
+		if(s[3] == '\'') {
 			/* single character escape sequence */
 			switch(s[2]) {
 			case 'n':	val = '\n'; break;
@@ -1625,6 +1669,7 @@
 	val = extval++;

 out:
+	//print("%s = %d\n", s, val);
 	tokset[ntokens].value = val;
 	toklev[ntokens] = 0;
 	return ntokens;
@@ -1642,7 +1687,7 @@
 	for(i=ndefout; i<=ntokens; i++) {
 		/* non-literals */
 		c = tokset[i].name[0];
-		if(c != ' ' && c != '$') {
+		if(c != '\'' && c != '$') {
 			Bprint(ftable, "#define	%s	%d\n",
 				tokset[i].name, tokset[i].value);
 			if(fdefine)
@@ -1737,7 +1782,7 @@
 	case '"':
 	case '\'':
 		match = c;
-		tokname[0] = ' ';
+		tokname[0] = '\'';
 		i = 1;
 		for(;;) {
 			c = Bgetrune(finput);
@@ -1756,6 +1801,8 @@
 			if(i < NAMESIZE)
 				i += c;
 		}
+		tokname[i] = '\'';
+		i++;
 		break;

 	case '%':
@@ -1847,7 +1894,7 @@
 {
 	int i;

-	if(s[0] == ' ')
+	if(s[0] == '\'')
 		t = 0;
 	TLOOP(i)
 		if(!strcmp(s, tokset[i].name))
@@ -1915,6 +1962,7 @@
 void
 cpycode(void)
 {
+
 	long c;

 	c = Bgetrune(finput);
@@ -1956,17 +2004,22 @@

 	/* i is the number of lines skipped */
 	i = 0;
-	if(Bgetrune(finput) != '*')
+	c = Bgetrune(finput);
+	if(c == '/'){			/* C++ //: skip to end of line */
+		while((c = Bgetrune(finput)) != Beof)
+			if(c == '\n')
+				return 1;
+	}else if(c == '*'){		/* normal C comment */
+		while((c = Bgetrune(finput)) != Beof) {
+			while(c == '*')
+				if((c = Bgetrune(finput)) == '/')
+					return i;
+			if(c == '\n')
+				i++;
+		}
+	}else
 		error("illegal comment");
-	c = Bgetrune(finput);
-	while(c != Beof) {
-		while(c == '*')
-			if((c=Bgetrune(finput)) == '/')
-				return i;
-		if(c == '\n')
-			i++;
-		c = Bgetrune(finput);
-	}
+
 	error("EOF inside comment");
 	return 0;
 }
@@ -2094,22 +2147,30 @@
 		/* look for comments */
 		Bputrune(faction, c);
 		c = Bgetrune(finput);
-		if(c != '*')
+		switch(c) {
+		case '/':
+			while(c != Beof) {
+				if(c == '\n')
+					goto swt;
+				Bputrune(faction, c);
+				c = Bgetrune(finput);
+			}
+			break;
+		case '*':
+			while(c != Beof) {
+				while(c == '*') {
+					Bputrune(faction, c);
+					if((c = Bgetrune(finput)) == '/')
+						goto lcopy;
+				}
+				Bputrune(faction, c);
+				if(c == '\n')
+					lineno++;
+				c = Bgetrune(faction);
+			}
+			break;
+		default:
 			goto swt;
-
-		/* it really is a comment */
-		Bputrune(faction, c);
-		c = Bgetrune(finput);
-		while(c >= 0) {
-			while(c == '*') {
-				Bputrune(faction, c);
-				if((c=Bgetrune(finput)) == '/')
-					goto lcopy;
-			}
-			Bputrune(faction, c);
-			if(c == '\n')
-				lineno++;
-			c = Bgetrune(finput);
 		}
 		error("EOF inside comment");

@@ -2158,26 +2219,26 @@
 	char buf[256];

 	if(vflag) {
-		sprint(buf, "%s.%s", stem, FILEU);
+		snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
 		foutput = Bopen(buf, OWRITE);
 		if(foutput == 0)
 			error("cannot open %s", buf);
 	}
 	if(yydebug) {
-		sprint(buf, "%s.%s", stem, FILEDEBUG);
+		snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
 		if((fdebug = Bopen(buf, OWRITE)) == 0)
 			error("can't open %s", buf);
 	}
 	if(dflag) {
-		sprint(buf, "%s.%s", stem, FILED);
+		snprint(buf, sizeof buf, "%s.%s", stem, FILED);
 		fdefine = Bopen(buf, OWRITE);
 		if(fdefine == 0)
 			error("can't create %s", buf);
 	}
 	if(ytab == 0)
-		sprint(buf, "%s.%s", stem, OFILE);
+		snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
 	else
-		strcpy(buf, ytabc);
+		strecpy(buf, buf+sizeof buf, ytabc);
 	ftable = Bopen(buf, OWRITE);
 	if(ftable == 0)
 		error("cannot open table file %s", buf);

  parent reply	other threads:[~2011-11-15 21:10 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-11 16:07 ron minnich
2011-11-11 17:16 ` Steve Simon
2011-11-11 20:54 ` Bakul Shah
2011-11-11 20:58   ` ron minnich
2011-11-11 21:30     ` Bakul Shah
2011-11-11 23:18       ` Steve Simon
2011-11-12  4:51       ` Lucio De Re
2011-11-12  5:15         ` Bruce Ellis
2011-11-12  5:36           ` Lucio De Re
2011-11-12  4:49     ` Lucio De Re
2011-11-11 23:38   ` Iruatã Souza
2011-11-12  9:23   ` Bakul Shah
2011-11-13  0:53     ` Bruce Ellis
2011-11-13  4:48       ` Lucio De Re
2011-11-13  5:55         ` Bruce Ellis
2011-11-13 10:08         ` Steve Simon
2011-11-13 10:30           ` Bruce Ellis
2011-11-13 17:08             ` ron minnich
2011-11-15 21:10 ` Anthony Martin [this message]
2011-11-15 21:49   ` David Leimbach
     [not found] <CAP6exY+7R9z3uusW683gFCCjvt+t6cZVpq-0TWZL2SiDBbMw3g@mail.gmail.c>
2011-11-11 16:09 ` erik quanstrom
2011-11-11 17:27   ` Lucio De Re
2011-11-11 17:25     ` ron minnich
2011-11-11 18:00       ` Lucio De Re
     [not found] <CABT0RTRAXEL454dP+gpC7gYtV9hT7mk+1znEVU0NP4CuKcW=vg@mail.gmail.com>
2011-11-13 22:30 ` Scato Logic
2011-11-13 23:22   ` Bruce Ellis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111115211034.GA6135@dinah \
    --to=ality@pbrane.org \
    --cc=9fans@9fans.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).