source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Improve (or rather, rewrite) tbl(7) option parsing.
@ 2015-01-26  0:57 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2015-01-26  0:57 UTC (permalink / raw)
  To: source

Log Message:
-----------
Improve (or rather, rewrite) tbl(7) option parsing.
* Allow the layout to start after the semicolon on the options line.
* Ignore leading commas.
* Option arguments cannot contain closing parentheses.
* Avoid needless UNSUPP messages.
* Better ERROR reporting.
* Delete unused "linesize" field in struct tbl_opts.      
* No need for static buffers.
* Garbage collect one almost empty wrapper function.
Improved functionality, but minus 40 lines of code.

Modified Files:
--------------
    mdocml:
        mandoc.1
        mandoc.h
        read.c
        tbl.c
        tbl_opts.c

Revision Data
-------------
Index: read.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/read.c,v
retrieving revision 1.115
retrieving revision 1.116
diff -Lread.c -Lread.c -u -p -r1.115 -r1.116
--- read.c
+++ read.c
@@ -180,6 +180,10 @@ static	const char * const	mandocerrs[MAN
 	"unexpected end of equation",
 
 	/* related to tables */
+	"non-alphabetic character in tbl options",
+	"skipping unknown tbl option",
+	"missing tbl option argument",
+	"wrong tbl option argument size",
 	"no table layout cells specified",
 	"no table data cells specified",
 	"ignore data in cell",
@@ -218,8 +222,6 @@ static	const char * const	mandocerrs[MAN
 	"input too large",
 	"unsupported control character",
 	"unsupported roff request",
-	"unsupported table syntax",
-	"unsupported table option",
 	"unsupported table layout",
 	"ignoring macro in table",
 };
Index: tbl_opts.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/tbl_opts.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -Ltbl_opts.c -Ltbl_opts.c -u -p -r1.16 -r1.17
--- tbl_opts.c
+++ tbl_opts.c
@@ -1,6 +1,7 @@
 /*	$Id$ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -53,12 +54,6 @@ struct	tbl_phrase {
 /* Handle Commonwealth/American spellings. */
 #define	KEY_MAXKEYS	 14
 
-/* Maximum length of key name string. */
-#define	KEY_MAXNAME	 13
-
-/* Maximum length of key number size. */
-#define	KEY_MAXNUMSZ	 10
-
 static	const struct tbl_phrase keys[KEY_MAXKEYS] = {
 	{ "center",	 TBL_OPT_CENTRE,	KEY_CENTRE},
 	{ "centre",	 TBL_OPT_CENTRE,	KEY_CENTRE},
@@ -76,193 +71,119 @@ static	const struct tbl_phrase keys[KEY_
 	{ "nospaces",	 TBL_OPT_NOSPACE,	KEY_NOSPACE},
 };
 
-static	int		 arg(struct tbl_node *, int,
+static	void		 arg(struct tbl_node *, int,
 				const char *, int *, enum tbl_ident);
-static	void		 opt(struct tbl_node *, int,
-				const char *, int *);
 
 
-static int
+static void
 arg(struct tbl_node *tbl, int ln, const char *p, int *pos, enum tbl_ident key)
 {
-	int		 i;
-	char		 buf[KEY_MAXNUMSZ];
+	const char	*optname;
+	int		 len, want;
 
 	while (isspace((unsigned char)p[*pos]))
 		(*pos)++;
 
-	/* Arguments always begin with a parenthesis. */
+	/* Arguments are enclosed in parentheses. */
 
-	if ('(' != p[*pos]) {
-		mandoc_msg(MANDOCERR_TBL, tbl->parse,
-		    ln, *pos, NULL);
-		return(0);
+	len = 0;
+	if (p[*pos] == '(') {
+		(*pos)++;
+		while (p[*pos + len] != ')')
+			len++;
 	}
 
-	(*pos)++;
-
-	/*
-	 * The arguments can be ANY value, so we can't just stop at the
-	 * next close parenthesis (the argument can be a closed
-	 * parenthesis itself).
-	 */
-
 	switch (key) {
 	case KEY_DELIM:
-		if ('\0' == p[(*pos)++]) {
-			mandoc_msg(MANDOCERR_TBL, tbl->parse,
-			    ln, *pos - 1, NULL);
-			return(0);
-		}
-
-		if ('\0' == p[(*pos)++]) {
-			mandoc_msg(MANDOCERR_TBL, tbl->parse,
-			    ln, *pos - 1, NULL);
-			return(0);
-		}
+		optname = "delim";
+		want = 2;
 		break;
 	case KEY_TAB:
-		if ('\0' != (tbl->opts.tab = p[(*pos)++]))
-			break;
-
-		mandoc_msg(MANDOCERR_TBL, tbl->parse,
-		    ln, *pos - 1, NULL);
-		return(0);
+		optname = "tab";
+		want = 1;
+		if (len == want)
+			tbl->opts.tab = p[*pos];
+		break;
 	case KEY_LINESIZE:
-		for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) {
-			buf[i] = p[*pos];
-			if ( ! isdigit((unsigned char)buf[i]))
-				break;
-		}
-
-		if (i < KEY_MAXNUMSZ) {
-			buf[i] = '\0';
-			tbl->opts.linesize = atoi(buf);
-			break;
-		}
-
-		mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL);
-		return(0);
+		optname = "linesize";
+		want = 0;
+		break;
 	case KEY_DPOINT:
-		if ('\0' != (tbl->opts.decimal = p[(*pos)++]))
-			break;
-
-		mandoc_msg(MANDOCERR_TBL, tbl->parse,
-		    ln, *pos - 1, NULL);
-		return(0);
+		optname = "decimalpoint";
+		want = 1;
+		if (len == want)
+			tbl->opts.decimal = p[*pos];
+		break;
 	default:
 		abort();
 		/* NOTREACHED */
 	}
 
-	/* End with a close parenthesis. */
+	if (len == 0)
+		mandoc_msg(MANDOCERR_TBLOPT_NOARG,
+		    tbl->parse, ln, *pos, optname);
+	else if (want && len != want)
+		mandoc_vmsg(MANDOCERR_TBLOPT_ARGSZ,
+		    tbl->parse, ln, *pos,
+		    "%s want %d have %d", optname, want, len);
 
-	if (')' == p[(*pos)++])
-		return(1);
-
-	mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos - 1, NULL);
-	return(0);
+	*pos += len;
+	if (p[*pos] == ')')
+		(*pos)++;
 }
 
-static void
-opt(struct tbl_node *tbl, int ln, const char *p, int *pos)
+/*
+ * Parse one line of options up to the semicolon.
+ * Each option can be preceded by blanks and/or commas,
+ * and some options are followed by arguments.
+ */
+void
+tbl_option(struct tbl_node *tbl, int ln, const char *p)
 {
-	int		 i, sv;
-	char		 buf[KEY_MAXNAME];
-
-	/*
-	 * Parse individual options from the stream as surrounded by
-	 * this goto.  Each pass through the routine parses out a single
-	 * option and registers it.  Option arguments are processed in
-	 * the arg() function.
-	 */
-
-again:	/*
-	 * EBNF describing this section:
-	 *
-	 * options	::= option_list [:space:]* [;][\n]
-	 * option_list	::= option option_tail
-	 * option_tail	::= [,:space:]+ option_list |
-	 *		::= epsilon
-	 * option	::= [:alpha:]+ args
-	 * args		::= [:space:]* [(] [:alpha:]+ [)]
-	 */
-
-	while (isspace((unsigned char)p[*pos]))
-		(*pos)++;
-
-	/* Safe exit point. */
-
-	if (';' == p[*pos])
-		return;
-
-	/* Copy up to first non-alpha character. */
+	int		 i, pos, len;
 
-	for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) {
-		buf[i] = (char)tolower((unsigned char)p[*pos]);
-		if ( ! isalpha((unsigned char)buf[i]))
-			break;
-	}
-
-	/* Exit if buffer is empty (or overrun). */
+	pos = 0;
+	for (;;) {
+		while (isspace((unsigned char)p[pos]) || p[pos] == ',')
+			pos++;
 
-	if (KEY_MAXNAME == i || 0 == i) {
-		mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL);
-		return;
-	}
+		if (p[pos] == ';')
+			return;
 
-	buf[i] = '\0';
+		/* Parse one option name. */
 
-	while (isspace((unsigned char)p[*pos]) || p[*pos] == ',')
-		(*pos)++;
+		len = 0;
+		while (isalpha((unsigned char)p[pos + len]))
+			len++;
+
+		if (len == 0) {
+			mandoc_vmsg(MANDOCERR_TBLOPT_ALPHA,
+			    tbl->parse, ln, pos, "%c", p[pos]);
+			pos++;
+			continue;
+		}
 
-	/*
-	 * Look through all of the available keys to find one that
-	 * matches the input.  FIXME: hashtable this.
-	 */
+		/* Look up the option name. */
 
-	for (i = 0; i < KEY_MAXKEYS; i++) {
-		if (strcmp(buf, keys[i].name))
+		i = 0;
+		while (i < KEY_MAXKEYS &&
+		    (strncasecmp(p + pos, keys[i].name, len) ||
+		     keys[i].name[len] != '\0'))
+			i++;
+
+		if (i == KEY_MAXKEYS) {
+			mandoc_vmsg(MANDOCERR_TBLOPT_BAD, tbl->parse,
+			    ln, pos, "%.*s", len, p + pos);
+			pos += len;
 			continue;
+		}
 
-		/*
-		 * Note: this is more difficult to recover from, as we
-		 * can be anywhere in the option sequence and it's
-		 * harder to jump to the next.  Meanwhile, just bail out
-		 * of the sequence altogether.
-		 */
+		/* Handle the option. */
 
+		pos += len;
 		if (keys[i].key)
 			tbl->opts.opts |= keys[i].key;
-		else if ( ! arg(tbl, ln, p, pos, keys[i].ident))
-			return;
-
-		break;
+		else
+			arg(tbl, ln, p, &pos, keys[i].ident);
 	}
-
-	/*
-	 * Allow us to recover from bad options by continuing to another
-	 * parse sequence.
-	 */
-
-	if (KEY_MAXKEYS == i)
-		mandoc_msg(MANDOCERR_TBLOPT, tbl->parse, ln, sv, NULL);
-
-	goto again;
-	/* NOTREACHED */
-}
-
-void
-tbl_option(struct tbl_node *tbl, int ln, const char *p)
-{
-	int		 pos;
-
-	/*
-	 * Table options are always on just one line, so automatically
-	 * switch into the next input mode here.
-	 */
-	tbl->part = TBL_PART_LAYOUT;
-
-	pos = 0;
-	opt(tbl, ln, p, &pos);
 }
Index: mandoc.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandoc.h,v
retrieving revision 1.186
retrieving revision 1.187
diff -Lmandoc.h -Lmandoc.h -u -p -r1.186 -r1.187
--- mandoc.h
+++ mandoc.h
@@ -136,6 +136,10 @@ enum	mandocerr {
 	MANDOCERR_EQNEOF, /* unexpected end of equation */
 
 	/* related to tables */
+	MANDOCERR_TBLOPT_ALPHA, /* non-alphabetic character in tbl options */
+	MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */
+	MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */
+	MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */
 	MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */
 	MANDOCERR_TBLNODATA, /* no table data cells specified */
 	MANDOCERR_TBLIGNDATA, /* ignore data in cell */
@@ -175,8 +179,6 @@ enum	mandocerr {
 	MANDOCERR_TOOLARGE, /* input too large */
 	MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */
 	MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */
-	MANDOCERR_TBL, /* unsupported table syntax */
-	MANDOCERR_TBLOPT, /* unsupported table option */
 	MANDOCERR_TBLLAYOUT, /* unsupported table layout */
 	MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */
 
@@ -186,7 +188,6 @@ enum	mandocerr {
 struct	tbl_opts {
 	char		  tab; /* cell-separator */
 	char		  decimal; /* decimal point */
-	int		  linesize;
 	int		  opts;
 #define	TBL_OPT_CENTRE	 (1 << 0)
 #define	TBL_OPT_EXPAND	 (1 << 1)
Index: tbl.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/tbl.c,v
retrieving revision 1.32
retrieving revision 1.33
diff -Ltbl.c -Ltbl.c -u -p -r1.32 -r1.33
--- tbl.c
+++ tbl.c
@@ -1,7 +1,7 @@
 /*	$Id$ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -34,29 +34,45 @@
 enum rofferr
 tbl_read(struct tbl_node *tbl, int ln, const char *p, int offs)
 {
-	int		 len;
 	const char	*cp;
-
-	cp = &p[offs];
-	len = (int)strlen(cp);
+	int		 active;
 
 	/*
-	 * If we're in the options section and we don't have a
-	 * terminating semicolon, assume we've moved directly into the
-	 * layout section.  No need to report a warning: this is,
-	 * apparently, standard behaviour.
+	 * In the options section, proceed to the layout section
+	 * after a semicolon, or right away if there is no semicolon.
+	 * Ignore semicolons in arguments.
 	 */
 
-	if (TBL_PART_OPTS == tbl->part && len)
-		if (';' != cp[len - 1])
-			tbl->part = TBL_PART_LAYOUT;
+	if (tbl->part == TBL_PART_OPTS) {
+		tbl->part = TBL_PART_LAYOUT;
+		active = 1;
+		for (cp = p; *cp != '\0'; cp++) {
+			switch (*cp) {
+			case '(':
+				active = 0;
+				continue;
+			case ')':
+				active = 1;
+				continue;
+			case ';':
+				if (active)
+					break;
+				continue;
+			default:
+				continue;
+			}
+			break;
+		}
+		if (*cp == ';') {
+			tbl_option(tbl, ln, p);
+			if (*(p = cp + 1) == '\0')
+				return(ROFF_IGN);
+		}
+	}
 
-	/* Now process each logical section of the table.  */
+	/* Process the other section types.  */
 
 	switch (tbl->part) {
-	case TBL_PART_OPTS:
-		tbl_option(tbl, ln, p);
-		return(ROFF_IGN);
 	case TBL_PART_LAYOUT:
 		tbl_layout(tbl, ln, p);
 		return(ROFF_IGN);
@@ -81,7 +97,6 @@ tbl_alloc(int pos, int line, struct mpar
 	tbl->parse = parse;
 	tbl->part = TBL_PART_OPTS;
 	tbl->opts.tab = '\t';
-	tbl->opts.linesize = 12;
 	tbl->opts.decimal = '.';
 	return(tbl);
 }
Index: mandoc.1
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandoc.1,v
retrieving revision 1.135
retrieving revision 1.136
diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.135 -r1.136
--- mandoc.1
+++ mandoc.1
@@ -1281,12 +1281,35 @@ keeps the code more readable.
 .It "unexpected end of equation"
 .El
 .Ss "Errors related to tables"
+.Bl -ohang
+.It Sy "non-alphabetic character in tbl options"
+.Pq tbl
+The table options line contains a character other than a letter,
+blank, or comma where the beginning of an option name is expected.
+The character is ignored.
+.It Sy "skipping unknown tbl option"
+.Pq tbl
+The table options line contains a string of letters that does not
+match any known option name.
+The word is ignored.
+.It Sy "missing tbl option argument"
+.Pq tbl
+A table option that requires an argument is not followed by an
+opening parenthesis, or the opening parenthesis is immediately
+followed by a closing parenthesis.
+The option is ignored.
+.It Sy "wrong tbl option argument size"
+.Pq tbl
+A table option argument contains an invalid number of characters.
+Both the option and the argument are ignored.
+.El
+.Pp
 .Bl -inset -compact
-.It "no table layout cells specified"
-.It "no table data cells specified"
-.It "ignore data in cell"
-.It "data block still open"
-.It "ignoring extra data cells"
+.It Sy "no table layout cells specified"
+.It Sy "no table data cells specified"
+.It Sy "ignore data in cell"
+.It Sy "data block still open"
+.It Sy "ignoring extra data cells"
 .El
 .Ss "Errors related to roff, mdoc, and man code"
 .Bl -ohang
@@ -1568,6 +1591,14 @@ cannot handle input files larger than it
 of 2^31 bytes (2 Gigabytes).
 Since useful manuals are always small, this is not a problem in practice.
 Parsing is aborted as soon as the condition is detected.
+.It Sy "unsupported control character"
+.Pq roff
+An ASCII control character supported by other
+.Xr roff 7
+implementations but not by
+.Nm
+was found in an input file.
+It is replaced by a question mark.
 .It Sy "unsupported roff request"
 .Pq roff
 An input file contains a
@@ -1576,9 +1607,7 @@ request supported by GNU troff or Heirlo
 .Nm ,
 and it is likely that this will cause information loss
 or considerable misformatting.
-.It Sy "bad table syntax"
-.It Sy "bad table option"
-.It Sy "bad table layout"
+.It Sy "unsupported table layout"
 .It Sy "ignoring macro in table"
 .El
 .Sh COMPATIBILITY
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-01-26  0:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-26  0:57 mdocml: Improve (or rather, rewrite) tbl(7) option parsing schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).