source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Rework tbl(7) layout parsing: * Continue parsing even if part of
@ 2015-01-26 18:43 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2015-01-26 18:43 UTC (permalink / raw)
  To: source

Log Message:
-----------
Rework tbl(7) layout parsing:
* Continue parsing even if part of the input is invalid.
* Do not require whitespace between cell specifications.
* Allow tabs as well as blanks between modifiers.
* Mark the 'm' modifier as unsupported.
* Parse and ignore the 'p' and 'v' modifiers.
* Better warning and error messages.
* Get rid of a static buffer.
Improved functionality but minus 50 lines of code.

Modified Files:
--------------
    mdocml:
        mandoc.1
        mandoc.h
        read.c
        tbl.7
        tbl_layout.c

Revision Data
-------------
Index: tbl_layout.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/tbl_layout.c,v
retrieving revision 1.31
retrieving revision 1.32
diff -Ltbl_layout.c -Ltbl_layout.c -u -p -r1.31 -r1.32
--- tbl_layout.c
+++ tbl_layout.c
@@ -1,7 +1,7 @@
 /*	$Id$ */
 /*
  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2012, 2014 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -34,15 +34,7 @@ struct	tbl_phrase {
 	enum tbl_cellt	 key;
 };
 
-/*
- * FIXME: we can make this parse a lot nicer by, when an error is
- * encountered in a layout key, bailing to the next key (i.e. to the
- * next whitespace then continuing).
- */
-
-#define	KEYS_MAX	 11
-
-static	const struct tbl_phrase keys[KEYS_MAX] = {
+static	const struct tbl_phrase keys[] = {
 	{ 'c',		 TBL_CELL_CENTRE },
 	{ 'r',		 TBL_CELL_RIGHT },
 	{ 'l',		 TBL_CELL_LEFT },
@@ -55,57 +47,30 @@ static	const struct tbl_phrase keys[KEYS
 	{ '=',		 TBL_CELL_DHORIZ }
 };
 
-static	int		 mods(struct tbl_node *, struct tbl_cell *,
+#define KEYS_MAX ((int)(sizeof(keys)/sizeof(keys[0])))
+
+static	void		 mods(struct tbl_node *, struct tbl_cell *,
 				int, const char *, int *);
-static	int		 cell(struct tbl_node *, struct tbl_row *,
+static	void		 cell(struct tbl_node *, struct tbl_row *,
 				int, const char *, int *);
 static	struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
 				enum tbl_cellt, int vert);
 
 
-static int
+static void
 mods(struct tbl_node *tbl, struct tbl_cell *cp,
 		int ln, const char *p, int *pos)
 {
-	char		 buf[5];
-	int		 i;
+	char		*endptr;
 
-	/* Not all types accept modifiers. */
+mod:
+	while (p[*pos] == ' ' || p[*pos] == '\t')
+		(*pos)++;
 
-	switch (cp->pos) {
-	case TBL_CELL_DOWN:
-		/* FALLTHROUGH */
-	case TBL_CELL_HORIZ:
-		/* FALLTHROUGH */
-	case TBL_CELL_DHORIZ:
-		return(1);
-	default:
-		break;
-	}
+	/* Row delimiters and cell specifiers end modifier lists. */
 
-mod:
-	/*
-	 * XXX: since, at least for now, modifiers are non-conflicting
-	 * (are separable by value, regardless of position), we let
-	 * modifiers come in any order.  The existing tbl doesn't let
-	 * this happen.
-	 */
-	switch (p[*pos]) {
-	case '\0':
-		/* FALLTHROUGH */
-	case ' ':
-		/* FALLTHROUGH */
-	case '\t':
-		/* FALLTHROUGH */
-	case ',':
-		/* FALLTHROUGH */
-	case '.':
-		/* FALLTHROUGH */
-	case '|':
-		return(1);
-	default:
-		break;
-	}
+	if (strchr(".,-=^_ACLNRSaclnrs|", p[*pos]) != NULL)
+		return;
 
 	/* Throw away parenthesised expression. */
 
@@ -117,72 +82,65 @@ mod:
 			(*pos)++;
 			goto mod;
 		}
-		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
+		mandoc_msg(MANDOCERR_TBLLAYOUT_PAR, tbl->parse,
 		    ln, *pos, NULL);
-		return(0);
+		return;
 	}
 
 	/* Parse numerical spacing from modifier string. */
 
 	if (isdigit((unsigned char)p[*pos])) {
-		for (i = 0; i < 4; i++) {
-			if ( ! isdigit((unsigned char)p[*pos + i]))
-				break;
-			buf[i] = p[*pos + i];
-		}
-		buf[i] = '\0';
-
-		/* No greater than 4 digits. */
-
-		if (4 == i) {
-			mandoc_msg(MANDOCERR_TBLLAYOUT,
-			    tbl->parse, ln, *pos, NULL);
-			return(0);
-		}
-
-		*pos += i;
-		cp->spacing = (size_t)atoi(buf);
-
+		cp->spacing = strtoull(p + *pos, &endptr, 10);
+		*pos = endptr - p;
 		goto mod;
-		/* NOTREACHED */
 	}
 
-	/* TODO: GNU has many more extensions. */
-
 	switch (tolower((unsigned char)p[(*pos)++])) {
-	case 'z':
-		cp->flags |= TBL_CELL_WIGN;
-		goto mod;
-	case 'u':
-		cp->flags |= TBL_CELL_UP;
+	case 'b':
+		/* FALLTHROUGH */
+	case 'i':
+		/* FALLTHROUGH */
+	case 'r':
+		(*pos)--;
+		break;
+	case 'd':
+		cp->flags |= TBL_CELL_BALIGN;
 		goto mod;
 	case 'e':
 		cp->flags |= TBL_CELL_EQUAL;
 		goto mod;
+	case 'f':
+		break;
+	case 'm':
+		mandoc_msg(MANDOCERR_TBLLAYOUT_MOD, tbl->parse,
+		    ln, *pos, "m");
+		goto mod;
+	case 'p':
+		/* FALLTHROUGH */
+	case 'v':
+		if (p[*pos] == '-' || p[*pos] == '+')
+			(*pos)++;
+		while (isdigit((unsigned char)p[*pos]))
+			(*pos)++;
+		goto mod;
 	case 't':
 		cp->flags |= TBL_CELL_TALIGN;
 		goto mod;
-	case 'd':
-		cp->flags |= TBL_CELL_BALIGN;
+	case 'u':
+		cp->flags |= TBL_CELL_UP;
 		goto mod;
 	case 'w':  /* XXX for now, ignore minimal column width */
 		goto mod;
 	case 'x':
 		cp->flags |= TBL_CELL_WMAX;
 		goto mod;
-	case 'f':
-		break;
-	case 'r':
-		/* FALLTHROUGH */
-	case 'b':
-		/* FALLTHROUGH */
-	case 'i':
-		(*pos)--;
-		break;
+	case 'z':
+		cp->flags |= TBL_CELL_WIGN;
+		goto mod;
 	default:
-		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
-		    ln, *pos - 1, NULL);
-		return(0);
+		mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+		    ln, *pos - 1, "%c", p[*pos - 1]);
+		goto mod;
 	}
 
 	switch (tolower((unsigned char)p[(*pos)++])) {
@@ -201,20 +159,13 @@ mod:
 	case 'r':
 		goto mod;
 	default:
-		break;
-	}
-	if (isalnum((unsigned char)p[*pos - 1])) {
 		mandoc_vmsg(MANDOCERR_FT_BAD, tbl->parse,
 		    ln, *pos - 1, "TS f%c", p[*pos - 1]);
 		goto mod;
 	}
-
-	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
-	    ln, *pos - 1, NULL);
-	return(0);
 }
 
-static int
+static void
 cell(struct tbl_node *tbl, struct tbl_row *rp,
 		int ln, const char *p, int *pos)
 {
@@ -223,16 +174,24 @@ cell(struct tbl_node *tbl, struct tbl_ro
 
 	/* Handle vertical lines. */
 
-	for (vert = 0; '|' == p[*pos]; ++*pos)
-		vert++;
-	while (' ' == p[*pos])
+	vert = 0;
+again:
+	while (p[*pos] == ' ' || p[*pos] == '\t' || p[*pos] == '|') {
+		if (p[*pos] == '|') {
+			if (vert < 2)
+				vert++;
+			else
+				mandoc_msg(MANDOCERR_TBLLAYOUT_VERT,
+				    tbl->parse, ln, *pos, NULL);
+		}
 		(*pos)++;
+	}
 
 	/* Handle trailing vertical lines */
 
 	if ('.' == p[*pos] || '\0' == p[*pos]) {
 		rp->vert = vert;
-		return(1);
+		return;
 	}
 
 	/* Parse the column position (`c', `l', `r', ...). */
@@ -241,62 +200,32 @@ cell(struct tbl_node *tbl, struct tbl_ro
 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
 			break;
 
-	if (KEYS_MAX == i) {
-		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
-		    ln, *pos, NULL);
-		return(0);
+	if (i == KEYS_MAX) {
+		mandoc_vmsg(MANDOCERR_TBLLAYOUT_CHAR, tbl->parse,
+		    ln, *pos, "%c", p[*pos]);
+		(*pos)++;
+		goto again;
 	}
-
 	c = keys[i].key;
 
-	/*
-	 * If a span cell is found first, raise a warning and abort the
-	 * parse.  If a span cell is found and the last layout element
-	 * isn't a "normal" layout, bail.
-	 *
-	 * FIXME: recover from this somehow?
-	 */
-
-	if (TBL_CELL_SPAN == c) {
-		if (NULL == rp->first) {
-			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
-			    ln, *pos, NULL);
-			return(0);
-		} else if (rp->last)
-			switch (rp->last->pos) {
-			case TBL_CELL_HORIZ:
-				/* FALLTHROUGH */
-			case TBL_CELL_DHORIZ:
-				mandoc_msg(MANDOCERR_TBLLAYOUT,
-				    tbl->parse, ln, *pos, NULL);
-				return(0);
-			default:
-				break;
-			}
-	}
+	/* Special cases of spanners. */
 
-	/*
-	 * If a vertical spanner is found, we may not be in the first
-	 * row.
-	 */
-
-	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
-		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
-		return(0);
-	}
+	if (c == TBL_CELL_SPAN) {
+		if (rp->last == NULL)
+			mandoc_msg(MANDOCERR_TBLLAYOUT_SPAN,
+			    tbl->parse, ln, *pos, NULL);
+		else if (rp->last->pos == TBL_CELL_HORIZ ||
+		    rp->last->pos == TBL_CELL_DHORIZ)
+			c = rp->last->pos;
+	} else if (c == TBL_CELL_DOWN && rp == tbl->first_row)
+		mandoc_msg(MANDOCERR_TBLLAYOUT_DOWN,
+		    tbl->parse, ln, *pos, NULL);
 
 	(*pos)++;
 
-	/* Disallow adjacent spacers. */
-
-	if (vert > 2) {
-		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
-		return(0);
-	}
-
 	/* Allocate cell then parse its modifiers. */
 
-	return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
+	mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos);
 }
 
 void
@@ -311,7 +240,7 @@ tbl_layout(struct tbl_node *tbl, int ln,
 	for (;;) {
 		/* Skip whitespace before and after each cell. */
 
-		while (isspace((unsigned char)p[pos]))
+		while (p[pos] == ' ' || p[pos] == '\t')
 			pos++;
 
 		switch (p[pos]) {
@@ -326,7 +255,7 @@ tbl_layout(struct tbl_node *tbl, int ln,
 			tbl->part = TBL_PART_DATA;
 			if (tbl->first_row != NULL)
 				return;
-			mandoc_msg(MANDOCERR_TBLNOLAYOUT,
+			mandoc_msg(MANDOCERR_TBLLAYOUT_NONE,
 			    tbl->parse, ln, pos, NULL);
 			rp = mandoc_calloc(1, sizeof(*rp));
 			cell_alloc(tbl, rp, TBL_CELL_LEFT, 0);
@@ -344,8 +273,7 @@ tbl_layout(struct tbl_node *tbl, int ln,
 				tbl->first_row = rp;
 			tbl->last_row = rp;
 		}
-		if ( ! cell(tbl, rp, ln, p, &pos))
-			return;
+		cell(tbl, rp, ln, p, &pos);
 	}
 }
 
Index: mandoc.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandoc.h,v
retrieving revision 1.188
retrieving revision 1.189
diff -Lmandoc.h -Lmandoc.h -u -p -r1.188 -r1.189
--- mandoc.h
+++ mandoc.h
@@ -127,6 +127,11 @@ enum	mandocerr {
 	MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */
 	MANDOCERR_STR_UNDEF, /* undefined string, using "": name */
 
+	/* related to tables */
+	MANDOCERR_TBLLAYOUT_SPAN, /* tbl line starts with span */
+	MANDOCERR_TBLLAYOUT_DOWN, /* tbl column starts with span */
+	MANDOCERR_TBLLAYOUT_VERT, /* skipping vertical bar in tbl layout */
+
 	MANDOCERR_ERROR, /* ===== start of errors ===== */
 
 	/* related to equations */
@@ -140,7 +145,9 @@ enum	mandocerr {
 	MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */
 	MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */
 	MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */
-	MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */
+	MANDOCERR_TBLLAYOUT_NONE, /* empty tbl layout */
+	MANDOCERR_TBLLAYOUT_CHAR, /* invalid character in tbl layout: char */
+	MANDOCERR_TBLLAYOUT_PAR, /* unmatched parenthesis in tbl layout */
 	MANDOCERR_TBLNODATA, /* no table data cells specified */
 	MANDOCERR_TBLIGNDATA, /* ignore data in cell */
 	MANDOCERR_TBLBLOCK, /* data block still open */
@@ -179,7 +186,7 @@ enum	mandocerr {
 	MANDOCERR_TOOLARGE, /* input too large */
 	MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */
 	MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */
-	MANDOCERR_TBLLAYOUT, /* unsupported table layout */
+	MANDOCERR_TBLLAYOUT_MOD, /* unsupported tbl layout modifier: m */
 	MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */
 	MANDOCERR_TBLEQN, /* eqn in tbl */
 
Index: read.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/read.c,v
retrieving revision 1.117
retrieving revision 1.118
diff -Lread.c -Lread.c -u -p -r1.117 -r1.118
--- read.c
+++ read.c
@@ -171,6 +171,11 @@ static	const char * const	mandocerrs[MAN
 	"invalid escape sequence",
 	"undefined string, using \"\"",
 
+	/* related to tables */
+	"tbl line starts with span",
+	"tbl column starts with span",
+	"skipping vertical bar in tbl layout",
+
 	"generic error",
 
 	/* related to equations */
@@ -184,7 +189,9 @@ static	const char * const	mandocerrs[MAN
 	"skipping unknown tbl option",
 	"missing tbl option argument",
 	"wrong tbl option argument size",
-	"no table layout cells specified",
+	"empty tbl layout",
+	"invalid character in tbl layout",
+	"unmatched parenthesis in tbl layout",
 	"no table data cells specified",
 	"ignore data in cell",
 	"data block still open",
@@ -222,7 +229,7 @@ static	const char * const	mandocerrs[MAN
 	"input too large",
 	"unsupported control character",
 	"unsupported roff request",
-	"unsupported table layout",
+	"unsupported tbl layout modifier",
 	"ignoring macro in table",
 	"eqn in tbl",
 };
Index: tbl.7
===================================================================
RCS file: /home/cvs/mdocml/mdocml/tbl.7,v
retrieving revision 1.23
retrieving revision 1.24
diff -Ltbl.7 -Ltbl.7 -u -p -r1.23 -r1.24
--- tbl.7
+++ tbl.7
@@ -251,6 +251,9 @@ The following case-insensitive modifier 
 .Bl -tag -width 2n
 .It Cm b
 Use a bold font for the contents of this column.
+.It Cm d
+Move cell content down to the last cell of a vertical span.
+Currently ignored.
 .It Cm e
 Make this column wider to match the maximum width
 of any other column also having the
@@ -263,6 +266,27 @@ See the
 manual for supported one-character font names.
 .It Cm i
 Use an italic font for the contents of this column.
+.It Cm m
+Specify a cell start macro.
+This is a GNU extension and currently unsupported.
+.It Cm p
+Set the point size to the following unsigned argument,
+or change it by the following signed argument.
+Currently ignored.
+.It Cm v
+Set the vertical line spacing to the following unsigned argument,
+or change it by the following signed argument.
+Currently ignored.
+.It Cm t
+Do not vertically center cell content in the vertical span,
+leave it at the top.
+Currently ignored.
+.It Cm u
+Move cell content up by half a table line.
+Currently ignored.
+.It Cm w
+Specify minimum column width.
+Currently ignored.
 .It Cm x
 After determining the width of all other columns, distribute the
 rest of the line length among all columns having the
@@ -271,15 +295,6 @@ modifier.
 .It Cm z
 Do not use this cell for determining the width of this column.
 .El
-.Pp
-The modifiers
-.Cm d ,
-.Cm t ,
-.Cm u ,
-and
-.Cm w
-are ignored by
-.Xr mandoc 1 .
 .Pp
 For example, the following layout specifies a center-justified column of
 minimum width 10, followed by vertical bar, followed by a left-justified
Index: mandoc.1
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mandoc.1,v
retrieving revision 1.137
retrieving revision 1.138
diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.137 -r1.138
--- mandoc.1
+++ mandoc.1
@@ -1273,6 +1273,24 @@ its value is implicitly set to the empty
 However, defining strings explicitly before use
 keeps the code more readable.
 .El
+.Ss "Warnings related to tables"
+.Bl -ohang
+.It Sy "tbl line starts with span"
+.Pq tbl
+The first cell in a table layout line is a horizontal span
+.Pq Sq Cm s .
+Data provided for this cell is ignored, and nothing is printed in the cell.
+.It Sy "tbl column starts with span"
+.Pq tbl
+The first line of a table layout specification
+requests a vertical span
+.Pq Sq Cm ^ .
+Data provided for this cell is ignored, and nothing is printed in the cell.
+.It Sy "skipping vertical bar in tbl layout"
+.Pq tbl
+A table layout specification contains more than two consecutive vertical bars.
+A double bar is printed, all additional bars are discarded.
+.El
 .Ss "Errors related to equations"
 .Bl -inset -compact
 .It "unexpected equation scope closure"
@@ -1302,10 +1320,25 @@ The option is ignored.
 .Pq tbl
 A table option argument contains an invalid number of characters.
 Both the option and the argument are ignored.
+.It Sy "empty tbl layout"
+.Pq tbl
+A table layout specification is completely empty,
+specifying zero lines and zero columns.
+As a fallback, a single left-justified column is used.
+.It Sy "invalid character in tbl layout"
+.Pq tbl
+A table layout specification contains a character that can neither
+be interpreted as a layout key character nor as a layout modifier,
+or a modifier precedes the first key.
+The invalid character is discarded.
+.It Sy "unmatched parenthesis in tbl layout"
+.Pq tbl
+A table layout specification contains an opening parenthesis,
+but no matching closing parenthesis.
+The rest of the input line, starting from the parenthesis, has no effect.
 .El
 .Pp
 .Bl -inset -compact
-.It Sy "no table layout cells specified"
 .It Sy "no table data cells specified"
 .It Sy "ignore data in cell"
 .It Sy "data block still open"
@@ -1607,7 +1640,12 @@ request supported by GNU troff or Heirlo
 .Nm ,
 and it is likely that this will cause information loss
 or considerable misformatting.
-.It Sy "unsupported table layout"
+.It Sy "unsupported table layout modfier"
+.Pq tbl
+A table layout specification contains an
+.Sq Cm m
+modifier.
+The modifier is discarded.
 .It Sy "ignoring macro in table"
 .It Sy "eqn in tbl"
 .Pq eqn , tbl
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-01-26 18:43 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-26 18:43 mdocml: Rework tbl(7) layout parsing: * Continue parsing even if part of schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).