From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]); by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id 08b05682; for ; Sun, 25 Jan 2015 19:57:52 -0500 (EST) Date: Sun, 25 Jan 2015 19:57:52 -0500 (EST) Message-Id: <9846480894202101677.enqueue@fantadrom.bsd.lv> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Improve (or rather, rewrite) tbl(7) option parsing. X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Improve (or rather, rewrite) tbl(7) option parsing. * Allow the layout to start after the semicolon on the options line. * Ignore leading commas. * Option arguments cannot contain closing parentheses. * Avoid needless UNSUPP messages. * Better ERROR reporting. * Delete unused "linesize" field in struct tbl_opts. * No need for static buffers. * Garbage collect one almost empty wrapper function. Improved functionality, but minus 40 lines of code. Modified Files: -------------- mdocml: mandoc.1 mandoc.h read.c tbl.c tbl_opts.c Revision Data ------------- Index: read.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/read.c,v retrieving revision 1.115 retrieving revision 1.116 diff -Lread.c -Lread.c -u -p -r1.115 -r1.116 --- read.c +++ read.c @@ -180,6 +180,10 @@ static const char * const mandocerrs[MAN "unexpected end of equation", /* related to tables */ + "non-alphabetic character in tbl options", + "skipping unknown tbl option", + "missing tbl option argument", + "wrong tbl option argument size", "no table layout cells specified", "no table data cells specified", "ignore data in cell", @@ -218,8 +222,6 @@ static const char * const mandocerrs[MAN "input too large", "unsupported control character", "unsupported roff request", - "unsupported table syntax", - "unsupported table option", "unsupported table layout", "ignoring macro in table", }; Index: tbl_opts.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/tbl_opts.c,v retrieving revision 1.16 retrieving revision 1.17 diff -Ltbl_opts.c -Ltbl_opts.c -u -p -r1.16 -r1.17 --- tbl_opts.c +++ tbl_opts.c @@ -1,6 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons + * Copyright (c) 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -53,12 +54,6 @@ struct tbl_phrase { /* Handle Commonwealth/American spellings. */ #define KEY_MAXKEYS 14 -/* Maximum length of key name string. */ -#define KEY_MAXNAME 13 - -/* Maximum length of key number size. */ -#define KEY_MAXNUMSZ 10 - static const struct tbl_phrase keys[KEY_MAXKEYS] = { { "center", TBL_OPT_CENTRE, KEY_CENTRE}, { "centre", TBL_OPT_CENTRE, KEY_CENTRE}, @@ -76,193 +71,119 @@ static const struct tbl_phrase keys[KEY_ { "nospaces", TBL_OPT_NOSPACE, KEY_NOSPACE}, }; -static int arg(struct tbl_node *, int, +static void arg(struct tbl_node *, int, const char *, int *, enum tbl_ident); -static void opt(struct tbl_node *, int, - const char *, int *); -static int +static void arg(struct tbl_node *tbl, int ln, const char *p, int *pos, enum tbl_ident key) { - int i; - char buf[KEY_MAXNUMSZ]; + const char *optname; + int len, want; while (isspace((unsigned char)p[*pos])) (*pos)++; - /* Arguments always begin with a parenthesis. */ + /* Arguments are enclosed in parentheses. */ - if ('(' != p[*pos]) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos, NULL); - return(0); + len = 0; + if (p[*pos] == '(') { + (*pos)++; + while (p[*pos + len] != ')') + len++; } - (*pos)++; - - /* - * The arguments can be ANY value, so we can't just stop at the - * next close parenthesis (the argument can be a closed - * parenthesis itself). - */ - switch (key) { case KEY_DELIM: - if ('\0' == p[(*pos)++]) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); - } - - if ('\0' == p[(*pos)++]) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); - } + optname = "delim"; + want = 2; break; case KEY_TAB: - if ('\0' != (tbl->opts.tab = p[(*pos)++])) - break; - - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); + optname = "tab"; + want = 1; + if (len == want) + tbl->opts.tab = p[*pos]; + break; case KEY_LINESIZE: - for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) { - buf[i] = p[*pos]; - if ( ! isdigit((unsigned char)buf[i])) - break; - } - - if (i < KEY_MAXNUMSZ) { - buf[i] = '\0'; - tbl->opts.linesize = atoi(buf); - break; - } - - mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL); - return(0); + optname = "linesize"; + want = 0; + break; case KEY_DPOINT: - if ('\0' != (tbl->opts.decimal = p[(*pos)++])) - break; - - mandoc_msg(MANDOCERR_TBL, tbl->parse, - ln, *pos - 1, NULL); - return(0); + optname = "decimalpoint"; + want = 1; + if (len == want) + tbl->opts.decimal = p[*pos]; + break; default: abort(); /* NOTREACHED */ } - /* End with a close parenthesis. */ + if (len == 0) + mandoc_msg(MANDOCERR_TBLOPT_NOARG, + tbl->parse, ln, *pos, optname); + else if (want && len != want) + mandoc_vmsg(MANDOCERR_TBLOPT_ARGSZ, + tbl->parse, ln, *pos, + "%s want %d have %d", optname, want, len); - if (')' == p[(*pos)++]) - return(1); - - mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos - 1, NULL); - return(0); + *pos += len; + if (p[*pos] == ')') + (*pos)++; } -static void -opt(struct tbl_node *tbl, int ln, const char *p, int *pos) +/* + * Parse one line of options up to the semicolon. + * Each option can be preceded by blanks and/or commas, + * and some options are followed by arguments. + */ +void +tbl_option(struct tbl_node *tbl, int ln, const char *p) { - int i, sv; - char buf[KEY_MAXNAME]; - - /* - * Parse individual options from the stream as surrounded by - * this goto. Each pass through the routine parses out a single - * option and registers it. Option arguments are processed in - * the arg() function. - */ - -again: /* - * EBNF describing this section: - * - * options ::= option_list [:space:]* [;][\n] - * option_list ::= option option_tail - * option_tail ::= [,:space:]+ option_list | - * ::= epsilon - * option ::= [:alpha:]+ args - * args ::= [:space:]* [(] [:alpha:]+ [)] - */ - - while (isspace((unsigned char)p[*pos])) - (*pos)++; - - /* Safe exit point. */ - - if (';' == p[*pos]) - return; - - /* Copy up to first non-alpha character. */ + int i, pos, len; - for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) { - buf[i] = (char)tolower((unsigned char)p[*pos]); - if ( ! isalpha((unsigned char)buf[i])) - break; - } - - /* Exit if buffer is empty (or overrun). */ + pos = 0; + for (;;) { + while (isspace((unsigned char)p[pos]) || p[pos] == ',') + pos++; - if (KEY_MAXNAME == i || 0 == i) { - mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL); - return; - } + if (p[pos] == ';') + return; - buf[i] = '\0'; + /* Parse one option name. */ - while (isspace((unsigned char)p[*pos]) || p[*pos] == ',') - (*pos)++; + len = 0; + while (isalpha((unsigned char)p[pos + len])) + len++; + + if (len == 0) { + mandoc_vmsg(MANDOCERR_TBLOPT_ALPHA, + tbl->parse, ln, pos, "%c", p[pos]); + pos++; + continue; + } - /* - * Look through all of the available keys to find one that - * matches the input. FIXME: hashtable this. - */ + /* Look up the option name. */ - for (i = 0; i < KEY_MAXKEYS; i++) { - if (strcmp(buf, keys[i].name)) + i = 0; + while (i < KEY_MAXKEYS && + (strncasecmp(p + pos, keys[i].name, len) || + keys[i].name[len] != '\0')) + i++; + + if (i == KEY_MAXKEYS) { + mandoc_vmsg(MANDOCERR_TBLOPT_BAD, tbl->parse, + ln, pos, "%.*s", len, p + pos); + pos += len; continue; + } - /* - * Note: this is more difficult to recover from, as we - * can be anywhere in the option sequence and it's - * harder to jump to the next. Meanwhile, just bail out - * of the sequence altogether. - */ + /* Handle the option. */ + pos += len; if (keys[i].key) tbl->opts.opts |= keys[i].key; - else if ( ! arg(tbl, ln, p, pos, keys[i].ident)) - return; - - break; + else + arg(tbl, ln, p, &pos, keys[i].ident); } - - /* - * Allow us to recover from bad options by continuing to another - * parse sequence. - */ - - if (KEY_MAXKEYS == i) - mandoc_msg(MANDOCERR_TBLOPT, tbl->parse, ln, sv, NULL); - - goto again; - /* NOTREACHED */ -} - -void -tbl_option(struct tbl_node *tbl, int ln, const char *p) -{ - int pos; - - /* - * Table options are always on just one line, so automatically - * switch into the next input mode here. - */ - tbl->part = TBL_PART_LAYOUT; - - pos = 0; - opt(tbl, ln, p, &pos); } Index: mandoc.h =================================================================== RCS file: /home/cvs/mdocml/mdocml/mandoc.h,v retrieving revision 1.186 retrieving revision 1.187 diff -Lmandoc.h -Lmandoc.h -u -p -r1.186 -r1.187 --- mandoc.h +++ mandoc.h @@ -136,6 +136,10 @@ enum mandocerr { MANDOCERR_EQNEOF, /* unexpected end of equation */ /* related to tables */ + MANDOCERR_TBLOPT_ALPHA, /* non-alphabetic character in tbl options */ + MANDOCERR_TBLOPT_BAD, /* skipping unknown tbl option: option */ + MANDOCERR_TBLOPT_NOARG, /* missing tbl option argument */ + MANDOCERR_TBLOPT_ARGSZ, /* wrong tbl option argument size */ MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */ MANDOCERR_TBLNODATA, /* no table data cells specified */ MANDOCERR_TBLIGNDATA, /* ignore data in cell */ @@ -175,8 +179,6 @@ enum mandocerr { MANDOCERR_TOOLARGE, /* input too large */ MANDOCERR_CHAR_UNSUPP, /* unsupported control character: number */ MANDOCERR_REQ_UNSUPP, /* unsupported roff request: request */ - MANDOCERR_TBL, /* unsupported table syntax */ - MANDOCERR_TBLOPT, /* unsupported table option */ MANDOCERR_TBLLAYOUT, /* unsupported table layout */ MANDOCERR_TBLMACRO, /* ignoring macro in table: macro */ @@ -186,7 +188,6 @@ enum mandocerr { struct tbl_opts { char tab; /* cell-separator */ char decimal; /* decimal point */ - int linesize; int opts; #define TBL_OPT_CENTRE (1 << 0) #define TBL_OPT_EXPAND (1 << 1) Index: tbl.c =================================================================== RCS file: /home/cvs/mdocml/mdocml/tbl.c,v retrieving revision 1.32 retrieving revision 1.33 diff -Ltbl.c -Ltbl.c -u -p -r1.32 -r1.33 --- tbl.c +++ tbl.c @@ -1,7 +1,7 @@ /* $Id$ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons - * Copyright (c) 2011 Ingo Schwarze + * Copyright (c) 2011, 2015 Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -34,29 +34,45 @@ enum rofferr tbl_read(struct tbl_node *tbl, int ln, const char *p, int offs) { - int len; const char *cp; - - cp = &p[offs]; - len = (int)strlen(cp); + int active; /* - * If we're in the options section and we don't have a - * terminating semicolon, assume we've moved directly into the - * layout section. No need to report a warning: this is, - * apparently, standard behaviour. + * In the options section, proceed to the layout section + * after a semicolon, or right away if there is no semicolon. + * Ignore semicolons in arguments. */ - if (TBL_PART_OPTS == tbl->part && len) - if (';' != cp[len - 1]) - tbl->part = TBL_PART_LAYOUT; + if (tbl->part == TBL_PART_OPTS) { + tbl->part = TBL_PART_LAYOUT; + active = 1; + for (cp = p; *cp != '\0'; cp++) { + switch (*cp) { + case '(': + active = 0; + continue; + case ')': + active = 1; + continue; + case ';': + if (active) + break; + continue; + default: + continue; + } + break; + } + if (*cp == ';') { + tbl_option(tbl, ln, p); + if (*(p = cp + 1) == '\0') + return(ROFF_IGN); + } + } - /* Now process each logical section of the table. */ + /* Process the other section types. */ switch (tbl->part) { - case TBL_PART_OPTS: - tbl_option(tbl, ln, p); - return(ROFF_IGN); case TBL_PART_LAYOUT: tbl_layout(tbl, ln, p); return(ROFF_IGN); @@ -81,7 +97,6 @@ tbl_alloc(int pos, int line, struct mpar tbl->parse = parse; tbl->part = TBL_PART_OPTS; tbl->opts.tab = '\t'; - tbl->opts.linesize = 12; tbl->opts.decimal = '.'; return(tbl); } Index: mandoc.1 =================================================================== RCS file: /home/cvs/mdocml/mdocml/mandoc.1,v retrieving revision 1.135 retrieving revision 1.136 diff -Lmandoc.1 -Lmandoc.1 -u -p -r1.135 -r1.136 --- mandoc.1 +++ mandoc.1 @@ -1281,12 +1281,35 @@ keeps the code more readable. .It "unexpected end of equation" .El .Ss "Errors related to tables" +.Bl -ohang +.It Sy "non-alphabetic character in tbl options" +.Pq tbl +The table options line contains a character other than a letter, +blank, or comma where the beginning of an option name is expected. +The character is ignored. +.It Sy "skipping unknown tbl option" +.Pq tbl +The table options line contains a string of letters that does not +match any known option name. +The word is ignored. +.It Sy "missing tbl option argument" +.Pq tbl +A table option that requires an argument is not followed by an +opening parenthesis, or the opening parenthesis is immediately +followed by a closing parenthesis. +The option is ignored. +.It Sy "wrong tbl option argument size" +.Pq tbl +A table option argument contains an invalid number of characters. +Both the option and the argument are ignored. +.El +.Pp .Bl -inset -compact -.It "no table layout cells specified" -.It "no table data cells specified" -.It "ignore data in cell" -.It "data block still open" -.It "ignoring extra data cells" +.It Sy "no table layout cells specified" +.It Sy "no table data cells specified" +.It Sy "ignore data in cell" +.It Sy "data block still open" +.It Sy "ignoring extra data cells" .El .Ss "Errors related to roff, mdoc, and man code" .Bl -ohang @@ -1568,6 +1591,14 @@ cannot handle input files larger than it of 2^31 bytes (2 Gigabytes). Since useful manuals are always small, this is not a problem in practice. Parsing is aborted as soon as the condition is detected. +.It Sy "unsupported control character" +.Pq roff +An ASCII control character supported by other +.Xr roff 7 +implementations but not by +.Nm +was found in an input file. +It is replaced by a question mark. .It Sy "unsupported roff request" .Pq roff An input file contains a @@ -1576,9 +1607,7 @@ request supported by GNU troff or Heirlo .Nm , and it is likely that this will cause information loss or considerable misformatting. -.It Sy "bad table syntax" -.It Sy "bad table option" -.It Sy "bad table layout" +.It Sy "unsupported table layout" .It Sy "ignoring macro in table" .El .Sh COMPATIBILITY -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv