From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from smtp-1.sys.kth.se (smtp-1.sys.kth.se [130.237.32.175]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id p38DFpWB020843 for ; Fri, 8 Apr 2011 09:15:51 -0400 (EDT) Received: from mailscan-1.sys.kth.se (mailscan-1.sys.kth.se [130.237.32.91]) by smtp-1.sys.kth.se (Postfix) with ESMTP id D84CC156402 for ; Fri, 8 Apr 2011 15:15:45 +0200 (CEST) X-Virus-Scanned: by amavisd-new at kth.se Received: from smtp-1.sys.kth.se ([130.237.32.175]) by mailscan-1.sys.kth.se (mailscan-1.sys.kth.se [130.237.32.91]) (amavisd-new, port 10024) with LMTP id lWVvPbxhf34e for ; Fri, 8 Apr 2011 15:15:41 +0200 (CEST) X-KTH-Auth: kristaps [193.10.49.5] X-KTH-mail-from: kristaps@bsd.lv X-KTH-rcpt-to: tech@mdocml.bsd.lv Received: from [172.16.18.84] (unknown [193.10.49.5]) by smtp-1.sys.kth.se (Postfix) with ESMTP id DC7B1156B42 for ; Fri, 8 Apr 2011 15:15:41 +0200 (CEST) Message-ID: <4D9F0A78.9040800@bsd.lv> Date: Fri, 08 Apr 2011 15:15:36 +0200 From: Kristaps Dzonsons User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.16) Gecko/20110303 Icedove/3.0.11 X-Mailinglist: mdocml-tech Reply-To: tech@mdocml.bsd.lv MIME-Version: 1.0 To: tech@mdocml.bsd.lv Subject: Re: Unifying the escape-sequence parser. References: <4D9DC396.9010504@bsd.lv> <4D9EEF2F.2030307@bsd.lv> <4D9EFC85.3040301@bsd.lv> <4D9F0478.40006@bsd.lv> In-Reply-To: <4D9F0478.40006@bsd.lv> Content-Type: multipart/mixed; boundary="------------080509010808000109040504" This is a multi-part message in MIME format. --------------080509010808000109040504 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit > Step 3: fixed where I forgot to for->while in mdoc_validate.c's > check_text() loop; removed DECO_SSPECIAL; cleaned up DECO_NOSPACE; > cleaned up print_encode() in html.c. The next will be removing the DECO > stuff entirely. Ok (step 4?), one last patch to show full fidelity between DECO and ESCAPE before I rip out DECO stuff. This also comes with a fix of a yet-unnoticed bug: \F escapes were being treated as \f escapes in terms of recognising styles (bold, italic, etc.). However, in the groff manual, \F accepts families (Times, Helvetica, etc.) while \f accepts styles (duh). I've actually completely removed the FONTFAMILY notion, as we don't really support it (it can be put in later, if necessary, but it's unlikely as it can't be reliably displayed across output media). --------------080509010808000109040504 Content-Type: text/plain; name="patch.escapes.txt" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="patch.escapes.txt" ? chat.8 ? config.h ? config.log ? foo.1 ? foo.1.html ? foo.3 ? foo.ps ? ksh.1 ? man.txt ? mandoc ? mandoc-db.bak.c ? mandoc.db ? mandoc.index ? manuals.txt ? patch.escapes.txt ? patch.mdoc.txt ? patch.roff.txt ? patch.txt Index: html.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/html.c,v retrieving revision 1.131 diff -u -r1.131 html.c --- html.c 22 Mar 2011 14:05:45 -0000 1.131 +++ html.c 8 Apr 2011 13:14:21 -0000 @@ -230,7 +230,7 @@ if ((cp = chars_spec2cp(h->symtab, p, len)) > 0) { printf("&#%d;", cp); return; - } else if (-1 == cp && DECO_SSPECIAL == d) { + } else if (-1 == cp && 1 == len) { fwrite(p, 1, len, stdout); return; } else if (-1 == cp) @@ -304,40 +304,41 @@ int len, nospace; const char *seq; enum roffdeco deco; - static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; + static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH }; nospace = 0; - for (; *p; p++) { + while ('\0' != *p) { sz = strcspn(p, rejs); fwrite(p, 1, sz, stdout); - p += /* LINTED */ - sz; + p += (int)sz; - if ('<' == *p) { + if ('\0' == *p) + break; + + switch (*p++) { + case ('<'): printf("<"); continue; - } else if ('>' == *p) { + case ('>'): printf(">"); continue; - } else if ('&' == *p) { + case ('&'): printf("&"); continue; - } else if (ASCII_HYPH == *p) { - /* - * Note: "soft hyphens" aren't graphically - * displayed when not breaking the text; we want - * them to be displayed. - */ - /*printf("­");*/ + case (ASCII_HYPH): putchar('-'); continue; - } else if ('\0' == *p) + default: + break; + } + + seq = p; + if (0 == (len = a2roffdeco(&deco, &seq, &sz))) break; - seq = ++p; - len = a2roffdeco(&deco, &seq, &sz); + p += len; switch (deco) { case (DECO_NUMBERED): @@ -346,8 +347,6 @@ case (DECO_RESERVED): print_res(h, seq, sz); break; - case (DECO_SSPECIAL): - /* FALLTHROUGH */ case (DECO_SPECIAL): print_spec(h, deco, seq, sz); break; @@ -362,14 +361,13 @@ break; print_metaf(h, deco); break; + case (DECO_NOSPACE): + if ('\0' == *p) + nospace = 1; + break; default: break; } - - p += len - 1; - - if (DECO_NOSPACE == deco && '\0' == *(p + 1)) - nospace = 1; } return(nospace); Index: libmandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v retrieving revision 1.17 diff -u -r1.17 libmandoc.h --- libmandoc.h 28 Mar 2011 23:52:13 -0000 1.17 +++ libmandoc.h 8 Apr 2011 13:14:21 -0000 @@ -73,7 +73,6 @@ int, int, const char *); void mandoc_vmsg(enum mandocerr, struct mparse *, int, int, const char *, ...); -int mandoc_special(char *); char *mandoc_strdup(const char *); char *mandoc_getarg(struct mparse *, char **, int, int *); char *mandoc_normdate(struct mparse *, char *, int, int); Index: man_validate.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man_validate.c,v retrieving revision 1.67 diff -u -r1.67 man_validate.c --- man_validate.c 22 Mar 2011 15:30:30 -0000 1.67 +++ man_validate.c 8 Apr 2011 13:14:21 -0000 @@ -54,7 +54,7 @@ static int check_part(CHKARGS); static int check_root(CHKARGS); static int check_sec(CHKARGS); -static int check_text(CHKARGS); +static void check_text(CHKARGS); static int post_AT(CHKARGS); static int post_fi(CHKARGS); @@ -151,7 +151,8 @@ switch (m->last->type) { case (MAN_TEXT): - return(check_text(m, m->last)); + check_text(m, m->last); + return(1); case (MAN_ROOT): return(check_root(m, m->last)); case (MAN_EQN): @@ -204,43 +205,48 @@ return(1); } - -static int +static void check_text(CHKARGS) { - char *p; - int pos, c; + char *p, *pp, *cpp; + int pos; size_t sz; - for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + p = n->string; + pos = n->pos + 1; - if ('\0' == *p) - break; + while ('\0' != *p) { + sz = strcspn(p, "\t\\"); + p += (int)sz; pos += (int)sz; if ('\t' == *p) { - if (MAN_LITERAL & m->flags) - continue; - man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + if ( ! (MAN_LITERAL & m->flags)) + man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; - /* Check the special character. */ + pos++; + pp = ++p; - c = mandoc_special(p); - if (c) { - p += c - 1; - pos += c - 1; - } else + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); - } + break; + } - return(1); -} + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + pos += pp - p; + p = pp; + } +} #define INEQ_DEFINE(x, ineq, name) \ static int \ Index: mandoc.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.c,v retrieving revision 1.44 diff -u -r1.44 mandoc.c --- mandoc.c 28 Mar 2011 23:52:13 -0000 1.44 +++ mandoc.c 8 Apr 2011 13:14:22 -0000 @@ -35,198 +35,362 @@ static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); +static int numescape(const char *); -int -mandoc_special(char *p) +/* + * Pass over recursive numerical expressions. This context of this + * function is important: it's only called within character-terminating + * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial + * recursion: we don't care about what's in these blocks. + * This returns the number of characters skipped or -1 if an error + * occurs (the caller should bail). + */ +static int +numescape(const char *start) { - int len, i; - char term; - char *sv; - - len = 0; + int i; + size_t sz; + const char *cp; + + i = 0; + + /* The expression consists of a subexpression. */ + + if ('\\' == start[i]) { + cp = &start[++i]; + /* + * Read past the end of the subexpression. + * Bail immediately on errors. + */ + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + return(i + cp - &start[i]); + } + + if ('(' != start[i++]) + return(0); + + /* + * A parenthesised subexpression. Read until the closing + * parenthesis, making sure to handle any nested subexpressions + * that might ruin our parse. + */ + + while (')' != start[i]) { + sz = strcspn(&start[i], ")\\"); + i += (int)sz; + + if ('\0' == start[i]) + return(-1); + else if ('\\' != start[i]) + continue; + + cp = &start[++i]; + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + i += cp - &start[i]; + } + + /* Read past the terminating ')'. */ + return(++i); +} + +/* + * Handle an escaped sequeence. This should be called with any + * string subsequent a `\'. Pass a pointer to this substring as "end"; + * it will be set to the supremum of the parsed escape sequence. If + * this returns ESCAPE_ERROR, the string is bogus and should be thrown + * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the + * first relevant character of the substring (font, glyph, whatever) of + * length sz. Both "start" and "sz" may be NULL. + */ +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + char c, term, numeric; + int i, lim, ssz, rlim; + const char *cp, *rstart; + enum mandoc_esc gly; + + cp = *end; + rstart = cp; + if (start) + *start = rstart; + i = 0; + gly = ESCAPE_ERROR; term = '\0'; - sv = p; + numeric = 0; - assert('\\' == *p); - p++; + switch ((c = cp[i++])) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case ('('): + gly = ESCAPE_SPECIAL; + lim = 2; + break; + case ('['): + gly = ESCAPE_SPECIAL; + term = ']'; + break; + case ('C'): + if ('\'' != cp[i]) + return(ESCAPE_ERROR); + gly = ESCAPE_SPECIAL; + term = '\''; + break; - switch (*p++) { -#if 0 - case ('Z'): - /* FALLTHROUGH */ - case ('X'): - /* FALLTHROUGH */ - case ('x'): - /* FALLTHROUGH */ - case ('S'): - /* FALLTHROUGH */ - case ('R'): - /* FALLTHROUGH */ - case ('N'): - /* FALLTHROUGH */ - case ('l'): - /* FALLTHROUGH */ - case ('L'): - /* FALLTHROUGH */ - case ('H'): + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case ('F'): /* FALLTHROUGH */ - case ('h'): + case ('g'): /* FALLTHROUGH */ - case ('D'): + case ('k'): /* FALLTHROUGH */ - case ('C'): + case ('M'): /* FALLTHROUGH */ - case ('b'): + case ('m'): /* FALLTHROUGH */ - case ('B'): + case ('n'): /* FALLTHROUGH */ - case ('a'): + case ('V'): /* FALLTHROUGH */ - case ('A'): - if (*p++ != '\'') - return(0); - term = '\''; - break; -#endif - case ('h'): + case ('Y'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('v'): + case ('*'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_PREDEF; /* FALLTHROUGH */ - case ('s'): - if (ASCII_HYPH == *p) - *p = '-'; + case ('f'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; - i = 0; - if ('+' == *p || '-' == *p) { - p++; - i = 1; - } + rstart= &cp[i]; + if (start) + *start = rstart; - switch (*p++) { + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): term = ']'; break; - case ('\''): - term = '\''; - break; - case ('0'): - i = 1; - /* FALLTHROUGH */ default: - len = 1; - p--; + lim = 1; + i--; break; } + break; - if (ASCII_HYPH == *p) - *p = '-'; - if ('+' == *p || '-' == *p) { - if (i) - return(0); - p++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == *p) { - while (*p && ')' != *p) - if ('\\' == *p++) { - i = mandoc_special(--p); - if (0 == i) - return(0); - p += i; - } - - if (')' == *p++) - break; - - return(0); - } else if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - } - + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + */ + case ('A'): + /* FALLTHROUGH */ + case ('b'): + /* FALLTHROUGH */ + case ('D'): + /* FALLTHROUGH */ + case ('o'): + /* FALLTHROUGH */ + case ('R'): + /* FALLTHROUGH */ + case ('X'): + /* FALLTHROUGH */ + case ('Z'): + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + gly = ESCAPE_IGNORE; + term = '\''; break; -#if 0 - case ('Y'): + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case ('B'): /* FALLTHROUGH */ - case ('V'): + case ('h'): /* FALLTHROUGH */ - case ('$'): + case ('H'): /* FALLTHROUGH */ - case ('n'): + case ('L'): /* FALLTHROUGH */ -#endif - case ('k'): + case ('l'): /* FALLTHROUGH */ - case ('M'): + case ('N'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_NUMBERED; /* FALLTHROUGH */ - case ('m'): + case ('S'): /* FALLTHROUGH */ - case ('f'): + case ('v'): /* FALLTHROUGH */ - case ('F'): + case ('w'): /* FALLTHROUGH */ - case ('*'): - switch (*p++) { + case ('x'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + term = numeric = '\''; + break; + + /* + * Sizes get a special category of their own. + */ + case ('s'): + gly = ESCAPE_IGNORE; + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): - term = ']'; + term = numeric = ']'; + break; + case ('\''): + term = numeric = '\''; break; default: - len = 1; - p--; + lim = 1; + i--; break; } + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + break; - case ('('): - len = 2; - break; - case ('['): - term = ']'; - break; - case ('z'): - len = 1; - if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - return(*p ? (int)(p - sv) : 0); - } + + /* + * Anything else is assumed to be a glyph. + */ + default: + gly = ESCAPE_SPECIAL; + lim = 1; + i--; break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == *p++) { - term = '\''; + } + + assert(ESCAPE_ERROR != gly); + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* + * If a terminating block has been specified, we need to + * handle the case of recursion, which could have their + * own terminating blocks that mess up our parse. This, by the + * way, means that the "start" and "size" values will be + * effectively meaningless. + */ + + ssz = 0; + if (numeric && -1 == (ssz = numescape(&cp[i]))) + return(ESCAPE_ERROR); + + i += ssz; + rlim = -1; + + /* + * We have a character terminator. Try to read up to that + * character. If we can't (i.e., we hit the nil), then return + * an error; if we can, calculate our length, read past the + * terminating character, and exit. + */ + + if ('\0' != term) { + *end = strchr(&cp[i], term); + if ('\0' == *end) + return(ESCAPE_ERROR); + + rlim = *end - &cp[i]; + if (sz) + *sz = rlim; + (*end)++; + goto out; + } + + assert(lim > 0); + + /* + * We have a numeric limit. If the string is shorter than that, + * stop and return an error. Else adjust our endpoint, length, + * and return the current glyph. + */ + + if ((size_t)lim > strlen(&cp[i])) + return(ESCAPE_ERROR); + + rlim = lim; + if (sz) + *sz = rlim; + + *end = &cp[i] + lim; + +out: + assert(rlim >= 0 && rstart); + + /* Run post-processors. */ + + switch (gly) { + case (ESCAPE_FONT): + if (1 != rlim) + break; + switch (*rstart) { + case ('3'): + /* FALLTHROUGH */ + case ('B'): + gly = ESCAPE_FONTBOLD; + break; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + gly = ESCAPE_FONTITALIC; + break; + case ('P'): + gly = ESCAPE_FONTPREV; + break; + case ('1'): + /* FALLTHROUGH */ + case ('R'): + gly = ESCAPE_FONTROMAN; break; } - /* FALLTHROUGH */ + case (ESCAPE_SPECIAL): + if (1 != rlim) + break; + if ('c' == *rstart) + gly = ESCAPE_NOSPACE; + break; default: - len = 1; - p--; break; } - if (term) { - for ( ; *p && term != *p; p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(*p ? (int)(p - sv) : 0); - } - - for (i = 0; *p && i < len; i++, p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(i == len ? (int)(p - sv) : 0); + return(gly); } - void * mandoc_calloc(size_t num, size_t size) Index: mandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v retrieving revision 1.69 diff -u -r1.69 mandoc.h --- mandoc.h 28 Mar 2011 21:49:42 -0000 1.69 +++ mandoc.h 8 Apr 2011 13:14:22 -0000 @@ -288,6 +288,20 @@ MPARSE_MAN /* assume -man */ }; +enum mandoc_esc { + ESCAPE_ERROR = 0, + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_PREDEF, /* a predefined special character */ + ESCAPE_FONT, /* a font mode */ + ESCAPE_FONTBOLD, + ESCAPE_FONTITALIC, + ESCAPE_FONTROMAN, + ESCAPE_FONTPREV, + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_NOSPACE +}; + typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, const char *, int, int, const char *); @@ -309,6 +323,8 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); + +enum mandoc_esc mandoc_escape(const char **, const char **, int *); __END_DECLS Index: mdoc_validate.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_validate.c,v retrieving revision 1.166 diff -u -r1.166 mdoc_validate.c --- mdoc_validate.c 3 Apr 2011 09:53:50 -0000 1.166 +++ mdoc_validate.c 8 Apr 2011 13:14:22 -0000 @@ -545,31 +545,39 @@ static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; + char *cpp, *pp; size_t sz; - for ( ; *p; p++, pos++) { + while ('\0' != *p) { sz = strcspn(p, "\t\\"); - p += (int)sz; - - if ('\0' == *p) - break; + p += (int)sz; pos += (int)sz; if ('\t' == *p) { if ( ! (MDOC_LITERAL & m->flags)) mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; + + pos++; + pp = ++p; - if (0 == (c = mandoc_special(p))) { + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; + break; } - p += c - 1; - pos += c - 1; + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + + pos += pp - p; + p = pp; } } Index: out.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/out.c,v retrieving revision 1.39 diff -u -r1.39 out.c --- out.c 17 Mar 2011 08:49:34 -0000 1.39 +++ out.c 8 Apr 2011 13:14:22 -0000 @@ -178,237 +178,55 @@ int a2roffdeco(enum roffdeco *d, const char **word, size_t *sz) { - int i, j, lim; - char term, c; - const char *wp; - enum roffdeco dd; + const char *cp, *start; + int ssz; + enum mandoc_esc esc; *d = DECO_NONE; - lim = i = 0; - term = '\0'; - wp = *word; - switch ((c = wp[i++])) { - case ('('): - *d = DECO_SPECIAL; - lim = 2; - break; - case ('F'): - /* FALLTHROUGH */ - case ('f'): - *d = 'F' == c ? DECO_FFONT : DECO_FONT; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('3'): - /* FALLTHROUGH */ - case ('B'): - *d = DECO_BOLD; - return(i); - case ('2'): - /* FALLTHROUGH */ - case ('I'): - *d = DECO_ITALIC; - return(i); - case ('P'): - *d = DECO_PREVIOUS; - return(i); - case ('1'): - /* FALLTHROUGH */ - case ('R'): - *d = DECO_ROMAN; - return(i); - default: - i--; - lim = 1; - break; - } - break; - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('*'): - if ('*' == c) - *d = DECO_RESERVED; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - default: - i--; - lim = 1; - break; - } - break; - - case ('N'): - - /* - * Sequence of characters: backslash, 'N' (i = 0), - * starting delimiter (i = 1), character number (i = 2). - */ - - *word = wp + 2; - *sz = 0; - - /* - * Cannot use a digit as a starting delimiter; - * but skip the digit anyway. - */ - - if (isdigit((int)wp[1])) - return(2); - - /* - * Any non-digit terminates the character number. - * That is, the terminating delimiter need not - * match the starting delimiter. - */ - - for (i = 2; isdigit((int)wp[i]); i++) - (*sz)++; - - /* - * This is only a numbered character - * if the character number has at least one digit. - */ - - if (*sz) - *d = DECO_NUMBERED; - - /* - * Skip the terminating delimiter, even if it does not - * match, and even if there is no character number. - */ - - return(++i); - - case ('h'): - /* FALLTHROUGH */ - case ('v'): - /* FALLTHROUGH */ - case ('s'): - j = 0; - if ('+' == wp[i] || '-' == wp[i]) { - i++; - j = 1; - } - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - case ('\''): - term = '\''; - break; - case ('0'): - j = 1; - /* FALLTHROUGH */ - default: - i--; - lim = 1; - break; - } - - if ('+' == wp[i] || '-' == wp[i]) { - if (j) - return(i); - i++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == wp[i]) { - while (wp[i] && ')' != wp[i]) - if ('\\' == wp[i++]) { - /* Handle embedded escape. */ - *word = &wp[i]; - i += a2roffdeco(&dd, word, sz); - } - - if (')' == wp[i++]) - break; - - *d = DECO_NONE; - return(i - 1); - } else if ('\\' == wp[i]) { - *word = &wp[++i]; - i += a2roffdeco(&dd, word, sz); - } + cp = start = *word; + esc = mandoc_escape(&cp, word, &ssz); + + switch (esc) { + case (ESCAPE_ERROR): + return(0); + case (ESCAPE_IGNORE): + break; + case (ESCAPE_NUMBERED): + *d = DECO_NUMBERED; break; - case ('['): + case (ESCAPE_FONT): + *d = DECO_FONT; + break; + case (ESCAPE_SPECIAL): *d = DECO_SPECIAL; - term = ']'; break; - case ('c'): + case (ESCAPE_PREDEF): + *d = DECO_RESERVED; + break; + case (ESCAPE_FONTBOLD): + *d = DECO_BOLD; + break; + case (ESCAPE_FONTITALIC): + *d = DECO_ITALIC; + break; + case (ESCAPE_FONTROMAN): + *d = DECO_ROMAN; + break; + case (ESCAPE_FONTPREV): + *d = DECO_PREVIOUS; + break; + case (ESCAPE_NOSPACE): *d = DECO_NOSPACE; - return(i); - case ('z'): - *d = DECO_NONE; - if ('\\' == wp[i]) { - *word = &wp[++i]; - return(i + a2roffdeco(&dd, word, sz)); - } else - lim = 1; - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == wp[i++]) { - term = '\''; - break; - } - /* FALLTHROUGH */ - default: - *d = DECO_SSPECIAL; - i--; - lim = 1; break; } - assert(term || lim); - *word = &wp[i]; - - if (term) { - j = i; - while (wp[i] && wp[i] != term) - i++; - if ('\0' == wp[i]) { - *d = DECO_NONE; - return(i); - } - - assert(i >= j); - *sz = (size_t)(i - j); - - return(i + 1); - } - - assert(lim > 0); - *sz = (size_t)lim; - - for (j = 0; wp[i] && j < lim; j++) - i++; - if (j < lim) - *d = DECO_NONE; + assert(ssz >= 0); + *sz = (size_t)ssz; + ssz = cp - start; - return(i); + return(ssz); } /* Index: out.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/out.h,v retrieving revision 1.18 diff -u -r1.18 out.h --- out.h 22 Mar 2011 10:13:01 -0000 1.18 +++ out.h 8 Apr 2011 13:14:22 -0000 @@ -35,7 +35,6 @@ DECO_NONE, DECO_NUMBERED, /* numbered character */ DECO_SPECIAL, /* special character */ - DECO_SSPECIAL, /* single-char special */ DECO_RESERVED, /* reserved word */ DECO_BOLD, /* bold font */ DECO_ITALIC, /* italic font */ @@ -43,7 +42,6 @@ DECO_PREVIOUS, /* revert to previous font */ DECO_NOSPACE, /* suppress spacing */ DECO_FONT, /* font */ - DECO_FFONT, /* font family */ DECO_MAX }; Index: read.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v retrieving revision 1.11 diff -u -r1.11 read.c --- read.c 4 Apr 2011 23:04:38 -0000 1.11 +++ read.c 8 Apr 2011 13:14:22 -0000 @@ -142,7 +142,7 @@ "tab in non-literal context", "end of line whitespace", "bad comment style", - "unknown escape sequence", + "bad escape sequence", "unterminated quoted string", "generic error", Index: term.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/term.c,v retrieving revision 1.183 diff -u -r1.183 term.c --- term.c 4 Apr 2011 21:14:12 -0000 1.183 +++ term.c 8 Apr 2011 13:14:22 -0000 @@ -366,7 +366,7 @@ rhs = chars_spec2str(p->symtab, word, len, &sz); if (rhs) encode(p, rhs, sz); - else if (DECO_SSPECIAL == d) + else if (1 == len) encode(p, word, len); } @@ -457,6 +457,7 @@ term_word(struct termp *p, const char *word) { const char *seq; + int sz; size_t ssz; enum roffdeco deco; @@ -487,7 +488,9 @@ continue; seq = ++word; - word += a2roffdeco(&deco, &seq, &ssz); + if (0 == (sz = a2roffdeco(&deco, &seq, &ssz))) + break; + word += sz; switch (deco) { case (DECO_NUMBERED): @@ -497,8 +500,6 @@ res(p, seq, ssz); break; case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): spec(p, deco, seq, ssz); break; case (DECO_BOLD): @@ -513,12 +514,13 @@ case (DECO_PREVIOUS): term_fontlast(p); break; + case (DECO_NOSPACE): + if ('\0' == *word) + p->flags |= TERMP_NOSPACE; + break; default: break; } - - if (DECO_NOSPACE == deco && '\0' == *word) - p->flags |= TERMP_NOSPACE; } } @@ -620,13 +622,11 @@ (p->symtab, seq, ssz, &rsz); break; case (DECO_SPECIAL): - /* FALLTHROUGH */ - case (DECO_SSPECIAL): rhs = chars_spec2str (p->symtab, seq, ssz, &rsz); /* Allow for one-char escapes. */ - if (DECO_SSPECIAL != d || rhs) + if (ssz != 1 || rhs) break; rhs = seq; --------------080509010808000109040504-- -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv