From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from smtp-1.sys.kth.se (smtp-1.sys.kth.se [130.237.32.175]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id p38CGCEt023041 for ; Fri, 8 Apr 2011 08:16:13 -0400 (EDT) Received: from mailscan-1.sys.kth.se (mailscan-1.sys.kth.se [130.237.32.91]) by smtp-1.sys.kth.se (Postfix) with ESMTP id 2199C154137 for ; Fri, 8 Apr 2011 14:16:07 +0200 (CEST) X-Virus-Scanned: by amavisd-new at kth.se Received: from smtp-1.sys.kth.se ([130.237.32.175]) by mailscan-1.sys.kth.se (mailscan-1.sys.kth.se [130.237.32.91]) (amavisd-new, port 10024) with LMTP id ONfdyHw6gWc2 for ; Fri, 8 Apr 2011 14:16:05 +0200 (CEST) X-KTH-Auth: kristaps [193.10.49.5] X-KTH-mail-from: kristaps@bsd.lv X-KTH-rcpt-to: tech@mdocml.bsd.lv Received: from [172.16.18.84] (unknown [193.10.49.5]) by smtp-1.sys.kth.se (Postfix) with ESMTP id 7228A156373 for ; Fri, 8 Apr 2011 14:16:05 +0200 (CEST) Message-ID: <4D9EFC85.3040301@bsd.lv> Date: Fri, 08 Apr 2011 14:16:05 +0200 From: Kristaps Dzonsons User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.16) Gecko/20110303 Icedove/3.0.11 X-Mailinglist: mdocml-tech Reply-To: tech@mdocml.bsd.lv MIME-Version: 1.0 To: tech@mdocml.bsd.lv Subject: Re: Unifying the escape-sequence parser. References: <4D9DC396.9010504@bsd.lv> <4D9EEF2F.2030307@bsd.lv> In-Reply-To: <4D9EEF2F.2030307@bsd.lv> Content-Type: multipart/mixed; boundary="------------030305040500060106000000" This is a multi-part message in MIME format. --------------030305040500060106000000 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit > Step 2. > > This finishes off the new escape-sequence parser and puts it into > mandoc_escape (mandoc.c, mandoc.h), then makes it the underlying engine > for a2roffdeco (out.c) (requiring a tiny change to term.c and html.c for > bailing out on bad sequences) and roff_res (roff.c). > > Now all escape-sequences are being parsed with the same engine! This > logic was being repeated in THREE different places, earlier (mandoc.c > for validation, out.c for output, and roff.c for predefined escapes). > > I've run this over all manuals I know of without problems, but it can > really use a close look-over with border cases. > > The next step is to clean out the out.c code, completely removing enum > roffdeco (putting that logic into mandoc.c, perhaps). Step 2b. I rolled back the roff part: the search/replace of predefined strings must happen prior to escape processing. I'd also forgotten to include mdoc_validate.c in the patch. --------------030305040500060106000000 Content-Type: text/plain; name="patch.escapes.txt" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="patch.escapes.txt" Index: html.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/html.c,v retrieving revision 1.131 diff -u -r1.131 html.c --- html.c 22 Mar 2011 14:05:45 -0000 1.131 +++ html.c 8 Apr 2011 12:15:23 -0000 @@ -337,7 +337,8 @@ break; seq = ++p; - len = a2roffdeco(&deco, &seq, &sz); + if (0 == (len = a2roffdeco(&deco, &seq, &sz))) + break; switch (deco) { case (DECO_NUMBERED): Index: libmandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v retrieving revision 1.17 diff -u -r1.17 libmandoc.h --- libmandoc.h 28 Mar 2011 23:52:13 -0000 1.17 +++ libmandoc.h 8 Apr 2011 12:15:23 -0000 @@ -73,7 +73,6 @@ int, int, const char *); void mandoc_vmsg(enum mandocerr, struct mparse *, int, int, const char *, ...); -int mandoc_special(char *); char *mandoc_strdup(const char *); char *mandoc_getarg(struct mparse *, char **, int, int *); char *mandoc_normdate(struct mparse *, char *, int, int); Index: man_validate.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man_validate.c,v retrieving revision 1.67 diff -u -r1.67 man_validate.c --- man_validate.c 22 Mar 2011 15:30:30 -0000 1.67 +++ man_validate.c 8 Apr 2011 12:15:23 -0000 @@ -54,7 +54,7 @@ static int check_part(CHKARGS); static int check_root(CHKARGS); static int check_sec(CHKARGS); -static int check_text(CHKARGS); +static void check_text(CHKARGS); static int post_AT(CHKARGS); static int post_fi(CHKARGS); @@ -151,7 +151,8 @@ switch (m->last->type) { case (MAN_TEXT): - return(check_text(m, m->last)); + check_text(m, m->last); + return(1); case (MAN_ROOT): return(check_root(m, m->last)); case (MAN_EQN): @@ -204,43 +205,48 @@ return(1); } - -static int +static void check_text(CHKARGS) { - char *p; - int pos, c; + char *p, *pp, *cpp; + int pos; size_t sz; - for (p = n->string, pos = n->pos + 1; *p; p++, pos++) { - sz = strcspn(p, "\t\\"); - p += (int)sz; + p = n->string; + pos = n->pos + 1; - if ('\0' == *p) - break; + while ('\0' != *p) { + sz = strcspn(p, "\t\\"); + p += (int)sz; pos += (int)sz; if ('\t' == *p) { - if (MAN_LITERAL & m->flags) - continue; - man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + if ( ! (MAN_LITERAL & m->flags)) + man_pmsg(m, n->line, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; - /* Check the special character. */ + pos++; + pp = ++p; - c = mandoc_special(p); - if (c) { - p += c - 1; - pos += c - 1; - } else + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE); - } + break; + } - return(1); -} + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + pos += pp - p; + p = pp; + } +} #define INEQ_DEFINE(x, ineq, name) \ static int \ Index: mandoc.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.c,v retrieving revision 1.44 diff -u -r1.44 mandoc.c --- mandoc.c 28 Mar 2011 23:52:13 -0000 1.44 +++ mandoc.c 8 Apr 2011 12:15:23 -0000 @@ -35,198 +35,315 @@ static int a2time(time_t *, const char *, const char *); static char *time2a(time_t); +static int numescape(const char *); -int -mandoc_special(char *p) +/* + * Pass over recursive numerical expressions. This context of this + * function is important: it's only called within character-terminating + * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial + * recursion: we don't care about what's in these blocks. + * This returns the number of characters skipped or -1 if an error + * occurs (the caller should bail). + */ +static int +numescape(const char *start) { - int len, i; - char term; - char *sv; - - len = 0; + int i; + size_t sz; + const char *cp; + + i = 0; + + /* The expression consists of a subexpression. */ + + if ('\\' == start[i]) { + cp = &start[++i]; + /* + * Read past the end of the subexpression. + * Bail immediately on errors. + */ + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + return(i + cp - &start[i]); + } + + if ('(' != start[i++]) + return(0); + + /* + * A parenthesised subexpression. Read until the closing + * parenthesis, making sure to handle any nested subexpressions + * that might ruin our parse. + */ + + while (')' != start[i]) { + sz = strcspn(&start[i], ")\\"); + i += (int)sz; + + if ('\0' == start[i]) + return(-1); + else if ('\\' != start[i]) + continue; + + cp = &start[++i]; + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + i += cp - &start[i]; + } + + /* Read past the terminating ')'. */ + return(++i); +} + +/* + * Handle an escaped sequeence. This should be called with any + * string subsequent a `\'. Pass a pointer to this substring as "end"; + * it will be set to the supremum of the parsed escape sequence. If + * this returns ESCAPE_ERROR, the string is bogus and should be thrown + * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the + * first relevant character of the substring (font, glyph, whatever) of + * length sz. Both "start" and "sz" may be NULL. + */ +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + char c, term, numeric; + int i, lim, ssz; + const char *cp; + enum mandoc_esc gly; + + cp = *end; + if (start) + *start = cp; + i = 0; + gly = ESCAPE_ERROR; term = '\0'; - sv = p; + numeric = 0; - assert('\\' == *p); - p++; + switch ((c = cp[i++])) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case ('('): + gly = ESCAPE_SPECIAL; + lim = 2; + break; + case ('['): + gly = ESCAPE_SPECIAL; + term = ']'; + break; + case ('C'): + if ('\'' != cp[i]) + return(ESCAPE_ERROR); + gly = ESCAPE_SPECIAL; + term = '\''; + break; - switch (*p++) { -#if 0 - case ('Z'): - /* FALLTHROUGH */ - case ('X'): - /* FALLTHROUGH */ - case ('x'): - /* FALLTHROUGH */ - case ('S'): - /* FALLTHROUGH */ - case ('R'): - /* FALLTHROUGH */ - case ('N'): - /* FALLTHROUGH */ - case ('l'): - /* FALLTHROUGH */ - case ('L'): - /* FALLTHROUGH */ - case ('H'): + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case ('g'): /* FALLTHROUGH */ - case ('h'): + case ('k'): /* FALLTHROUGH */ - case ('D'): + case ('M'): /* FALLTHROUGH */ - case ('C'): + case ('m'): /* FALLTHROUGH */ - case ('b'): + case ('n'): /* FALLTHROUGH */ - case ('B'): + case ('V'): /* FALLTHROUGH */ - case ('a'): + case ('Y'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; /* FALLTHROUGH */ - case ('A'): - if (*p++ != '\'') - return(0); - term = '\''; - break; -#endif - case ('h'): + case ('*'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_PREDEF; /* FALLTHROUGH */ - case ('v'): + case ('F'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONTFAM; /* FALLTHROUGH */ - case ('s'): - if (ASCII_HYPH == *p) - *p = '-'; + case ('f'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; - i = 0; - if ('+' == *p || '-' == *p) { - p++; - i = 1; - } + if (start) + *start = &cp[i]; - switch (*p++) { + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): term = ']'; break; - case ('\''): - term = '\''; - break; - case ('0'): - i = 1; - /* FALLTHROUGH */ default: - len = 1; - p--; + lim = 1; + i--; break; } + break; - if (ASCII_HYPH == *p) - *p = '-'; - if ('+' == *p || '-' == *p) { - if (i) - return(0); - p++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == *p) { - while (*p && ')' != *p) - if ('\\' == *p++) { - i = mandoc_special(--p); - if (0 == i) - return(0); - p += i; - } - - if (')' == *p++) - break; - - return(0); - } else if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - } - + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + */ + case ('A'): + /* FALLTHROUGH */ + case ('b'): + /* FALLTHROUGH */ + case ('D'): + /* FALLTHROUGH */ + case ('o'): + /* FALLTHROUGH */ + case ('R'): + /* FALLTHROUGH */ + case ('X'): + /* FALLTHROUGH */ + case ('Z'): + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + gly = ESCAPE_IGNORE; + term = '\''; break; -#if 0 - case ('Y'): + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case ('B'): /* FALLTHROUGH */ - case ('V'): + case ('h'): /* FALLTHROUGH */ - case ('$'): + case ('H'): /* FALLTHROUGH */ - case ('n'): + case ('L'): /* FALLTHROUGH */ -#endif - case ('k'): + case ('l'): /* FALLTHROUGH */ - case ('M'): + case ('N'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_NUMBERED; /* FALLTHROUGH */ - case ('m'): + case ('S'): /* FALLTHROUGH */ - case ('f'): + case ('v'): /* FALLTHROUGH */ - case ('F'): + case ('w'): /* FALLTHROUGH */ - case ('*'): - switch (*p++) { + case ('x'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + term = numeric = '\''; + break; + + /* + * Sizes get a special category of their own. + */ + case ('s'): + gly = ESCAPE_IGNORE; + + if (start) + *start = &cp[i]; + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + + switch (cp[i++]) { case ('('): - len = 2; + lim = 2; break; case ('['): - term = ']'; + term = numeric = ']'; + break; + case ('\''): + term = numeric = '\''; break; default: - len = 1; - p--; + lim = 1; + i--; break; } + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + break; - case ('('): - len = 2; - break; - case ('['): - term = ']'; - break; - case ('z'): - len = 1; - if ('\\' == *p) { - if (0 == (i = mandoc_special(p))) - return(0); - p += i; - return(*p ? (int)(p - sv) : 0); - } - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == *p++) { - term = '\''; - break; - } - /* FALLTHROUGH */ + + /* + * Anything else is assumed to be a glyph. + */ default: - len = 1; - p--; + gly = ESCAPE_SPECIAL; + lim = 1; + i--; break; } - if (term) { - for ( ; *p && term != *p; p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(*p ? (int)(p - sv) : 0); + assert(ESCAPE_ERROR != gly); + + if (start) + *start = &cp[i]; + + /* + * If a terminating block has been specified, we need to + * handle the case of recursion, which could have their + * own terminating blocks that mess up our parse. This, by the + * way, means that the "start" and "size" values will be + * effectively meaningless. + */ + + ssz = 0; + if (numeric && -1 == (ssz = numescape(&cp[i]))) + return(ESCAPE_ERROR); + + i += ssz; + + /* + * We have a character terminator. Try to read up to that + * character. If we can't (i.e., we hit the nil), then return + * an error; if we can, calculate our length, read past the + * terminating character, and exit. + */ + + if ('\0' != term) { + *end = strchr(&cp[i], term); + if ('\0' == *end) + return(ESCAPE_ERROR); + if (sz) + *sz = *end - &cp[i]; + (*end)++; + return(gly); } - for (i = 0; *p && i < len; i++, p++) - if (ASCII_HYPH == *p) - *p = '-'; - return(i == len ? (int)(p - sv) : 0); -} + assert(lim > 0); + /* + * We have a numeric limit. If the string is shorter than that, + * stop and return an error. Else adjust our endpoint, length, + * and return the current glyph. + */ + + if ((size_t)lim > strlen(&cp[i])) + return(ESCAPE_ERROR); + + if (sz) + *sz = lim; + *end = &cp[i] + lim; + return(gly); +} void * mandoc_calloc(size_t num, size_t size) Index: mandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v retrieving revision 1.69 diff -u -r1.69 mandoc.h --- mandoc.h 28 Mar 2011 21:49:42 -0000 1.69 +++ mandoc.h 8 Apr 2011 12:15:23 -0000 @@ -288,6 +288,16 @@ MPARSE_MAN /* assume -man */ }; +enum mandoc_esc { + ESCAPE_ERROR = 0, + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_PREDEF, /* a predefined special character */ + ESCAPE_FONT, /* a font mode */ + ESCAPE_FONTFAM, /* a font family */ + ESCAPE_NUMBERED /* a numbered glyph */ +}; + typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, const char *, int, int, const char *); @@ -309,6 +319,8 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); + +enum mandoc_esc mandoc_escape(const char **, const char **, int *); __END_DECLS Index: mdoc_validate.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_validate.c,v retrieving revision 1.166 diff -u -r1.166 mdoc_validate.c --- mdoc_validate.c 3 Apr 2011 09:53:50 -0000 1.166 +++ mdoc_validate.c 8 Apr 2011 12:15:24 -0000 @@ -545,31 +545,39 @@ static void check_text(struct mdoc *m, int ln, int pos, char *p) { - int c; + char *cpp, *pp; size_t sz; for ( ; *p; p++, pos++) { sz = strcspn(p, "\t\\"); - p += (int)sz; - - if ('\0' == *p) - break; + p += (int)sz; pos += (int)sz; if ('\t' == *p) { if ( ! (MDOC_LITERAL & m->flags)) mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB); + p++; + pos++; continue; - } + } else if ('\0' == *p) + break; + + pos++; + pp = ++p; - if (0 == (c = mandoc_special(p))) { + if (ESCAPE_ERROR == mandoc_escape + ((const char **)&pp, NULL, NULL)) { mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE); - continue; + break; } - p += c - 1; - pos += c - 1; + cpp = p; + while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp))) + *cpp = '-'; + + pos += pp - p; + p = pp; } } Index: out.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/out.c,v retrieving revision 1.39 diff -u -r1.39 out.c --- out.c 17 Mar 2011 08:49:34 -0000 1.39 +++ out.c 8 Apr 2011 12:15:24 -0000 @@ -178,237 +178,70 @@ int a2roffdeco(enum roffdeco *d, const char **word, size_t *sz) { - int i, j, lim; - char term, c; - const char *wp; - enum roffdeco dd; + const char *cp, *start; + int ssz; + enum mandoc_esc esc; *d = DECO_NONE; - lim = i = 0; - term = '\0'; - wp = *word; - switch ((c = wp[i++])) { - case ('('): + cp = start = *word; + + esc = mandoc_escape(&cp, word, &ssz); + + switch (esc) { + case (ESCAPE_ERROR): + return(0); + case (ESCAPE_IGNORE): + break; + case (ESCAPE_NUMBERED): + *d = DECO_NUMBERED; + break; + case (ESCAPE_FONT): + *d = DECO_FONT; + break; + case (ESCAPE_FONTFAM): + *d = DECO_FFONT; + break; + case (ESCAPE_SPECIAL): *d = DECO_SPECIAL; - lim = 2; break; - case ('F'): - /* FALLTHROUGH */ - case ('f'): - *d = 'F' == c ? DECO_FFONT : DECO_FONT; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; + case (ESCAPE_PREDEF): + *d = DECO_RESERVED; + break; + } + + assert(ssz >= 0); + *sz = (size_t)ssz; + ssz = cp - start; + + if (1 == *sz && (DECO_FONT == *d || DECO_FFONT == *d)) + switch (**word) { case ('3'): /* FALLTHROUGH */ case ('B'): *d = DECO_BOLD; - return(i); + break; case ('2'): /* FALLTHROUGH */ case ('I'): *d = DECO_ITALIC; - return(i); + break; case ('P'): *d = DECO_PREVIOUS; - return(i); + break; case ('1'): /* FALLTHROUGH */ case ('R'): *d = DECO_ROMAN; - return(i); - default: - i--; - lim = 1; - break; - } - break; - case ('k'): - /* FALLTHROUGH */ - case ('M'): - /* FALLTHROUGH */ - case ('m'): - /* FALLTHROUGH */ - case ('*'): - if ('*' == c) - *d = DECO_RESERVED; - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; - break; - default: - i--; - lim = 1; - break; - } - break; - - case ('N'): - - /* - * Sequence of characters: backslash, 'N' (i = 0), - * starting delimiter (i = 1), character number (i = 2). - */ - - *word = wp + 2; - *sz = 0; - - /* - * Cannot use a digit as a starting delimiter; - * but skip the digit anyway. - */ - - if (isdigit((int)wp[1])) - return(2); - - /* - * Any non-digit terminates the character number. - * That is, the terminating delimiter need not - * match the starting delimiter. - */ - - for (i = 2; isdigit((int)wp[i]); i++) - (*sz)++; - - /* - * This is only a numbered character - * if the character number has at least one digit. - */ - - if (*sz) - *d = DECO_NUMBERED; - - /* - * Skip the terminating delimiter, even if it does not - * match, and even if there is no character number. - */ - - return(++i); - - case ('h'): - /* FALLTHROUGH */ - case ('v'): - /* FALLTHROUGH */ - case ('s'): - j = 0; - if ('+' == wp[i] || '-' == wp[i]) { - i++; - j = 1; - } - - switch (wp[i++]) { - case ('('): - lim = 2; - break; - case ('['): - term = ']'; break; - case ('\''): - term = '\''; - break; - case ('0'): - j = 1; - /* FALLTHROUGH */ default: - i--; - lim = 1; - break; - } - - if ('+' == wp[i] || '-' == wp[i]) { - if (j) - return(i); - i++; - } - - /* Handle embedded numerical subexp or escape. */ - - if ('(' == wp[i]) { - while (wp[i] && ')' != wp[i]) - if ('\\' == wp[i++]) { - /* Handle embedded escape. */ - *word = &wp[i]; - i += a2roffdeco(&dd, word, sz); - } - - if (')' == wp[i++]) - break; - - *d = DECO_NONE; - return(i - 1); - } else if ('\\' == wp[i]) { - *word = &wp[++i]; - i += a2roffdeco(&dd, word, sz); - } - - break; - case ('['): - *d = DECO_SPECIAL; - term = ']'; - break; - case ('c'): - *d = DECO_NOSPACE; - return(i); - case ('z'): - *d = DECO_NONE; - if ('\\' == wp[i]) { - *word = &wp[++i]; - return(i + a2roffdeco(&dd, word, sz)); - } else - lim = 1; - break; - case ('o'): - /* FALLTHROUGH */ - case ('w'): - if ('\'' == wp[i++]) { - term = '\''; break; - } - /* FALLTHROUGH */ - default: - *d = DECO_SSPECIAL; - i--; - lim = 1; - break; - } - - assert(term || lim); - *word = &wp[i]; - - if (term) { - j = i; - while (wp[i] && wp[i] != term) - i++; - if ('\0' == wp[i]) { - *d = DECO_NONE; - return(i); } - assert(i >= j); - *sz = (size_t)(i - j); - - return(i + 1); - } - - assert(lim > 0); - *sz = (size_t)lim; - - for (j = 0; wp[i] && j < lim; j++) - i++; - if (j < lim) - *d = DECO_NONE; + if (1 == *sz && DECO_SPECIAL == *d) + *d = 'c' == **word ? DECO_NOSPACE : DECO_SSPECIAL; - return(i); + return(ssz); } /* Index: read.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v retrieving revision 1.11 diff -u -r1.11 read.c --- read.c 4 Apr 2011 23:04:38 -0000 1.11 +++ read.c 8 Apr 2011 12:15:24 -0000 @@ -142,7 +142,7 @@ "tab in non-literal context", "end of line whitespace", "bad comment style", - "unknown escape sequence", + "bad escape sequence", "unterminated quoted string", "generic error", Index: term.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/term.c,v retrieving revision 1.183 diff -u -r1.183 term.c --- term.c 4 Apr 2011 21:14:12 -0000 1.183 +++ term.c 8 Apr 2011 12:15:24 -0000 @@ -457,6 +457,7 @@ term_word(struct termp *p, const char *word) { const char *seq; + int sz; size_t ssz; enum roffdeco deco; @@ -487,7 +488,9 @@ continue; seq = ++word; - word += a2roffdeco(&deco, &seq, &ssz); + if (0 == (sz = a2roffdeco(&deco, &seq, &ssz))) + break; + word += sz; switch (deco) { case (DECO_NUMBERED): --------------030305040500060106000000-- -- To unsubscribe send an email to tech+unsubscribe@mdocml.bsd.lv