From: Kristaps Dzonsons <kristaps@bsd.lv>
To: tech@mdocml.bsd.lv
Subject: Re: Unifying the escape-sequence parser.
Date: Fri, 08 Apr 2011 14:16:05 +0200 [thread overview]
Message-ID: <4D9EFC85.3040301@bsd.lv> (raw)
In-Reply-To: <4D9EEF2F.2030307@bsd.lv>
[-- Attachment #1: Type: text/plain, Size: 940 bytes --]
> Step 2.
>
> This finishes off the new escape-sequence parser and puts it into
> mandoc_escape (mandoc.c, mandoc.h), then makes it the underlying engine
> for a2roffdeco (out.c) (requiring a tiny change to term.c and html.c for
> bailing out on bad sequences) and roff_res (roff.c).
>
> Now all escape-sequences are being parsed with the same engine! This
> logic was being repeated in THREE different places, earlier (mandoc.c
> for validation, out.c for output, and roff.c for predefined escapes).
>
> I've run this over all manuals I know of without problems, but it can
> really use a close look-over with border cases.
>
> The next step is to clean out the out.c code, completely removing enum
> roffdeco (putting that logic into mandoc.c, perhaps).
Step 2b. I rolled back the roff part: the search/replace of predefined
strings must happen prior to escape processing. I'd also forgotten to
include mdoc_validate.c in the patch.
[-- Attachment #2: patch.escapes.txt --]
[-- Type: text/plain, Size: 20299 bytes --]
Index: html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/html.c,v
retrieving revision 1.131
diff -u -r1.131 html.c
--- html.c 22 Mar 2011 14:05:45 -0000 1.131
+++ html.c 8 Apr 2011 12:15:23 -0000
@@ -337,7 +337,8 @@
break;
seq = ++p;
- len = a2roffdeco(&deco, &seq, &sz);
+ if (0 == (len = a2roffdeco(&deco, &seq, &sz)))
+ break;
switch (deco) {
case (DECO_NUMBERED):
Index: libmandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libmandoc.h,v
retrieving revision 1.17
diff -u -r1.17 libmandoc.h
--- libmandoc.h 28 Mar 2011 23:52:13 -0000 1.17
+++ libmandoc.h 8 Apr 2011 12:15:23 -0000
@@ -73,7 +73,6 @@
int, int, const char *);
void mandoc_vmsg(enum mandocerr, struct mparse *,
int, int, const char *, ...);
-int mandoc_special(char *);
char *mandoc_strdup(const char *);
char *mandoc_getarg(struct mparse *, char **, int, int *);
char *mandoc_normdate(struct mparse *, char *, int, int);
Index: man_validate.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/man_validate.c,v
retrieving revision 1.67
diff -u -r1.67 man_validate.c
--- man_validate.c 22 Mar 2011 15:30:30 -0000 1.67
+++ man_validate.c 8 Apr 2011 12:15:23 -0000
@@ -54,7 +54,7 @@
static int check_part(CHKARGS);
static int check_root(CHKARGS);
static int check_sec(CHKARGS);
-static int check_text(CHKARGS);
+static void check_text(CHKARGS);
static int post_AT(CHKARGS);
static int post_fi(CHKARGS);
@@ -151,7 +151,8 @@
switch (m->last->type) {
case (MAN_TEXT):
- return(check_text(m, m->last));
+ check_text(m, m->last);
+ return(1);
case (MAN_ROOT):
return(check_root(m, m->last));
case (MAN_EQN):
@@ -204,43 +205,48 @@
return(1);
}
-
-static int
+static void
check_text(CHKARGS)
{
- char *p;
- int pos, c;
+ char *p, *pp, *cpp;
+ int pos;
size_t sz;
- for (p = n->string, pos = n->pos + 1; *p; p++, pos++) {
- sz = strcspn(p, "\t\\");
- p += (int)sz;
+ p = n->string;
+ pos = n->pos + 1;
- if ('\0' == *p)
- break;
+ while ('\0' != *p) {
+ sz = strcspn(p, "\t\\");
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
- if (MAN_LITERAL & m->flags)
- continue;
- man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ if ( ! (MAN_LITERAL & m->flags))
+ man_pmsg(m, n->line, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
- /* Check the special character. */
+ pos++;
+ pp = ++p;
- c = mandoc_special(p);
- if (c) {
- p += c - 1;
- pos += c - 1;
- } else
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
man_pmsg(m, n->line, pos, MANDOCERR_BADESCAPE);
- }
+ break;
+ }
- return(1);
-}
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+ pos += pp - p;
+ p = pp;
+ }
+}
#define INEQ_DEFINE(x, ineq, name) \
static int \
Index: mandoc.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.c,v
retrieving revision 1.44
diff -u -r1.44 mandoc.c
--- mandoc.c 28 Mar 2011 23:52:13 -0000 1.44
+++ mandoc.c 8 Apr 2011 12:15:23 -0000
@@ -35,198 +35,315 @@
static int a2time(time_t *, const char *, const char *);
static char *time2a(time_t);
+static int numescape(const char *);
-int
-mandoc_special(char *p)
+/*
+ * Pass over recursive numerical expressions. This context of this
+ * function is important: it's only called within character-terminating
+ * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
+ * recursion: we don't care about what's in these blocks.
+ * This returns the number of characters skipped or -1 if an error
+ * occurs (the caller should bail).
+ */
+static int
+numescape(const char *start)
{
- int len, i;
- char term;
- char *sv;
-
- len = 0;
+ int i;
+ size_t sz;
+ const char *cp;
+
+ i = 0;
+
+ /* The expression consists of a subexpression. */
+
+ if ('\\' == start[i]) {
+ cp = &start[++i];
+ /*
+ * Read past the end of the subexpression.
+ * Bail immediately on errors.
+ */
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ return(i + cp - &start[i]);
+ }
+
+ if ('(' != start[i++])
+ return(0);
+
+ /*
+ * A parenthesised subexpression. Read until the closing
+ * parenthesis, making sure to handle any nested subexpressions
+ * that might ruin our parse.
+ */
+
+ while (')' != start[i]) {
+ sz = strcspn(&start[i], ")\\");
+ i += (int)sz;
+
+ if ('\0' == start[i])
+ return(-1);
+ else if ('\\' != start[i])
+ continue;
+
+ cp = &start[++i];
+ if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
+ return(-1);
+ i += cp - &start[i];
+ }
+
+ /* Read past the terminating ')'. */
+ return(++i);
+}
+
+/*
+ * Handle an escaped sequeence. This should be called with any
+ * string subsequent a `\'. Pass a pointer to this substring as "end";
+ * it will be set to the supremum of the parsed escape sequence. If
+ * this returns ESCAPE_ERROR, the string is bogus and should be thrown
+ * away. If not ESCAPE_ERROR or ESCAPE_IGNORE, "start" is set to the
+ * first relevant character of the substring (font, glyph, whatever) of
+ * length sz. Both "start" and "sz" may be NULL.
+ */
+enum mandoc_esc
+mandoc_escape(const char **end, const char **start, int *sz)
+{
+ char c, term, numeric;
+ int i, lim, ssz;
+ const char *cp;
+ enum mandoc_esc gly;
+
+ cp = *end;
+ if (start)
+ *start = cp;
+ i = 0;
+ gly = ESCAPE_ERROR;
term = '\0';
- sv = p;
+ numeric = 0;
- assert('\\' == *p);
- p++;
+ switch ((c = cp[i++])) {
+ /*
+ * First the glyphs. There are several different forms of
+ * these, but each eventually returns a substring of the glyph
+ * name.
+ */
+ case ('('):
+ gly = ESCAPE_SPECIAL;
+ lim = 2;
+ break;
+ case ('['):
+ gly = ESCAPE_SPECIAL;
+ term = ']';
+ break;
+ case ('C'):
+ if ('\'' != cp[i])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_SPECIAL;
+ term = '\'';
+ break;
- switch (*p++) {
-#if 0
- case ('Z'):
- /* FALLTHROUGH */
- case ('X'):
- /* FALLTHROUGH */
- case ('x'):
- /* FALLTHROUGH */
- case ('S'):
- /* FALLTHROUGH */
- case ('R'):
- /* FALLTHROUGH */
- case ('N'):
- /* FALLTHROUGH */
- case ('l'):
- /* FALLTHROUGH */
- case ('L'):
- /* FALLTHROUGH */
- case ('H'):
+ /*
+ * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
+ * 'X' is the trigger. These have opaque sub-strings.
+ */
+ case ('g'):
/* FALLTHROUGH */
- case ('h'):
+ case ('k'):
/* FALLTHROUGH */
- case ('D'):
+ case ('M'):
/* FALLTHROUGH */
- case ('C'):
+ case ('m'):
/* FALLTHROUGH */
- case ('b'):
+ case ('n'):
/* FALLTHROUGH */
- case ('B'):
+ case ('V'):
/* FALLTHROUGH */
- case ('a'):
+ case ('Y'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
/* FALLTHROUGH */
- case ('A'):
- if (*p++ != '\'')
- return(0);
- term = '\'';
- break;
-#endif
- case ('h'):
+ case ('*'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_PREDEF;
/* FALLTHROUGH */
- case ('v'):
+ case ('F'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_FONTFAM;
/* FALLTHROUGH */
- case ('s'):
- if (ASCII_HYPH == *p)
- *p = '-';
+ case ('f'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_FONT;
- i = 0;
- if ('+' == *p || '-' == *p) {
- p++;
- i = 1;
- }
+ if (start)
+ *start = &cp[i];
- switch (*p++) {
+ switch (cp[i++]) {
case ('('):
- len = 2;
+ lim = 2;
break;
case ('['):
term = ']';
break;
- case ('\''):
- term = '\'';
- break;
- case ('0'):
- i = 1;
- /* FALLTHROUGH */
default:
- len = 1;
- p--;
+ lim = 1;
+ i--;
break;
}
+ break;
- if (ASCII_HYPH == *p)
- *p = '-';
- if ('+' == *p || '-' == *p) {
- if (i)
- return(0);
- p++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == *p) {
- while (*p && ')' != *p)
- if ('\\' == *p++) {
- i = mandoc_special(--p);
- if (0 == i)
- return(0);
- p += i;
- }
-
- if (')' == *p++)
- break;
-
- return(0);
- } else if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- }
-
+ /*
+ * These escapes are of the form \X'Y', where 'X' is the trigger
+ * and 'Y' is any string. These have opaque sub-strings.
+ */
+ case ('A'):
+ /* FALLTHROUGH */
+ case ('b'):
+ /* FALLTHROUGH */
+ case ('D'):
+ /* FALLTHROUGH */
+ case ('o'):
+ /* FALLTHROUGH */
+ case ('R'):
+ /* FALLTHROUGH */
+ case ('X'):
+ /* FALLTHROUGH */
+ case ('Z'):
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ gly = ESCAPE_IGNORE;
+ term = '\'';
break;
-#if 0
- case ('Y'):
+
+ /*
+ * These escapes are of the form \X'N', where 'X' is the trigger
+ * and 'N' resolves to a numerical expression.
+ */
+ case ('B'):
/* FALLTHROUGH */
- case ('V'):
+ case ('h'):
/* FALLTHROUGH */
- case ('$'):
+ case ('H'):
/* FALLTHROUGH */
- case ('n'):
+ case ('L'):
/* FALLTHROUGH */
-#endif
- case ('k'):
+ case ('l'):
/* FALLTHROUGH */
- case ('M'):
+ case ('N'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_NUMBERED;
/* FALLTHROUGH */
- case ('m'):
+ case ('S'):
/* FALLTHROUGH */
- case ('f'):
+ case ('v'):
/* FALLTHROUGH */
- case ('F'):
+ case ('w'):
/* FALLTHROUGH */
- case ('*'):
- switch (*p++) {
+ case ('x'):
+ if (ESCAPE_ERROR == gly)
+ gly = ESCAPE_IGNORE;
+ if ('\'' != cp[i++])
+ return(ESCAPE_ERROR);
+ term = numeric = '\'';
+ break;
+
+ /*
+ * Sizes get a special category of their own.
+ */
+ case ('s'):
+ gly = ESCAPE_IGNORE;
+
+ if (start)
+ *start = &cp[i];
+
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
+
+ switch (cp[i++]) {
case ('('):
- len = 2;
+ lim = 2;
break;
case ('['):
- term = ']';
+ term = numeric = ']';
+ break;
+ case ('\''):
+ term = numeric = '\'';
break;
default:
- len = 1;
- p--;
+ lim = 1;
+ i--;
break;
}
+
+ /* See +/- counts as a sign. */
+ c = cp[i];
+ if ('+' == c || '-' == c || ASCII_HYPH == c)
+ ++i;
+
break;
- case ('('):
- len = 2;
- break;
- case ('['):
- term = ']';
- break;
- case ('z'):
- len = 1;
- if ('\\' == *p) {
- if (0 == (i = mandoc_special(p)))
- return(0);
- p += i;
- return(*p ? (int)(p - sv) : 0);
- }
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == *p++) {
- term = '\'';
- break;
- }
- /* FALLTHROUGH */
+
+ /*
+ * Anything else is assumed to be a glyph.
+ */
default:
- len = 1;
- p--;
+ gly = ESCAPE_SPECIAL;
+ lim = 1;
+ i--;
break;
}
- if (term) {
- for ( ; *p && term != *p; p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(*p ? (int)(p - sv) : 0);
+ assert(ESCAPE_ERROR != gly);
+
+ if (start)
+ *start = &cp[i];
+
+ /*
+ * If a terminating block has been specified, we need to
+ * handle the case of recursion, which could have their
+ * own terminating blocks that mess up our parse. This, by the
+ * way, means that the "start" and "size" values will be
+ * effectively meaningless.
+ */
+
+ ssz = 0;
+ if (numeric && -1 == (ssz = numescape(&cp[i])))
+ return(ESCAPE_ERROR);
+
+ i += ssz;
+
+ /*
+ * We have a character terminator. Try to read up to that
+ * character. If we can't (i.e., we hit the nil), then return
+ * an error; if we can, calculate our length, read past the
+ * terminating character, and exit.
+ */
+
+ if ('\0' != term) {
+ *end = strchr(&cp[i], term);
+ if ('\0' == *end)
+ return(ESCAPE_ERROR);
+ if (sz)
+ *sz = *end - &cp[i];
+ (*end)++;
+ return(gly);
}
- for (i = 0; *p && i < len; i++, p++)
- if (ASCII_HYPH == *p)
- *p = '-';
- return(i == len ? (int)(p - sv) : 0);
-}
+ assert(lim > 0);
+ /*
+ * We have a numeric limit. If the string is shorter than that,
+ * stop and return an error. Else adjust our endpoint, length,
+ * and return the current glyph.
+ */
+
+ if ((size_t)lim > strlen(&cp[i]))
+ return(ESCAPE_ERROR);
+
+ if (sz)
+ *sz = lim;
+ *end = &cp[i] + lim;
+ return(gly);
+}
void *
mandoc_calloc(size_t num, size_t size)
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.69
diff -u -r1.69 mandoc.h
--- mandoc.h 28 Mar 2011 21:49:42 -0000 1.69
+++ mandoc.h 8 Apr 2011 12:15:23 -0000
@@ -288,6 +288,16 @@
MPARSE_MAN /* assume -man */
};
+enum mandoc_esc {
+ ESCAPE_ERROR = 0,
+ ESCAPE_IGNORE, /* escape to be ignored */
+ ESCAPE_SPECIAL, /* a regular special character */
+ ESCAPE_PREDEF, /* a predefined special character */
+ ESCAPE_FONT, /* a font mode */
+ ESCAPE_FONTFAM, /* a font family */
+ ESCAPE_NUMBERED /* a numbered glyph */
+};
+
typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel,
const char *, int, int, const char *);
@@ -309,6 +319,8 @@
void *mandoc_calloc(size_t, size_t);
void *mandoc_malloc(size_t);
void *mandoc_realloc(void *, size_t);
+
+enum mandoc_esc mandoc_escape(const char **, const char **, int *);
__END_DECLS
Index: mdoc_validate.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mdoc_validate.c,v
retrieving revision 1.166
diff -u -r1.166 mdoc_validate.c
--- mdoc_validate.c 3 Apr 2011 09:53:50 -0000 1.166
+++ mdoc_validate.c 8 Apr 2011 12:15:24 -0000
@@ -545,31 +545,39 @@
static void
check_text(struct mdoc *m, int ln, int pos, char *p)
{
- int c;
+ char *cpp, *pp;
size_t sz;
for ( ; *p; p++, pos++) {
sz = strcspn(p, "\t\\");
- p += (int)sz;
-
- if ('\0' == *p)
- break;
+ p += (int)sz;
pos += (int)sz;
if ('\t' == *p) {
if ( ! (MDOC_LITERAL & m->flags))
mdoc_pmsg(m, ln, pos, MANDOCERR_BADTAB);
+ p++;
+ pos++;
continue;
- }
+ } else if ('\0' == *p)
+ break;
+
+ pos++;
+ pp = ++p;
- if (0 == (c = mandoc_special(p))) {
+ if (ESCAPE_ERROR == mandoc_escape
+ ((const char **)&pp, NULL, NULL)) {
mdoc_pmsg(m, ln, pos, MANDOCERR_BADESCAPE);
- continue;
+ break;
}
- p += c - 1;
- pos += c - 1;
+ cpp = p;
+ while (NULL != (cpp = memchr(cpp, ASCII_HYPH, pp - cpp)))
+ *cpp = '-';
+
+ pos += pp - p;
+ p = pp;
}
}
Index: out.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/out.c,v
retrieving revision 1.39
diff -u -r1.39 out.c
--- out.c 17 Mar 2011 08:49:34 -0000 1.39
+++ out.c 8 Apr 2011 12:15:24 -0000
@@ -178,237 +178,70 @@
int
a2roffdeco(enum roffdeco *d, const char **word, size_t *sz)
{
- int i, j, lim;
- char term, c;
- const char *wp;
- enum roffdeco dd;
+ const char *cp, *start;
+ int ssz;
+ enum mandoc_esc esc;
*d = DECO_NONE;
- lim = i = 0;
- term = '\0';
- wp = *word;
- switch ((c = wp[i++])) {
- case ('('):
+ cp = start = *word;
+
+ esc = mandoc_escape(&cp, word, &ssz);
+
+ switch (esc) {
+ case (ESCAPE_ERROR):
+ return(0);
+ case (ESCAPE_IGNORE):
+ break;
+ case (ESCAPE_NUMBERED):
+ *d = DECO_NUMBERED;
+ break;
+ case (ESCAPE_FONT):
+ *d = DECO_FONT;
+ break;
+ case (ESCAPE_FONTFAM):
+ *d = DECO_FFONT;
+ break;
+ case (ESCAPE_SPECIAL):
*d = DECO_SPECIAL;
- lim = 2;
break;
- case ('F'):
- /* FALLTHROUGH */
- case ('f'):
- *d = 'F' == c ? DECO_FFONT : DECO_FONT;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
+ case (ESCAPE_PREDEF):
+ *d = DECO_RESERVED;
+ break;
+ }
+
+ assert(ssz >= 0);
+ *sz = (size_t)ssz;
+ ssz = cp - start;
+
+ if (1 == *sz && (DECO_FONT == *d || DECO_FFONT == *d))
+ switch (**word) {
case ('3'):
/* FALLTHROUGH */
case ('B'):
*d = DECO_BOLD;
- return(i);
+ break;
case ('2'):
/* FALLTHROUGH */
case ('I'):
*d = DECO_ITALIC;
- return(i);
+ break;
case ('P'):
*d = DECO_PREVIOUS;
- return(i);
+ break;
case ('1'):
/* FALLTHROUGH */
case ('R'):
*d = DECO_ROMAN;
- return(i);
- default:
- i--;
- lim = 1;
- break;
- }
- break;
- case ('k'):
- /* FALLTHROUGH */
- case ('M'):
- /* FALLTHROUGH */
- case ('m'):
- /* FALLTHROUGH */
- case ('*'):
- if ('*' == c)
- *d = DECO_RESERVED;
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
- break;
- default:
- i--;
- lim = 1;
- break;
- }
- break;
-
- case ('N'):
-
- /*
- * Sequence of characters: backslash, 'N' (i = 0),
- * starting delimiter (i = 1), character number (i = 2).
- */
-
- *word = wp + 2;
- *sz = 0;
-
- /*
- * Cannot use a digit as a starting delimiter;
- * but skip the digit anyway.
- */
-
- if (isdigit((int)wp[1]))
- return(2);
-
- /*
- * Any non-digit terminates the character number.
- * That is, the terminating delimiter need not
- * match the starting delimiter.
- */
-
- for (i = 2; isdigit((int)wp[i]); i++)
- (*sz)++;
-
- /*
- * This is only a numbered character
- * if the character number has at least one digit.
- */
-
- if (*sz)
- *d = DECO_NUMBERED;
-
- /*
- * Skip the terminating delimiter, even if it does not
- * match, and even if there is no character number.
- */
-
- return(++i);
-
- case ('h'):
- /* FALLTHROUGH */
- case ('v'):
- /* FALLTHROUGH */
- case ('s'):
- j = 0;
- if ('+' == wp[i] || '-' == wp[i]) {
- i++;
- j = 1;
- }
-
- switch (wp[i++]) {
- case ('('):
- lim = 2;
- break;
- case ('['):
- term = ']';
break;
- case ('\''):
- term = '\'';
- break;
- case ('0'):
- j = 1;
- /* FALLTHROUGH */
default:
- i--;
- lim = 1;
- break;
- }
-
- if ('+' == wp[i] || '-' == wp[i]) {
- if (j)
- return(i);
- i++;
- }
-
- /* Handle embedded numerical subexp or escape. */
-
- if ('(' == wp[i]) {
- while (wp[i] && ')' != wp[i])
- if ('\\' == wp[i++]) {
- /* Handle embedded escape. */
- *word = &wp[i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- if (')' == wp[i++])
- break;
-
- *d = DECO_NONE;
- return(i - 1);
- } else if ('\\' == wp[i]) {
- *word = &wp[++i];
- i += a2roffdeco(&dd, word, sz);
- }
-
- break;
- case ('['):
- *d = DECO_SPECIAL;
- term = ']';
- break;
- case ('c'):
- *d = DECO_NOSPACE;
- return(i);
- case ('z'):
- *d = DECO_NONE;
- if ('\\' == wp[i]) {
- *word = &wp[++i];
- return(i + a2roffdeco(&dd, word, sz));
- } else
- lim = 1;
- break;
- case ('o'):
- /* FALLTHROUGH */
- case ('w'):
- if ('\'' == wp[i++]) {
- term = '\'';
break;
- }
- /* FALLTHROUGH */
- default:
- *d = DECO_SSPECIAL;
- i--;
- lim = 1;
- break;
- }
-
- assert(term || lim);
- *word = &wp[i];
-
- if (term) {
- j = i;
- while (wp[i] && wp[i] != term)
- i++;
- if ('\0' == wp[i]) {
- *d = DECO_NONE;
- return(i);
}
- assert(i >= j);
- *sz = (size_t)(i - j);
-
- return(i + 1);
- }
-
- assert(lim > 0);
- *sz = (size_t)lim;
-
- for (j = 0; wp[i] && j < lim; j++)
- i++;
- if (j < lim)
- *d = DECO_NONE;
+ if (1 == *sz && DECO_SPECIAL == *d)
+ *d = 'c' == **word ? DECO_NOSPACE : DECO_SSPECIAL;
- return(i);
+ return(ssz);
}
/*
Index: read.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v
retrieving revision 1.11
diff -u -r1.11 read.c
--- read.c 4 Apr 2011 23:04:38 -0000 1.11
+++ read.c 8 Apr 2011 12:15:24 -0000
@@ -142,7 +142,7 @@
"tab in non-literal context",
"end of line whitespace",
"bad comment style",
- "unknown escape sequence",
+ "bad escape sequence",
"unterminated quoted string",
"generic error",
Index: term.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/term.c,v
retrieving revision 1.183
diff -u -r1.183 term.c
--- term.c 4 Apr 2011 21:14:12 -0000 1.183
+++ term.c 8 Apr 2011 12:15:24 -0000
@@ -457,6 +457,7 @@
term_word(struct termp *p, const char *word)
{
const char *seq;
+ int sz;
size_t ssz;
enum roffdeco deco;
@@ -487,7 +488,9 @@
continue;
seq = ++word;
- word += a2roffdeco(&deco, &seq, &ssz);
+ if (0 == (sz = a2roffdeco(&deco, &seq, &ssz)))
+ break;
+ word += sz;
switch (deco) {
case (DECO_NUMBERED):
next prev parent reply other threads:[~2011-04-08 12:16 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-04-07 14:00 Kristaps Dzonsons
2011-04-08 11:19 ` Kristaps Dzonsons
2011-04-08 12:16 ` Kristaps Dzonsons [this message]
2011-04-08 12:50 ` Kristaps Dzonsons
2011-04-08 13:15 ` Kristaps Dzonsons
2011-04-08 13:56 ` Finished: unifying " Kristaps Dzonsons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4D9EFC85.3040301@bsd.lv \
--to=kristaps@bsd.lv \
--cc=tech@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).