From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: mdocml: Very tricky diff to fix macro interpretation and spacing around
Date: Fri, 16 Oct 2015 19:21:37 -0500 (EST) [thread overview]
Message-ID: <11486075074665028854.enqueue@fantadrom.bsd.lv> (raw)
Log Message:
-----------
Very tricky diff to fix macro interpretation and spacing around tabs
in .Bl -column; it took me more than a day to get this right.
Triggered by a loosely related bug report from tim@.
The lesson for you is: Use .Ta macros in .Bl -column, avoid tabs,
or you are in for surprises: The last word before a tab is not
interpreted as a macro (unless there is a blank in between), the
first word after a tab isn't either (unless there is a blank in
between), and a blank after a tab causes a leading blank in the
respective output cell. Yes, "blank", "tab", "blank tab" and "tab
blank" all have different semantics; if you write code relying on
that, good luck maintaining it afterwards...
Modified Files:
--------------
mdocml:
libmdoc.h
mdoc_argv.c
mdoc_macro.c
roff.h
Revision Data
-------------
Index: libmdoc.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/libmdoc.h,v
retrieving revision 1.105
retrieving revision 1.106
diff -Llibmdoc.h -Llibmdoc.h -u -p -r1.105 -r1.106
--- libmdoc.h
+++ libmdoc.h
@@ -40,9 +40,7 @@ enum margserr {
ARGS_WORD, /* normal word */
ARGS_PUNCT, /* series of punctuation */
ARGS_QWORD, /* quoted word */
- ARGS_PHRASE, /* Ta'd phrase (-column) */
- ARGS_PPHRASE, /* tabbed phrase (-column) */
- ARGS_PEND /* last phrase (-column) */
+ ARGS_PHRASE /* Bl -column phrase */
};
/*
Index: mdoc_macro.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mdoc_macro.c,v
retrieving revision 1.204
retrieving revision 1.205
diff -Lmdoc_macro.c -Lmdoc_macro.c -u -p -r1.204 -r1.205
--- mdoc_macro.c
+++ mdoc_macro.c
@@ -239,6 +239,10 @@ lookup(struct roff_man *mdoc, int from,
{
int res;
+ if (mdoc->flags & MDOC_PHRASEQF) {
+ mdoc->flags &= ~MDOC_PHRASEQF;
+ return TOKEN_NONE;
+ }
if (from == TOKEN_NONE || mdoc_macros[from].flags & MDOC_PARSED) {
res = mdoc_hash_find(p);
if (res != TOKEN_NONE) {
@@ -1030,26 +1034,39 @@ blk_full(MACRO_PROT_ARGS)
if (tok == MDOC_Bk)
mdoc->flags |= MDOC_KEEP;
- ac = ARGS_PEND;
+ ac = ARGS_EOLN;
for (;;) {
+
+ /*
+ * If we are right after a tab character,
+ * do not parse the first word for macros.
+ */
+
+ if (mdoc->flags & MDOC_PHRASEQN) {
+ mdoc->flags &= ~MDOC_PHRASEQN;
+ mdoc->flags |= MDOC_PHRASEQF;
+ }
+
la = *pos;
lac = ac;
ac = mdoc_args(mdoc, line, pos, buf, tok, &p);
if (ac == ARGS_EOLN) {
- if (lac != ARGS_PPHRASE && lac != ARGS_PHRASE)
+ if (lac != ARGS_PHRASE ||
+ ! (mdoc->flags & MDOC_PHRASEQF))
break;
+
/*
- * This is necessary: if the last token on a
- * line is a `Ta' or tab, then we'll get
- * ARGS_EOLN, so we must be smart enough to
- * reopen our scope if the last parse was a
- * phrase or partial phrase.
+ * This line ends in a tab; start the next
+ * column now, with a leading blank.
*/
+
if (body != NULL)
rew_last(mdoc, body);
body = roff_body_alloc(mdoc, line, ppos, tok);
+ roff_word_alloc(mdoc, line, ppos, "\\&");
break;
}
+
if (tok == MDOC_Bd || tok == MDOC_Bk) {
mandoc_vmsg(MANDOCERR_ARG_EXCESS,
mdoc->parse, line, la, "%s ... %s",
@@ -1070,9 +1087,7 @@ blk_full(MACRO_PROT_ARGS)
*/
if (head == NULL &&
- ac != ARGS_PEND &&
ac != ARGS_PHRASE &&
- ac != ARGS_PPHRASE &&
ac != ARGS_QWORD &&
mdoc_isdelim(p) == DELIM_OPEN) {
dword(mdoc, line, la, p, DELIM_OPEN, 0);
@@ -1084,9 +1099,7 @@ blk_full(MACRO_PROT_ARGS)
if (head == NULL)
head = roff_head_alloc(mdoc, line, ppos, tok);
- if (ac == ARGS_PHRASE ||
- ac == ARGS_PEND ||
- ac == ARGS_PPHRASE) {
+ if (ac == ARGS_PHRASE) {
/*
* If we haven't opened a body yet, rewind the
@@ -1096,18 +1109,11 @@ blk_full(MACRO_PROT_ARGS)
rew_last(mdoc, body == NULL ? head : body);
body = roff_body_alloc(mdoc, line, ppos, tok);
- /*
- * Process phrases: set whether we're in a
- * partial-phrase (this effects line handling)
- * then call down into the phrase parser.
- */
+ /* Process to the tab or to the end of the line. */
- if (ac == ARGS_PPHRASE)
- mdoc->flags |= MDOC_PPHRASE;
- if (ac == ARGS_PEND && lac == ARGS_PPHRASE)
- mdoc->flags |= MDOC_PPHRASE;
+ mdoc->flags |= MDOC_PHRASE;
parse_rest(mdoc, TOKEN_NONE, line, &la, buf);
- mdoc->flags &= ~MDOC_PPHRASE;
+ mdoc->flags &= ~MDOC_PHRASE;
/* There may have been `Ta' macros. */
Index: roff.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/roff.h,v
retrieving revision 1.34
retrieving revision 1.35
diff -Lroff.h -Lroff.h -u -p -r1.34 -r1.35
--- roff.h
+++ roff.h
@@ -141,8 +141,8 @@ struct roff_man {
#define MDOC_LITERAL (1 << 1) /* In a literal scope. */
#define MDOC_PBODY (1 << 2) /* In the document body. */
#define MDOC_NEWLINE (1 << 3) /* First macro/text in a line. */
-#define MDOC_PHRASELIT (1 << 4) /* Literal within a partial phrase. */
-#define MDOC_PPHRASE (1 << 5) /* Within a partial phrase. */
+#define MDOC_PHRASE (1 << 4) /* In a Bl -column phrase. */
+#define MDOC_PHRASELIT (1 << 5) /* Literal within a phrase. */
#define MDOC_FREECOL (1 << 6) /* `It' invocation should close. */
#define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting. */
#define MDOC_KEEP (1 << 8) /* In a word keep. */
@@ -150,6 +150,9 @@ struct roff_man {
#define MDOC_NODELIMC (1 << 10) /* Disable closing delimiter handling. */
#define MAN_ELINE (1 << 11) /* Next-line element scope. */
#define MAN_BLINE (1 << 12) /* Next-line block scope. */
+#define MDOC_PHRASEQF (1 << 13) /* Quote first word encountered. */
+#define MDOC_PHRASEQL (1 << 14) /* Quote last word of this phrase. */
+#define MDOC_PHRASEQN (1 << 15) /* Quote first word of the next phrase. */
#define MAN_LITERAL MDOC_LITERAL
#define MAN_NEWLINE MDOC_NEWLINE
enum roff_macroset macroset; /* Kind of high-level macros used. */
Index: mdoc_argv.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mdoc_argv.c,v
retrieving revision 1.106
retrieving revision 1.107
diff -Lmdoc_argv.c -Lmdoc_argv.c -u -p -r1.106 -r1.107
--- mdoc_argv.c
+++ mdoc_argv.c
@@ -449,11 +449,10 @@ args(struct roff_man *mdoc, int line, in
{
char *p;
int pairs;
- enum margserr rc;
if (buf[*pos] == '\0') {
if (mdoc->flags & MDOC_PHRASELIT &&
- ! (mdoc->flags & MDOC_PPHRASE)) {
+ ! (mdoc->flags & MDOC_PHRASE)) {
mandoc_msg(MANDOCERR_ARG_QUOTE,
mdoc->parse, line, *pos, NULL);
mdoc->flags &= ~MDOC_PHRASELIT;
@@ -473,18 +472,41 @@ args(struct roff_man *mdoc, int line, in
if (fl == ARGSFL_TABSEP) {
if ((p = strchr(*v, '\t')) != NULL) {
- /* Skip any blank characters after the tab. */
+
+ /*
+ * Words right before and right after
+ * tab characters are not parsed,
+ * unless there is a blank in between.
+ */
+
+ if (p[-1] != ' ')
+ mdoc->flags |= MDOC_PHRASEQL;
+ if (p[1] != ' ')
+ mdoc->flags |= MDOC_PHRASEQN;
+
+ /*
+ * One or more blanks after a tab cause
+ * one leading blank in the next column.
+ * So skip all but one of them.
+ */
+
*pos += (int)(p - *v) + 1;
- while (buf[*pos] == ' ')
+ while (buf[*pos] == ' ' && buf[*pos + 1] == ' ')
(*pos)++;
- rc = ARGS_PPHRASE;
+
+ /*
+ * A tab at the end of an input line
+ * switches to the next column.
+ */
+
+ if (buf[*pos] == '\0' || buf[*pos + 1] == '\0')
+ mdoc->flags |= MDOC_PHRASEQN;
} else {
p = strchr(*v, '\0');
if (p[-1] == ' ')
mandoc_msg(MANDOCERR_SPACE_EOL,
mdoc->parse, line, *pos, NULL);
*pos += (int)(p - *v);
- rc = ARGS_PEND;
}
/* Skip any trailing blank characters. */
@@ -493,7 +515,7 @@ args(struct roff_man *mdoc, int line, in
p--;
*p = '\0';
- return rc;
+ return ARGS_PHRASE;
}
/*
@@ -504,11 +526,11 @@ args(struct roff_man *mdoc, int line, in
* Whitespace is NOT involved in literal termination.
*/
- if (MDOC_PHRASELIT & mdoc->flags || '\"' == buf[*pos]) {
- if ( ! (MDOC_PHRASELIT & mdoc->flags))
+ if (mdoc->flags & MDOC_PHRASELIT || buf[*pos] == '\"') {
+ if ( ! (mdoc->flags & MDOC_PHRASELIT))
*v = &buf[++(*pos)];
- if (MDOC_PPHRASE & mdoc->flags)
+ if (mdoc->flags & MDOC_PHRASE)
mdoc->flags |= MDOC_PHRASELIT;
pairs = 0;
@@ -528,11 +550,10 @@ args(struct roff_man *mdoc, int line, in
if (pairs)
buf[*pos - pairs] = '\0';
- if ('\0' == buf[*pos]) {
- if (MDOC_PPHRASE & mdoc->flags)
- return ARGS_QWORD;
- mandoc_msg(MANDOCERR_ARG_QUOTE,
- mdoc->parse, line, *pos, NULL);
+ if (buf[*pos] == '\0') {
+ if ( ! (mdoc->flags & MDOC_PHRASE))
+ mandoc_msg(MANDOCERR_ARG_QUOTE,
+ mdoc->parse, line, *pos, NULL);
return ARGS_QWORD;
}
@@ -555,6 +576,15 @@ args(struct roff_man *mdoc, int line, in
p = &buf[*pos];
*v = mandoc_getarg(mdoc->parse, &p, line, pos);
+ /*
+ * After parsing the last word in this phrase,
+ * tell lookup() whether or not to interpret it.
+ */
+
+ if (*p == '\0' && mdoc->flags & MDOC_PHRASEQL) {
+ mdoc->flags &= ~MDOC_PHRASEQL;
+ mdoc->flags |= MDOC_PHRASEQF;
+ }
return ARGS_WORD;
}
--
To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv
reply other threads:[~2015-10-17 0:21 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=11486075074665028854.enqueue@fantadrom.bsd.lv \
--to=schwarze@mdocml.bsd.lv \
--cc=source@mdocml.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).