source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* mdocml: Very tricky diff to fix macro interpretation and spacing around
@ 2015-10-17  0:21 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2015-10-17  0:21 UTC (permalink / raw)
  To: source

Log Message:
-----------
Very tricky diff to fix macro interpretation and spacing around tabs
in .Bl -column; it took me more than a day to get this right.
Triggered by a loosely related bug report from tim@.

The lesson for you is:  Use .Ta macros in .Bl -column, avoid tabs,
or you are in for surprises:  The last word before a tab is not
interpreted as a macro (unless there is a blank in between), the
first word after a tab isn't either (unless there is a blank in
between), and a blank after a tab causes a leading blank in the
respective output cell.  Yes, "blank", "tab", "blank tab" and "tab
blank" all have different semantics; if you write code relying on
that, good luck maintaining it afterwards...

Modified Files:
--------------
    mdocml:
        libmdoc.h
        mdoc_argv.c
        mdoc_macro.c
        roff.h

Revision Data
-------------
Index: libmdoc.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/libmdoc.h,v
retrieving revision 1.105
retrieving revision 1.106
diff -Llibmdoc.h -Llibmdoc.h -u -p -r1.105 -r1.106
--- libmdoc.h
+++ libmdoc.h
@@ -40,9 +40,7 @@ enum	margserr {
 	ARGS_WORD, /* normal word */
 	ARGS_PUNCT, /* series of punctuation */
 	ARGS_QWORD, /* quoted word */
-	ARGS_PHRASE, /* Ta'd phrase (-column) */
-	ARGS_PPHRASE, /* tabbed phrase (-column) */
-	ARGS_PEND /* last phrase (-column) */
+	ARGS_PHRASE /* Bl -column phrase */
 };
 
 /*
Index: mdoc_macro.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mdoc_macro.c,v
retrieving revision 1.204
retrieving revision 1.205
diff -Lmdoc_macro.c -Lmdoc_macro.c -u -p -r1.204 -r1.205
--- mdoc_macro.c
+++ mdoc_macro.c
@@ -239,6 +239,10 @@ lookup(struct roff_man *mdoc, int from, 
 {
 	int	 res;
 
+	if (mdoc->flags & MDOC_PHRASEQF) {
+		mdoc->flags &= ~MDOC_PHRASEQF;
+		return TOKEN_NONE;
+	}
 	if (from == TOKEN_NONE || mdoc_macros[from].flags & MDOC_PARSED) {
 		res = mdoc_hash_find(p);
 		if (res != TOKEN_NONE) {
@@ -1030,26 +1034,39 @@ blk_full(MACRO_PROT_ARGS)
 	if (tok == MDOC_Bk)
 		mdoc->flags |= MDOC_KEEP;
 
-	ac = ARGS_PEND;
+	ac = ARGS_EOLN;
 	for (;;) {
+
+		/*
+		 * If we are right after a tab character,
+		 * do not parse the first word for macros.
+		 */
+
+		if (mdoc->flags & MDOC_PHRASEQN) {
+			mdoc->flags &= ~MDOC_PHRASEQN;
+			mdoc->flags |= MDOC_PHRASEQF;
+		}
+
 		la = *pos;
 		lac = ac;
 		ac = mdoc_args(mdoc, line, pos, buf, tok, &p);
 		if (ac == ARGS_EOLN) {
-			if (lac != ARGS_PPHRASE && lac != ARGS_PHRASE)
+			if (lac != ARGS_PHRASE ||
+			    ! (mdoc->flags & MDOC_PHRASEQF))
 				break;
+
 			/*
-			 * This is necessary: if the last token on a
-			 * line is a `Ta' or tab, then we'll get
-			 * ARGS_EOLN, so we must be smart enough to
-			 * reopen our scope if the last parse was a
-			 * phrase or partial phrase.
+			 * This line ends in a tab; start the next
+			 * column now, with a leading blank.
 			 */
+
 			if (body != NULL)
 				rew_last(mdoc, body);
 			body = roff_body_alloc(mdoc, line, ppos, tok);
+			roff_word_alloc(mdoc, line, ppos, "\\&");
 			break;
 		}
+
 		if (tok == MDOC_Bd || tok == MDOC_Bk) {
 			mandoc_vmsg(MANDOCERR_ARG_EXCESS,
 			    mdoc->parse, line, la, "%s ... %s",
@@ -1070,9 +1087,7 @@ blk_full(MACRO_PROT_ARGS)
 		 */
 
 		if (head == NULL &&
-		    ac != ARGS_PEND &&
 		    ac != ARGS_PHRASE &&
-		    ac != ARGS_PPHRASE &&
 		    ac != ARGS_QWORD &&
 		    mdoc_isdelim(p) == DELIM_OPEN) {
 			dword(mdoc, line, la, p, DELIM_OPEN, 0);
@@ -1084,9 +1099,7 @@ blk_full(MACRO_PROT_ARGS)
 		if (head == NULL)
 			head = roff_head_alloc(mdoc, line, ppos, tok);
 
-		if (ac == ARGS_PHRASE ||
-		    ac == ARGS_PEND ||
-		    ac == ARGS_PPHRASE) {
+		if (ac == ARGS_PHRASE) {
 
 			/*
 			 * If we haven't opened a body yet, rewind the
@@ -1096,18 +1109,11 @@ blk_full(MACRO_PROT_ARGS)
 			rew_last(mdoc, body == NULL ? head : body);
 			body = roff_body_alloc(mdoc, line, ppos, tok);
 
-			/*
-			 * Process phrases: set whether we're in a
-			 * partial-phrase (this effects line handling)
-			 * then call down into the phrase parser.
-			 */
+			/* Process to the tab or to the end of the line. */
 
-			if (ac == ARGS_PPHRASE)
-				mdoc->flags |= MDOC_PPHRASE;
-			if (ac == ARGS_PEND && lac == ARGS_PPHRASE)
-				mdoc->flags |= MDOC_PPHRASE;
+			mdoc->flags |= MDOC_PHRASE;
 			parse_rest(mdoc, TOKEN_NONE, line, &la, buf);
-			mdoc->flags &= ~MDOC_PPHRASE;
+			mdoc->flags &= ~MDOC_PHRASE;
 
 			/* There may have been `Ta' macros. */
 
Index: roff.h
===================================================================
RCS file: /home/cvs/mdocml/mdocml/roff.h,v
retrieving revision 1.34
retrieving revision 1.35
diff -Lroff.h -Lroff.h -u -p -r1.34 -r1.35
--- roff.h
+++ roff.h
@@ -141,8 +141,8 @@ struct	roff_man {
 #define	MDOC_LITERAL	 (1 << 1)  /* In a literal scope. */
 #define	MDOC_PBODY	 (1 << 2)  /* In the document body. */
 #define	MDOC_NEWLINE	 (1 << 3)  /* First macro/text in a line. */
-#define	MDOC_PHRASELIT	 (1 << 4)  /* Literal within a partial phrase. */
-#define	MDOC_PPHRASE	 (1 << 5)  /* Within a partial phrase. */
+#define	MDOC_PHRASE	 (1 << 4)  /* In a Bl -column phrase. */
+#define	MDOC_PHRASELIT	 (1 << 5)  /* Literal within a phrase. */
 #define	MDOC_FREECOL	 (1 << 6)  /* `It' invocation should close. */
 #define	MDOC_SYNOPSIS	 (1 << 7)  /* SYNOPSIS-style formatting. */
 #define	MDOC_KEEP	 (1 << 8)  /* In a word keep. */
@@ -150,6 +150,9 @@ struct	roff_man {
 #define	MDOC_NODELIMC	 (1 << 10) /* Disable closing delimiter handling. */
 #define	MAN_ELINE	 (1 << 11) /* Next-line element scope. */
 #define	MAN_BLINE	 (1 << 12) /* Next-line block scope. */
+#define	MDOC_PHRASEQF	 (1 << 13) /* Quote first word encountered. */
+#define	MDOC_PHRASEQL	 (1 << 14) /* Quote last word of this phrase. */
+#define	MDOC_PHRASEQN	 (1 << 15) /* Quote first word of the next phrase. */
 #define	MAN_LITERAL	  MDOC_LITERAL
 #define	MAN_NEWLINE	  MDOC_NEWLINE
 	enum roff_macroset macroset; /* Kind of high-level macros used. */
Index: mdoc_argv.c
===================================================================
RCS file: /home/cvs/mdocml/mdocml/mdoc_argv.c,v
retrieving revision 1.106
retrieving revision 1.107
diff -Lmdoc_argv.c -Lmdoc_argv.c -u -p -r1.106 -r1.107
--- mdoc_argv.c
+++ mdoc_argv.c
@@ -449,11 +449,10 @@ args(struct roff_man *mdoc, int line, in
 {
 	char		*p;
 	int		 pairs;
-	enum margserr	 rc;
 
 	if (buf[*pos] == '\0') {
 		if (mdoc->flags & MDOC_PHRASELIT &&
-		    ! (mdoc->flags & MDOC_PPHRASE)) {
+		    ! (mdoc->flags & MDOC_PHRASE)) {
 			mandoc_msg(MANDOCERR_ARG_QUOTE,
 			    mdoc->parse, line, *pos, NULL);
 			mdoc->flags &= ~MDOC_PHRASELIT;
@@ -473,18 +472,41 @@ args(struct roff_man *mdoc, int line, in
 
 	if (fl == ARGSFL_TABSEP) {
 		if ((p = strchr(*v, '\t')) != NULL) {
-			/* Skip any blank characters after the tab. */
+
+			/*
+			 * Words right before and right after
+			 * tab characters are not parsed,
+			 * unless there is a blank in between.
+			 */
+
+			if (p[-1] != ' ')
+				mdoc->flags |= MDOC_PHRASEQL;
+			if (p[1] != ' ')
+				mdoc->flags |= MDOC_PHRASEQN;
+
+			/*
+			 * One or more blanks after a tab cause
+			 * one leading blank in the next column.
+			 * So skip all but one of them.
+			 */
+
 			*pos += (int)(p - *v) + 1;
-			while (buf[*pos] == ' ')
+			while (buf[*pos] == ' ' && buf[*pos + 1] == ' ')
 				(*pos)++;
-			rc = ARGS_PPHRASE;
+
+			/*
+			 * A tab at the end of an input line
+			 * switches to the next column.
+			 */
+
+			if (buf[*pos] == '\0' || buf[*pos + 1] == '\0')
+				mdoc->flags |= MDOC_PHRASEQN;
 		} else {
 			p = strchr(*v, '\0');
 			if (p[-1] == ' ')
 				mandoc_msg(MANDOCERR_SPACE_EOL,
 				    mdoc->parse, line, *pos, NULL);
 			*pos += (int)(p - *v);
-			rc = ARGS_PEND;
 		}
 
 		/* Skip any trailing blank characters. */
@@ -493,7 +515,7 @@ args(struct roff_man *mdoc, int line, in
 			p--;
 		*p = '\0';
 
-		return rc;
+		return ARGS_PHRASE;
 	}
 
 	/*
@@ -504,11 +526,11 @@ args(struct roff_man *mdoc, int line, in
 	 * Whitespace is NOT involved in literal termination.
 	 */
 
-	if (MDOC_PHRASELIT & mdoc->flags || '\"' == buf[*pos]) {
-		if ( ! (MDOC_PHRASELIT & mdoc->flags))
+	if (mdoc->flags & MDOC_PHRASELIT || buf[*pos] == '\"') {
+		if ( ! (mdoc->flags & MDOC_PHRASELIT))
 			*v = &buf[++(*pos)];
 
-		if (MDOC_PPHRASE & mdoc->flags)
+		if (mdoc->flags & MDOC_PHRASE)
 			mdoc->flags |= MDOC_PHRASELIT;
 
 		pairs = 0;
@@ -528,11 +550,10 @@ args(struct roff_man *mdoc, int line, in
 		if (pairs)
 			buf[*pos - pairs] = '\0';
 
-		if ('\0' == buf[*pos]) {
-			if (MDOC_PPHRASE & mdoc->flags)
-				return ARGS_QWORD;
-			mandoc_msg(MANDOCERR_ARG_QUOTE,
-			    mdoc->parse, line, *pos, NULL);
+		if (buf[*pos] == '\0') {
+			if ( ! (mdoc->flags & MDOC_PHRASE))
+				mandoc_msg(MANDOCERR_ARG_QUOTE,
+				    mdoc->parse, line, *pos, NULL);
 			return ARGS_QWORD;
 		}
 
@@ -555,6 +576,15 @@ args(struct roff_man *mdoc, int line, in
 	p = &buf[*pos];
 	*v = mandoc_getarg(mdoc->parse, &p, line, pos);
 
+	/*
+	 * After parsing the last word in this phrase,
+	 * tell lookup() whether or not to interpret it.
+	 */
+
+	if (*p == '\0' && mdoc->flags & MDOC_PHRASEQL) {
+		mdoc->flags &= ~MDOC_PHRASEQL;
+		mdoc->flags |= MDOC_PHRASEQF;
+	}
 	return ARGS_WORD;
 }
 
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-10-17  0:21 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-17  0:21 mdocml: Very tricky diff to fix macro interpretation and spacing around schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).