source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: pod2mdoc: Stop bypassing the text output buffer when printing macro
Date: Fri, 20 Feb 2015 08:34:22 -0500 (EST)	[thread overview]
Message-ID: <9952616508070897905.enqueue@fantadrom.bsd.lv> (raw)

Log Message:
-----------
Stop bypassing the text output buffer when printing macro arguments,
stop ignoring the wantws flag for macro argument spacing, and stop
attempting artificial argument spacing by manually printing blank
characters at more than twenty distinct places in the code. 

This fixes a wide range of formatting issues:

* Print named escapes inside format codes at the correct position.
* Escape quotes at the beginning of command arguments.
* Correct spacing in words like B<x>yB<z>.
* Do not escape "Xx" at the end of words, but only if it's the whole word.
* Avoid an additional blank when a command line starts with a format code.
* Avoid an additional blank after Qc.

I tried to do this in smaller, incremental steps, but it did not 
work out.  All these aspects are related to each other; so i'm
committing it as one big diff, thoroughly tested for regressions.

Modified Files:
--------------
    pod2mdoc:
        pod2mdoc.c

Revision Data
-------------
Index: pod2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/pod2mdoc/pod2mdoc.c,v
retrieving revision 1.55
retrieving revision 1.56
diff -Lpod2mdoc.c -Lpod2mdoc.c -u -p -r1.55 -r1.56
--- pod2mdoc.c
+++ pod2mdoc.c
@@ -208,7 +208,7 @@ outbuf_flush(struct state *st)
 	if (0 == st->outbuflen)
 		return;
 
-	if (OUST_TXT == st->oust && st->wantws)
+	if (OUST_NL != st->oust && st->wantws)
 		putchar(' ');
 
 	if (OUST_MAC == st->oust && '"' == *st->outbuf)
@@ -441,7 +441,7 @@ again:
 		'?' != buf[*start + 1] &&
 		'-' != buf[*start + 1]) {
 		(*start)--;
-		fputs("Ar ", stdout);
+		fputs("Ar", stdout);
 		return;
 	}
 
@@ -461,7 +461,7 @@ again:
 	assert(i < end);
 
 	if ( ! (' ' == buf[i] || '>' == buf[i])) {
-		printf("Ar ");
+		fputs("Ar", stdout);
 		return;
 	}
 
@@ -472,7 +472,7 @@ again:
 		(end - *start == 2 ||
 		 ' ' == buf[*start + 2]))
 		printf("\\&");
-	printf("%.*s ", (int)(i - *start), &buf[*start]);
+	printf("%.*s", (int)(i - *start), &buf[*start]);
 	*start = i;
 
 	if (' ' == buf[i]) {
@@ -483,7 +483,7 @@ again:
 			*start = i;
 			goto again;
 		}
-		printf("Ar ");
+		fputs("Ar", stdout);
 		*start = i;
 	}
 }
@@ -511,6 +511,7 @@ formatcode(struct state *st, const char 
 	size_t		 i, j, dsz;
 	enum fmt	 fmt;
 	unsigned char	 uc;
+	int		 gotmacro, wantws;
 
 	assert(*start + 1 < end);
 	assert('<' == buf[*start + 1]);
@@ -588,31 +589,35 @@ formatcode(struct state *st, const char 
 	if (FMT__MAX != fmt && !nomacro) {
 
 		/*
+		 * Do we need spacing before the upcoming macro,
+		 * after any pending text already in the outbuf?
+		 * We may already have wantws if there was whitespace
+		 * before the code ("text B<text"), or there may be
+		 * whitespace inside our scope ("textB< text").
+		 */
+
+		wantws = ' ' == buf[*start] ||
+		    (st->wantws && ! st->outbuflen);
+
+		/*
 		 * If we are on a text line and there is no
 		 * whitespace before our content, we have to make
 		 * the previous word a prefix to the macro line.
 		 */
 
-		if (OUST_MAC != st->oust && ' ' != buf[*start] &&
-		    st->outbuflen) {
+		if (OUST_MAC != st->oust && ! wantws) {
 			if (OUST_NL != st->oust)
 				mdoc_newln(st);
-			printf(".Pf ");
+			fputs(".Pf", stdout);
 			st->oust = OUST_MAC;
-			st->wantws = 1;
+			st->wantws = wantws = 1;
 		}
 
 		outbuf_flush(st);
 
-		/*
-		 * Whitespace is easier to suppress on macro lines.
-		 * We may already have wantws if there was whitespace
-		 * before the code ("text B<text"), or there may be
-		 * whitespace inside our scope ("textB< text").
-		 */
+		/* Whitespace is easier to suppress on macro lines. */
 
-		if (OUST_MAC == st->oust && ' ' != buf[*start] &&
-		    ! st->wantws)
+		if (OUST_MAC == st->oust && ! wantws)
 			printf(" Ns");
 
 		/* Unless we are on a macro line, start one. */
@@ -633,16 +638,16 @@ formatcode(struct state *st, const char 
 
 		switch (fmt) {
 		case (FMT_ITALIC):
-			printf("Em ");
+			fputs("Em", stdout);
 			break;
 		case (FMT_BOLD):
 			if (SECT_SYNOPSIS == st->sect) { 
 				if (1 == dsz && '-' == buf[*start])
 					dosynopsisfl(buf, start, end);
 				else if (0 == pos)
-					printf("Nm ");
+					fputs("Nm", stdout);
 				else
-					printf("Ar ");
+					fputs("Ar", stdout);
 				break;
 			}
 			i = 0;
@@ -652,40 +657,42 @@ formatcode(struct state *st, const char 
 			if ('=' != uc && '>' != uc)
 				i = 0;
 			if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
-				printf("Dv ");
+				fputs("Dv", stdout);
 				break;
 			}
 			switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
 			case MDOC_Fa:
-				printf("Fa ");
+				fputs("Fa", stdout);
 				break;
 			case MDOC_Vt:
-				printf("Vt ");
+				fputs("Vt", stdout);
 				break;
 			default:
-				printf("Sy ");
+				fputs("Sy", stdout);
 				break;
 			}
 			break;
 		case (FMT_CODE):
-			printf("Qo Li ");
+			fputs("Qo Li", stdout);
 			break;
 		case (FMT_LINK):
 			/* Try to link; use "No" if it's empty. */
 			if ( ! trylink(buf, start, end, dsz))
-				printf("No ");
+				fputs("No", stdout);
 			break;
 		case (FMT_FILE):
-			printf("Pa ");
+			fputs("Pa", stdout);
 			break;
 		case (FMT_NBSP):
-			printf("No ");
+			fputs("No", stdout);
 			break;
 		default:
 			abort();
 		}
-	} else
+	} else {
 		outbuf_flush(st);
+		st->wantws = 0;
+	}
 
 	/*
 	 * Process until we reach the end marker (e.g., '>') or until we
@@ -693,6 +700,8 @@ formatcode(struct state *st, const char 
 	 * Don't emit any newlines: since we're on a macro line, we
 	 * don't want to break the line.
 	 */
+
+	gotmacro = 0;
 	while (*start < end) {
 		if ('>' == buf[*start] && 1 == dsz) {
 			(*start)++;
@@ -716,54 +725,58 @@ formatcode(struct state *st, const char 
 		}
 		if (*start + 1 < end && '<' == buf[*start + 1] &&
 		    'A' <= buf[*start] && 'Z' >= buf[*start]) {
-			if ( ! formatcode(st, buf, start, end, nomacro, 1))
-				st->wantws = 1;
+			gotmacro = formatcode(st, buf,
+			    start, end, nomacro, 1);
 			continue;
 		}
 
 		/* Suppress newlines and multiple spaces. */
 
 		last = buf[(*start)++];
-		if (' ' == last || '\n' == last) {
-			putchar(' ');
-			while (*start < end && ' ' == buf[*start])
+		if (isspace(last)) {
+			outbuf_flush(st);
+			st->wantws = 1;
+			gotmacro = 0;
+			while (*start < end &&
+			    isspace((unsigned char)buf[*start]))
 				(*start)++;
 			continue;
 		}
 
 		if (OUST_MAC == st->oust && FMT__MAX != fmt) {
-			if ( ! st->wantws) {
-				printf(" Ns ");
+			if (gotmacro && ! st->wantws) {
+				printf(" Ns");
 				st->wantws = 1;
 			}
+			gotmacro = 0;
 
 			/*
 			 * Escape macro-like words.
 			 * This matches "Xx " and "XxEOLN".
 			 */
 
-			if (end - *start > 0 &&
-			    isupper((unsigned char)last) &&
+			if (*start < end && ! st->outbuflen &&
+			    isupper(last) &&
 			    islower((unsigned char)buf[*start]) &&
 			    (end - *start == 1 ||
 			     ' ' == buf[*start + 1] ||
 			     '>' == buf[*start + 1]))
-				printf("\\&");
+				outbuf_addstr(st, "\\&");
+			last = buf[*start - 1];
 		}
+		outbuf_addchar(st);
+	}
 
-		putchar(last);
+	if (FMT__MAX == fmt)
+		return(0);
 
-		/* Protect against character escapes. */
-
-		if ('\\' == last)
-			putchar('e');
-	}
+	outbuf_flush(st);
 
 	if ( ! nomacro && FMT_CODE == fmt)
-		printf(" Qc ");
+		fputs(" Qc", stdout);
 
 	st->wantws = ' ' == last;
-	return(FMT__MAX != fmt);
+	return(1);
 }
 
 /*
@@ -776,51 +789,44 @@ static void
 formatcodeln(struct state *st, const char *linemac,
 	const char *buf, size_t *start, size_t end, int nomacro)
 {
-	int	 gotmacro, wantws;
+	int	 gotmacro;
 
 	assert(OUST_NL == st->oust);
 	assert(st->wantws);
-	printf(".%s ", linemac);
+	printf(".%s", linemac);
 	st->oust = OUST_MAC;
 
 	gotmacro = 0;
 	while (*start < end)  {
-		wantws = ' ' == buf[*start] || '\n' == buf[*start];
-		if (wantws) {
-			last = ' ';
-			do {
-				(*start)++;
-			} while (*start < end && ' ' == buf[*start]);
-		}
-
 		if (*start + 1 < end && '<' == buf[*start + 1] &&
 		    'A' <= buf[*start] && 'Z' >= buf[*start]) {
-			st->wantws |= wantws;
 			gotmacro = formatcode(st, buf,
 			    start, end, nomacro, 1);
 			continue;
 		}
 
+		/* Suppress newlines and multiple spaces. */
+
+		last = buf[(*start)++];
+		if (isspace(last)) {
+			outbuf_flush(st);
+			st->wantws = 1;
+			while (*start < end &&
+			    isspace((unsigned char)buf[*start]))
+				(*start)++;
+			continue;
+		}
+
 		if (gotmacro) {
-			if (*start < end || st->outbuflen) {
-				if (st->wantws ||
-				    (wantws && !st->outbuflen))
-					printf(" No ");
+			if (*start < end) {
+				if (st->wantws)
+					printf(" No");
 				else
-					printf(" Ns ");
+					printf(" Ns");
 			}
+			st->wantws = 1;
 			gotmacro = 0;
 		}
-		outbuf_flush(st);
-		st->wantws = wantws;
-
-		if (*start >= end)
-			break;
-
-		if (st->wantws) {
-			putchar(' ');
-			st->wantws = 0;
-		}
 
 		/*
 		 * Since we're already on a macro line, we want to make
@@ -829,21 +835,16 @@ formatcodeln(struct state *st, const cha
 		 * are used in troff and we don't want to escape
 		 * something that needn't be escaped.
 		 */
-		if (' ' == last && end - *start > 1 &&
-		    isupper((unsigned char)buf[*start]) &&
-		    islower((unsigned char)buf[*start + 1]) &&
-		    (end - *start == 2 || ' ' == buf[*start + 2]))
-			printf("\\&");
-
-		putchar(last = buf[*start]);
-
-		/* Protect against character escapes. */
-
-		if ('\\' == last)
-			putchar('e');
-
-		(*start)++;
+		if (*start < end && ! st->outbuflen && isupper(last) &&
+		    islower((unsigned char)buf[*start]) &&
+		    (end - *start == 1 || ' ' == buf[*start + 1])) {
+			outbuf_addstr(st, "\\&");
+			last = buf[*start - 1];
+		}
+		outbuf_addchar(st);
 	}
+	outbuf_flush(st);
+	st->wantws = 1;
 }
 
 /*
@@ -1529,9 +1530,9 @@ ordinary(struct state *st, const char *b
 					st->outbuf[wend] = '\0';
 					mdoc_newln(st);
 					if (MDOC_Fo == mtype)
-						fputs(".Fn ", stdout);
+						fputs(".Fn", stdout);
 					else
-						fputs(".Xr ", stdout);
+						fputs(".Xr", stdout);
 					st->oust = OUST_MAC;
 				}
 			} else {
@@ -1540,7 +1541,7 @@ ordinary(struct state *st, const char *b
 					savechar = st->outbuf[wend];
 					st->outbuf[wend] = '\0';
 					mdoc_newln(st);
-					fputs(".Dv ", stdout);
+					fputs(".Dv", stdout);
 					st->oust = OUST_MAC;
 				} else
 					mtype = MDOC_Fa;
@@ -1623,7 +1624,7 @@ ordinary(struct state *st, const char *b
 				    ('<' != buf[start + 1] ||
 				     'A' > buf[start] ||
 				     'Z' < buf[start])) {
-					printf(" Ns ");
+					fputs(" Ns", stdout);
 					st->wantws = 1;
 				}
 			}
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

                 reply	other threads:[~2015-02-20 13:34 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9952616508070897905.enqueue@fantadrom.bsd.lv \
    --to=schwarze@mdocml.bsd.lv \
    --cc=source@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).