source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* pod2mdoc: Stop bypassing the text output buffer when printing macro
@ 2015-02-20 13:34 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2015-02-20 13:34 UTC (permalink / raw)
  To: source

Log Message:
-----------
Stop bypassing the text output buffer when printing macro arguments,
stop ignoring the wantws flag for macro argument spacing, and stop
attempting artificial argument spacing by manually printing blank
characters at more than twenty distinct places in the code. 

This fixes a wide range of formatting issues:

* Print named escapes inside format codes at the correct position.
* Escape quotes at the beginning of command arguments.
* Correct spacing in words like B<x>yB<z>.
* Do not escape "Xx" at the end of words, but only if it's the whole word.
* Avoid an additional blank when a command line starts with a format code.
* Avoid an additional blank after Qc.

I tried to do this in smaller, incremental steps, but it did not 
work out.  All these aspects are related to each other; so i'm
committing it as one big diff, thoroughly tested for regressions.

Modified Files:
--------------
    pod2mdoc:
        pod2mdoc.c

Revision Data
-------------
Index: pod2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/pod2mdoc/pod2mdoc.c,v
retrieving revision 1.55
retrieving revision 1.56
diff -Lpod2mdoc.c -Lpod2mdoc.c -u -p -r1.55 -r1.56
--- pod2mdoc.c
+++ pod2mdoc.c
@@ -208,7 +208,7 @@ outbuf_flush(struct state *st)
 	if (0 == st->outbuflen)
 		return;
 
-	if (OUST_TXT == st->oust && st->wantws)
+	if (OUST_NL != st->oust && st->wantws)
 		putchar(' ');
 
 	if (OUST_MAC == st->oust && '"' == *st->outbuf)
@@ -441,7 +441,7 @@ again:
 		'?' != buf[*start + 1] &&
 		'-' != buf[*start + 1]) {
 		(*start)--;
-		fputs("Ar ", stdout);
+		fputs("Ar", stdout);
 		return;
 	}
 
@@ -461,7 +461,7 @@ again:
 	assert(i < end);
 
 	if ( ! (' ' == buf[i] || '>' == buf[i])) {
-		printf("Ar ");
+		fputs("Ar", stdout);
 		return;
 	}
 
@@ -472,7 +472,7 @@ again:
 		(end - *start == 2 ||
 		 ' ' == buf[*start + 2]))
 		printf("\\&");
-	printf("%.*s ", (int)(i - *start), &buf[*start]);
+	printf("%.*s", (int)(i - *start), &buf[*start]);
 	*start = i;
 
 	if (' ' == buf[i]) {
@@ -483,7 +483,7 @@ again:
 			*start = i;
 			goto again;
 		}
-		printf("Ar ");
+		fputs("Ar", stdout);
 		*start = i;
 	}
 }
@@ -511,6 +511,7 @@ formatcode(struct state *st, const char 
 	size_t		 i, j, dsz;
 	enum fmt	 fmt;
 	unsigned char	 uc;
+	int		 gotmacro, wantws;
 
 	assert(*start + 1 < end);
 	assert('<' == buf[*start + 1]);
@@ -588,31 +589,35 @@ formatcode(struct state *st, const char 
 	if (FMT__MAX != fmt && !nomacro) {
 
 		/*
+		 * Do we need spacing before the upcoming macro,
+		 * after any pending text already in the outbuf?
+		 * We may already have wantws if there was whitespace
+		 * before the code ("text B<text"), or there may be
+		 * whitespace inside our scope ("textB< text").
+		 */
+
+		wantws = ' ' == buf[*start] ||
+		    (st->wantws && ! st->outbuflen);
+
+		/*
 		 * If we are on a text line and there is no
 		 * whitespace before our content, we have to make
 		 * the previous word a prefix to the macro line.
 		 */
 
-		if (OUST_MAC != st->oust && ' ' != buf[*start] &&
-		    st->outbuflen) {
+		if (OUST_MAC != st->oust && ! wantws) {
 			if (OUST_NL != st->oust)
 				mdoc_newln(st);
-			printf(".Pf ");
+			fputs(".Pf", stdout);
 			st->oust = OUST_MAC;
-			st->wantws = 1;
+			st->wantws = wantws = 1;
 		}
 
 		outbuf_flush(st);
 
-		/*
-		 * Whitespace is easier to suppress on macro lines.
-		 * We may already have wantws if there was whitespace
-		 * before the code ("text B<text"), or there may be
-		 * whitespace inside our scope ("textB< text").
-		 */
+		/* Whitespace is easier to suppress on macro lines. */
 
-		if (OUST_MAC == st->oust && ' ' != buf[*start] &&
-		    ! st->wantws)
+		if (OUST_MAC == st->oust && ! wantws)
 			printf(" Ns");
 
 		/* Unless we are on a macro line, start one. */
@@ -633,16 +638,16 @@ formatcode(struct state *st, const char 
 
 		switch (fmt) {
 		case (FMT_ITALIC):
-			printf("Em ");
+			fputs("Em", stdout);
 			break;
 		case (FMT_BOLD):
 			if (SECT_SYNOPSIS == st->sect) { 
 				if (1 == dsz && '-' == buf[*start])
 					dosynopsisfl(buf, start, end);
 				else if (0 == pos)
-					printf("Nm ");
+					fputs("Nm", stdout);
 				else
-					printf("Ar ");
+					fputs("Ar", stdout);
 				break;
 			}
 			i = 0;
@@ -652,40 +657,42 @@ formatcode(struct state *st, const char 
 			if ('=' != uc && '>' != uc)
 				i = 0;
 			if (4 == i && ! strncmp(buf + *start, "NULL", 4)) {
-				printf("Dv ");
+				fputs("Dv", stdout);
 				break;
 			}
 			switch (i ? dict_get(buf + *start, i) : MDOC_MAX) {
 			case MDOC_Fa:
-				printf("Fa ");
+				fputs("Fa", stdout);
 				break;
 			case MDOC_Vt:
-				printf("Vt ");
+				fputs("Vt", stdout);
 				break;
 			default:
-				printf("Sy ");
+				fputs("Sy", stdout);
 				break;
 			}
 			break;
 		case (FMT_CODE):
-			printf("Qo Li ");
+			fputs("Qo Li", stdout);
 			break;
 		case (FMT_LINK):
 			/* Try to link; use "No" if it's empty. */
 			if ( ! trylink(buf, start, end, dsz))
-				printf("No ");
+				fputs("No", stdout);
 			break;
 		case (FMT_FILE):
-			printf("Pa ");
+			fputs("Pa", stdout);
 			break;
 		case (FMT_NBSP):
-			printf("No ");
+			fputs("No", stdout);
 			break;
 		default:
 			abort();
 		}
-	} else
+	} else {
 		outbuf_flush(st);
+		st->wantws = 0;
+	}
 
 	/*
 	 * Process until we reach the end marker (e.g., '>') or until we
@@ -693,6 +700,8 @@ formatcode(struct state *st, const char 
 	 * Don't emit any newlines: since we're on a macro line, we
 	 * don't want to break the line.
 	 */
+
+	gotmacro = 0;
 	while (*start < end) {
 		if ('>' == buf[*start] && 1 == dsz) {
 			(*start)++;
@@ -716,54 +725,58 @@ formatcode(struct state *st, const char 
 		}
 		if (*start + 1 < end && '<' == buf[*start + 1] &&
 		    'A' <= buf[*start] && 'Z' >= buf[*start]) {
-			if ( ! formatcode(st, buf, start, end, nomacro, 1))
-				st->wantws = 1;
+			gotmacro = formatcode(st, buf,
+			    start, end, nomacro, 1);
 			continue;
 		}
 
 		/* Suppress newlines and multiple spaces. */
 
 		last = buf[(*start)++];
-		if (' ' == last || '\n' == last) {
-			putchar(' ');
-			while (*start < end && ' ' == buf[*start])
+		if (isspace(last)) {
+			outbuf_flush(st);
+			st->wantws = 1;
+			gotmacro = 0;
+			while (*start < end &&
+			    isspace((unsigned char)buf[*start]))
 				(*start)++;
 			continue;
 		}
 
 		if (OUST_MAC == st->oust && FMT__MAX != fmt) {
-			if ( ! st->wantws) {
-				printf(" Ns ");
+			if (gotmacro && ! st->wantws) {
+				printf(" Ns");
 				st->wantws = 1;
 			}
+			gotmacro = 0;
 
 			/*
 			 * Escape macro-like words.
 			 * This matches "Xx " and "XxEOLN".
 			 */
 
-			if (end - *start > 0 &&
-			    isupper((unsigned char)last) &&
+			if (*start < end && ! st->outbuflen &&
+			    isupper(last) &&
 			    islower((unsigned char)buf[*start]) &&
 			    (end - *start == 1 ||
 			     ' ' == buf[*start + 1] ||
 			     '>' == buf[*start + 1]))
-				printf("\\&");
+				outbuf_addstr(st, "\\&");
+			last = buf[*start - 1];
 		}
+		outbuf_addchar(st);
+	}
 
-		putchar(last);
+	if (FMT__MAX == fmt)
+		return(0);
 
-		/* Protect against character escapes. */
-
-		if ('\\' == last)
-			putchar('e');
-	}
+	outbuf_flush(st);
 
 	if ( ! nomacro && FMT_CODE == fmt)
-		printf(" Qc ");
+		fputs(" Qc", stdout);
 
 	st->wantws = ' ' == last;
-	return(FMT__MAX != fmt);
+	return(1);
 }
 
 /*
@@ -776,51 +789,44 @@ static void
 formatcodeln(struct state *st, const char *linemac,
 	const char *buf, size_t *start, size_t end, int nomacro)
 {
-	int	 gotmacro, wantws;
+	int	 gotmacro;
 
 	assert(OUST_NL == st->oust);
 	assert(st->wantws);
-	printf(".%s ", linemac);
+	printf(".%s", linemac);
 	st->oust = OUST_MAC;
 
 	gotmacro = 0;
 	while (*start < end)  {
-		wantws = ' ' == buf[*start] || '\n' == buf[*start];
-		if (wantws) {
-			last = ' ';
-			do {
-				(*start)++;
-			} while (*start < end && ' ' == buf[*start]);
-		}
-
 		if (*start + 1 < end && '<' == buf[*start + 1] &&
 		    'A' <= buf[*start] && 'Z' >= buf[*start]) {
-			st->wantws |= wantws;
 			gotmacro = formatcode(st, buf,
 			    start, end, nomacro, 1);
 			continue;
 		}
 
+		/* Suppress newlines and multiple spaces. */
+
+		last = buf[(*start)++];
+		if (isspace(last)) {
+			outbuf_flush(st);
+			st->wantws = 1;
+			while (*start < end &&
+			    isspace((unsigned char)buf[*start]))
+				(*start)++;
+			continue;
+		}
+
 		if (gotmacro) {
-			if (*start < end || st->outbuflen) {
-				if (st->wantws ||
-				    (wantws && !st->outbuflen))
-					printf(" No ");
+			if (*start < end) {
+				if (st->wantws)
+					printf(" No");
 				else
-					printf(" Ns ");
+					printf(" Ns");
 			}
+			st->wantws = 1;
 			gotmacro = 0;
 		}
-		outbuf_flush(st);
-		st->wantws = wantws;
-
-		if (*start >= end)
-			break;
-
-		if (st->wantws) {
-			putchar(' ');
-			st->wantws = 0;
-		}
 
 		/*
 		 * Since we're already on a macro line, we want to make
@@ -829,21 +835,16 @@ formatcodeln(struct state *st, const cha
 		 * are used in troff and we don't want to escape
 		 * something that needn't be escaped.
 		 */
-		if (' ' == last && end - *start > 1 &&
-		    isupper((unsigned char)buf[*start]) &&
-		    islower((unsigned char)buf[*start + 1]) &&
-		    (end - *start == 2 || ' ' == buf[*start + 2]))
-			printf("\\&");
-
-		putchar(last = buf[*start]);
-
-		/* Protect against character escapes. */
-
-		if ('\\' == last)
-			putchar('e');
-
-		(*start)++;
+		if (*start < end && ! st->outbuflen && isupper(last) &&
+		    islower((unsigned char)buf[*start]) &&
+		    (end - *start == 1 || ' ' == buf[*start + 1])) {
+			outbuf_addstr(st, "\\&");
+			last = buf[*start - 1];
+		}
+		outbuf_addchar(st);
 	}
+	outbuf_flush(st);
+	st->wantws = 1;
 }
 
 /*
@@ -1529,9 +1530,9 @@ ordinary(struct state *st, const char *b
 					st->outbuf[wend] = '\0';
 					mdoc_newln(st);
 					if (MDOC_Fo == mtype)
-						fputs(".Fn ", stdout);
+						fputs(".Fn", stdout);
 					else
-						fputs(".Xr ", stdout);
+						fputs(".Xr", stdout);
 					st->oust = OUST_MAC;
 				}
 			} else {
@@ -1540,7 +1541,7 @@ ordinary(struct state *st, const char *b
 					savechar = st->outbuf[wend];
 					st->outbuf[wend] = '\0';
 					mdoc_newln(st);
-					fputs(".Dv ", stdout);
+					fputs(".Dv", stdout);
 					st->oust = OUST_MAC;
 				} else
 					mtype = MDOC_Fa;
@@ -1623,7 +1624,7 @@ ordinary(struct state *st, const char *b
 				    ('<' != buf[start + 1] ||
 				     'A' > buf[start] ||
 				     'Z' < buf[start])) {
-					printf(" Ns ");
+					fputs(" Ns", stdout);
 					st->wantws = 1;
 				}
 			}
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-02-20 13:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-20 13:34 pod2mdoc: Stop bypassing the text output buffer when printing macro schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).