source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* pod2mdoc: When formatting codes follow text without intervening
@ 2014-07-15 19:03 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2014-07-15 19:03 UTC (permalink / raw)
  To: source

Log Message:
-----------
When formatting codes follow text without intervening whitespace,
a .Pf macro was issued without arguments.  While this (accidentally)
works in mandoc(1), groff(1) does not support it.  Introduce an output
buffer such that the previous word of the text can be given as an
argument to .Pf.
    
In some situations, for example right after a section header, the
newline printed before the .Pf could result in an empty line.
Introduce the "hasnl" state variable to avoid this.

Modified Files:
--------------
    pod2mdoc:
        pod2mdoc.c

Revision Data
-------------
Index: pod2mdoc.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/pod2mdoc/pod2mdoc.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -Lpod2mdoc.c -Lpod2mdoc.c -u -p -r1.30 -r1.31
--- pod2mdoc.c
+++ pod2mdoc.c
@@ -53,14 +53,18 @@ enum	sect {
 };
 
 struct	state {
+	const char	*fname; /* file being parsed */
 	int		 parsing; /* after =cut of before command */
 	int		 paused; /* in =begin and before =end */
-	int		 haspar; /* in paragraph: do we need Pp? */
 	enum sect	 sect; /* which section are we in? */
-	const char	*fname; /* file being parsed */
 #define	LIST_STACKSZ	 128
 	enum list	 lstack[LIST_STACKSZ]; /* open lists */
 	size_t		 lpos; /* where in list stack */
+	int		 haspar; /* in paragraph: do we need Pp? */
+	int		 hasnl; /* in text: just started a new line */
+	char		*outbuf; /* text buffered for output */
+	size_t		 outbufsz; /* allocated size of outbuf */
+	size_t		 outbuflen; /* current length of outbuf */
 };
 
 enum	fmt {
@@ -123,13 +127,75 @@ static	const char fmts[FMT__MAX] = {
 
 static	int 	last;
 
+
+static void
+outbuf_grow(struct state *st, size_t by)
+{
+
+	st->outbufsz += (by / 128 + 1) * 128;
+	st->outbuf = realloc(st->outbuf, st->outbufsz);
+	if (NULL == st->outbuf) {
+		perror(NULL);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static void
+outbuf_addchar(struct state *st)
+{
+
+	if (st->outbuflen + 2 >= st->outbufsz)
+		outbuf_grow(st, 1);
+	st->outbuf[st->outbuflen++] = last;
+	if ('\\' == last)
+		st->outbuf[st->outbuflen++] = 'e';
+	st->outbuf[st->outbuflen] = '\0';
+}
+
+static void
+outbuf_addstr(struct state *st, const char *str)
+{
+	size_t	 slen;
+
+	slen = strlen(str);
+	if (st->outbuflen + slen >= st->outbufsz)
+		outbuf_grow(st, slen);
+	memcpy(st->outbuf + st->outbuflen, str, slen+1);
+	last = str[slen - 1];
+}
+
+static void
+outbuf_flush(struct state *st)
+{
+
+	if (0 == st->outbuflen)
+		return;
+
+	fputs(st->outbuf, stdout);
+	*st->outbuf = '\0';
+	st->outbuflen = 0;
+	st->hasnl = 0;
+}
+
+static void
+outbuf_newln(struct state *st)
+{
+
+	if ('\n' == last)
+		return;
+	outbuf_flush(st);
+	putchar('\n');
+	last = '\n';
+	st->hasnl = 1;
+}
+
 /*
  * Given buf[*start] is at the start of an escape name, read til the end
  * of the escape ('>') then try to do something with it.
  * Sets start to be one after the '>'.
  */
 static void
-formatescape(const char *buf, size_t *start, size_t end)
+formatescape(struct state *st, const char *buf, size_t *start, size_t end)
 {
 	char		 esc[16]; /* no more needed */
 	size_t		 i, max;
@@ -157,17 +223,13 @@ formatescape(const char *buf, size_t *st
 	 * Just let the rest of them go. 
 	 */
 	if (0 == strcmp(esc, "lt")) 
-		printf("\\(la");
+		outbuf_addstr(st, "\\(la");
 	else if (0 == strcmp(esc, "gt"))
-		printf("\\(ra");
+		outbuf_addstr(st, "\\(ra");
 	else if (0 == strcmp(esc, "vb"))
-		printf("\\(ba");
+		outbuf_addstr(st, "\\(ba");
 	else if (0 == strcmp(esc, "sol"))
-		printf("\\(sl");
-	else
-		return;
-
-	last = 'a';
+		outbuf_addstr(st, "\\(sl");
 }
 
 /*
@@ -391,6 +453,7 @@ formatcode(struct state *st, const char 
 {
 	enum fmt	 fmt;
 	size_t		 i, j, dsz;
+	int		 white;
 
 	assert(*start + 1 < end);
 	assert('<' == buf[*start + 1]);
@@ -427,7 +490,7 @@ formatcode(struct state *st, const char 
 	 * processing for real macros.
 	 */
 	if (FMT_ESCAPE == fmt) {
-		formatescape(buf, start, end);
+		formatescape(st, buf, start, end);
 		return(0);
 	} else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
 		/* 
@@ -466,27 +529,38 @@ formatcode(struct state *st, const char 
 	 * suppressed in, e.g., "Nm" and "Sh" macros).
 	 */
 	if (FMT__MAX != fmt && !nomacro) {
+		white = ' ' == last || '\n' == last ||
+			' ' == buf[*start];
+
 		/*
-		 * Print out the macro describing this format code.
-		 * If we're not "reentrant" (not yet on a macro line)
-		 * then print a newline, if necessary, and the macro
-		 * indicator.
-		 * Otherwise, offset us with a space.
+		 * If we are on a text line and there is no
+		 * whitespace before our content, we have to make
+		 * the previous word a prefix to the macro line.
 		 */
-		if ( ! reentrant) {
+
+		if ( ! white && ! reentrant) {
+			if ( ! st->hasnl)
+				putchar('\n');
+			printf(".Pf ");
+		}
+
+		outbuf_flush(st);
+
+		/* Whitespace is easier to suppress on macro lines. */
+
+		if ( ! white && reentrant)
+			printf(" Ns");
+
+		/* Unless we are on a macro line, start one. */
+
+		if (white && ! reentrant) {
 			if (last != '\n')
 				putchar('\n');
 			putchar('.');
-		} else 
+		} else
 			putchar(' ');
-		
-		/*
-		 * If we don't have whitespace before us (and none after
-		 * the opening delimiter), then suppress macro
-		 * whitespace with Pf.
-		 */
-		if (' ' != last && '\n' != last && ' ' != buf[*start])
-			printf("Pf ");
+
+		/* Print the macro corresponding to this format code. */
 
 		switch (fmt) {
 		case (FMT_ITALIC):
@@ -526,7 +600,8 @@ formatcode(struct state *st, const char 
 		default:
 			abort();
 		}
-	}
+	} else
+		outbuf_flush(st);
 
 	/*
 	 * Process until we reach the end marker (e.g., '>') or until we
@@ -730,20 +805,20 @@ command(struct state *st, const char *bu
 				st->sect = SECT_SYNOPSIS;
 		} 
 		formatcodeln(st, buf, &start, end, 1);
-		putchar('\n');
+		putchar(last = '\n');
 		st->haspar = 1;
 		break;
 	case (CMD_HEAD2):
 		printf(".Ss ");
 		formatcodeln(st, buf, &start, end, 1);
-		putchar('\n');
+		putchar(last = '\n');
 		st->haspar = 1;
 		break;
 	case (CMD_HEAD3):
 		puts(".Pp");
 		printf(".Em ");
 		formatcodeln(st, buf, &start, end, 0);
-		putchar('\n');
+		putchar(last = '\n');
 		puts(".Pp");
 		st->haspar = 1;
 		break;
@@ -751,7 +826,7 @@ command(struct state *st, const char *bu
 		puts(".Pp");
 		printf(".No ");
 		formatcodeln(st, buf, &start, end, 0);
-		putchar('\n');
+		putchar(last = '\n');
 		puts(".Pp");
 		st->haspar = 1;
 		break;
@@ -805,7 +880,7 @@ command(struct state *st, const char *bu
 		case (LIST_TAG):
 			printf(".It ");
 			formatcodeln(st, buf, &start, end, 0);
-			putchar('\n');
+			putchar(last = '\n');
 			break;
 		case (LIST_ENUM):
 			/* FALLTHROUGH */
@@ -861,7 +936,6 @@ command(struct state *st, const char *bu
 static void
 verbatim(struct state *st, const char *buf, size_t start, size_t end)
 {
-	int		 last;
 	size_t		 i;
 
 	if ( ! st->parsing || st->paused)
@@ -916,7 +990,7 @@ again:
 		if ('\\' == buf[start])
 			printf("e");
 	}
-	putchar('\n');
+	putchar(last = '\n');
 	puts(".Ed");
 }
 
@@ -946,14 +1020,13 @@ hasmatch(const char *buf, size_t start, 
  * If we're an ending bracket, see if we have a stack already.
  */
 static int
-dosynopsisop(const char *buf, int *last,
-	size_t *start, size_t end, size_t *opstack)
+dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack)
 {
 
 	assert('[' == buf[*start] || ']' == buf[*start]);
 
 	if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
-		if ('\n' != *last)
+		if ('\n' != last)
 			putchar('\n');
 		puts(".Oo");
 		(*opstack)++;
@@ -961,7 +1034,7 @@ dosynopsisop(const char *buf, int *last,
 		return(0);
 
 	if (']' == buf[*start] && *opstack > 0) {
-		if ('\n' != *last)
+		if ('\n' != last)
 			putchar('\n');
 		puts(".Oc");
 		(*opstack)--;
@@ -969,7 +1042,7 @@ dosynopsisop(const char *buf, int *last,
 		return(0);
 
 	(*start)++;
-	*last = '\n';
+	last = '\n';
 	while (' ' == buf[*start])
 		(*start)++;
 	return(1);
@@ -998,7 +1071,7 @@ donamenm(struct state *st, const char *b
 				break;
 		formatcodeln(st, buf, start, word, 1);
 		if (*start == end) {
-			putchar('\n');
+			putchar(last = '\n');
 			continue;
 		}
 		assert(',' == buf[*start]);
@@ -1048,7 +1121,7 @@ ordinary(struct state *st, const char *b
 				start++;
 			fputs(".Nd ", stdout);
 			formatcodeln(st, buf, &start, end, 1);
-			putchar('\n');
+			putchar(last = '\n');
 			return;
 		}
 	}
@@ -1057,6 +1130,7 @@ ordinary(struct state *st, const char *b
 		puts(".Pp");
 
 	st->haspar = 0;
+	st->hasnl = 1;
 	last = '\n';
 	opstack = 0;
 
@@ -1071,9 +1145,9 @@ ordinary(struct state *st, const char *b
 			else if ('\n' == buf[start])
 				break;
 			else if ('\n' == last && '.' == buf[start])
-				printf("\\&");
+				outbuf_addstr(st, "\\&");
 			else if ('\n' == last && '\'' == buf[start])
-				printf("\\&");
+				outbuf_addstr(st, "\\&");
 			/*
 			 * If we're in the SYNOPSIS, have square
 			 * brackets indicate that we're opening and
@@ -1082,12 +1156,14 @@ ordinary(struct state *st, const char *b
 			if (SECT_SYNOPSIS == st->sect &&
 				('[' == buf[start] || 
 				 ']' == buf[start]) &&
-				dosynopsisop(buf, &last, 
-					&start, end, &opstack))
+				dosynopsisop(buf, &start, end, &opstack))
 				continue;
-			putchar(last = buf[start++]);
-			if ('\\' == last)
-				putchar('e');
+			last = buf[start++];
+			if (' ' == last) {
+				outbuf_flush(st);
+				putchar(' ');
+			} else
+				outbuf_addchar(st);
 		}
 
 		if (start < end - 1 && '<' == buf[start + 1]) {
@@ -1108,6 +1184,7 @@ ordinary(struct state *st, const char *b
 				}
 				/* End the macro line. */
 				putchar(last = '\n');
+				st->hasnl = 1;
 				/*
 				 * Consume all whitespace
 				 * so we don't accidentally start
@@ -1117,12 +1194,7 @@ ordinary(struct state *st, const char *b
 					start++;
 			}
 		} else if (start < end && '\n' == buf[start]) {
-			/*
-			 * Print the newline only if we haven't already
-			 * printed a newline.
-			 */
-			if (last != '\n')
-				putchar(last = buf[start]);
+			outbuf_newln(st);
 			if (++start >= end)
 				continue;
 			/*
@@ -1133,18 +1205,14 @@ ordinary(struct state *st, const char *b
 			 * have a macro subsequent it, which may be
 			 * possible if we have an escape next.
 			 */
-			if (' ' == buf[start] || '\t' == buf[start]) {
+			if (' ' == buf[start] || '\t' == buf[start])
 				puts(".br");
-				last = '\n';
-			}
 			for ( ; start < end; start++)
 				if (' ' != buf[start] && '\t' != buf[start])
 					break;
 		} 
 	}
-
-	if (last != '\n')
-		putchar('\n');
+	outbuf_newln(st);
 }
 
 /*
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2014-07-15 19:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-15 19:03 pod2mdoc: When formatting codes follow text without intervening schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).