source@mandoc.bsd.lv
 help / color / mirror / Atom feed
From: schwarze@mdocml.bsd.lv
To: source@mdocml.bsd.lv
Subject: pod2mdoc: When formatting codes follow text without intervening
Date: Tue, 15 Jul 2014 15:03:07 -0400 (EDT)	[thread overview]
Message-ID: <201407151903.s6FJ37TW030865@krisdoz.my.domain> (raw)

Log Message:
-----------
When formatting codes follow text without intervening whitespace,
a .Pf macro was issued without arguments.  While this (accidentally)
works in mandoc(1), groff(1) does not support it.  Introduce an output
buffer such that the previous word of the text can be given as an
argument to .Pf.
    
In some situations, for example right after a section header, the
newline printed before the .Pf could result in an empty line.
Introduce the "hasnl" state variable to avoid this.

Modified Files:
--------------
    pod2mdoc:
        pod2mdoc.c

Revision Data
-------------
Index: pod2mdoc.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/pod2mdoc/pod2mdoc.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -Lpod2mdoc.c -Lpod2mdoc.c -u -p -r1.30 -r1.31
--- pod2mdoc.c
+++ pod2mdoc.c
@@ -53,14 +53,18 @@ enum	sect {
 };
 
 struct	state {
+	const char	*fname; /* file being parsed */
 	int		 parsing; /* after =cut of before command */
 	int		 paused; /* in =begin and before =end */
-	int		 haspar; /* in paragraph: do we need Pp? */
 	enum sect	 sect; /* which section are we in? */
-	const char	*fname; /* file being parsed */
 #define	LIST_STACKSZ	 128
 	enum list	 lstack[LIST_STACKSZ]; /* open lists */
 	size_t		 lpos; /* where in list stack */
+	int		 haspar; /* in paragraph: do we need Pp? */
+	int		 hasnl; /* in text: just started a new line */
+	char		*outbuf; /* text buffered for output */
+	size_t		 outbufsz; /* allocated size of outbuf */
+	size_t		 outbuflen; /* current length of outbuf */
 };
 
 enum	fmt {
@@ -123,13 +127,75 @@ static	const char fmts[FMT__MAX] = {
 
 static	int 	last;
 
+
+static void
+outbuf_grow(struct state *st, size_t by)
+{
+
+	st->outbufsz += (by / 128 + 1) * 128;
+	st->outbuf = realloc(st->outbuf, st->outbufsz);
+	if (NULL == st->outbuf) {
+		perror(NULL);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static void
+outbuf_addchar(struct state *st)
+{
+
+	if (st->outbuflen + 2 >= st->outbufsz)
+		outbuf_grow(st, 1);
+	st->outbuf[st->outbuflen++] = last;
+	if ('\\' == last)
+		st->outbuf[st->outbuflen++] = 'e';
+	st->outbuf[st->outbuflen] = '\0';
+}
+
+static void
+outbuf_addstr(struct state *st, const char *str)
+{
+	size_t	 slen;
+
+	slen = strlen(str);
+	if (st->outbuflen + slen >= st->outbufsz)
+		outbuf_grow(st, slen);
+	memcpy(st->outbuf + st->outbuflen, str, slen+1);
+	last = str[slen - 1];
+}
+
+static void
+outbuf_flush(struct state *st)
+{
+
+	if (0 == st->outbuflen)
+		return;
+
+	fputs(st->outbuf, stdout);
+	*st->outbuf = '\0';
+	st->outbuflen = 0;
+	st->hasnl = 0;
+}
+
+static void
+outbuf_newln(struct state *st)
+{
+
+	if ('\n' == last)
+		return;
+	outbuf_flush(st);
+	putchar('\n');
+	last = '\n';
+	st->hasnl = 1;
+}
+
 /*
  * Given buf[*start] is at the start of an escape name, read til the end
  * of the escape ('>') then try to do something with it.
  * Sets start to be one after the '>'.
  */
 static void
-formatescape(const char *buf, size_t *start, size_t end)
+formatescape(struct state *st, const char *buf, size_t *start, size_t end)
 {
 	char		 esc[16]; /* no more needed */
 	size_t		 i, max;
@@ -157,17 +223,13 @@ formatescape(const char *buf, size_t *st
 	 * Just let the rest of them go. 
 	 */
 	if (0 == strcmp(esc, "lt")) 
-		printf("\\(la");
+		outbuf_addstr(st, "\\(la");
 	else if (0 == strcmp(esc, "gt"))
-		printf("\\(ra");
+		outbuf_addstr(st, "\\(ra");
 	else if (0 == strcmp(esc, "vb"))
-		printf("\\(ba");
+		outbuf_addstr(st, "\\(ba");
 	else if (0 == strcmp(esc, "sol"))
-		printf("\\(sl");
-	else
-		return;
-
-	last = 'a';
+		outbuf_addstr(st, "\\(sl");
 }
 
 /*
@@ -391,6 +453,7 @@ formatcode(struct state *st, const char 
 {
 	enum fmt	 fmt;
 	size_t		 i, j, dsz;
+	int		 white;
 
 	assert(*start + 1 < end);
 	assert('<' == buf[*start + 1]);
@@ -427,7 +490,7 @@ formatcode(struct state *st, const char 
 	 * processing for real macros.
 	 */
 	if (FMT_ESCAPE == fmt) {
-		formatescape(buf, start, end);
+		formatescape(st, buf, start, end);
 		return(0);
 	} else if (FMT_NULL == fmt || FMT_INDEX == fmt) {
 		/* 
@@ -466,27 +529,38 @@ formatcode(struct state *st, const char 
 	 * suppressed in, e.g., "Nm" and "Sh" macros).
 	 */
 	if (FMT__MAX != fmt && !nomacro) {
+		white = ' ' == last || '\n' == last ||
+			' ' == buf[*start];
+
 		/*
-		 * Print out the macro describing this format code.
-		 * If we're not "reentrant" (not yet on a macro line)
-		 * then print a newline, if necessary, and the macro
-		 * indicator.
-		 * Otherwise, offset us with a space.
+		 * If we are on a text line and there is no
+		 * whitespace before our content, we have to make
+		 * the previous word a prefix to the macro line.
 		 */
-		if ( ! reentrant) {
+
+		if ( ! white && ! reentrant) {
+			if ( ! st->hasnl)
+				putchar('\n');
+			printf(".Pf ");
+		}
+
+		outbuf_flush(st);
+
+		/* Whitespace is easier to suppress on macro lines. */
+
+		if ( ! white && reentrant)
+			printf(" Ns");
+
+		/* Unless we are on a macro line, start one. */
+
+		if (white && ! reentrant) {
 			if (last != '\n')
 				putchar('\n');
 			putchar('.');
-		} else 
+		} else
 			putchar(' ');
-		
-		/*
-		 * If we don't have whitespace before us (and none after
-		 * the opening delimiter), then suppress macro
-		 * whitespace with Pf.
-		 */
-		if (' ' != last && '\n' != last && ' ' != buf[*start])
-			printf("Pf ");
+
+		/* Print the macro corresponding to this format code. */
 
 		switch (fmt) {
 		case (FMT_ITALIC):
@@ -526,7 +600,8 @@ formatcode(struct state *st, const char 
 		default:
 			abort();
 		}
-	}
+	} else
+		outbuf_flush(st);
 
 	/*
 	 * Process until we reach the end marker (e.g., '>') or until we
@@ -730,20 +805,20 @@ command(struct state *st, const char *bu
 				st->sect = SECT_SYNOPSIS;
 		} 
 		formatcodeln(st, buf, &start, end, 1);
-		putchar('\n');
+		putchar(last = '\n');
 		st->haspar = 1;
 		break;
 	case (CMD_HEAD2):
 		printf(".Ss ");
 		formatcodeln(st, buf, &start, end, 1);
-		putchar('\n');
+		putchar(last = '\n');
 		st->haspar = 1;
 		break;
 	case (CMD_HEAD3):
 		puts(".Pp");
 		printf(".Em ");
 		formatcodeln(st, buf, &start, end, 0);
-		putchar('\n');
+		putchar(last = '\n');
 		puts(".Pp");
 		st->haspar = 1;
 		break;
@@ -751,7 +826,7 @@ command(struct state *st, const char *bu
 		puts(".Pp");
 		printf(".No ");
 		formatcodeln(st, buf, &start, end, 0);
-		putchar('\n');
+		putchar(last = '\n');
 		puts(".Pp");
 		st->haspar = 1;
 		break;
@@ -805,7 +880,7 @@ command(struct state *st, const char *bu
 		case (LIST_TAG):
 			printf(".It ");
 			formatcodeln(st, buf, &start, end, 0);
-			putchar('\n');
+			putchar(last = '\n');
 			break;
 		case (LIST_ENUM):
 			/* FALLTHROUGH */
@@ -861,7 +936,6 @@ command(struct state *st, const char *bu
 static void
 verbatim(struct state *st, const char *buf, size_t start, size_t end)
 {
-	int		 last;
 	size_t		 i;
 
 	if ( ! st->parsing || st->paused)
@@ -916,7 +990,7 @@ again:
 		if ('\\' == buf[start])
 			printf("e");
 	}
-	putchar('\n');
+	putchar(last = '\n');
 	puts(".Ed");
 }
 
@@ -946,14 +1020,13 @@ hasmatch(const char *buf, size_t start, 
  * If we're an ending bracket, see if we have a stack already.
  */
 static int
-dosynopsisop(const char *buf, int *last,
-	size_t *start, size_t end, size_t *opstack)
+dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack)
 {
 
 	assert('[' == buf[*start] || ']' == buf[*start]);
 
 	if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) {
-		if ('\n' != *last)
+		if ('\n' != last)
 			putchar('\n');
 		puts(".Oo");
 		(*opstack)++;
@@ -961,7 +1034,7 @@ dosynopsisop(const char *buf, int *last,
 		return(0);
 
 	if (']' == buf[*start] && *opstack > 0) {
-		if ('\n' != *last)
+		if ('\n' != last)
 			putchar('\n');
 		puts(".Oc");
 		(*opstack)--;
@@ -969,7 +1042,7 @@ dosynopsisop(const char *buf, int *last,
 		return(0);
 
 	(*start)++;
-	*last = '\n';
+	last = '\n';
 	while (' ' == buf[*start])
 		(*start)++;
 	return(1);
@@ -998,7 +1071,7 @@ donamenm(struct state *st, const char *b
 				break;
 		formatcodeln(st, buf, start, word, 1);
 		if (*start == end) {
-			putchar('\n');
+			putchar(last = '\n');
 			continue;
 		}
 		assert(',' == buf[*start]);
@@ -1048,7 +1121,7 @@ ordinary(struct state *st, const char *b
 				start++;
 			fputs(".Nd ", stdout);
 			formatcodeln(st, buf, &start, end, 1);
-			putchar('\n');
+			putchar(last = '\n');
 			return;
 		}
 	}
@@ -1057,6 +1130,7 @@ ordinary(struct state *st, const char *b
 		puts(".Pp");
 
 	st->haspar = 0;
+	st->hasnl = 1;
 	last = '\n';
 	opstack = 0;
 
@@ -1071,9 +1145,9 @@ ordinary(struct state *st, const char *b
 			else if ('\n' == buf[start])
 				break;
 			else if ('\n' == last && '.' == buf[start])
-				printf("\\&");
+				outbuf_addstr(st, "\\&");
 			else if ('\n' == last && '\'' == buf[start])
-				printf("\\&");
+				outbuf_addstr(st, "\\&");
 			/*
 			 * If we're in the SYNOPSIS, have square
 			 * brackets indicate that we're opening and
@@ -1082,12 +1156,14 @@ ordinary(struct state *st, const char *b
 			if (SECT_SYNOPSIS == st->sect &&
 				('[' == buf[start] || 
 				 ']' == buf[start]) &&
-				dosynopsisop(buf, &last, 
-					&start, end, &opstack))
+				dosynopsisop(buf, &start, end, &opstack))
 				continue;
-			putchar(last = buf[start++]);
-			if ('\\' == last)
-				putchar('e');
+			last = buf[start++];
+			if (' ' == last) {
+				outbuf_flush(st);
+				putchar(' ');
+			} else
+				outbuf_addchar(st);
 		}
 
 		if (start < end - 1 && '<' == buf[start + 1]) {
@@ -1108,6 +1184,7 @@ ordinary(struct state *st, const char *b
 				}
 				/* End the macro line. */
 				putchar(last = '\n');
+				st->hasnl = 1;
 				/*
 				 * Consume all whitespace
 				 * so we don't accidentally start
@@ -1117,12 +1194,7 @@ ordinary(struct state *st, const char *b
 					start++;
 			}
 		} else if (start < end && '\n' == buf[start]) {
-			/*
-			 * Print the newline only if we haven't already
-			 * printed a newline.
-			 */
-			if (last != '\n')
-				putchar(last = buf[start]);
+			outbuf_newln(st);
 			if (++start >= end)
 				continue;
 			/*
@@ -1133,18 +1205,14 @@ ordinary(struct state *st, const char *b
 			 * have a macro subsequent it, which may be
 			 * possible if we have an escape next.
 			 */
-			if (' ' == buf[start] || '\t' == buf[start]) {
+			if (' ' == buf[start] || '\t' == buf[start])
 				puts(".br");
-				last = '\n';
-			}
 			for ( ; start < end; start++)
 				if (' ' != buf[start] && '\t' != buf[start])
 					break;
 		} 
 	}
-
-	if (last != '\n')
-		putchar('\n');
+	outbuf_newln(st);
 }
 
 /*
--
 To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv

                 reply	other threads:[~2014-07-15 19:03 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201407151903.s6FJ37TW030865@krisdoz.my.domain \
    --to=schwarze@mdocml.bsd.lv \
    --cc=source@mdocml.bsd.lv \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).