source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* docbook2mdoc: Store the information whether a node is preceded by
@ 2019-04-06 22:38 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-04-06 22:38 UTC (permalink / raw)
  To: source

Log Message:
-----------
Store the information whether a node is preceded by whitespace
into the node tree.  Use that information in the formatter
to suppress the insertion of whitespace
in text-text, macro-text, and macro-macro node sequences.

Text-macro sequences are not yet handled.  They are more complicated
because they require emitting a .Pf macro as part of a text node
depending on the spacing properties of the *following* macro node.

Modified Files:
--------------
    docbook2mdoc:
        docbook2mdoc.c
        macro.c
        macro.h
        node.h
        parse.c

Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.91
retrieving revision 1.92
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.91 -r1.92
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -642,6 +642,7 @@ pnode_print(struct format *p, struct pno
 	if (pn == NULL)
 		return;
 
+	p->spc = pn->spc;
 	sv = p->linestate;
 
 	switch (pn->node) {
@@ -674,6 +675,7 @@ pnode_print(struct format *p, struct pno
 		break;
 	case NODE_EDITOR:
 		print_text(p, "editor:", ARG_SPACE);
+		sv = LINE_TEXT;
 		macro_open(p, "An");
 		break;
 	case NODE_EMAIL:
@@ -692,7 +694,7 @@ pnode_print(struct format *p, struct pno
 	case NODE_ESCAPE:
 		if (p->linestate == LINE_NEW)
 			p->linestate = LINE_TEXT;
-		else
+		else if (pn->spc || p->linestate == LINE_MACRO)
 			putchar(' ');
 		fputs(pn->b, stdout);
 		break;
@@ -822,7 +824,7 @@ pnode_print(struct format *p, struct pno
 		}
 		if (p->linestate == LINE_NEW)
 			p->linestate = LINE_TEXT;
-		else
+		else if (pn->spc || p->linestate == LINE_MACRO)
 			putchar(' ');
 
 		/*
Index: macro.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/macro.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -Lmacro.c -Lmacro.c -u -p -r1.5 -r1.6
--- macro.c
+++ macro.c
@@ -17,6 +17,7 @@
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
+#include <string.h>
 
 #include "node.h"
 #include "macro.h"
@@ -39,6 +40,8 @@ macro_open(struct format *f, const char 
 		break;
 	case LINE_MACRO:
 		putchar(' ');
+		if (f->spc == 0)
+			fputs("Ns ", stdout);
 		break;
 	}
 	fputs(name, stdout);
@@ -61,20 +64,68 @@ macro_line(struct format *f, const char 
 }
 
 /*
- * If the next node is a text node starting with closing punctuation,
- * emit the closing punctuation as a trailing macro argument.
+ * At the end of a macro, decide whether the line needs to remain open
+ * because the next node follows without intervening whitespace;
+ * otherwise, close the line.
  */
 void
 macro_closepunct(struct format *f, struct pnode *pn)
 {
-	if ((pn = TAILQ_NEXT(pn, child)) != NULL &&
-	    pn->node == NODE_TEXT && pn->bsz > 0 &&
-	    (pn->b[0] == ',' || pn->b[0] == '.') &&
-	    (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) {
-		putchar(' ');
-		putchar(pn->b[0]);
-		pn->b++;
-		pn->bsz--;
+	char		*cp;
+
+	if ((pn = TAILQ_NEXT(pn, child)) != NULL && pn->spc == 0) {
+
+		/*
+		 * If a non-text node follows without intervening
+		 * whitespace, the handler of that node will decide
+		 * whether and how to suppress whitespace.  To allow
+		 * that, the macro line needs to remain open.
+		 */
+
+		if (pn->node != NODE_TEXT && pn->node != NODE_ESCAPE)
+			return;
+
+		/*
+		 * Give closing punctuation
+		 * in the form of trailing macro arguments.
+		 */
+
+		while (*pn->b != '\0' &&
+		    strchr("!),.:;?]", *pn->b) != NULL) {
+			putchar(' ');
+			putchar(*pn->b);
+			pn->b++;
+			pn->bsz--;
+		}
+
+		/*
+		 * Text follows without intervening whitespace.
+		 * Append the first word with .Ns.
+		 */
+
+		if (*pn->b != '\0' && isspace((unsigned char)*pn->b) == 0) {
+			fputs(" Ns", stdout);
+			for (cp = pn->b; *cp != '\0'; cp++)
+				if (isspace((unsigned char)*cp))
+					break;
+			*cp = '\0';
+			macro_addarg(f, pn->b, ARG_SPACE);
+			pn->bsz -= cp - pn->b;
+			pn->b = cp;
+			if (pn->bsz > 0) {
+				pn->b++;
+				pn->bsz--;
+				pn->spc = 1;
+			}
+		}
+
+		/* Skip whitespace after the first word. */
+
+		while (isspace((unsigned char)*pn->b)) {
+			pn->b++;
+			pn->bsz--;
+			pn->spc = 1;
+		}
 	}
 	macro_close(f);
 }
Index: macro.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/macro.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -Lmacro.h -Lmacro.h -u -p -r1.2 -r1.3
--- macro.h
+++ macro.h
@@ -28,6 +28,7 @@ enum	linestate {
 
 struct	format {
 	int		 level;      /* Header level, starting at 1. */
+	int		 spc;	     /* Whitespace before next macro. */
 	enum linestate	 linestate;
 };
 
Index: parse.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/parse.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -Lparse.c -Lparse.c -u -p -r1.15 -r1.16
--- parse.c
+++ parse.c
@@ -52,6 +52,7 @@ struct	parse {
 	int		 nline;  /* Line number of next token. */
 	int		 ncol;   /* Column number of next token. */
 	int		 del;    /* Levels of nested nodes being deleted. */
+	int		 spc;	 /* Whitespace before the next element. */
 	int		 attr;   /* The most recent attribute is valid. */
 	int		 warn;
 };
@@ -294,6 +295,7 @@ static void
 xml_char(struct parse *ps, const char *p, int sz)
 {
 	struct pnode	*dat;
+	size_t		 newsz;
 
 	if (ps->del > 0)
 		return;
@@ -309,6 +311,7 @@ xml_char(struct parse *ps, const char *p
 			exit(1);
 		}
 		dat->node = NODE_TEXT;
+		dat->spc = ps->spc;
 		dat->parent = ps->cur;
 		TAILQ_INIT(&dat->childq);
 		TAILQ_INIT(&dat->attrq);
@@ -323,24 +326,35 @@ xml_char(struct parse *ps, const char *p
 	/* Append to the current text node. */
 
 	assert(sz >= 0);
-	ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
+	newsz = ps->cur->bsz + (ps->cur->bsz && ps->spc) + sz;
+	ps->cur->b = realloc(ps->cur->b, newsz + 1);
 	if (ps->cur->b == NULL) {
 		perror(NULL);
 		exit(1);
 	}
+	if (ps->cur->bsz && ps->spc)
+		ps->cur->b[ps->cur->bsz++] = ' ';
 	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
-	ps->cur->bsz += sz;
-	ps->cur->b[ps->cur->bsz] = '\0';
+	ps->cur->b[ps->cur->bsz = newsz] = '\0';
 	ps->cur->real = ps->cur->b;
+	ps->spc = 0;
 }
 
+/*
+ * Close out the text node and strip trailing whitespace, if one is open.
+ */
 static void
-pnode_trim(struct pnode *pn)
+pnode_closetext(struct parse *p)
 {
-	assert(pn->node == NODE_TEXT);
-	for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
-		if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
-			break;
+	struct pnode	*n;
+
+	if ((n = p->cur) == NULL || n->node != NODE_TEXT)
+		return;
+	p->cur = n->parent;
+	while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) {
+		n->b[--n->bsz] = '\0';
+		p->spc = 1;
+	}
 }
 
 static void
@@ -357,11 +371,7 @@ xml_entity(struct parse *p, const char *
 		return;
 	}
 
-	/* Close out the text node, if there is one. */
-	if (p->cur->node == NODE_TEXT) {
-		pnode_trim(p->cur);
-		p->cur = p->cur->parent;
-	}
+	pnode_closetext(p);
 
 	if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root)
 		warn_msg(p, "entity after end of document: &%s;", name);
@@ -383,10 +393,12 @@ xml_entity(struct parse *p, const char *
 	}
 	dat->node = NODE_ESCAPE;
 	dat->bsz = strlen(dat->b);
+	dat->spc = p->spc;
 	dat->parent = p->cur;
 	TAILQ_INIT(&dat->childq);
 	TAILQ_INIT(&dat->attrq);
 	TAILQ_INSERT_TAIL(&p->cur->childq, dat, child);
+	p->spc = 0;
 }
 
 /*
@@ -410,11 +422,7 @@ xml_elem_start(struct parse *ps, const c
 		return;
 	}
 
-	/* Close out the text node, if there is one. */
-	if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
-		pnode_trim(ps->cur);
-		ps->cur = ps->cur->parent;
-	}
+	pnode_closetext(ps);
 
 	for (elem = elements; elem->name != NULL; elem++)
 		if (strcmp(elem->name, name) == 0)
@@ -449,6 +457,7 @@ xml_elem_start(struct parse *ps, const c
 		exit(1);
 	}
 	dat->node = elem->node;
+	dat->spc = ps->spc;
 	dat->parent = ps->cur;
 	TAILQ_INIT(&dat->childq);
 	TAILQ_INIT(&dat->attrq);
@@ -519,11 +528,8 @@ xml_elem_end(struct parse *ps, const cha
 		return;
 	}
 
-	/* Close out the text node, if there is one. */
-	if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) {
-		pnode_trim(ps->cur);
-		ps->cur = ps->cur->parent;
-	}
+	if (ps->del == 0)
+		pnode_closetext(ps);
 
 	if (name != NULL) {
 		for (elem = elements; elem->name != NULL; elem++)
@@ -558,6 +564,7 @@ xml_elem_end(struct parse *ps, const cha
 			ps->tree->flags |= TREE_CLOSED;
 		else
 			ps->cur = ps->cur->parent;
+		ps->spc = 0;
 		break;
 	}
 	assert(ps->del == 0);
@@ -651,6 +658,7 @@ parse_string(struct parse *p, char *b, s
 	size_t		 pend;  /* Offset of the end of the current word. */
 	int		 elem_end;
 
+	p->spc = 0;
 	pend = 0;
 	for (;;) {
 
@@ -663,6 +671,7 @@ parse_string(struct parse *p, char *b, s
 		if ((poff = pend) == rlen)
 			break;
 		if (isspace((unsigned char)b[pend])) {
+			p->spc = 1;
 			increment(p, b, &pend, refill);
 			continue;
 		}
@@ -836,10 +845,7 @@ parse_file(struct parse *p, int fd, cons
 		perror(fname);
 		p->tree->flags |= TREE_FAIL;
 	}
-	if (p->cur != NULL && p->cur->node == NODE_TEXT) {
-		pnode_trim(p->cur);
-		p->cur = p->cur->parent;
-	}
+	pnode_closetext(p);
 	if ((p->tree->flags & TREE_CLOSED) == 0)
 		warn_msg(p, "document not closed");
 	return p->tree;
Index: node.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/node.h,v
retrieving revision 1.9
retrieving revision 1.10
diff -Lnode.h -Lnode.h -u -p -r1.9 -r1.10
--- node.h
+++ node.h
@@ -198,6 +198,7 @@ struct	pnode {
 	char		*b;        /* String value. */
 	char		*real;     /* Storage for "b". */
 	size_t		 bsz;      /* strlen(b) */
+	int		 spc;      /* Whitespace before this node. */
 	struct pnode	*parent;   /* Parent node or NULL. */
 	struct pnodeq	 childq;   /* Queue of children. */
 	struct pattrq	 attrq;    /* Attributes of the node. */
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-04-06 22:38 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-06 22:38 docbook2mdoc: Store the information whether a node is preceded by schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).