source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* docbook2mdoc: For macro_addarg(), macro_addnode(), and macro_nodeline(),
@ 2019-03-25 17:29 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-03-25 17:29 UTC (permalink / raw)
  To: source

Log Message:
-----------
For macro_addarg(), macro_addnode(), and macro_nodeline(), provide 
a flag ARG_SINGLE to request quoting of strings containing whitespace.
Use it for .Dt, .Fa, .Fo, .Nm, and .Xr.

Do not \&-escape macros in quoted strings.
Escape quote characters in macro arguments.
NUL-terminate the content of text nodes.
Eliminate the awkward "bufappend()" global output buffer.
Simplify and improve handling of <paramdef>.

Substantial functional improvements, easier to read, yet minus 20 LOC.

Modified Files:
--------------
    docbook2mdoc:
        docbook2mdoc.c

Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.71
retrieving revision 1.72
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.71 -r1.72
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -48,9 +48,6 @@ struct	parse {
 	unsigned int	 flags; /* document-wide flags */
 	struct pnode	*root; /* root of parse tree */
 	struct pnode	*cur; /* current node in tree */
-	char		*b; /* NUL-terminated buffer for pre-print */
-	size_t		 bsz; /* current length of b */
-	size_t		 mbsz; /* max bsz allocation */
 	int		 level; /* header level, starting at 1 */
 	enum linestate	 linestate;
 };
@@ -273,7 +270,7 @@ xml_char(void *arg, const XML_Char *p, i
 			return;
 		p += i;
 		sz -= i;
-		dat = calloc(1, sizeof(struct pnode));
+		dat = calloc(1, sizeof(*dat));
 		if (dat == NULL) {
 			perror(NULL);
 			exit(1);
@@ -290,14 +287,14 @@ xml_char(void *arg, const XML_Char *p, i
 
 	/* Append to current buffer. */
 	assert(sz >= 0);
-	ps->cur->b = realloc(ps->cur->b,
-		ps->cur->bsz + (size_t)sz);
+	ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
 	if (ps->cur->b == NULL) {
 		perror(NULL);
 		exit(1);
 	}
 	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
-	ps->cur->bsz += (size_t)sz;
+	ps->cur->bsz += sz;
+	ps->cur->b[ps->cur->bsz] = '\0';
 	ps->cur->real = ps->cur->b;
 }
 
@@ -305,7 +302,7 @@ static void
 pnode_trim(struct pnode *pn)
 {
 	assert(pn->node == NODE_TEXT);
-	for ( ; pn->bsz > 0; pn->bsz--)
+	for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
 		if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
 			break;
 }
@@ -369,7 +366,7 @@ xml_elem_start(void *arg, const XML_Char
 	if (node->node == NODE_INLINEEQUATION)
 		ps->flags |= PARSE_EQN;
 
-	if ((dat = calloc(1, sizeof(struct pnode))) == NULL) {
+	if ((dat = calloc(1, sizeof(*dat))) == NULL) {
 		perror(NULL);
 		exit(1);
 	}
@@ -406,7 +403,7 @@ xml_elem_start(void *arg, const XML_Char
 		for (val = 0; val < ATTRVAL__MAX; val++)
 			if (strcmp(att[1], attrvals[val]) == 0)
 				break;
-		pattr = calloc(1, sizeof(struct pattr));
+		pattr = calloc(1, sizeof(*pattr));
 		pattr->key = key;
 		pattr->val = val;
 		if (val == ATTRVAL__MAX)
@@ -526,55 +523,6 @@ pnode_getattr_raw(struct pnode *pn, enum
 }
 
 /*
- * Reset the lookaside buffer.
- */
-static void
-bufclear(struct parse *p)
-{
-
-	p->b[p->bsz = 0] = '\0';
-}
-
-/*
- * Append NODE_TEXT contents to the current buffer, reallocating its
- * size if necessary.
- * The buffer is ALWAYS NUL-terminated.
- */
-static void
-bufappend(struct parse *p, struct pnode *pn)
-{
-
-	assert(pn->node == NODE_TEXT);
-	if (p->bsz + pn->bsz + 1 > p->mbsz) {
-		p->mbsz = p->bsz + pn->bsz + 1;
-		if ((p->b = realloc(p->b, p->mbsz)) == NULL) {
-			perror(NULL);
-			exit(1);
-		}
-	}
-	memcpy(p->b + p->bsz, pn->b, pn->bsz);
-	p->bsz += pn->bsz;
-	p->b[p->bsz] = '\0';
-}
-
-/*
- * Recursively append all NODE_TEXT nodes to the buffer.
- * This descends into non-text nodes, but doesn't do anything beyond
- * them.
- * In other words, this is a recursive text grok.
- */
-static void
-bufappend_r(struct parse *p, struct pnode *pn)
-{
-	struct pnode	*pp;
-
-	if (pn->node == NODE_TEXT)
-		bufappend(p, pn);
-	TAILQ_FOREACH(pp, &pn->childq, child)
-		bufappend_r(p, pp);
-}
-
-/*
  * Recursively search and return the first instance of "node".
  */
 static struct pnode *
@@ -626,36 +574,62 @@ macro_line(struct parse *p, const char *
 	macro_close(p);
 }
 
-#define	MACROLINE_UPPER	1
-#define	MACROLINE_NOWS	2
+#define	ARG_SPACE	1  /* Insert whitespace before this argument. */
+#define	ARG_SINGLE	2  /* Quote argument if it contains whitespace. */
+#define	ARG_QUOTED	4  /* We are already in a quoted argument. */
+#define	ARG_UPPER	8  /* Covert argument to upper case. */
 /*
  * Print an argument string on a macro line, collapsing whitespace.
  */
 static void
-macro_addarg(struct parse *p, const char *arg, int fl)
+macro_addarg(struct parse *p, const char *arg, int flags)
 {
 	const char	*cp;
-	int		 wantspace;
 
 	assert(p->linestate == LINE_MACRO);
-	wantspace = !(fl & MACROLINE_NOWS);
+
+	/* Quote if requested and necessary. */
+
+	if ((flags & (ARG_SINGLE | ARG_QUOTED)) == ARG_SINGLE) {
+		for (cp = arg; *cp != '\0'; cp++)
+			if (isspace((unsigned char)*cp))
+				break;
+		if (*cp != '\0') {
+			if (flags & ARG_SPACE) {
+				putchar(' ');
+				flags &= ~ ARG_SPACE;
+			}
+			putchar('"');
+			flags = ARG_QUOTED;
+		}
+	}
+
 	for (cp = arg; *cp != '\0'; cp++) {
+
+		/* Collapse whitespace. */
+
 		if (isspace((unsigned char)*cp)) {
-			wantspace = 1;
+			flags |= ARG_SPACE;
 			continue;
-		} else if (wantspace) {
+		} else if (flags & ARG_SPACE) {
 			putchar(' ');
-			wantspace = 0;
+			flags &= ~ ARG_SPACE;
 		}
+
 		/* Escape us if we look like a macro. */
-		if ((cp == arg || cp[-1] == ' ') &&
+
+		if ((flags & ARG_QUOTED) == 0 &&
+		    (cp == arg || isspace((unsigned char)cp[-1])) &&
 		    isupper((unsigned char)cp[0]) &&
 		    islower((unsigned char)cp[1]) &&
 		    (cp[2] == '\0' || cp[2] == ' ' ||
 		     (islower((unsigned char)cp[2]) &&
 		      (cp[3] == '\0' || cp[3] == ' '))))
 			fputs("\\&", stdout);
-		if (fl & MACROLINE_UPPER)
+
+		if (*cp == '"')
+			fputs("\\(dq", stdout);
+		else if (flags & ARG_UPPER)
 			putchar(toupper((unsigned char)*cp));
 		else
 			putchar(*cp);
@@ -668,26 +642,73 @@ static void
 macro_argline(struct parse *p, const char *name, const char *arg)
 {
 	macro_open(p, name);
-	macro_addarg(p, arg, 0);
+	macro_addarg(p, arg, ARG_SPACE);
 	macro_close(p);
 }
 
 /*
- * Recurse nodes to print arguments on a macro line.
+ * Recursively append text from the children of a node to a macro line.
  */
 static void
-macro_addnode(struct parse *p, struct pnode *pn, int fl)
+macro_addnode(struct parse *p, struct pnode *pn, int flags)
 {
-	bufclear(p);
-	bufappend_r(p, pn);
-	macro_addarg(p, p->b, fl);
+	int		 quote_now;
+
+	assert(p->linestate == LINE_MACRO);
+
+	/*
+	 * If the only child is a text node, just add that text,
+	 * letting macro_addarg() decide about quoting.
+	 */
+
+	pn = TAILQ_FIRST(&pn->childq);
+	if (pn != NULL && pn->node == NODE_TEXT &&
+	    TAILQ_NEXT(pn, child) == NULL) {
+		macro_addarg(p, pn->b, flags);
+		return;
+	}
+
+	/*
+	 * If we want the argument quoted and are not already
+	 * in a quoted context, quote now.
+	 */
+
+	quote_now = 0;
+	if (flags & ARG_SINGLE) {
+		if ((flags & ARG_QUOTED) == 0) {
+			if (flags & ARG_SPACE) {
+				putchar(' ');
+				flags &= ~ARG_SPACE;
+			}
+			putchar('"');
+			flags |= ARG_QUOTED;
+			quote_now = 1;
+		}
+		flags &= ~ARG_SINGLE;
+	}
+
+	/*
+	 * Iterate to child and sibling nodes,
+	 * inserting whitespace between nodes.
+	 */
+
+	while (pn != NULL) {
+		if (pn->node == NODE_TEXT)
+			macro_addarg(p, pn->b, flags);
+		else
+			macro_addnode(p, pn, flags);
+		pn = TAILQ_NEXT(pn, child);
+		flags |= ARG_SPACE;
+	}
+	if (quote_now)
+		putchar('"');
 }
 
 static void
-macro_nodeline(struct parse *p, const char *name, struct pnode *pn)
+macro_nodeline(struct parse *p, const char *name, struct pnode *pn, int flags)
 {
 	macro_open(p, name);
-	macro_addnode(p, pn, 0);
+	macro_addnode(p, pn, ARG_SPACE | flags);
 	macro_close(p);
 }
 
@@ -780,7 +801,9 @@ pnode_printrefsect(struct parse *p, stru
 		return;
 
 	level = ++p->level;
-	flags = level == 1 ? MACROLINE_UPPER : 0;
+	flags = ARG_SPACE;
+	if (level == 1)
+		flags |= ARG_UPPER;
 	if (level < 3) {
 		switch (pn->node) {
 		case NODE_CAUTION:
@@ -838,7 +861,7 @@ pnode_printrefsect(struct parse *p, stru
 		macro_addnode(p, pp, flags);
 		pnode_unlink(pp);
 	} else
-		macro_addarg(p, title, 0);
+		macro_addarg(p, title, ARG_SPACE | ARG_QUOTED);
 	macro_close(p);
 }
 
@@ -859,13 +882,13 @@ pnode_printciterefentry(struct parse *p,
 	}
 	macro_open(p, "Xr");
 	if (title == NULL)
-		macro_addarg(p, "unknown", 0);
+		macro_addarg(p, "unknown", ARG_SPACE);
 	else
-		macro_addnode(p, title, 0);
+		macro_addnode(p, title, ARG_SPACE | ARG_SINGLE);
 	if (manvol == NULL)
-		macro_addarg(p, "1", 0);
+		macro_addarg(p, "1", ARG_SPACE);
 	else
-		macro_addnode(p, manvol, 0);
+		macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE);
 	macro_close(p);
 	pnode_unlinksub(pn);
 }
@@ -884,13 +907,13 @@ pnode_printrefmeta(struct parse *p, stru
 	}
 	macro_open(p, "Dt");
 	if (title == NULL)
-		macro_addarg(p, "UNKNOWN", 0);
+		macro_addarg(p, "UNKNOWN", ARG_SPACE);
 	else
-		macro_addnode(p, title, MACROLINE_UPPER);
+		macro_addnode(p, title, ARG_SPACE | ARG_SINGLE | ARG_UPPER);
 	if (manvol == NULL)
-		macro_addarg(p, "1", 0);
+		macro_addarg(p, "1", ARG_SPACE);
 	else
-		macro_addnode(p, manvol, 0);
+		macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE);
 	macro_close(p);
 	pnode_unlink(pn);
 }
@@ -908,38 +931,12 @@ pnode_printfuncdef(struct parse *p, stru
 			func = pp;
 	}
 	if (ftype != NULL)
-		macro_nodeline(p, "Ft", ftype);
+		macro_argline(p, "Ft", ftype->b);
 	macro_open(p, "Fo");
 	if (func == NULL)
-		macro_addarg(p, "UNKNOWN", 0);
+		macro_addarg(p, "UNKNOWN", ARG_SPACE);
 	else
-		macro_addnode(p, func, 0);
-	macro_close(p);
-}
-
-static void
-pnode_printparamdef(struct parse *p, struct pnode *pn)
-{
-	struct pnode	*pp, *ptype, *param;
-	int		 flags;
-
-	ptype = param = NULL;
-	TAILQ_FOREACH(pp, &pn->childq, child) {
-		if (pp->node == NODE_TEXT)
-			ptype = pp;
-		else if (pp->node == NODE_PARAMETER)
-			param = pp;
-	}
-	macro_open(p, "Fa \"");
-	flags = MACROLINE_NOWS;
-	if (ptype != NULL) {
-		macro_addnode(p, ptype, flags);
-		flags = 0;
-	}
-	if (param != NULL)
-		macro_addnode(p, param, flags);
-	flags = MACROLINE_NOWS;
-	macro_addarg(p, "\"", flags);
+		macro_addnode(p, func, ARG_SPACE | ARG_SINGLE);
 	macro_close(p);
 }
 
@@ -1016,7 +1013,7 @@ pnode_printfuncprototype(struct parse *p
 
 	TAILQ_FOREACH(pp, &pn->childq, child)
 		if (pp->node == NODE_PARAMDEF)
-			pnode_printparamdef(p, pp);
+			macro_nodeline(p, "Fa", pp, ARG_SINGLE);
 
 	macro_line(p, "Fc");
 	pnode_unlinksub(pn);
@@ -1053,7 +1050,7 @@ pnode_printarg(struct parse *p, struct p
 			macro_open(p, "Ar");
 		pnode_print(p, pp);
 		if (isrep && pp->node == NODE_TEXT)
-			macro_addarg(p, "...", 0);
+			macro_addarg(p, "...", ARG_SPACE);
 	}
 	pnode_unlinksub(pn);
 }
@@ -1098,8 +1095,8 @@ pnode_printgroup(struct parse *p, struct
 		while (np != NULL) {
 			if (pp->node != np->node)
 				break;
-			macro_addarg(p, "|", 0);
-			macro_addnode(p, np, 0);
+			macro_addarg(p, "|", ARG_SPACE);
+			macro_addnode(p, np, ARG_SPACE);
 			pp = np;
 			np = TAILQ_NEXT(np, child);
 		}
@@ -1123,8 +1120,9 @@ pnode_printprologue(struct parse *p, str
 	else {
 		macro_open(p, "Dt");
 		macro_addarg(p,
-		    pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), 0);
-		macro_addarg(p, "1", 0);
+		    pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"),
+		    ARG_SPACE | ARG_SINGLE | ARG_UPPER);
+		macro_addarg(p, "1", ARG_SPACE);
 		macro_close(p);
 	}
 	macro_line(p, "Os");
@@ -1151,7 +1149,7 @@ pnode_printvarlistentry(struct parse *p,
 		if (pp->node != NODE_TERM)
 			continue;
 		if ( ! first)
-			macro_addarg(p, ",", MACROLINE_NOWS);
+			macro_addarg(p, ",", 0);
 		pnode_print(p, pp);
 		first = 0;
 	}
@@ -1232,7 +1230,7 @@ pnode_printvariablelist(struct parse *p,
 		if (pp->node == NODE_VARLISTENTRY)
 			pnode_print(p, pp);
 		else
-			macro_nodeline(p, "It", pp);
+			macro_nodeline(p, "It", pp, 0);
 	}
 	macro_line(p, "El");
 	pnode_unlinksub(pn);
@@ -1379,11 +1377,7 @@ pnode_print(struct parse *p, struct pnod
 		pnode_printpara(p, pn);
 		break;
 	case NODE_PARAMETER:
-		/* Suppress non-text children... */
-		macro_open(p, "Fa \"");
-		macro_addnode(p, pn, MACROLINE_NOWS);
-		macro_addarg(p, "\"", MACROLINE_NOWS);
-		macro_close(p);
+		macro_nodeline(p, "Fa", pn, ARG_SINGLE);
 		pnode_unlinksub(pn);
 		break;
 	case NODE_QUOTE:
@@ -1403,7 +1397,7 @@ pnode_print(struct parse *p, struct pnod
 	case NODE_REFNAME:
 		/* Suppress non-text children... */
 		macro_open(p, "Nm");
-		macro_addnode(p, pn, 0);
+		macro_addnode(p, pn, ARG_SPACE | ARG_SINGLE);
 		pnode_unlinksub(pn);
 		break;
 	case NODE_REFNAMEDIV:
@@ -1440,9 +1434,7 @@ pnode_print(struct parse *p, struct pnod
 		pnode_printtable(p, pn);
 		break;
 	case NODE_TEXT:
-		bufclear(p);
-		bufappend(p, pn);
-		if (p->bsz == 0) {
+		if (pn->bsz == 0) {
 			assert(pn->real != pn->b);
 			break;
 		}
@@ -1457,7 +1449,7 @@ pnode_print(struct parse *p, struct pnod
 		 * XXX: all whitespace, including tabs (?).
 		 * Remember to escape control characters and escapes.
 		 */
-		cp = p->b;
+		cp = pn->b;
 
 		/*
 		 * There's often a superfluous "-" in its <option> tags
@@ -1574,7 +1566,7 @@ pnode_print(struct parse *p, struct pnod
 		    pn->parent->node == NODE_REFNAMEDIV &&
 		    TAILQ_NEXT(pn, child) != NULL &&
 		    TAILQ_NEXT(pn, child)->node == NODE_REFNAME)
-			macro_addarg(p, ",", 0);
+			macro_addarg(p, ",", ARG_SPACE);
 		if (sv == LINE_NEW)
 			macro_close(p);
 		break;
@@ -1614,7 +1606,6 @@ readfile(XML_Parser xp, int fd,
 
 	memset(&p, 0, sizeof(struct parse));
 
-	p.b = malloc(p.bsz = p.mbsz = 1024);
 	p.fname = fn;
 	p.xml = xp;
 
@@ -1641,14 +1632,12 @@ readfile(XML_Parser xp, int fd,
 		if (p.linestate != LINE_NEW)
 			putchar('\n');
 		pnode_free(p.root);
-		free(p.b);
 		return rc != 0 && p.stop == 0;
 	}
 
 	/* Read error has occured. */
 	perror(fn);
 	pnode_free(p.root);
-	free(p.b);
 	return 0;
 }
 
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-03-25 17:29 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-25 17:29 docbook2mdoc: For macro_addarg(), macro_addnode(), and macro_nodeline(), schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).