source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* docbook2mdoc: The program docbook2mdoc(1) has become large enough that
@ 2019-03-26 18:32 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-03-26 18:32 UTC (permalink / raw)
  To: source

Log Message:
-----------
The program docbook2mdoc(1) has become large enough that splitting 
it into a number of logical components makes sense: node tree,
parser, formatter, each with interface and implementation, and the
main program.  That way, it becomes easier to see what interacts
with what, and what is independent of what.

Modified Files:
--------------
    docbook2mdoc:
        Makefile
        docbook2mdoc.c

Added Files:
-----------
    docbook2mdoc:
        format.h
        main.c
        node.c
        node.h
        parse.c
        parse.h

Removed Files:
-------------
    docbook2mdoc:
        extern.h

Revision Data
-------------
--- extern.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/* $Id: extern.h,v 1.33 2019/03/25 23:14:44 schwarze Exp $ */
-/*
- * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * All recognised node types.
- */
-enum	nodeid {
-	NODE_NONE = 0,  /* Must come first. */
-	/* Alpha-ordered hereafter. */
-	NODE_ACRONYM,
-	NODE_AFFILIATION,
-	NODE_ANCHOR,
-	NODE_APPLICATION,
-	NODE_ARG,
-	NODE_AUTHOR,
-	NODE_AUTHORGROUP,
-	NODE_BLOCKQUOTE,
-	NODE_BOOK,
-	NODE_BOOKINFO,
-	NODE_CAUTION,
-	NODE_CITEREFENTRY,
-	NODE_CITETITLE,
-	NODE_CMDSYNOPSIS,
-	NODE_CODE,
-	NODE_COLSPEC,
-	NODE_COMMAND,
-	NODE_CONSTANT,
-	NODE_COPYRIGHT,
-	NODE_DATE,
-	NODE_EDITOR,
-	NODE_EMAIL,
-	NODE_EMPHASIS,
-	NODE_ENTRY,
-	NODE_ENVAR,
-	NODE_FIELDSYNOPSIS,
-	NODE_FILENAME,
-	NODE_FIRSTNAME,
-	NODE_FIRSTTERM,
-	NODE_FOOTNOTE,
-	NODE_FUNCDEF,
-	NODE_FUNCPROTOTYPE,
-	NODE_FUNCSYNOPSIS,
-	NODE_FUNCSYNOPSISINFO,
-	NODE_FUNCTION,
-	NODE_GLOSSTERM,
-	NODE_GROUP,
-	NODE_HOLDER,
-	NODE_INDEX,
-	NODE_INDEXTERM,
-	NODE_INFO,
-	NODE_INFORMALEQUATION,
-	NODE_INFORMALTABLE,
-	NODE_INLINEEQUATION,
-	NODE_ITEMIZEDLIST,
-	NODE_KEYSYM,
-	NODE_LEGALNOTICE,
-	NODE_LINK,
-	NODE_LISTITEM,
-	NODE_LITERAL,
-	NODE_LITERALLAYOUT,
-	NODE_MANVOLNUM,
-	NODE_MEMBER,
-	NODE_MML_MATH,
-	NODE_MML_MFENCED,
-	NODE_MML_MFRAC,
-	NODE_MML_MI,
-	NODE_MML_MN,
-	NODE_MML_MO,
-	NODE_MML_MROW,
-	NODE_MML_MSUB,
-	NODE_MML_MSUP,
-	NODE_MODIFIER,
-	NODE_NOTE,
-	NODE_OPTION,
-	NODE_ORDEREDLIST,
-	NODE_ORGNAME,
-	NODE_OTHERNAME,
-	NODE_PARA,
-	NODE_PARAMDEF,
-	NODE_PARAMETER,
-	NODE_PERSONNAME,
-	NODE_PHRASE,
-	NODE_PREFACE,
-	NODE_PRIMARY,
-	NODE_PROGRAMLISTING,
-	NODE_PROMPT,
-	NODE_QUOTE,
-	NODE_REFCLASS,
-	NODE_REFDESCRIPTOR,
-	NODE_REFENTRY,
-	NODE_REFENTRYINFO,
-	NODE_REFENTRYTITLE,
-	NODE_REFMETA,
-	NODE_REFMETAINFO,
-	NODE_REFMISCINFO,
-	NODE_REFNAME,
-	NODE_REFNAMEDIV,
-	NODE_REFPURPOSE,
-	NODE_REFSYNOPSISDIV,
-	NODE_RELEASEINFO,
-	NODE_REPLACEABLE,
-	NODE_ROW,
-	NODE_SBR,
-	NODE_SCREEN,
-	NODE_SECONDARY,
-	NODE_SECTION,
-	NODE_SGMLTAG,
-	NODE_SIMPLELIST,
-	NODE_SPANSPEC,
-	NODE_STRUCTNAME,
-	NODE_SUBTITLE,
-	NODE_SURNAME,
-	NODE_SYNOPSIS,
-	NODE_TABLE,
-	NODE_TBODY,
-	NODE_TERM,
-	NODE_TEXT,
-	NODE_TFOOT,
-	NODE_TGROUP,
-	NODE_THEAD,
-	NODE_TIP,
-	NODE_TITLE,
-	NODE_TRADEMARK,
-	NODE_TYPE,
-	NODE_ULINK,
-	NODE_USERINPUT,
-	NODE_VARIABLELIST,
-	NODE_VARLISTENTRY,
-	NODE_VARNAME,
-	NODE_WARNING,
-	NODE_WORDASWORD,
-	NODE_YEAR,
-	NODE__MAX
-};
-
-/*
- * All recognised attribute keys.
- */
-enum	attrkey {
-	/* Alpha-order... */
-	ATTRKEY_CHOICE = 0,
-	ATTRKEY_CLASS,
-	ATTRKEY_CLOSE,
-	ATTRKEY_ID,
-	ATTRKEY_LINKEND,
-	ATTRKEY_OPEN,
-	ATTRKEY_REP,
-	ATTRKEY__MAX
-};
-
-/*
- * All [explicitly] recognised attribute values.
- * If an attribute has ATTRVAL__MAX, it could be a free-form.
- */
-enum	attrval {
-	/* Alpha-order... */
-	ATTRVAL_MONOSPACED,
-	ATTRVAL_NOREPEAT,
-	ATTRVAL_OPT,
-	ATTRVAL_PLAIN,
-	ATTRVAL_REPEAT,
-	ATTRVAL_REQ,
-	ATTRVAL__MAX
-};
--- /dev/null
+++ parse.c
@@ -0,0 +1,415 @@
+/* $Id: parse.c,v 1.1 2019/03/26 18:32:07 schwarze Exp $ */
+/*
+ * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <ctype.h>
+#include <expat.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "node.h"
+#include "parse.h"
+
+/*
+ * The implementation of the DocBook parser.
+ */
+
+/*
+ * Global parse state.
+ * Keep this as simple and small as possible.
+ */
+struct	parse {
+	XML_Parser	 xml;
+	const char	*fname;  /* Name of the input file. */
+	struct ptree	*tree;   /* Complete parse result. */
+	struct pnode	*cur;	 /* Current node in the tree. */
+	int		 warn;
+};
+
+struct	element {
+	const char	*name;   /* DocBook element name. */
+	enum nodeid	 node;   /* Node type to generate. */
+};
+
+static	const struct element elements[] = {
+	{ "acronym",		NODE_ACRONYM },
+	{ "affiliation",	NODE_AFFILIATION },
+	{ "anchor",		NODE_ANCHOR },
+	{ "application",	NODE_APPLICATION },
+	{ "arg",		NODE_ARG },
+	{ "author",		NODE_AUTHOR },
+	{ "authorgroup",	NODE_AUTHORGROUP },
+	{ "blockquote",		NODE_BLOCKQUOTE },
+	{ "book",		NODE_BOOK },
+	{ "bookinfo",		NODE_BOOKINFO },
+	{ "caution",		NODE_CAUTION },
+	{ "chapter",		NODE_SECTION },
+	{ "citerefentry",	NODE_CITEREFENTRY },
+	{ "citetitle",		NODE_CITETITLE },
+	{ "cmdsynopsis",	NODE_CMDSYNOPSIS },
+	{ "code",		NODE_CODE },
+	{ "colspec",		NODE_COLSPEC },
+	{ "command",		NODE_COMMAND },
+	{ "constant",		NODE_CONSTANT },
+	{ "copyright",		NODE_COPYRIGHT },
+	{ "date",		NODE_DATE },
+	{ "editor",		NODE_EDITOR },
+	{ "email",		NODE_EMAIL },
+	{ "emphasis",		NODE_EMPHASIS },
+	{ "entry",		NODE_ENTRY },
+	{ "envar",		NODE_ENVAR },
+	{ "fieldsynopsis",	NODE_FIELDSYNOPSIS },
+	{ "filename",		NODE_FILENAME },
+	{ "firstname",		NODE_FIRSTNAME },
+	{ "firstterm",		NODE_FIRSTTERM },
+	{ "footnote",		NODE_FOOTNOTE },
+	{ "funcdef",		NODE_FUNCDEF },
+	{ "funcprototype",	NODE_FUNCPROTOTYPE },
+	{ "funcsynopsis",	NODE_FUNCSYNOPSIS },
+	{ "funcsynopsisinfo",	NODE_FUNCSYNOPSISINFO },
+	{ "function",		NODE_FUNCTION },
+	{ "glossterm",		NODE_GLOSSTERM },
+	{ "group",		NODE_GROUP },
+	{ "holder",		NODE_HOLDER },
+	{ "index",		NODE_INDEX },
+	{ "indexterm",		NODE_INDEXTERM },
+	{ "info",		NODE_INFO },
+	{ "informalequation",	NODE_INFORMALEQUATION },
+	{ "informaltable",	NODE_INFORMALTABLE },
+	{ "inlineequation",	NODE_INLINEEQUATION },
+	{ "itemizedlist",	NODE_ITEMIZEDLIST },
+	{ "keysym",		NODE_KEYSYM },
+	{ "legalnotice",	NODE_LEGALNOTICE },
+	{ "link",		NODE_LINK },
+	{ "listitem",		NODE_LISTITEM },
+	{ "literal",		NODE_LITERAL },
+	{ "literallayout",	NODE_LITERALLAYOUT },
+	{ "manvolnum",		NODE_MANVOLNUM },
+	{ "member",		NODE_MEMBER },
+	{ "mml:math",		NODE_MML_MATH },
+	{ "mml:mfenced",	NODE_MML_MFENCED },
+	{ "mml:mfrac",		NODE_MML_MFRAC },
+	{ "mml:mi",		NODE_MML_MI },
+	{ "mml:mn",		NODE_MML_MN },
+	{ "mml:mo",		NODE_MML_MO },
+	{ "mml:mrow",		NODE_MML_MROW },
+	{ "mml:msub",		NODE_MML_MSUB },
+	{ "mml:msup",		NODE_MML_MSUP },
+	{ "modifier",		NODE_MODIFIER },
+	{ "note",		NODE_NOTE },
+	{ "option",		NODE_OPTION },
+	{ "orderedlist",	NODE_ORDEREDLIST },
+	{ "orgname",		NODE_ORGNAME },
+	{ "othername",		NODE_OTHERNAME },
+	{ "para",		NODE_PARA },
+	{ "paramdef",		NODE_PARAMDEF },
+	{ "parameter",		NODE_PARAMETER },
+	{ "part",		NODE_SECTION },
+	{ "personname",		NODE_PERSONNAME },
+	{ "phrase",		NODE_PHRASE },
+	{ "preface",		NODE_PREFACE },
+	{ "primary",		NODE_PRIMARY },
+	{ "programlisting",	NODE_PROGRAMLISTING },
+	{ "prompt",		NODE_PROMPT },
+	{ "quote",		NODE_QUOTE },
+	{ "refclass",		NODE_REFCLASS },
+	{ "refdescriptor",	NODE_REFDESCRIPTOR },
+	{ "refentry",		NODE_REFENTRY },
+	{ "refentryinfo",	NODE_REFENTRYINFO },
+	{ "refentrytitle",	NODE_REFENTRYTITLE },
+	{ "refmeta",		NODE_REFMETA },
+	{ "refmetainfo",	NODE_REFMETAINFO },
+	{ "refmiscinfo",	NODE_REFMISCINFO },
+	{ "refname",		NODE_REFNAME },
+	{ "refnamediv",		NODE_REFNAMEDIV },
+	{ "refpurpose",		NODE_REFPURPOSE },
+	{ "refsect1",		NODE_SECTION },
+	{ "refsect2",		NODE_SECTION },
+	{ "refsect3",		NODE_SECTION },
+	{ "refsection",		NODE_SECTION },
+	{ "refsynopsisdiv",	NODE_REFSYNOPSISDIV },
+	{ "releaseinfo",	NODE_RELEASEINFO },
+	{ "replaceable",	NODE_REPLACEABLE },
+	{ "row",		NODE_ROW },
+	{ "sbr",		NODE_SBR },
+	{ "screen",		NODE_SCREEN },
+	{ "secondary",		NODE_SECONDARY },
+	{ "sect1",		NODE_SECTION },
+	{ "sect2",		NODE_SECTION },
+	{ "section",		NODE_SECTION },
+	{ "sgmltag",		NODE_SGMLTAG },
+	{ "simplelist",		NODE_SIMPLELIST },
+	{ "spanspec",		NODE_SPANSPEC },
+	{ "structname",		NODE_STRUCTNAME },
+	{ "subtitle",		NODE_SUBTITLE },
+	{ "surname",		NODE_SURNAME },
+	{ "synopsis",		NODE_SYNOPSIS },
+	{ "table",		NODE_TABLE },
+	{ "tbody",		NODE_TBODY },
+	{ "term",		NODE_TERM },
+	{ "tfoot",		NODE_TFOOT },
+	{ "tgroup",		NODE_TGROUP },
+	{ "thead",		NODE_THEAD },
+	{ "tip",		NODE_TIP },
+	{ "title",		NODE_TITLE },
+	{ "trademark",		NODE_TRADEMARK },
+	{ "type",		NODE_TYPE },
+	{ "ulink",		NODE_ULINK },
+	{ "userinput",		NODE_USERINPUT },
+	{ "variablelist",	NODE_VARIABLELIST },
+	{ "varlistentry",	NODE_VARLISTENTRY },
+	{ "varname",		NODE_VARNAME },
+	{ "warning",		NODE_WARNING },
+	{ "wordasword",		NODE_WORDASWORD },
+	{ "year",		NODE_YEAR },
+	{ NULL,			NODE__MAX }
+};
+
+/*
+ * Process a string of characters.
+ * If a text node is already open, append to it.
+ * Otherwise, create a new one as a child of the current node.
+ */
+static void
+xml_char(void *arg, const XML_Char *p, int sz)
+{
+	struct parse	*ps;
+	struct pnode	*dat;
+	int		 i;
+
+	ps = arg;
+	if (ps->tree->flags && TREE_FAIL)
+		return;
+
+	/*
+	 * Only create a new node if there is non-whitespace text.
+	 * Strip all leading whitespace.
+	 */
+	if (ps->cur->node != NODE_TEXT) {
+		for (i = 0; i < sz; i++)
+			if (isspace((unsigned char)p[i]) == 0)
+				break;
+		if (i == sz)
+			return;
+		p += i;
+		sz -= i;
+
+		if ((dat = calloc(1, sizeof(*dat))) == NULL) {
+			perror(NULL);
+			exit(1);
+		}
+		dat->node = NODE_TEXT;
+		dat->parent = ps->cur;
+		TAILQ_INIT(&dat->childq);
+		TAILQ_INIT(&dat->attrq);
+		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
+		ps->cur = dat;
+	}
+
+	/* Append to the current text node. */
+
+	assert(sz >= 0);
+	ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
+	if (ps->cur->b == NULL) {
+		perror(NULL);
+		exit(1);
+	}
+	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
+	ps->cur->bsz += sz;
+	ps->cur->b[ps->cur->bsz] = '\0';
+	ps->cur->real = ps->cur->b;
+}
+
+static void
+pnode_trim(struct pnode *pn)
+{
+	assert(pn->node == NODE_TEXT);
+	for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
+		if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
+			break;
+}
+
+/*
+ * Begin an element.
+ * If the name is unknown, abort parsing.
+ */
+static void
+xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
+{
+	struct parse	 *ps;
+	const struct element *elem;
+	enum attrkey	  key;
+	struct pnode	 *dat;
+	struct pattr	 *pattr;
+	const XML_Char	**att;
+
+	ps = arg;
+	if (ps->tree->flags && TREE_FAIL)
+		return;
+
+	/* FIXME: find a better way to ditch other namespaces. */
+	if (strcmp(name, "xi:include") == 0)
+		return;
+
+	/* Close out the text node, if there is one. */
+	if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
+		pnode_trim(ps->cur);
+		ps->cur = ps->cur->parent;
+	}
+
+	for (elem = elements; elem->name != NULL; elem++)
+		if (strcmp(elem->name, name) == 0)
+			break;
+
+	if (elem->name == NULL) {
+		fprintf(stderr, "%s:%zu:%zu: unknown element \"%s\"\n",
+			ps->fname, XML_GetCurrentLineNumber(ps->xml),
+			XML_GetCurrentColumnNumber(ps->xml), name);
+		ps->tree->flags |= TREE_FAIL;
+		return;
+	}
+
+	if (elem->node == NODE_INLINEEQUATION)
+		ps->tree->flags |= TREE_EQN;
+
+	if ((dat = calloc(1, sizeof(*dat))) == NULL) {
+		perror(NULL);
+		exit(1);
+	}
+	dat->node = elem->node;
+	dat->parent = ps->cur;
+	TAILQ_INIT(&dat->childq);
+	TAILQ_INIT(&dat->attrq);
+
+	if (ps->cur != NULL)
+		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
+
+	ps->cur = dat;
+	if (ps->tree->root == NULL)
+		ps->tree->root = dat;
+
+	/*
+	 * Process attributes.
+	 */
+	for (att = atts; *att != NULL; att += 2) {
+		if ((key = attrkey_parse(*att)) == ATTRKEY__MAX) {
+			if (ps->warn)
+				fprintf(stderr, "%s:%zu:%zu: warning: "
+				    "unknown attribute \"%s\"\n",
+				    ps->fname,
+				    XML_GetCurrentLineNumber(ps->xml),
+				    XML_GetCurrentColumnNumber(ps->xml),
+				    *att);
+			continue;
+		}
+		pattr = calloc(1, sizeof(*pattr));
+		pattr->key = key;
+		if ((pattr->val = attrval_parse(att[1])) == ATTRVAL__MAX)
+			pattr->rawval = strdup(att[1]);
+		TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
+	}
+}
+
+/*
+ * Roll up the parse tree.
+ * If we're at a text node, roll that one up first.
+ */
+static void
+xml_elem_end(void *arg, const XML_Char *name)
+{
+	struct parse	*ps;
+
+	ps = arg;
+	if (ps->tree->flags && TREE_FAIL)
+		return;
+
+	/* FIXME: find a better way to ditch other namespaces. */
+	if (strcmp(name, "xi:include") == 0)
+		return;
+
+	/* Close out the text node, if there is one. */
+	if (ps->cur->node == NODE_TEXT) {
+		pnode_trim(ps->cur);
+		ps->cur = ps->cur->parent;
+	}
+	ps->cur = ps->cur->parent;
+}
+
+struct parse *
+parse_alloc(int warn)
+{
+	struct parse	*p;
+
+	if ((p = calloc(1, sizeof(*p))) == NULL)
+		return NULL;
+
+	if ((p->tree = calloc(1, sizeof(*p->tree))) == NULL) {
+		free(p);
+		return NULL;
+	}
+
+	if ((p->xml = XML_ParserCreate(NULL)) == NULL) {
+		free(p->tree);
+		free(p);
+		return NULL;
+	}
+	p->warn = warn;
+	XML_SetCharacterDataHandler(p->xml, xml_char);
+	XML_SetElementHandler(p->xml, xml_elem_start, xml_elem_end);
+	XML_SetUserData(p->xml, p);
+	return p;
+}
+
+void
+parse_free(struct parse *p)
+{
+	if (p == NULL)
+		return;
+	XML_ParserFree(p->xml);
+	if (p->tree != NULL) {
+		pnode_unlink(p->tree->root);
+		free(p->tree);
+	}
+	free(p);
+}
+
+struct ptree *
+parse_file(struct parse *p, int fd, const char *fname)
+{
+	char		 b[4096];
+	ssize_t		 ssz;
+
+	p->fname = fname;
+	do {
+		if ((ssz = read(fd, b, sizeof(b))) < 0) {
+			perror(fname);
+			pnode_unlink(p->tree->root);
+			p->tree->root = p->cur = NULL;
+			p->tree->flags |= TREE_FAIL;
+			return NULL;
+		}
+		if (XML_Parse(p->xml, b, ssz, ssz == 0) == 0) {
+			fprintf(stderr, "%s:%zu:%zu: %s\n", fname,
+			    XML_GetCurrentLineNumber(p->xml),
+			    XML_GetCurrentColumnNumber(p->xml),
+			    XML_ErrorString(XML_GetErrorCode(p->xml)));
+			p->tree->flags |= TREE_FAIL;
+		}
+	} while (ssz > 0 && (p->tree->flags & TREE_FAIL) == 0);
+	return p->tree;
+}
Index: Makefile
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/Makefile,v
retrieving revision 1.17
retrieving revision 1.18
diff -LMakefile -LMakefile -u -p -r1.17 -r1.18
--- Makefile
+++ Makefile
@@ -3,10 +3,14 @@ CFLAGS += -g -W -Wall -Wstrict-prototype
 WWWPREFIX = /usr/vhosts/mdocml.bsd.lv/www/htdocs/docbook2mdoc
 PREFIX = /usr/local
 
+HEADS =	node.h parse.h format.h
+SRCS =	node.c parse.c docbook2mdoc.c main.c
+OBJS =	node.o parse.o docbook2mdoc.o main.o
+
 all: docbook2mdoc
 
-docbook2mdoc: docbook2mdoc.o
-	$(CC) -o $@ docbook2mdoc.o -lexpat
+docbook2mdoc: $(OBJS)
+	$(CC) -o $@ $(OBJS) -lexpat
 
 www: index.html docbook2mdoc.1.html docbook2mdoc-$(VERSION).tgz README.txt
 
@@ -26,11 +30,15 @@ dist: docbook2mdoc-$(VERSION).tgz
 
 docbook2mdoc-$(VERSION).tgz:
 	mkdir -p .dist/docbook2mdoc-$(VERSION)
-	install -m 0444 docbook2mdoc.c extern.h Makefile docbook2mdoc.1 .dist/docbook2mdoc-$(VERSION)
+	install -m 0444 $(HEADS) $(SRCS) Makefile docbook2mdoc.1 \
+	    .dist/docbook2mdoc-$(VERSION)
 	(cd .dist && tar zcf ../$@ docbook2mdoc-$(VERSION))
 	rm -rf .dist
 
-docbook2mdoc.o: extern.h
+node.o: node.h
+parse.o: node.h parse.h
+docbook2mdoc.o: node.h format.h
+main.o: node.h parse.h format.h
 
 index.html: index.xml
 	sed "s!@VERSION@!$(VERSION)!g" index.xml >$@
@@ -42,6 +50,7 @@ README.txt: README
 	cp README $@
 
 clean:
-	rm -f docbook2mdoc docbook2mdoc.o
+	rm -f docbook2mdoc $(OBJS) docbook2mdoc.core
 	rm -rf docbook2mdoc.dSYM
-	rm -f index.html docbook2mdoc.1.html docbook2mdoc-$(VERSION).tgz README.txt
+	rm -f index.html docbook2mdoc.1.html README.txt
+	rm -f docbook2mdoc-$(VERSION).tgz
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.73
retrieving revision 1.74
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.73 -r1.74
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -15,19 +15,17 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-#include <sys/queue.h>
-
 #include <assert.h>
 #include <ctype.h>
-#include <expat.h>
-#include <fcntl.h>
-#include <getopt.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
 
-#include "extern.h"
+#include "node.h"
+#include "format.h"
+
+/*
+ * The implementation of the mdoc(7) formatter.
+ */
 
 enum	linestate {
 	LINE_NEW = 0,
@@ -35,499 +33,16 @@ enum	linestate {
 	LINE_MACRO
 };
 
-/*
- * Global parse state.
- * Keep this as simple and small as possible.
- */
-struct	parse {
-	XML_Parser	 xml;
-	const char	*fname; /* filename */
-	int		 stop; /* should we stop now? */
-#define	PARSE_EQN	 1
-	unsigned int	 flags; /* document-wide flags */
-	struct pnode	*root; /* root of parse tree */
-	struct pnode	*cur; /* current node in tree */
-	int		 level; /* header level, starting at 1 */
+struct	format {
+	int		 level;      /* Header level, starting at 1. */
 	enum linestate	 linestate;
 };
 
-struct	node {
-	const char	*name; /* docbook element name */
-	enum nodeid	 node; /* docbook element to generate */
-};
-
-TAILQ_HEAD(pnodeq, pnode);
-TAILQ_HEAD(pattrq, pattr);
-
-struct	pattr {
-	enum attrkey	 key;
-	enum attrval	 val;
-	char		*rawval;
-	TAILQ_ENTRY(pattr) child;
-};
-
-struct	pnode {
-	enum nodeid	 node; /* node type */
-	char		*b; /* binary data buffer */
-	char		*real; /* store for "b" */
-	size_t		 bsz; /* data buffer size */
-	struct pnode	*parent; /* parent (or NULL if top) */
-	struct pnodeq	 childq; /* queue of children */
-	struct pattrq	 attrq; /* attributes of node */
-	TAILQ_ENTRY(pnode) child;
-};
-
-static	const char *attrkeys[ATTRKEY__MAX] = {
-	"choice",
-	"class",
-	"close",
-	"id",
-	"linkend",
-	"open",
-	"rep"
-};
-
-static	const char *attrvals[ATTRVAL__MAX] = {
-	"monospaced",
-	"norepeat",
-	"opt",
-	"plain",
-	"repeat",
-	"req"
-};
-
-static	const struct node nodes[] = {
-	{ "acronym",		NODE_ACRONYM },
-	{ "affiliation",	NODE_AFFILIATION },
-	{ "anchor",		NODE_ANCHOR },
-	{ "application",	NODE_APPLICATION },
-	{ "arg",		NODE_ARG },
-	{ "author",		NODE_AUTHOR },
-	{ "authorgroup",	NODE_AUTHORGROUP },
-	{ "blockquote",		NODE_BLOCKQUOTE },
-	{ "book",		NODE_BOOK },
-	{ "bookinfo",		NODE_BOOKINFO },
-	{ "caution",		NODE_CAUTION },
-	{ "chapter",		NODE_SECTION },
-	{ "citerefentry",	NODE_CITEREFENTRY },
-	{ "citetitle",		NODE_CITETITLE },
-	{ "cmdsynopsis",	NODE_CMDSYNOPSIS },
-	{ "code",		NODE_CODE },
-	{ "colspec",		NODE_COLSPEC },
-	{ "command",		NODE_COMMAND },
-	{ "constant",		NODE_CONSTANT },
-	{ "copyright",		NODE_COPYRIGHT },
-	{ "date",		NODE_DATE },
-	{ "editor",		NODE_EDITOR },
-	{ "email",		NODE_EMAIL },
-	{ "emphasis",		NODE_EMPHASIS },
-	{ "entry",		NODE_ENTRY },
-	{ "envar",		NODE_ENVAR },
-	{ "fieldsynopsis",	NODE_FIELDSYNOPSIS },
-	{ "filename",		NODE_FILENAME },
-	{ "firstname",		NODE_FIRSTNAME },
-	{ "firstterm",		NODE_FIRSTTERM },
-	{ "footnote",		NODE_FOOTNOTE },
-	{ "funcdef",		NODE_FUNCDEF },
-	{ "funcprototype",	NODE_FUNCPROTOTYPE },
-	{ "funcsynopsis",	NODE_FUNCSYNOPSIS },
-	{ "funcsynopsisinfo",	NODE_FUNCSYNOPSISINFO },
-	{ "function",		NODE_FUNCTION },
-	{ "glossterm",		NODE_GLOSSTERM },
-	{ "group",		NODE_GROUP },
-	{ "holder",		NODE_HOLDER },
-	{ "index",		NODE_INDEX },
-	{ "indexterm",		NODE_INDEXTERM },
-	{ "info",		NODE_INFO },
-	{ "informalequation",	NODE_INFORMALEQUATION },
-	{ "informaltable",	NODE_INFORMALTABLE },
-	{ "inlineequation",	NODE_INLINEEQUATION },
-	{ "itemizedlist",	NODE_ITEMIZEDLIST },
-	{ "keysym",		NODE_KEYSYM },
-	{ "legalnotice",	NODE_LEGALNOTICE },
-	{ "link",		NODE_LINK },
-	{ "listitem",		NODE_LISTITEM },
-	{ "literal",		NODE_LITERAL },
-	{ "literallayout",	NODE_LITERALLAYOUT },
-	{ "manvolnum",		NODE_MANVOLNUM },
-	{ "member",		NODE_MEMBER },
-	{ "mml:math",		NODE_MML_MATH },
-	{ "mml:mfenced",	NODE_MML_MFENCED },
-	{ "mml:mfrac",		NODE_MML_MFRAC },
-	{ "mml:mi",		NODE_MML_MI },
-	{ "mml:mn",		NODE_MML_MN },
-	{ "mml:mo",		NODE_MML_MO },
-	{ "mml:mrow",		NODE_MML_MROW },
-	{ "mml:msub",		NODE_MML_MSUB },
-	{ "mml:msup",		NODE_MML_MSUP },
-	{ "modifier",		NODE_MODIFIER },
-	{ "note",		NODE_NOTE },
-	{ "option",		NODE_OPTION },
-	{ "orderedlist",	NODE_ORDEREDLIST },
-	{ "orgname",		NODE_ORGNAME },
-	{ "othername",		NODE_OTHERNAME },
-	{ "para",		NODE_PARA },
-	{ "paramdef",		NODE_PARAMDEF },
-	{ "parameter",		NODE_PARAMETER },
-	{ "part",		NODE_SECTION },
-	{ "personname",		NODE_PERSONNAME },
-	{ "phrase",		NODE_PHRASE },
-	{ "preface",		NODE_PREFACE },
-	{ "primary",		NODE_PRIMARY },
-	{ "programlisting",	NODE_PROGRAMLISTING },
-	{ "prompt",		NODE_PROMPT },
-	{ "quote",		NODE_QUOTE },
-	{ "refclass",		NODE_REFCLASS },
-	{ "refdescriptor",	NODE_REFDESCRIPTOR },
-	{ "refentry",		NODE_REFENTRY },
-	{ "refentryinfo",	NODE_REFENTRYINFO },
-	{ "refentrytitle",	NODE_REFENTRYTITLE },
-	{ "refmeta",		NODE_REFMETA },
-	{ "refmetainfo",	NODE_REFMETAINFO },
-	{ "refmiscinfo",	NODE_REFMISCINFO },
-	{ "refname",		NODE_REFNAME },
-	{ "refnamediv",		NODE_REFNAMEDIV },
-	{ "refpurpose",		NODE_REFPURPOSE },
-	{ "refsect1",		NODE_SECTION },
-	{ "refsect2",		NODE_SECTION },
-	{ "refsect3",		NODE_SECTION },
-	{ "refsection",		NODE_SECTION },
-	{ "refsynopsisdiv",	NODE_REFSYNOPSISDIV },
-	{ "releaseinfo",	NODE_RELEASEINFO },
-	{ "replaceable",	NODE_REPLACEABLE },
-	{ "row",		NODE_ROW },
-	{ "sbr",		NODE_SBR },
-	{ "screen",		NODE_SCREEN },
-	{ "secondary",		NODE_SECONDARY },
-	{ "sect1",		NODE_SECTION },
-	{ "sect2",		NODE_SECTION },
-	{ "section",		NODE_SECTION },
-	{ "sgmltag",		NODE_SGMLTAG },
-	{ "simplelist",		NODE_SIMPLELIST },
-	{ "spanspec",		NODE_SPANSPEC },
-	{ "structname",		NODE_STRUCTNAME },
-	{ "subtitle",		NODE_SUBTITLE },
-	{ "surname",		NODE_SURNAME },
-	{ "synopsis",		NODE_SYNOPSIS },
-	{ "table",		NODE_TABLE },
-	{ "tbody",		NODE_TBODY },
-	{ "term",		NODE_TERM },
-	{ "tfoot",		NODE_TFOOT },
-	{ "tgroup",		NODE_TGROUP },
-	{ "thead",		NODE_THEAD },
-	{ "tip",		NODE_TIP },
-	{ "title",		NODE_TITLE },
-	{ "trademark",		NODE_TRADEMARK },
-	{ "type",		NODE_TYPE },
-	{ "ulink",		NODE_ULINK },
-	{ "userinput",		NODE_USERINPUT },
-	{ "variablelist",	NODE_VARIABLELIST },
-	{ "varlistentry",	NODE_VARLISTENTRY },
-	{ "varname",		NODE_VARNAME },
-	{ "warning",		NODE_WARNING },
-	{ "wordasword",		NODE_WORDASWORD },
-	{ "year",		NODE_YEAR },
-	{ NULL,			NODE__MAX }
-};
-
-static	int warn = 0;
-
-static void
-pnode_print(struct parse *p, struct pnode *pn);
-
-/*
- * Process a stream of characters.
- * We store text as nodes in and of themselves.
- * If a text node is already open, append to it.
- * If it's not open, open one under the current context.
- */
-static void
-xml_char(void *arg, const XML_Char *p, int sz)
-{
-	struct parse	*ps = arg;
-	struct pnode	*dat;
-	int		 i;
-
-	if (ps->stop)
-		return;
-
-	/*
-	 * Are we in the midst of processing text?
-	 * If we're not processing text right now, then create a text
-	 * node for doing so.
-	 * However, don't do so unless we have some non-whitespace to
-	 * process: strip out all leading whitespace to be sure.
-	 */
-	if (ps->cur->node != NODE_TEXT) {
-		for (i = 0; i < sz; i++)
-			if ( ! isspace((unsigned char)p[i]))
-				break;
-		if (i == sz)
-			return;
-		p += i;
-		sz -= i;
-		dat = calloc(1, sizeof(*dat));
-		if (dat == NULL) {
-			perror(NULL);
-			exit(1);
-		}
-
-		dat->node = NODE_TEXT;
-		dat->parent = ps->cur;
-		TAILQ_INIT(&dat->childq);
-		TAILQ_INIT(&dat->attrq);
-		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
-		ps->cur = dat;
-		assert(ps->root != NULL);
-	}
-
-	/* Append to current buffer. */
-	assert(sz >= 0);
-	ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
-	if (ps->cur->b == NULL) {
-		perror(NULL);
-		exit(1);
-	}
-	memcpy(ps->cur->b + ps->cur->bsz, p, sz);
-	ps->cur->bsz += sz;
-	ps->cur->b[ps->cur->bsz] = '\0';
-	ps->cur->real = ps->cur->b;
-}
-
-static void
-pnode_trim(struct pnode *pn)
-{
-	assert(pn->node == NODE_TEXT);
-	for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
-		if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
-			break;
-}
-
-/*
- * Begin an element.
- * First, look for the element.
- * If we don't find it and we're not parsing, keep going.
- * If we don't find it and we're parsing, puke and exit.
- * If we find it but we're not parsing yet (i.e., it's not a refentry
- * and thus out of context), keep going.
- * If we find it and we're at the root and already have a tree, puke and
- * exit (FIXME: I don't think this is right?).
- * If we find it but we're parsing a text node, close out the text node,
- * return to its parent, and keep going.
- * Make sure that the element is in the right context.
- * Lastly, put the node onto our parse tree and continue.
- */
-static void
-xml_elem_start(void *arg, const XML_Char *name, const XML_Char **atts)
-{
-	struct parse	 *ps = arg;
-	const struct node *node;
-	enum attrkey	  key;
-	enum attrval	  val;
-	struct pnode	 *dat;
-	struct pattr	 *pattr;
-	const XML_Char	**att;
-
-	/* FIXME: find a better way to ditch other namespaces. */
-	if (ps->stop || strcmp(name, "xi:include") == 0)
-		return;
-
-	/* Close out text node, if applicable... */
-	if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
-		pnode_trim(ps->cur);
-		ps->cur = ps->cur->parent;
-	}
-
-	for (node = nodes; node->name != NULL; node++)
-		if (strcmp(node->name, name) == 0)
-			break;
-
-	if (node->name == NULL) {
-		fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
-			ps->fname, XML_GetCurrentLineNumber(ps->xml),
-			XML_GetCurrentColumnNumber(ps->xml), name);
-		ps->stop = 1;
-		return;
-	}
-
-	if (node->node == NODE_INLINEEQUATION)
-		ps->flags |= PARSE_EQN;
-
-	if ((dat = calloc(1, sizeof(*dat))) == NULL) {
-		perror(NULL);
-		exit(1);
-	}
-
-	dat->node = node->node;
-	dat->parent = ps->cur;
-	TAILQ_INIT(&dat->childq);
-	TAILQ_INIT(&dat->attrq);
-
-	if (ps->cur != NULL)
-		TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
-
-	ps->cur = dat;
-	if (ps->root == NULL)
-		ps->root = dat;
-
-	/*
-	 * Process attributes.
-	 */
-	for (att = atts; *att != NULL; att += 2) {
-		for (key = 0; key < ATTRKEY__MAX; key++)
-			if (strcmp(*att, attrkeys[key]) == 0)
-				break;
-		if (key == ATTRKEY__MAX) {
-			if (warn)
-				fprintf(stderr, "%s:%zu:%zu: warning: "
-					"unknown attribute \"%s\"\n",
-					ps->fname,
-					XML_GetCurrentLineNumber(ps->xml),
-					XML_GetCurrentColumnNumber(ps->xml),
-					*att);
-			continue;
-		}
-		for (val = 0; val < ATTRVAL__MAX; val++)
-			if (strcmp(att[1], attrvals[val]) == 0)
-				break;
-		pattr = calloc(1, sizeof(*pattr));
-		pattr->key = key;
-		pattr->val = val;
-		if (val == ATTRVAL__MAX)
-			pattr->rawval = strdup(att[1]);
-		TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
-	}
-}
-
-/*
- * Roll up the parse tree.
- * If we're at a text node, roll that one up first.
- */
-static void
-xml_elem_end(void *arg, const XML_Char *name)
-{
-	struct parse	*ps = arg;
-
-	if (ps->stop)
-		return;
-
-	/* FIXME: find a better way to ditch other namespaces. */
-	if (strcmp(name, "xi:include") == 0)
-		return;
-
-	/* Close out text node, if applicable... */
-	if (ps->cur->node == NODE_TEXT) {
-		pnode_trim(ps->cur);
-		ps->cur = ps->cur->parent;
-	}
-	ps->cur = ps->cur->parent;
-}
-
-/*
- * Recursively free a node (NULL is ok).
- */
-static void
-pnode_free(struct pnode *pn)
-{
-	struct pnode	*pp;
-	struct pattr	*ap;
-
-	if (pn == NULL)
-		return;
-
-	while ((pp = TAILQ_FIRST(&pn->childq)) != NULL) {
-		TAILQ_REMOVE(&pn->childq, pp, child);
-		pnode_free(pp);
-	}
-
-	while ((ap = TAILQ_FIRST(&pn->attrq)) != NULL) {
-		TAILQ_REMOVE(&pn->attrq, ap, child);
-		free(ap->rawval);
-		free(ap);
-	}
-
-	free(pn->real);
-	free(pn);
-}
-
-/*
- * Unlink a node from its parent and pnode_free() it.
- */
-static void
-pnode_unlink(struct pnode *pn)
-{
-	if (pn->parent != NULL)
-		TAILQ_REMOVE(&pn->parent->childq, pn, child);
-	pnode_free(pn);
-}
-
-/*
- * Unlink all children of a node and pnode_free() them.
- */
-static void
-pnode_unlinksub(struct pnode *pn)
-{
-
-	while ( ! TAILQ_EMPTY(&pn->childq))
-		pnode_unlink(TAILQ_FIRST(&pn->childq));
-}
-
-/*
- * Retrieve an enumeration attribute from a node.
- * Return ATTRVAL__MAX if the node has no such attribute.
- */
-enum attrval
-pnode_getattr(struct pnode *pn, enum attrkey key)
-{
-	struct pattr	*ap;
-
-	if (pn == NULL)
-		return ATTRVAL__MAX;
-	TAILQ_FOREACH(ap, &pn->attrq, child)
-		if (ap->key == key)
-			return ap->val;
-	return ATTRVAL__MAX;
-}
-
-/*
- * Retrieve an attribute string from a node.
- * Return defval if the node has no such attribute.
- */
-const char *
-pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval)
-{
-	struct pattr	*ap;
-
-	if (pn == NULL)
-		return defval;
-	TAILQ_FOREACH(ap, &pn->attrq, child)
-		if (ap->key == key)
-			return ap->val == ATTRVAL__MAX ? ap->rawval :
-			    attrvals[ap->val];
-	return defval;
-}
-
-/*
- * Recursively search and return the first instance of "node".
- */
-static struct pnode *
-pnode_findfirst(struct pnode *pn, enum nodeid node)
-{
-	struct pnode	*pp, *res;
-
-	res = NULL;
-	TAILQ_FOREACH(pp, &pn->childq, child) {
-		res = pp->node == node ? pp :
-			pnode_findfirst(pp, node);
-		if (res != NULL)
-			break;
-	}
+static void	 pnode_print(struct format *, struct pnode *);
 
-	return res;
-}
 
 static void
-macro_open(struct parse *p, const char *name)
+macro_open(struct format *p, const char *name)
 {
 	switch (p->linestate) {
 	case LINE_TEXT:
@@ -545,7 +60,7 @@ macro_open(struct parse *p, const char *
 }
 
 static void
-macro_close(struct parse *p)
+macro_close(struct format *p)
 {
 	assert(p->linestate == LINE_MACRO);
 	putchar('\n');
@@ -553,7 +68,7 @@ macro_close(struct parse *p)
 }
 
 static void
-macro_line(struct parse *p, const char *name)
+macro_line(struct format *p, const char *name)
 {
 	macro_open(p, name);
 	macro_close(p);
@@ -567,7 +82,7 @@ macro_line(struct parse *p, const char *
  * Print an argument string on a macro line, collapsing whitespace.
  */
 static void
-macro_addarg(struct parse *p, const char *arg, int flags)
+macro_addarg(struct format *p, const char *arg, int flags)
 {
 	const char	*cp;
 
@@ -624,7 +139,7 @@ macro_addarg(struct parse *p, const char
 }
 
 static void
-macro_argline(struct parse *p, const char *name, const char *arg)
+macro_argline(struct format *p, const char *name, const char *arg)
 {
 	macro_open(p, name);
 	macro_addarg(p, arg, ARG_SPACE);
@@ -635,7 +150,7 @@ macro_argline(struct parse *p, const cha
  * Recursively append text from the children of a node to a macro line.
  */
 static void
-macro_addnode(struct parse *p, struct pnode *pn, int flags)
+macro_addnode(struct format *p, struct pnode *pn, int flags)
 {
 	int		 quote_now;
 
@@ -690,7 +205,7 @@ macro_addnode(struct parse *p, struct pn
 }
 
 static void
-macro_nodeline(struct parse *p, const char *name, struct pnode *pn, int flags)
+macro_nodeline(struct format *p, const char *name, struct pnode *pn, int flags)
 {
 	macro_open(p, name);
 	macro_addnode(p, pn, ARG_SPACE | flags);
@@ -702,7 +217,7 @@ macro_nodeline(struct parse *p, const ch
  * emit the closing punctuation as a trailing macro argument.
  */
 static void
-macro_closepunct(struct parse *p, struct pnode *pn)
+macro_closepunct(struct format *p, struct pnode *pn)
 {
 	if ((pn = TAILQ_NEXT(pn, child)) != NULL &&
 	    pn->node == NODE_TEXT && pn->bsz > 0 &&
@@ -717,7 +232,7 @@ macro_closepunct(struct parse *p, struct
 }
 
 static void
-print_text(struct parse *p, const char *word)
+print_text(struct format *p, const char *word)
 {
 	switch (p->linestate) {
 	case LINE_NEW:
@@ -734,7 +249,7 @@ print_text(struct parse *p, const char *
 }
 
 static void
-pnode_printpara(struct parse *p, struct pnode *pn)
+pnode_printpara(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -761,7 +276,7 @@ pnode_printpara(struct parse *p, struct 
  * If the SYNOPSIS macro has a superfluous title, kill it.
  */
 static void
-pnode_printrefsynopsisdiv(struct parse *p, struct pnode *pn)
+pnode_printrefsynopsisdiv(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *pq;
 
@@ -776,7 +291,7 @@ pnode_printrefsynopsisdiv(struct parse *
  * Start a hopefully-named `Sh' section.
  */
 static void
-pnode_printrefsect(struct parse *p, struct pnode *pn)
+pnode_printrefsect(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 	const char	*title;
@@ -854,7 +369,7 @@ pnode_printrefsect(struct parse *p, stru
  * Start a reference, extracting the title and volume.
  */
 static void
-pnode_printciterefentry(struct parse *p, struct pnode *pn)
+pnode_printciterefentry(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *title, *manvol;
 
@@ -879,7 +394,7 @@ pnode_printciterefentry(struct parse *p,
 }
 
 static void
-pnode_printrefmeta(struct parse *p, struct pnode *pn)
+pnode_printrefmeta(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *title, *manvol;
 
@@ -904,7 +419,7 @@ pnode_printrefmeta(struct parse *p, stru
 }
 
 static void
-pnode_printfuncdef(struct parse *p, struct pnode *pn)
+pnode_printfuncdef(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *ftype, *func;
 
@@ -932,7 +447,7 @@ pnode_printfuncdef(struct parse *p, stru
  * Second, >1 arguments are separated by commas.
  */
 static void
-pnode_printmathfenced(struct parse *p, struct pnode *pn)
+pnode_printmathfenced(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -956,7 +471,7 @@ pnode_printmathfenced(struct parse *p, s
  * particular eqn(7) word.
  */
 static void
-pnode_printmath(struct parse *p, struct pnode *pn)
+pnode_printmath(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -983,7 +498,7 @@ pnode_printmath(struct parse *p, struct 
 }
 
 static void
-pnode_printfuncprototype(struct parse *p, struct pnode *pn)
+pnode_printfuncprototype(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *fdef;
 
@@ -1012,7 +527,7 @@ pnode_printfuncprototype(struct parse *p
  * ellipsis following an argument) and optionality.
  */
 static void
-pnode_printarg(struct parse *p, struct pnode *pn)
+pnode_printarg(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 	struct pattr	*ap;
@@ -1041,7 +556,7 @@ pnode_printarg(struct parse *p, struct p
 }
 
 static void
-pnode_printgroup(struct parse *p, struct pnode *pn)
+pnode_printgroup(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *np;
 	struct pattr	*ap;
@@ -1092,27 +607,26 @@ pnode_printgroup(struct parse *p, struct
 }
 
 static void
-pnode_printprologue(struct parse *p, struct pnode *pn)
+pnode_printprologue(struct format *p, struct ptree *tree)
 {
-	struct pnode	*pp;
+	struct pnode	*refmeta;
 
-	pp = p->root == NULL ? NULL :
-		pnode_findfirst(p->root, NODE_REFMETA);
+	refmeta = tree->root == NULL ? NULL :
+	    pnode_findfirst(tree->root, NODE_REFMETA);
 
 	macro_line(p, "Dd $Mdocdate" "$");
-	if (pp != NULL)
-		pnode_printrefmeta(p, pp);
-	else {
+	if (refmeta == NULL) {
 		macro_open(p, "Dt");
 		macro_addarg(p,
-		    pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"),
+		    pnode_getattr_raw(tree->root, ATTRKEY_ID, "UNKNOWN"),
 		    ARG_SPACE | ARG_SINGLE | ARG_UPPER);
 		macro_addarg(p, "1", ARG_SPACE);
 		macro_close(p);
-	}
+	} else
+		pnode_printrefmeta(p, refmeta);
 	macro_line(p, "Os");
 
-	if (p->flags & PARSE_EQN) {
+	if (tree->flags & TREE_EQN) {
 		macro_line(p, "EQ");
 		print_text(p, "delim $$");
 		macro_line(p, "EN");
@@ -1124,7 +638,7 @@ pnode_printprologue(struct parse *p, str
  * we should comma-separate as list headers.
  */
 static void
-pnode_printvarlistentry(struct parse *p, struct pnode *pn)
+pnode_printvarlistentry(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 	int		 first = 1;
@@ -1146,7 +660,7 @@ pnode_printvarlistentry(struct parse *p,
 }
 
 static void
-pnode_printtitle(struct parse *p, struct pnode *pn)
+pnode_printtitle(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp, *pq;
 
@@ -1160,7 +674,7 @@ pnode_printtitle(struct parse *p, struct
 }
 
 static void
-pnode_printrow(struct parse *p, struct pnode *pn)
+pnode_printrow(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -1174,7 +688,7 @@ pnode_printrow(struct parse *p, struct p
 }
 
 static void
-pnode_printtable(struct parse *p, struct pnode *pn)
+pnode_printtable(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -1189,7 +703,7 @@ pnode_printtable(struct parse *p, struct
 }
 
 static void
-pnode_printlist(struct parse *p, struct pnode *pn)
+pnode_printlist(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -1205,7 +719,7 @@ pnode_printlist(struct parse *p, struct 
 }
 
 static void
-pnode_printvariablelist(struct parse *p, struct pnode *pn)
+pnode_printvariablelist(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 
@@ -1228,7 +742,7 @@ pnode_printvariablelist(struct parse *p,
  * whatever), don't print inline macros.
  */
 static void
-pnode_print(struct parse *p, struct pnode *pn)
+pnode_print(struct format *p, struct pnode *pn)
 {
 	struct pnode	*pp;
 	const char	*ccp;
@@ -1577,118 +1091,15 @@ pnode_print(struct parse *p, struct pnod
 	}
 }
 
-/*
- * Loop around the read buffer until we've drained it of all data.
- * Invoke the parser context with each buffer fill.
- */
-static int
-readfile(XML_Parser xp, int fd,
-	char *b, size_t bsz, const char *fn)
-{
-	struct parse	 p;
-	int		 rc;
-	ssize_t		 ssz;
-
-	memset(&p, 0, sizeof(p));
-	p.fname = fn;
-	p.xml = xp;
-
-	XML_SetCharacterDataHandler(xp, xml_char);
-	XML_SetElementHandler(xp, xml_elem_start, xml_elem_end);
-	XML_SetUserData(xp, &p);
-
-	while ((ssz = read(fd, b, bsz)) >= 0) {
-		if ((rc = XML_Parse(xp, b, ssz, 0 == ssz)) == 0)
-			fprintf(stderr, "%s:%zu:%zu: %s\n", fn,
-				XML_GetCurrentLineNumber(xp),
-				XML_GetCurrentColumnNumber(xp),
-				XML_ErrorString
-				(XML_GetErrorCode(xp)));
-		else if ( ! p.stop && ssz > 0)
-			continue;
-		/*
-		 * Exit when we've read all or errors have occured
-		 * during the parse sequence.
-		 */
-		p.linestate = LINE_NEW;
-		pnode_printprologue(&p, p.root);
-		pnode_print(&p, p.root);
-		if (p.linestate != LINE_NEW)
-			putchar('\n');
-		pnode_free(p.root);
-		return rc != 0 && p.stop == 0;
-	}
-
-	/* Read error has occured. */
-	perror(fn);
-	pnode_free(p.root);
-	return 0;
-}
-
-int
-main(int argc, char *argv[])
-{
-	XML_Parser	 xp;
-	const char	*fname;
-	char		*buf;
-	int		 fd, rc, ch;
-	const char	*progname;
-
-	progname = strrchr(argv[0], '/');
-	if (progname == NULL)
-		progname = argv[0];
-	else
-		++progname;
-
-	fname = "-";
-	xp = NULL;
-	buf = NULL;
-	rc = 1;
-
-	while ((ch = getopt(argc, argv, "W")) != -1)
-		switch (ch) {
-		case 'W':
-			warn = 1;
-			break;
-		default:
-			goto usage;
-		}
-
-	argc -= optind;
-	argv += optind;
-
-	if (argc > 1) {
-		fprintf(stderr, "%s: Too many arguments\n", argv[1]);
-		goto usage;
-	} else if (argc > 0)
-		fname = argv[0];
-
-	/* Read from stdin or a file. */
-	fd = strcmp(fname, "-") == 0 ?
-		STDIN_FILENO : open(fname, O_RDONLY, 0);
+void
+ptree_print(struct ptree *tree)
+{
+	struct format	 formatter;
 
-	/*
-	 * Open file for reading.
-	 * Allocate a read buffer.
-	 * Create the parser context.
-	 * Dive directly into the parse.
-	 */
-	if (fd == -1)
-		perror(fname);
-	else if ((buf = malloc(4096)) == NULL)
-		perror(NULL);
-	else if ((xp = XML_ParserCreate(NULL)) == NULL)
-		perror(NULL);
-	else if (readfile(xp, fd, buf, 4096, fname))
-		rc = 0;
-
-	XML_ParserFree(xp);
-	free(buf);
-	if (fd != STDIN_FILENO)
-		close(fd);
-	return rc;
-
-usage:
-	fprintf(stderr, "usage: %s [-W] [input_filename]\n", progname);
-	return 1;
+	formatter.level = 0;
+	formatter.linestate = LINE_NEW;
+	pnode_printprologue(&formatter, tree);
+	pnode_print(&formatter, tree->root);
+	if (formatter.linestate != LINE_NEW)
+		putchar('\n');
 }
--- /dev/null
+++ format.h
@@ -0,0 +1,22 @@
+/* $Id: format.h,v 1.1 2019/03/26 18:32:07 schwarze Exp $ */
+/*
+ * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * The interface of the mdoc(7) formatter.
+ */
+
+void	 ptree_print(struct ptree *);
--- /dev/null
+++ main.c
@@ -0,0 +1,100 @@
+/* $Id: main.c,v 1.1 2019/03/26 18:32:07 schwarze Exp $ */
+/*
+ * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "node.h"
+#include "parse.h"
+#include "format.h"
+
+/*
+ * The steering function of the docbook2mdoc(1) program.
+ */
+
+int
+main(int argc, char *argv[])
+{
+	struct parse	*parser;
+	struct ptree	*tree;
+	const char	*progname;
+	const char	*fname;
+	int		 ch, fd, rc, warn;
+
+	if ((progname = strrchr(argv[0], '/')) == NULL)
+		progname = argv[0];
+	else
+		progname++;
+
+	warn = 0;
+	while ((ch = getopt(argc, argv, "W")) != -1) {
+		switch (ch) {
+		case 'W':
+			warn = 1;
+			break;
+		default:
+			goto usage;
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	/*
+	 * Argument processing:
+	 * Open file or use standard input.
+	 */
+
+	if (argc > 1) {
+		fprintf(stderr, "%s: Too many arguments\n", argv[1]);
+		goto usage;
+	} else
+		fname = argc > 0 ? argv[0] : "-";
+
+	fd = strcmp(fname, "-") == 0 ?
+		STDIN_FILENO : open(fname, O_RDONLY, 0);
+
+	if (fd == -1) {
+		perror(fname);
+		return 1;
+	}
+
+	/* Parse and format. */
+
+	rc = 1;
+	if ((parser = parse_alloc(warn)) != NULL) {
+		if ((tree = parse_file(parser, fd, fname)) != NULL) {
+			if ((tree->flags & TREE_FAIL) == 0)
+				rc = 0;
+			ptree_print(tree);
+			pnode_unlink(tree->root);
+			tree->root = NULL;
+		}
+		parse_free(parser);
+	} else
+		perror(NULL);
+
+	if (fd != STDIN_FILENO)
+		close(fd);
+	return rc;
+
+usage:
+	fprintf(stderr, "usage: %s [-W] [input_filename]\n", progname);
+	return 1;
+}
--- /dev/null
+++ node.h
@@ -0,0 +1,232 @@
+/* $Id: node.h,v 1.1 2019/03/26 18:32:07 schwarze Exp $ */
+/*
+ * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/queue.h>
+
+/*
+ * The interface of the DocBook syntax tree.
+ */
+
+/*
+ * All node types used for formatting purposes.
+ * More DocBook XML elements are recognized, but remapped or discarded.
+ */
+enum	nodeid {
+	NODE_NONE = 0,  /* Must come first. */
+	/* Alpha-ordered hereafter. */
+	NODE_ACRONYM,
+	NODE_AFFILIATION,
+	NODE_ANCHOR,
+	NODE_APPLICATION,
+	NODE_ARG,
+	NODE_AUTHOR,
+	NODE_AUTHORGROUP,
+	NODE_BLOCKQUOTE,
+	NODE_BOOK,
+	NODE_BOOKINFO,
+	NODE_CAUTION,
+	NODE_CITEREFENTRY,
+	NODE_CITETITLE,
+	NODE_CMDSYNOPSIS,
+	NODE_CODE,
+	NODE_COLSPEC,
+	NODE_COMMAND,
+	NODE_CONSTANT,
+	NODE_COPYRIGHT,
+	NODE_DATE,
+	NODE_EDITOR,
+	NODE_EMAIL,
+	NODE_EMPHASIS,
+	NODE_ENTRY,
+	NODE_ENVAR,
+	NODE_FIELDSYNOPSIS,
+	NODE_FILENAME,
+	NODE_FIRSTNAME,
+	NODE_FIRSTTERM,
+	NODE_FOOTNOTE,
+	NODE_FUNCDEF,
+	NODE_FUNCPROTOTYPE,
+	NODE_FUNCSYNOPSIS,
+	NODE_FUNCSYNOPSISINFO,
+	NODE_FUNCTION,
+	NODE_GLOSSTERM,
+	NODE_GROUP,
+	NODE_HOLDER,
+	NODE_INDEX,
+	NODE_INDEXTERM,
+	NODE_INFO,
+	NODE_INFORMALEQUATION,
+	NODE_INFORMALTABLE,
+	NODE_INLINEEQUATION,
+	NODE_ITEMIZEDLIST,
+	NODE_KEYSYM,
+	NODE_LEGALNOTICE,
+	NODE_LINK,
+	NODE_LISTITEM,
+	NODE_LITERAL,
+	NODE_LITERALLAYOUT,
+	NODE_MANVOLNUM,
+	NODE_MEMBER,
+	NODE_MML_MATH,
+	NODE_MML_MFENCED,
+	NODE_MML_MFRAC,
+	NODE_MML_MI,
+	NODE_MML_MN,
+	NODE_MML_MO,
+	NODE_MML_MROW,
+	NODE_MML_MSUB,
+	NODE_MML_MSUP,
+	NODE_MODIFIER,
+	NODE_NOTE,
+	NODE_OPTION,
+	NODE_ORDEREDLIST,
+	NODE_ORGNAME,
+	NODE_OTHERNAME,
+	NODE_PARA,
+	NODE_PARAMDEF,
+	NODE_PARAMETER,
+	NODE_PERSONNAME,
+	NODE_PHRASE,
+	NODE_PREFACE,
+	NODE_PRIMARY,
+	NODE_PROGRAMLISTING,
+	NODE_PROMPT,
+	NODE_QUOTE,
+	NODE_REFCLASS,
+	NODE_REFDESCRIPTOR,
+	NODE_REFENTRY,
+	NODE_REFENTRYINFO,
+	NODE_REFENTRYTITLE,
+	NODE_REFMETA,
+	NODE_REFMETAINFO,
+	NODE_REFMISCINFO,
+	NODE_REFNAME,
+	NODE_REFNAMEDIV,
+	NODE_REFPURPOSE,
+	NODE_REFSYNOPSISDIV,
+	NODE_RELEASEINFO,
+	NODE_REPLACEABLE,
+	NODE_ROW,
+	NODE_SBR,
+	NODE_SCREEN,
+	NODE_SECONDARY,
+	NODE_SECTION,
+	NODE_SGMLTAG,
+	NODE_SIMPLELIST,
+	NODE_SPANSPEC,
+	NODE_STRUCTNAME,
+	NODE_SUBTITLE,
+	NODE_SURNAME,
+	NODE_SYNOPSIS,
+	NODE_TABLE,
+	NODE_TBODY,
+	NODE_TERM,
+	NODE_TEXT,
+	NODE_TFOOT,
+	NODE_TGROUP,
+	NODE_THEAD,
+	NODE_TIP,
+	NODE_TITLE,
+	NODE_TRADEMARK,
+	NODE_TYPE,
+	NODE_ULINK,
+	NODE_USERINPUT,
+	NODE_VARIABLELIST,
+	NODE_VARLISTENTRY,
+	NODE_VARNAME,
+	NODE_WARNING,
+	NODE_WORDASWORD,
+	NODE_YEAR,
+	NODE__MAX
+};
+
+/*
+ * All recognised attribute keys.
+ * Other attributes are discarded.
+ */
+enum	attrkey {
+	/* Alpha-order... */
+	ATTRKEY_CHOICE = 0,
+	ATTRKEY_CLASS,
+	ATTRKEY_CLOSE,
+	ATTRKEY_ID,
+	ATTRKEY_LINKEND,
+	ATTRKEY_OPEN,
+	ATTRKEY_REP,
+	ATTRKEY__MAX
+};
+
+/*
+ * All explicitly recognised attribute values.
+ * If an attribute has ATTRVAL__MAX, it is treated as free-form.
+ */
+enum	attrval {
+	/* Alpha-order... */
+	ATTRVAL_MONOSPACED,
+	ATTRVAL_NOREPEAT,
+	ATTRVAL_OPT,
+	ATTRVAL_PLAIN,
+	ATTRVAL_REPEAT,
+	ATTRVAL_REQ,
+	ATTRVAL__MAX
+};
+
+TAILQ_HEAD(pnodeq, pnode);
+TAILQ_HEAD(pattrq, pattr);
+
+/*
+ * One DocBook XML element attribute.
+ */
+struct	pattr {
+	enum attrkey	 key;
+	enum attrval	 val;
+	char		*rawval;
+	TAILQ_ENTRY(pattr) child;
+};
+
+/*
+ * One DocBook XML element.
+ */
+struct	pnode {
+	enum nodeid	 node;     /* Node type. */
+	char		*b;        /* String value. */
+	char		*real;     /* Storage for "b". */
+	size_t		 bsz;      /* strlen(b) */
+	struct pnode	*parent;   /* Parent node or NULL. */
+	struct pnodeq	 childq;   /* Queue of children. */
+	struct pattrq	 attrq;    /* Attributes of the node. */
+	TAILQ_ENTRY(pnode) child;
+};
+
+/*
+ * The parse result for one complete DocBook XML document.
+ */
+struct	ptree {
+	struct pnode	*root;     /* The document element. */
+	int		 flags;
+#define	TREE_FAIL	 (1 << 0)  /* A fatal parse error occurred. */
+#define	TREE_EQN	 (1 << 1)  /* The document needs inline eqn(7). */
+};
+
+
+enum attrkey	 attrkey_parse(const char *);
+enum attrval	 attrval_parse(const char *);
+void		 pnode_unlink(struct pnode *);
+void		 pnode_unlinksub(struct pnode *);
+enum attrval	 pnode_getattr(struct pnode *, enum attrkey);
+const char	*pnode_getattr_raw(struct pnode *, enum attrkey, const char *);
+struct pnode	*pnode_findfirst(struct pnode *, enum nodeid);
--- /dev/null
+++ node.c
@@ -0,0 +1,165 @@
+/* $Id: node.c,v 1.1 2019/03/26 18:32:07 schwarze Exp $ */
+/*
+ * Copyright (c) 2014 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <stdlib.h>
+#include <string.h>
+
+#include "node.h"
+
+/*
+ * The implementation of the DocBook syntax tree.
+ */
+
+static	const char *const attrkeys[ATTRKEY__MAX] = {
+	"choice",
+	"class",
+	"close",
+	"id",
+	"linkend",
+	"open",
+	"rep"
+};
+
+static	const char *const attrvals[ATTRVAL__MAX] = {
+	"monospaced",
+	"norepeat",
+	"opt",
+	"plain",
+	"repeat",
+	"req"
+};
+
+enum attrkey
+attrkey_parse(const char *name)
+{
+	enum attrkey	 key;
+
+	for (key = 0; key < ATTRKEY__MAX; key++)
+		if (strcmp(name, attrkeys[key]) == 0)
+			break;
+	return key;
+}
+
+enum attrval
+attrval_parse(const char *name)
+{
+	enum attrval	 val;
+
+	for (val = 0; val < ATTRVAL__MAX; val++)
+		if (strcmp(name, attrvals[val]) == 0)
+			break;
+	return val;
+}
+
+/*
+ * Recursively free a node (NULL is ok).
+ */
+static void
+pnode_free(struct pnode *pn)
+{
+	struct pnode	*pch;
+	struct pattr	*ap;
+
+	if (pn == NULL)
+		return;
+
+	while ((pch = TAILQ_FIRST(&pn->childq)) != NULL) {
+		TAILQ_REMOVE(&pn->childq, pch, child);
+		pnode_free(pch);
+	}
+	while ((ap = TAILQ_FIRST(&pn->attrq)) != NULL) {
+		TAILQ_REMOVE(&pn->attrq, ap, child);
+		free(ap->rawval);
+		free(ap);
+	}
+	free(pn->real);
+	free(pn);
+}
+
+/*
+ * Unlink a node from its parent and pnode_free() it.
+ */
+void
+pnode_unlink(struct pnode *pn)
+{
+	if (pn == NULL)
+		return;
+	if (pn->parent != NULL)
+		TAILQ_REMOVE(&pn->parent->childq, pn, child);
+	pnode_free(pn);
+}
+
+/*
+ * Unlink all children of a node and pnode_free() them.
+ */
+void
+pnode_unlinksub(struct pnode *pn)
+{
+	while (TAILQ_EMPTY(&pn->childq) == 0)
+		pnode_unlink(TAILQ_FIRST(&pn->childq));
+}
+
+/*
+ * Retrieve an enumeration attribute from a node.
+ * Return ATTRVAL__MAX if the node has no such attribute.
+ */
+enum attrval
+pnode_getattr(struct pnode *pn, enum attrkey key)
+{
+	struct pattr	*ap;
+
+	if (pn == NULL)
+		return ATTRVAL__MAX;
+	TAILQ_FOREACH(ap, &pn->attrq, child)
+		if (ap->key == key)
+			return ap->val;
+	return ATTRVAL__MAX;
+}
+
+/*
+ * Retrieve an attribute string from a node.
+ * Return defval if the node has no such attribute.
+ */
+const char *
+pnode_getattr_raw(struct pnode *pn, enum attrkey key, const char *defval)
+{
+	struct pattr	*ap;
+
+	if (pn == NULL)
+		return defval;
+	TAILQ_FOREACH(ap, &pn->attrq, child)
+		if (ap->key == key)
+			return ap->val == ATTRVAL__MAX ? ap->rawval :
+			    attrvals[ap->val];
+	return defval;
+}
+
+/*
+ * Recursively search and return the first instance of "node".
+ */
+struct pnode *
+pnode_findfirst(struct pnode *pn, enum nodeid node)
+{
+	struct pnode	*pch, *res;
+
+	if (pn->node == node)
+		return pn;
+	TAILQ_FOREACH(pch, &pn->childq, child)
+		if ((res = pnode_findfirst(pch, node)) != NULL)
+			return res;
+	return NULL;
+}
--- /dev/null
+++ parse.h
@@ -0,0 +1,26 @@
+/* $Id: parse.h,v 1.1 2019/03/26 18:32:07 schwarze Exp $ */
+/*
+ * Copyright (c) 2019 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * The interface of the DocBook parser.
+ */
+
+struct parse;	 /* Opaque object; used only in parse.c. */
+
+struct parse	*parse_alloc(int warn);
+void		 parse_free(struct parse *);
+struct ptree	*parse_file(struct parse *, int, const char *);
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-03-26 18:32 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-26 18:32 docbook2mdoc: The program docbook2mdoc(1) has become large enough that schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).