source@mandoc.bsd.lv
 help / color / mirror / Atom feed
* docbook2mdoc: Various parser simplifications and improvements.
@ 2019-03-25 23:15 schwarze
  0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-03-25 23:15 UTC (permalink / raw)
  To: source

Log Message:
-----------
Various parser simplifications and improvements.

Delete the redundant member "node" of struct parse.
The same is already available from cur->node.

No need to check in xml_char() whether an element is open.
Text outside the document element results in expat errors "not
well-formed (invalid token)" or "junk after document element" 
and the function xml_char() is not called.

No need to check in xml_elem_end() whether an element is open.
Bogus closing tags result in expat errors "not well-formed (invalid 
token)" or "mismatched tag" and the function xml_elem_end() is not
called.

In xml_elem_start(): no point in skipping the element name check 
for the document element; and the error error "multiple refentries"
can no longer happen due to earlier cleanups.

Modified Files:
--------------
    docbook2mdoc:
        docbook2mdoc.c
        extern.h

Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.72
retrieving revision 1.73
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.72 -r1.73
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -41,7 +41,6 @@ enum	linestate {
  */
 struct	parse {
 	XML_Parser	 xml;
-	enum nodeid	 node; /* current (NODE_ROOT if pre-tree) */
 	const char	*fname; /* filename */
 	int		 stop; /* should we stop now? */
 #define	PARSE_EQN	 1
@@ -249,12 +248,9 @@ xml_char(void *arg, const XML_Char *p, i
 	struct pnode	*dat;
 	int		 i;
 
-	/* Stopped or no tree yet. */
-	if (ps->stop || ps->node == NODE_ROOT)
+	if (ps->stop)
 		return;
 
-	assert(ps->cur != NULL);
-
 	/*
 	 * Are we in the midst of processing text?
 	 * If we're not processing text right now, then create a text
@@ -262,7 +258,7 @@ xml_char(void *arg, const XML_Char *p, i
 	 * However, don't do so unless we have some non-whitespace to
 	 * process: strip out all leading whitespace to be sure.
 	 */
-	if (ps->node != NODE_TEXT) {
+	if (ps->cur->node != NODE_TEXT) {
 		for (i = 0; i < sz; i++)
 			if ( ! isspace((unsigned char)p[i]))
 				break;
@@ -276,7 +272,7 @@ xml_char(void *arg, const XML_Char *p, i
 			exit(1);
 		}
 
-		dat->node = ps->node = NODE_TEXT;
+		dat->node = NODE_TEXT;
 		dat->parent = ps->cur;
 		TAILQ_INIT(&dat->childq);
 		TAILQ_INIT(&dat->attrq);
@@ -337,10 +333,9 @@ xml_elem_start(void *arg, const XML_Char
 		return;
 
 	/* Close out text node, if applicable... */
-	if (ps->node == NODE_TEXT) {
+	if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
 		pnode_trim(ps->cur);
 		ps->cur = ps->cur->parent;
-		ps->node = ps->cur->node;
 	}
 
 	for (node = nodes; node->name != NULL; node++)
@@ -348,19 +343,11 @@ xml_elem_start(void *arg, const XML_Char
 			break;
 
 	if (node->name == NULL) {
-		if (ps->node == NODE_ROOT)
-			return;
 		fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
 			ps->fname, XML_GetCurrentLineNumber(ps->xml),
 			XML_GetCurrentColumnNumber(ps->xml), name);
 		ps->stop = 1;
 		return;
-	} else if (ps->node == NODE_ROOT && ps->root != NULL) {
-		fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
-			ps->fname, XML_GetCurrentLineNumber(ps->xml),
-			XML_GetCurrentColumnNumber(ps->xml));
-		ps->stop = 1;
-		return;
 	}
 
 	if (node->node == NODE_INLINEEQUATION)
@@ -371,7 +358,7 @@ xml_elem_start(void *arg, const XML_Char
 		exit(1);
 	}
 
-	dat->node = ps->node = node->node;
+	dat->node = node->node;
 	dat->parent = ps->cur;
 	TAILQ_INIT(&dat->childq);
 	TAILQ_INIT(&dat->attrq);
@@ -410,36 +397,30 @@ xml_elem_start(void *arg, const XML_Char
 			pattr->rawval = strdup(att[1]);
 		TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
 	}
-
 }
 
 /*
  * Roll up the parse tree.
  * If we're at a text node, roll that one up first.
- * If we hit the root, then assign ourselves as the NODE_ROOT.
  */
 static void
 xml_elem_end(void *arg, const XML_Char *name)
 {
 	struct parse	*ps = arg;
 
-	/* FIXME: find a better way to ditch other namespaces. */
-	if (ps->stop || ps->node == NODE_ROOT)
+	if (ps->stop)
 		return;
-	else if (strcmp(name, "xi:include") == 0)
+
+	/* FIXME: find a better way to ditch other namespaces. */
+	if (strcmp(name, "xi:include") == 0)
 		return;
 
 	/* Close out text node, if applicable... */
-	if (ps->node == NODE_TEXT) {
+	if (ps->cur->node == NODE_TEXT) {
 		pnode_trim(ps->cur);
 		ps->cur = ps->cur->parent;
-		ps->node = ps->cur->node;
 	}
-
-	if ((ps->cur = ps->cur->parent) == NULL)
-		ps->node = NODE_ROOT;
-	else
-		ps->node = ps->cur->node;
+	ps->cur = ps->cur->parent;
 }
 
 /*
@@ -500,6 +481,8 @@ pnode_getattr(struct pnode *pn, enum att
 {
 	struct pattr	*ap;
 
+	if (pn == NULL)
+		return ATTRVAL__MAX;
 	TAILQ_FOREACH(ap, &pn->attrq, child)
 		if (ap->key == key)
 			return ap->val;
@@ -515,6 +498,8 @@ pnode_getattr_raw(struct pnode *pn, enum
 {
 	struct pattr	*ap;
 
+	if (pn == NULL)
+		return defval;
 	TAILQ_FOREACH(ap, &pn->attrq, child)
 		if (ap->key == key)
 			return ap->val == ATTRVAL__MAX ? ap->rawval :
@@ -1604,8 +1589,7 @@ readfile(XML_Parser xp, int fd,
 	int		 rc;
 	ssize_t		 ssz;
 
-	memset(&p, 0, sizeof(struct parse));
-
+	memset(&p, 0, sizeof(p));
 	p.fname = fn;
 	p.xml = xp;
 
Index: extern.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/extern.h,v
retrieving revision 1.32
retrieving revision 1.33
diff -Lextern.h -Lextern.h -u -p -r1.32 -r1.33
--- extern.h
+++ extern.h
@@ -20,7 +20,7 @@
  * All recognised node types.
  */
 enum	nodeid {
-	NODE_ROOT = 0, /* Must comes first. */
+	NODE_NONE = 0,  /* Must come first. */
 	/* Alpha-ordered hereafter. */
 	NODE_ACRONYM,
 	NODE_AFFILIATION,
--
 To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-03-25 23:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-25 23:15 docbook2mdoc: Various parser simplifications and improvements schwarze

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).