From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]) by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id 5013e063 for ; Mon, 25 Mar 2019 18:15:14 -0500 (EST) Date: Mon, 25 Mar 2019 18:15:14 -0500 (EST) X-Mailinglist: mandoc-source Reply-To: source@mandoc.bsd.lv MIME-Version: 1.0 From: schwarze@mandoc.bsd.lv To: source@mandoc.bsd.lv Subject: docbook2mdoc: Various parser simplifications and improvements. X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Message-ID: Log Message: ----------- Various parser simplifications and improvements. Delete the redundant member "node" of struct parse. The same is already available from cur->node. No need to check in xml_char() whether an element is open. Text outside the document element results in expat errors "not well-formed (invalid token)" or "junk after document element" and the function xml_char() is not called. No need to check in xml_elem_end() whether an element is open. Bogus closing tags result in expat errors "not well-formed (invalid token)" or "mismatched tag" and the function xml_elem_end() is not called. In xml_elem_start(): no point in skipping the element name check for the document element; and the error error "multiple refentries" can no longer happen due to earlier cleanups. Modified Files: -------------- docbook2mdoc: docbook2mdoc.c extern.h Revision Data ------------- Index: docbook2mdoc.c =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.72 retrieving revision 1.73 diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.72 -r1.73 --- docbook2mdoc.c +++ docbook2mdoc.c @@ -41,7 +41,6 @@ enum linestate { */ struct parse { XML_Parser xml; - enum nodeid node; /* current (NODE_ROOT if pre-tree) */ const char *fname; /* filename */ int stop; /* should we stop now? */ #define PARSE_EQN 1 @@ -249,12 +248,9 @@ xml_char(void *arg, const XML_Char *p, i struct pnode *dat; int i; - /* Stopped or no tree yet. */ - if (ps->stop || ps->node == NODE_ROOT) + if (ps->stop) return; - assert(ps->cur != NULL); - /* * Are we in the midst of processing text? * If we're not processing text right now, then create a text @@ -262,7 +258,7 @@ xml_char(void *arg, const XML_Char *p, i * However, don't do so unless we have some non-whitespace to * process: strip out all leading whitespace to be sure. */ - if (ps->node != NODE_TEXT) { + if (ps->cur->node != NODE_TEXT) { for (i = 0; i < sz; i++) if ( ! isspace((unsigned char)p[i])) break; @@ -276,7 +272,7 @@ xml_char(void *arg, const XML_Char *p, i exit(1); } - dat->node = ps->node = NODE_TEXT; + dat->node = NODE_TEXT; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -337,10 +333,9 @@ xml_elem_start(void *arg, const XML_Char return; /* Close out text node, if applicable... */ - if (ps->node == NODE_TEXT) { + if (ps->cur != NULL && ps->cur->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; - ps->node = ps->cur->node; } for (node = nodes; node->name != NULL; node++) @@ -348,19 +343,11 @@ xml_elem_start(void *arg, const XML_Char break; if (node->name == NULL) { - if (ps->node == NODE_ROOT) - return; fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n", ps->fname, XML_GetCurrentLineNumber(ps->xml), XML_GetCurrentColumnNumber(ps->xml), name); ps->stop = 1; return; - } else if (ps->node == NODE_ROOT && ps->root != NULL) { - fprintf(stderr, "%s:%zu:%zu: multiple refentries\n", - ps->fname, XML_GetCurrentLineNumber(ps->xml), - XML_GetCurrentColumnNumber(ps->xml)); - ps->stop = 1; - return; } if (node->node == NODE_INLINEEQUATION) @@ -371,7 +358,7 @@ xml_elem_start(void *arg, const XML_Char exit(1); } - dat->node = ps->node = node->node; + dat->node = node->node; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -410,36 +397,30 @@ xml_elem_start(void *arg, const XML_Char pattr->rawval = strdup(att[1]); TAILQ_INSERT_TAIL(&dat->attrq, pattr, child); } - } /* * Roll up the parse tree. * If we're at a text node, roll that one up first. - * If we hit the root, then assign ourselves as the NODE_ROOT. */ static void xml_elem_end(void *arg, const XML_Char *name) { struct parse *ps = arg; - /* FIXME: find a better way to ditch other namespaces. */ - if (ps->stop || ps->node == NODE_ROOT) + if (ps->stop) return; - else if (strcmp(name, "xi:include") == 0) + + /* FIXME: find a better way to ditch other namespaces. */ + if (strcmp(name, "xi:include") == 0) return; /* Close out text node, if applicable... */ - if (ps->node == NODE_TEXT) { + if (ps->cur->node == NODE_TEXT) { pnode_trim(ps->cur); ps->cur = ps->cur->parent; - ps->node = ps->cur->node; } - - if ((ps->cur = ps->cur->parent) == NULL) - ps->node = NODE_ROOT; - else - ps->node = ps->cur->node; + ps->cur = ps->cur->parent; } /* @@ -500,6 +481,8 @@ pnode_getattr(struct pnode *pn, enum att { struct pattr *ap; + if (pn == NULL) + return ATTRVAL__MAX; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == key) return ap->val; @@ -515,6 +498,8 @@ pnode_getattr_raw(struct pnode *pn, enum { struct pattr *ap; + if (pn == NULL) + return defval; TAILQ_FOREACH(ap, &pn->attrq, child) if (ap->key == key) return ap->val == ATTRVAL__MAX ? ap->rawval : @@ -1604,8 +1589,7 @@ readfile(XML_Parser xp, int fd, int rc; ssize_t ssz; - memset(&p, 0, sizeof(struct parse)); - + memset(&p, 0, sizeof(p)); p.fname = fn; p.xml = xp; Index: extern.h =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/extern.h,v retrieving revision 1.32 retrieving revision 1.33 diff -Lextern.h -Lextern.h -u -p -r1.32 -r1.33 --- extern.h +++ extern.h @@ -20,7 +20,7 @@ * All recognised node types. */ enum nodeid { - NODE_ROOT = 0, /* Must comes first. */ + NODE_NONE = 0, /* Must come first. */ /* Alpha-ordered hereafter. */ NODE_ACRONYM, NODE_AFFILIATION, -- To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv