* docbook2mdoc: Various parser simplifications and improvements.
@ 2019-03-25 23:15 schwarze
0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-03-25 23:15 UTC (permalink / raw)
To: source
Log Message:
-----------
Various parser simplifications and improvements.
Delete the redundant member "node" of struct parse.
The same is already available from cur->node.
No need to check in xml_char() whether an element is open.
Text outside the document element results in expat errors "not
well-formed (invalid token)" or "junk after document element"
and the function xml_char() is not called.
No need to check in xml_elem_end() whether an element is open.
Bogus closing tags result in expat errors "not well-formed (invalid
token)" or "mismatched tag" and the function xml_elem_end() is not
called.
In xml_elem_start(): no point in skipping the element name check
for the document element; and the error error "multiple refentries"
can no longer happen due to earlier cleanups.
Modified Files:
--------------
docbook2mdoc:
docbook2mdoc.c
extern.h
Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.72
retrieving revision 1.73
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.72 -r1.73
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -41,7 +41,6 @@ enum linestate {
*/
struct parse {
XML_Parser xml;
- enum nodeid node; /* current (NODE_ROOT if pre-tree) */
const char *fname; /* filename */
int stop; /* should we stop now? */
#define PARSE_EQN 1
@@ -249,12 +248,9 @@ xml_char(void *arg, const XML_Char *p, i
struct pnode *dat;
int i;
- /* Stopped or no tree yet. */
- if (ps->stop || ps->node == NODE_ROOT)
+ if (ps->stop)
return;
- assert(ps->cur != NULL);
-
/*
* Are we in the midst of processing text?
* If we're not processing text right now, then create a text
@@ -262,7 +258,7 @@ xml_char(void *arg, const XML_Char *p, i
* However, don't do so unless we have some non-whitespace to
* process: strip out all leading whitespace to be sure.
*/
- if (ps->node != NODE_TEXT) {
+ if (ps->cur->node != NODE_TEXT) {
for (i = 0; i < sz; i++)
if ( ! isspace((unsigned char)p[i]))
break;
@@ -276,7 +272,7 @@ xml_char(void *arg, const XML_Char *p, i
exit(1);
}
- dat->node = ps->node = NODE_TEXT;
+ dat->node = NODE_TEXT;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -337,10 +333,9 @@ xml_elem_start(void *arg, const XML_Char
return;
/* Close out text node, if applicable... */
- if (ps->node == NODE_TEXT) {
+ if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
- ps->node = ps->cur->node;
}
for (node = nodes; node->name != NULL; node++)
@@ -348,19 +343,11 @@ xml_elem_start(void *arg, const XML_Char
break;
if (node->name == NULL) {
- if (ps->node == NODE_ROOT)
- return;
fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml), name);
ps->stop = 1;
return;
- } else if (ps->node == NODE_ROOT && ps->root != NULL) {
- fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
- ps->fname, XML_GetCurrentLineNumber(ps->xml),
- XML_GetCurrentColumnNumber(ps->xml));
- ps->stop = 1;
- return;
}
if (node->node == NODE_INLINEEQUATION)
@@ -371,7 +358,7 @@ xml_elem_start(void *arg, const XML_Char
exit(1);
}
- dat->node = ps->node = node->node;
+ dat->node = node->node;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -410,36 +397,30 @@ xml_elem_start(void *arg, const XML_Char
pattr->rawval = strdup(att[1]);
TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
}
-
}
/*
* Roll up the parse tree.
* If we're at a text node, roll that one up first.
- * If we hit the root, then assign ourselves as the NODE_ROOT.
*/
static void
xml_elem_end(void *arg, const XML_Char *name)
{
struct parse *ps = arg;
- /* FIXME: find a better way to ditch other namespaces. */
- if (ps->stop || ps->node == NODE_ROOT)
+ if (ps->stop)
return;
- else if (strcmp(name, "xi:include") == 0)
+
+ /* FIXME: find a better way to ditch other namespaces. */
+ if (strcmp(name, "xi:include") == 0)
return;
/* Close out text node, if applicable... */
- if (ps->node == NODE_TEXT) {
+ if (ps->cur->node == NODE_TEXT) {
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
- ps->node = ps->cur->node;
}
-
- if ((ps->cur = ps->cur->parent) == NULL)
- ps->node = NODE_ROOT;
- else
- ps->node = ps->cur->node;
+ ps->cur = ps->cur->parent;
}
/*
@@ -500,6 +481,8 @@ pnode_getattr(struct pnode *pn, enum att
{
struct pattr *ap;
+ if (pn == NULL)
+ return ATTRVAL__MAX;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val;
@@ -515,6 +498,8 @@ pnode_getattr_raw(struct pnode *pn, enum
{
struct pattr *ap;
+ if (pn == NULL)
+ return defval;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val == ATTRVAL__MAX ? ap->rawval :
@@ -1604,8 +1589,7 @@ readfile(XML_Parser xp, int fd,
int rc;
ssize_t ssz;
- memset(&p, 0, sizeof(struct parse));
-
+ memset(&p, 0, sizeof(p));
p.fname = fn;
p.xml = xp;
Index: extern.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/extern.h,v
retrieving revision 1.32
retrieving revision 1.33
diff -Lextern.h -Lextern.h -u -p -r1.32 -r1.33
--- extern.h
+++ extern.h
@@ -20,7 +20,7 @@
* All recognised node types.
*/
enum nodeid {
- NODE_ROOT = 0, /* Must comes first. */
+ NODE_NONE = 0, /* Must come first. */
/* Alpha-ordered hereafter. */
NODE_ACRONYM,
NODE_AFFILIATION,
--
To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-03-25 23:15 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-25 23:15 docbook2mdoc: Various parser simplifications and improvements schwarze
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).