From: schwarze@mandoc.bsd.lv
To: source@mandoc.bsd.lv
Subject: docbook2mdoc: Various parser simplifications and improvements.
Date: Mon, 25 Mar 2019 18:15:14 -0500 (EST) [thread overview]
Message-ID: <e3fcb72f77ff9a79@fantadrom.bsd.lv> (raw)
Log Message:
-----------
Various parser simplifications and improvements.
Delete the redundant member "node" of struct parse.
The same is already available from cur->node.
No need to check in xml_char() whether an element is open.
Text outside the document element results in expat errors "not
well-formed (invalid token)" or "junk after document element"
and the function xml_char() is not called.
No need to check in xml_elem_end() whether an element is open.
Bogus closing tags result in expat errors "not well-formed (invalid
token)" or "mismatched tag" and the function xml_elem_end() is not
called.
In xml_elem_start(): no point in skipping the element name check
for the document element; and the error error "multiple refentries"
can no longer happen due to earlier cleanups.
Modified Files:
--------------
docbook2mdoc:
docbook2mdoc.c
extern.h
Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.72
retrieving revision 1.73
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.72 -r1.73
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -41,7 +41,6 @@ enum linestate {
*/
struct parse {
XML_Parser xml;
- enum nodeid node; /* current (NODE_ROOT if pre-tree) */
const char *fname; /* filename */
int stop; /* should we stop now? */
#define PARSE_EQN 1
@@ -249,12 +248,9 @@ xml_char(void *arg, const XML_Char *p, i
struct pnode *dat;
int i;
- /* Stopped or no tree yet. */
- if (ps->stop || ps->node == NODE_ROOT)
+ if (ps->stop)
return;
- assert(ps->cur != NULL);
-
/*
* Are we in the midst of processing text?
* If we're not processing text right now, then create a text
@@ -262,7 +258,7 @@ xml_char(void *arg, const XML_Char *p, i
* However, don't do so unless we have some non-whitespace to
* process: strip out all leading whitespace to be sure.
*/
- if (ps->node != NODE_TEXT) {
+ if (ps->cur->node != NODE_TEXT) {
for (i = 0; i < sz; i++)
if ( ! isspace((unsigned char)p[i]))
break;
@@ -276,7 +272,7 @@ xml_char(void *arg, const XML_Char *p, i
exit(1);
}
- dat->node = ps->node = NODE_TEXT;
+ dat->node = NODE_TEXT;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -337,10 +333,9 @@ xml_elem_start(void *arg, const XML_Char
return;
/* Close out text node, if applicable... */
- if (ps->node == NODE_TEXT) {
+ if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
- ps->node = ps->cur->node;
}
for (node = nodes; node->name != NULL; node++)
@@ -348,19 +343,11 @@ xml_elem_start(void *arg, const XML_Char
break;
if (node->name == NULL) {
- if (ps->node == NODE_ROOT)
- return;
fprintf(stderr, "%s:%zu:%zu: unknown node \"%s\"\n",
ps->fname, XML_GetCurrentLineNumber(ps->xml),
XML_GetCurrentColumnNumber(ps->xml), name);
ps->stop = 1;
return;
- } else if (ps->node == NODE_ROOT && ps->root != NULL) {
- fprintf(stderr, "%s:%zu:%zu: multiple refentries\n",
- ps->fname, XML_GetCurrentLineNumber(ps->xml),
- XML_GetCurrentColumnNumber(ps->xml));
- ps->stop = 1;
- return;
}
if (node->node == NODE_INLINEEQUATION)
@@ -371,7 +358,7 @@ xml_elem_start(void *arg, const XML_Char
exit(1);
}
- dat->node = ps->node = node->node;
+ dat->node = node->node;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -410,36 +397,30 @@ xml_elem_start(void *arg, const XML_Char
pattr->rawval = strdup(att[1]);
TAILQ_INSERT_TAIL(&dat->attrq, pattr, child);
}
-
}
/*
* Roll up the parse tree.
* If we're at a text node, roll that one up first.
- * If we hit the root, then assign ourselves as the NODE_ROOT.
*/
static void
xml_elem_end(void *arg, const XML_Char *name)
{
struct parse *ps = arg;
- /* FIXME: find a better way to ditch other namespaces. */
- if (ps->stop || ps->node == NODE_ROOT)
+ if (ps->stop)
return;
- else if (strcmp(name, "xi:include") == 0)
+
+ /* FIXME: find a better way to ditch other namespaces. */
+ if (strcmp(name, "xi:include") == 0)
return;
/* Close out text node, if applicable... */
- if (ps->node == NODE_TEXT) {
+ if (ps->cur->node == NODE_TEXT) {
pnode_trim(ps->cur);
ps->cur = ps->cur->parent;
- ps->node = ps->cur->node;
}
-
- if ((ps->cur = ps->cur->parent) == NULL)
- ps->node = NODE_ROOT;
- else
- ps->node = ps->cur->node;
+ ps->cur = ps->cur->parent;
}
/*
@@ -500,6 +481,8 @@ pnode_getattr(struct pnode *pn, enum att
{
struct pattr *ap;
+ if (pn == NULL)
+ return ATTRVAL__MAX;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val;
@@ -515,6 +498,8 @@ pnode_getattr_raw(struct pnode *pn, enum
{
struct pattr *ap;
+ if (pn == NULL)
+ return defval;
TAILQ_FOREACH(ap, &pn->attrq, child)
if (ap->key == key)
return ap->val == ATTRVAL__MAX ? ap->rawval :
@@ -1604,8 +1589,7 @@ readfile(XML_Parser xp, int fd,
int rc;
ssize_t ssz;
- memset(&p, 0, sizeof(struct parse));
-
+ memset(&p, 0, sizeof(p));
p.fname = fn;
p.xml = xp;
Index: extern.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/extern.h,v
retrieving revision 1.32
retrieving revision 1.33
diff -Lextern.h -Lextern.h -u -p -r1.32 -r1.33
--- extern.h
+++ extern.h
@@ -20,7 +20,7 @@
* All recognised node types.
*/
enum nodeid {
- NODE_ROOT = 0, /* Must comes first. */
+ NODE_NONE = 0, /* Must come first. */
/* Alpha-ordered hereafter. */
NODE_ACRONYM,
NODE_AFFILIATION,
--
To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv
reply other threads:[~2019-03-25 23:15 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e3fcb72f77ff9a79@fantadrom.bsd.lv \
--to=schwarze@mandoc.bsd.lv \
--cc=source@mandoc.bsd.lv \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).