* docbook2mdoc: Handle DOCTYPE declarations containing ENTITY definitions.
@ 2019-04-08 14:38 schwarze
0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-04-08 14:38 UTC (permalink / raw)
To: source
Log Message:
-----------
Handle DOCTYPE declarations containing ENTITY definitions.
Also make <sbr> self-closing even without a trailing slash.
Modified Files:
--------------
docbook2mdoc:
node.c
node.h
parse.c
Revision Data
-------------
Index: node.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/node.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -Lnode.c -Lnode.c -u -p -r1.4 -r1.5
--- node.c
+++ node.c
@@ -29,11 +29,15 @@ static const char *const attrkeys[ATTRKE
"class",
"close",
"cols",
+ "DEFINITION",
"endterm",
"id",
"linkend",
+ "NAME",
"open",
+ "PUBLIC",
"rep",
+ "SYSTEM",
"url",
"xlink:href"
};
Index: node.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/node.h,v
retrieving revision 1.12
retrieving revision 1.13
diff -Lnode.h -Lnode.h -u -p -r1.12 -r1.13
--- node.h
+++ node.h
@@ -48,9 +48,11 @@ enum nodeid {
NODE_CONTRIB,
NODE_COPYRIGHT,
NODE_DATE,
+ NODE_DOCTYPE,
NODE_EDITOR,
NODE_EMAIL,
NODE_EMPHASIS,
+ NODE_ENTITY,
NODE_ENTRY,
NODE_ENVAR,
NODE_ERRORNAME,
@@ -155,11 +157,15 @@ enum attrkey {
ATTRKEY_CLASS,
ATTRKEY_CLOSE,
ATTRKEY_COLS,
+ ATTRKEY_DEFINITION,
ATTRKEY_ENDTERM,
ATTRKEY_ID,
ATTRKEY_LINKEND,
+ ATTRKEY_NAME,
ATTRKEY_OPEN,
+ ATTRKEY_PUBLIC,
ATTRKEY_REP,
+ ATTRKEY_SYSTEM,
ATTRKEY_URL,
ATTRKEY_XLINK_HREF,
ATTRKEY__MAX
Index: parse.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/parse.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -Lparse.c -Lparse.c -u -p -r1.22 -r1.23
--- parse.c
+++ parse.c
@@ -45,6 +45,7 @@ enum pstate {
struct parse {
const char *fname; /* Name of the input file. */
struct ptree *tree; /* Complete parse result. */
+ struct pnode *doctype;
struct pnode *cur; /* Current node in the tree. */
enum nodeid ncur; /* Type of the current node. */
int line; /* Line number in the input file. */
@@ -52,9 +53,11 @@ struct parse {
int nline; /* Line number of next token. */
int ncol; /* Column number of next token. */
int del; /* Levels of nested nodes being deleted. */
- int spc; /* Whitespace before the next element. */
- int attr; /* The most recent attribute is valid. */
- int warn;
+ int flags;
+#define PFLAG_WARN (1 << 0) /* Print warning messages. */
+#define PFLAG_SPC (1 << 1) /* Whitespace before the next element. */
+#define PFLAG_ATTR (1 << 2) /* The most recent attribute is valid. */
+#define PFLAG_EEND (1 << 3) /* This element is self-closing. */
};
struct element {
@@ -87,9 +90,12 @@ static const struct element elements[] =
{ "contrib", NODE_CONTRIB },
{ "copyright", NODE_COPYRIGHT },
{ "date", NODE_DATE },
+ { "!doctype", NODE_DOCTYPE },
+ { "!DOCTYPE", NODE_DOCTYPE },
{ "editor", NODE_EDITOR },
{ "email", NODE_EMAIL },
{ "emphasis", NODE_EMPHASIS },
+ { "!ENTITY", NODE_ENTITY },
{ "entry", NODE_ENTRY },
{ "envar", NODE_ENVAR },
{ "errorname", NODE_ERRORNAME },
@@ -265,6 +271,10 @@ static const struct entity entities[] =
{ NULL, NULL }
};
+static size_t parse_string(struct parse *, char *, size_t,
+ enum pstate *, int);
+
+
static void
error_msg(struct parse *p, const char *fmt, ...)
{
@@ -283,7 +293,7 @@ warn_msg(struct parse *p, const char *fm
{
va_list ap;
- if (p->warn == 0)
+ if ((p->flags & PFLAG_WARN) == 0)
return;
fprintf(stderr, "%s:%d:%d: warning: ", p->fname, p->line, p->col);
@@ -318,7 +328,7 @@ xml_char(struct parse *ps, const char *p
exit(1);
}
dat->node = NODE_TEXT;
- dat->spc = ps->spc;
+ dat->spc = (ps->flags & PFLAG_SPC) != 0;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -333,18 +343,18 @@ xml_char(struct parse *ps, const char *p
/* Append to the current text node. */
assert(sz >= 0);
- newsz = ps->cur->bsz + (ps->cur->bsz && ps->spc) + sz;
+ newsz = ps->cur->bsz + (ps->cur->bsz && (ps->flags & PFLAG_SPC)) + sz;
ps->cur->b = realloc(ps->cur->b, newsz + 1);
if (ps->cur->b == NULL) {
perror(NULL);
exit(1);
}
- if (ps->cur->bsz && ps->spc)
+ if (ps->cur->bsz && (ps->flags & PFLAG_SPC))
ps->cur->b[ps->cur->bsz++] = ' ';
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
ps->cur->b[ps->cur->bsz = newsz] = '\0';
ps->cur->real = ps->cur->b;
- ps->spc = 0;
+ ps->flags &= ~PFLAG_SPC;
}
/*
@@ -360,7 +370,7 @@ pnode_closetext(struct parse *p)
p->cur = n->parent;
while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) {
n->b[--n->bsz] = '\0';
- p->spc = 1;
+ p->flags |= PFLAG_SPC;
}
}
@@ -369,6 +379,9 @@ xml_entity(struct parse *p, const char *
{
const struct entity *entity;
struct pnode *dat;
+ const char *ccp;
+ char *cp;
+ enum pstate pstate;
if (p->del > 0)
return;
@@ -388,6 +401,25 @@ xml_entity(struct parse *p, const char *
break;
if (entity->roff == NULL) {
+ if (p->doctype != NULL) {
+ TAILQ_FOREACH(dat, &p->doctype->childq, child) {
+ if ((ccp = pnode_getattr_raw(dat,
+ ATTRKEY_NAME, NULL)) == NULL ||
+ strcmp(ccp, name) != 0 ||
+ (ccp = pnode_getattr_raw(dat,
+ ATTRKEY_DEFINITION, NULL)) == NULL)
+ continue;
+ if ((cp = strdup(ccp)) == NULL) {
+ perror(NULL);
+ exit(1);
+ }
+ pstate = PARSE_ELEM;
+ parse_string(p, cp, strlen(cp), &pstate, 0);
+ p->flags &= ~PFLAG_SPC;
+ free(cp);
+ return;
+ }
+ }
error_msg(p, "unknown entity &%s;", name);
return;
}
@@ -400,12 +432,12 @@ xml_entity(struct parse *p, const char *
}
dat->node = NODE_ESCAPE;
dat->bsz = strlen(dat->b);
- dat->spc = p->spc;
+ dat->spc = (p->flags & PFLAG_SPC) != 0;
dat->parent = p->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
TAILQ_INSERT_TAIL(&p->cur->childq, dat, child);
- p->spc = 0;
+ p->flags &= ~PFLAG_SPC;
}
/*
@@ -417,15 +449,13 @@ xml_elem_start(struct parse *ps, const c
const struct element *elem;
struct pnode *dat;
- if (*name == '!' || *name == '?')
- return;
-
/*
* An ancestor is excluded from the tree;
* keep track of the number of levels excluded.
*/
if (ps->del > 0) {
- ps->del++;
+ if (*name != '!' && *name != '?')
+ ps->del++;
return;
}
@@ -435,8 +465,11 @@ xml_elem_start(struct parse *ps, const c
if (strcmp(elem->name, name) == 0)
break;
- if (elem->name == NULL)
+ if (elem->name == NULL) {
+ if (*name == '!' || *name == '?')
+ return;
error_msg(ps, "unknown element <%s>", name);
+ }
ps->ncur = elem->node;
@@ -470,6 +503,11 @@ xml_elem_start(struct parse *ps, const c
*/
switch (dat->node = elem->node) {
+ case NODE_DOCTYPE:
+ case NODE_ENTITY:
+ case NODE_SBR:
+ ps->flags |= PFLAG_EEND;
+ /* FALLTHROUGH */
case NODE_APPENDIX:
case NODE_AUTHORGROUP:
case NODE_BLOCKQUOTE:
@@ -493,7 +531,6 @@ xml_elem_start(struct parse *ps, const c
case NODE_REFNAMEDIV:
case NODE_REFSYNOPSISDIV:
case NODE_ROW:
- case NODE_SBR:
case NODE_SCREEN:
case NODE_SECTION:
case NODE_SYNOPSIS:
@@ -506,7 +543,7 @@ xml_elem_start(struct parse *ps, const c
dat->spc = 1;
break;
default:
- dat->spc = ps->spc;
+ dat->spc = (ps->flags & PFLAG_SPC) != 0;
break;
}
dat->parent = ps->cur;
@@ -517,7 +554,12 @@ xml_elem_start(struct parse *ps, const c
TAILQ_INSERT_TAIL(&ps->cur->childq, dat, child);
ps->cur = dat;
- if (ps->tree->root == NULL)
+ if (dat->node == NODE_DOCTYPE) {
+ if (ps->doctype == NULL)
+ ps->doctype = dat;
+ else
+ error_msg(ps, "duplicate doctype");
+ } else if (dat->parent == NULL && ps->tree->root == NULL)
ps->tree->root = dat;
}
@@ -525,12 +567,21 @@ static void
xml_attrkey(struct parse *ps, const char *name)
{
struct pattr *attr;
+ const char *value;
enum attrkey key;
if (ps->del > 0 || ps->ncur == NODE_IGNORE || *name == '\0')
return;
+
+ if ((ps->ncur == NODE_DOCTYPE || ps->ncur == NODE_ENTITY) &&
+ TAILQ_FIRST(&ps->cur->attrq) == NULL) {
+ value = name;
+ name = "NAME";
+ } else
+ value = NULL;
+
if ((key = attrkey_parse(name)) == ATTRKEY__MAX) {
- ps->attr = 0;
+ ps->flags &= ~PFLAG_ATTR;
return;
}
if ((attr = calloc(1, sizeof(*attr))) == NULL) {
@@ -539,9 +590,19 @@ xml_attrkey(struct parse *ps, const char
}
attr->key = key;
attr->val = ATTRVAL__MAX;
- attr->rawval = NULL;
+ if (value == NULL) {
+ attr->rawval = NULL;
+ ps->flags |= PFLAG_ATTR;
+ } else {
+ if ((attr->rawval = strdup(value)) == NULL) {
+ perror(NULL);
+ exit(1);
+ }
+ ps->flags &= ~PFLAG_ATTR;
+ }
TAILQ_INSERT_TAIL(&ps->cur->attrq, attr, child);
- ps->attr = 1;
+ if (ps->ncur == NODE_ENTITY && key == ATTRKEY_NAME)
+ xml_attrkey(ps, "DEFINITION");
}
static void
@@ -549,7 +610,8 @@ xml_attrval(struct parse *ps, const char
{
struct pattr *attr;
- if (ps->del > 0 || ps->ncur == NODE_IGNORE || ps->attr == 0)
+ if (ps->del > 0 || ps->ncur == NODE_IGNORE ||
+ (ps->flags & PFLAG_ATTR) == 0)
return;
if ((attr = TAILQ_LAST(&ps->cur->attrq, pattrq)) == NULL)
return;
@@ -598,6 +660,9 @@ xml_elem_end(struct parse *ps, const cha
break;
case NODE_IGNORE:
break;
+ case NODE_DOCTYPE:
+ ps->flags &= ~PFLAG_EEND;
+ /* FALLTHROUGH */
default:
if (ps->cur == NULL || node != ps->cur->node) {
warn_msg(ps, "element not open: </%s>", name);
@@ -611,11 +676,13 @@ xml_elem_end(struct parse *ps, const cha
* obviously better than discarding it or crashing.
*/
- if (ps->cur->parent == NULL)
- ps->tree->flags |= TREE_CLOSED;
- else
+ if (ps->cur->parent != NULL || node == NODE_DOCTYPE) {
ps->cur = ps->cur->parent;
- ps->spc = 0;
+ if (ps->cur != NULL)
+ ps->ncur = ps->cur->node;
+ } else
+ ps->tree->flags |= TREE_CLOSED;
+ ps->flags &= ~PFLAG_SPC;
break;
}
assert(ps->del == 0);
@@ -633,7 +700,10 @@ parse_alloc(int warn)
free(p);
return NULL;
}
- p->warn = warn;
+ if (warn)
+ p->flags |= PFLAG_WARN;
+ else
+ p->flags &= ~PFLAG_WARN;
return p;
}
@@ -709,7 +779,6 @@ parse_string(struct parse *p, char *b, s
size_t pend; /* Offset of the end of the current word. */
int elem_end;
- p->spc = 0;
pend = 0;
for (;;) {
@@ -722,7 +791,7 @@ parse_string(struct parse *p, char *b, s
if ((poff = pend) == rlen)
break;
if (isspace((unsigned char)b[pend])) {
- p->spc = 1;
+ p->flags |= PFLAG_SPC;
increment(p, b, &pend, refill);
continue;
}
@@ -763,6 +832,8 @@ parse_string(struct parse *p, char *b, s
b[pend - 1] = '\0';
elem_end = 1;
}
+ if (p->flags & PFLAG_EEND)
+ elem_end = 1;
}
b[pend] = '\0';
if (pend < rlen)
@@ -774,6 +845,23 @@ parse_string(struct parse *p, char *b, s
/* Look for an attribute name. */
} else if (*pstate == PARSE_TAG) {
+ switch (p->ncur) {
+ case NODE_DOCTYPE:
+ if (b[pend] == '[') {
+ *pstate = PARSE_ELEM;
+ increment(p, b, &pend, refill);
+ continue;
+ }
+ /* FALLTHROUGH */
+ case NODE_ENTITY:
+ if (b[pend] == '"' || b[pend] == '\'') {
+ *pstate = PARSE_ARG;
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
if (advance(p, b, rlen, &pend, " =>", refill))
break;
elem_end = 0;
@@ -784,6 +872,8 @@ parse_string(struct parse *p, char *b, s
b[pend - 1] = '\0';
elem_end = 1;
}
+ if (p->flags & PFLAG_EEND)
+ elem_end = 1;
break;
case '=':
*pstate = PARSE_ARG;
@@ -832,11 +922,21 @@ parse_string(struct parse *p, char *b, s
if (b[++poff] == '/') {
elem_end = 1;
poff++;
- } else
+ } else {
xml_elem_start(p, b + poff);
+ if (*pstate == PARSE_ELEM &&
+ p->flags & PFLAG_EEND)
+ elem_end = 1;
+ }
if (elem_end)
xml_elem_end(p, b + poff);
+ /* Close a doctype. */
+
+ } else if (p->ncur == NODE_DOCTYPE && b[poff] == ']') {
+ *pstate = PARSE_TAG;
+ increment(p, b, &pend, refill);
+
/* Process an entity. */
} else if (b[poff] == '&') {
@@ -899,5 +999,6 @@ parse_file(struct parse *p, int fd, cons
pnode_closetext(p);
if ((p->tree->flags & TREE_CLOSED) == 0)
warn_msg(p, "document not closed");
+ pnode_unlink(p->doctype);
return p->tree;
}
--
To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-04-08 14:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-08 14:38 docbook2mdoc: Handle DOCTYPE declarations containing ENTITY definitions schwarze
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).