* docbook2mdoc: For macro_addarg(), macro_addnode(), and macro_nodeline(),
@ 2019-03-25 17:29 schwarze
0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-03-25 17:29 UTC (permalink / raw)
To: source
Log Message:
-----------
For macro_addarg(), macro_addnode(), and macro_nodeline(), provide
a flag ARG_SINGLE to request quoting of strings containing whitespace.
Use it for .Dt, .Fa, .Fo, .Nm, and .Xr.
Do not \&-escape macros in quoted strings.
Escape quote characters in macro arguments.
NUL-terminate the content of text nodes.
Eliminate the awkward "bufappend()" global output buffer.
Simplify and improve handling of <paramdef>.
Substantial functional improvements, easier to read, yet minus 20 LOC.
Modified Files:
--------------
docbook2mdoc:
docbook2mdoc.c
Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.71
retrieving revision 1.72
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.71 -r1.72
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -48,9 +48,6 @@ struct parse {
unsigned int flags; /* document-wide flags */
struct pnode *root; /* root of parse tree */
struct pnode *cur; /* current node in tree */
- char *b; /* NUL-terminated buffer for pre-print */
- size_t bsz; /* current length of b */
- size_t mbsz; /* max bsz allocation */
int level; /* header level, starting at 1 */
enum linestate linestate;
};
@@ -273,7 +270,7 @@ xml_char(void *arg, const XML_Char *p, i
return;
p += i;
sz -= i;
- dat = calloc(1, sizeof(struct pnode));
+ dat = calloc(1, sizeof(*dat));
if (dat == NULL) {
perror(NULL);
exit(1);
@@ -290,14 +287,14 @@ xml_char(void *arg, const XML_Char *p, i
/* Append to current buffer. */
assert(sz >= 0);
- ps->cur->b = realloc(ps->cur->b,
- ps->cur->bsz + (size_t)sz);
+ ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
if (ps->cur->b == NULL) {
perror(NULL);
exit(1);
}
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
- ps->cur->bsz += (size_t)sz;
+ ps->cur->bsz += sz;
+ ps->cur->b[ps->cur->bsz] = '\0';
ps->cur->real = ps->cur->b;
}
@@ -305,7 +302,7 @@ static void
pnode_trim(struct pnode *pn)
{
assert(pn->node == NODE_TEXT);
- for ( ; pn->bsz > 0; pn->bsz--)
+ for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
if ( ! isspace((unsigned char)pn->b[pn->bsz - 1]))
break;
}
@@ -369,7 +366,7 @@ xml_elem_start(void *arg, const XML_Char
if (node->node == NODE_INLINEEQUATION)
ps->flags |= PARSE_EQN;
- if ((dat = calloc(1, sizeof(struct pnode))) == NULL) {
+ if ((dat = calloc(1, sizeof(*dat))) == NULL) {
perror(NULL);
exit(1);
}
@@ -406,7 +403,7 @@ xml_elem_start(void *arg, const XML_Char
for (val = 0; val < ATTRVAL__MAX; val++)
if (strcmp(att[1], attrvals[val]) == 0)
break;
- pattr = calloc(1, sizeof(struct pattr));
+ pattr = calloc(1, sizeof(*pattr));
pattr->key = key;
pattr->val = val;
if (val == ATTRVAL__MAX)
@@ -526,55 +523,6 @@ pnode_getattr_raw(struct pnode *pn, enum
}
/*
- * Reset the lookaside buffer.
- */
-static void
-bufclear(struct parse *p)
-{
-
- p->b[p->bsz = 0] = '\0';
-}
-
-/*
- * Append NODE_TEXT contents to the current buffer, reallocating its
- * size if necessary.
- * The buffer is ALWAYS NUL-terminated.
- */
-static void
-bufappend(struct parse *p, struct pnode *pn)
-{
-
- assert(pn->node == NODE_TEXT);
- if (p->bsz + pn->bsz + 1 > p->mbsz) {
- p->mbsz = p->bsz + pn->bsz + 1;
- if ((p->b = realloc(p->b, p->mbsz)) == NULL) {
- perror(NULL);
- exit(1);
- }
- }
- memcpy(p->b + p->bsz, pn->b, pn->bsz);
- p->bsz += pn->bsz;
- p->b[p->bsz] = '\0';
-}
-
-/*
- * Recursively append all NODE_TEXT nodes to the buffer.
- * This descends into non-text nodes, but doesn't do anything beyond
- * them.
- * In other words, this is a recursive text grok.
- */
-static void
-bufappend_r(struct parse *p, struct pnode *pn)
-{
- struct pnode *pp;
-
- if (pn->node == NODE_TEXT)
- bufappend(p, pn);
- TAILQ_FOREACH(pp, &pn->childq, child)
- bufappend_r(p, pp);
-}
-
-/*
* Recursively search and return the first instance of "node".
*/
static struct pnode *
@@ -626,36 +574,62 @@ macro_line(struct parse *p, const char *
macro_close(p);
}
-#define MACROLINE_UPPER 1
-#define MACROLINE_NOWS 2
+#define ARG_SPACE 1 /* Insert whitespace before this argument. */
+#define ARG_SINGLE 2 /* Quote argument if it contains whitespace. */
+#define ARG_QUOTED 4 /* We are already in a quoted argument. */
+#define ARG_UPPER 8 /* Covert argument to upper case. */
/*
* Print an argument string on a macro line, collapsing whitespace.
*/
static void
-macro_addarg(struct parse *p, const char *arg, int fl)
+macro_addarg(struct parse *p, const char *arg, int flags)
{
const char *cp;
- int wantspace;
assert(p->linestate == LINE_MACRO);
- wantspace = !(fl & MACROLINE_NOWS);
+
+ /* Quote if requested and necessary. */
+
+ if ((flags & (ARG_SINGLE | ARG_QUOTED)) == ARG_SINGLE) {
+ for (cp = arg; *cp != '\0'; cp++)
+ if (isspace((unsigned char)*cp))
+ break;
+ if (*cp != '\0') {
+ if (flags & ARG_SPACE) {
+ putchar(' ');
+ flags &= ~ ARG_SPACE;
+ }
+ putchar('"');
+ flags = ARG_QUOTED;
+ }
+ }
+
for (cp = arg; *cp != '\0'; cp++) {
+
+ /* Collapse whitespace. */
+
if (isspace((unsigned char)*cp)) {
- wantspace = 1;
+ flags |= ARG_SPACE;
continue;
- } else if (wantspace) {
+ } else if (flags & ARG_SPACE) {
putchar(' ');
- wantspace = 0;
+ flags &= ~ ARG_SPACE;
}
+
/* Escape us if we look like a macro. */
- if ((cp == arg || cp[-1] == ' ') &&
+
+ if ((flags & ARG_QUOTED) == 0 &&
+ (cp == arg || isspace((unsigned char)cp[-1])) &&
isupper((unsigned char)cp[0]) &&
islower((unsigned char)cp[1]) &&
(cp[2] == '\0' || cp[2] == ' ' ||
(islower((unsigned char)cp[2]) &&
(cp[3] == '\0' || cp[3] == ' '))))
fputs("\\&", stdout);
- if (fl & MACROLINE_UPPER)
+
+ if (*cp == '"')
+ fputs("\\(dq", stdout);
+ else if (flags & ARG_UPPER)
putchar(toupper((unsigned char)*cp));
else
putchar(*cp);
@@ -668,26 +642,73 @@ static void
macro_argline(struct parse *p, const char *name, const char *arg)
{
macro_open(p, name);
- macro_addarg(p, arg, 0);
+ macro_addarg(p, arg, ARG_SPACE);
macro_close(p);
}
/*
- * Recurse nodes to print arguments on a macro line.
+ * Recursively append text from the children of a node to a macro line.
*/
static void
-macro_addnode(struct parse *p, struct pnode *pn, int fl)
+macro_addnode(struct parse *p, struct pnode *pn, int flags)
{
- bufclear(p);
- bufappend_r(p, pn);
- macro_addarg(p, p->b, fl);
+ int quote_now;
+
+ assert(p->linestate == LINE_MACRO);
+
+ /*
+ * If the only child is a text node, just add that text,
+ * letting macro_addarg() decide about quoting.
+ */
+
+ pn = TAILQ_FIRST(&pn->childq);
+ if (pn != NULL && pn->node == NODE_TEXT &&
+ TAILQ_NEXT(pn, child) == NULL) {
+ macro_addarg(p, pn->b, flags);
+ return;
+ }
+
+ /*
+ * If we want the argument quoted and are not already
+ * in a quoted context, quote now.
+ */
+
+ quote_now = 0;
+ if (flags & ARG_SINGLE) {
+ if ((flags & ARG_QUOTED) == 0) {
+ if (flags & ARG_SPACE) {
+ putchar(' ');
+ flags &= ~ARG_SPACE;
+ }
+ putchar('"');
+ flags |= ARG_QUOTED;
+ quote_now = 1;
+ }
+ flags &= ~ARG_SINGLE;
+ }
+
+ /*
+ * Iterate to child and sibling nodes,
+ * inserting whitespace between nodes.
+ */
+
+ while (pn != NULL) {
+ if (pn->node == NODE_TEXT)
+ macro_addarg(p, pn->b, flags);
+ else
+ macro_addnode(p, pn, flags);
+ pn = TAILQ_NEXT(pn, child);
+ flags |= ARG_SPACE;
+ }
+ if (quote_now)
+ putchar('"');
}
static void
-macro_nodeline(struct parse *p, const char *name, struct pnode *pn)
+macro_nodeline(struct parse *p, const char *name, struct pnode *pn, int flags)
{
macro_open(p, name);
- macro_addnode(p, pn, 0);
+ macro_addnode(p, pn, ARG_SPACE | flags);
macro_close(p);
}
@@ -780,7 +801,9 @@ pnode_printrefsect(struct parse *p, stru
return;
level = ++p->level;
- flags = level == 1 ? MACROLINE_UPPER : 0;
+ flags = ARG_SPACE;
+ if (level == 1)
+ flags |= ARG_UPPER;
if (level < 3) {
switch (pn->node) {
case NODE_CAUTION:
@@ -838,7 +861,7 @@ pnode_printrefsect(struct parse *p, stru
macro_addnode(p, pp, flags);
pnode_unlink(pp);
} else
- macro_addarg(p, title, 0);
+ macro_addarg(p, title, ARG_SPACE | ARG_QUOTED);
macro_close(p);
}
@@ -859,13 +882,13 @@ pnode_printciterefentry(struct parse *p,
}
macro_open(p, "Xr");
if (title == NULL)
- macro_addarg(p, "unknown", 0);
+ macro_addarg(p, "unknown", ARG_SPACE);
else
- macro_addnode(p, title, 0);
+ macro_addnode(p, title, ARG_SPACE | ARG_SINGLE);
if (manvol == NULL)
- macro_addarg(p, "1", 0);
+ macro_addarg(p, "1", ARG_SPACE);
else
- macro_addnode(p, manvol, 0);
+ macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE);
macro_close(p);
pnode_unlinksub(pn);
}
@@ -884,13 +907,13 @@ pnode_printrefmeta(struct parse *p, stru
}
macro_open(p, "Dt");
if (title == NULL)
- macro_addarg(p, "UNKNOWN", 0);
+ macro_addarg(p, "UNKNOWN", ARG_SPACE);
else
- macro_addnode(p, title, MACROLINE_UPPER);
+ macro_addnode(p, title, ARG_SPACE | ARG_SINGLE | ARG_UPPER);
if (manvol == NULL)
- macro_addarg(p, "1", 0);
+ macro_addarg(p, "1", ARG_SPACE);
else
- macro_addnode(p, manvol, 0);
+ macro_addnode(p, manvol, ARG_SPACE | ARG_SINGLE);
macro_close(p);
pnode_unlink(pn);
}
@@ -908,38 +931,12 @@ pnode_printfuncdef(struct parse *p, stru
func = pp;
}
if (ftype != NULL)
- macro_nodeline(p, "Ft", ftype);
+ macro_argline(p, "Ft", ftype->b);
macro_open(p, "Fo");
if (func == NULL)
- macro_addarg(p, "UNKNOWN", 0);
+ macro_addarg(p, "UNKNOWN", ARG_SPACE);
else
- macro_addnode(p, func, 0);
- macro_close(p);
-}
-
-static void
-pnode_printparamdef(struct parse *p, struct pnode *pn)
-{
- struct pnode *pp, *ptype, *param;
- int flags;
-
- ptype = param = NULL;
- TAILQ_FOREACH(pp, &pn->childq, child) {
- if (pp->node == NODE_TEXT)
- ptype = pp;
- else if (pp->node == NODE_PARAMETER)
- param = pp;
- }
- macro_open(p, "Fa \"");
- flags = MACROLINE_NOWS;
- if (ptype != NULL) {
- macro_addnode(p, ptype, flags);
- flags = 0;
- }
- if (param != NULL)
- macro_addnode(p, param, flags);
- flags = MACROLINE_NOWS;
- macro_addarg(p, "\"", flags);
+ macro_addnode(p, func, ARG_SPACE | ARG_SINGLE);
macro_close(p);
}
@@ -1016,7 +1013,7 @@ pnode_printfuncprototype(struct parse *p
TAILQ_FOREACH(pp, &pn->childq, child)
if (pp->node == NODE_PARAMDEF)
- pnode_printparamdef(p, pp);
+ macro_nodeline(p, "Fa", pp, ARG_SINGLE);
macro_line(p, "Fc");
pnode_unlinksub(pn);
@@ -1053,7 +1050,7 @@ pnode_printarg(struct parse *p, struct p
macro_open(p, "Ar");
pnode_print(p, pp);
if (isrep && pp->node == NODE_TEXT)
- macro_addarg(p, "...", 0);
+ macro_addarg(p, "...", ARG_SPACE);
}
pnode_unlinksub(pn);
}
@@ -1098,8 +1095,8 @@ pnode_printgroup(struct parse *p, struct
while (np != NULL) {
if (pp->node != np->node)
break;
- macro_addarg(p, "|", 0);
- macro_addnode(p, np, 0);
+ macro_addarg(p, "|", ARG_SPACE);
+ macro_addnode(p, np, ARG_SPACE);
pp = np;
np = TAILQ_NEXT(np, child);
}
@@ -1123,8 +1120,9 @@ pnode_printprologue(struct parse *p, str
else {
macro_open(p, "Dt");
macro_addarg(p,
- pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"), 0);
- macro_addarg(p, "1", 0);
+ pnode_getattr_raw(p->root, ATTRKEY_ID, "UNKNOWN"),
+ ARG_SPACE | ARG_SINGLE | ARG_UPPER);
+ macro_addarg(p, "1", ARG_SPACE);
macro_close(p);
}
macro_line(p, "Os");
@@ -1151,7 +1149,7 @@ pnode_printvarlistentry(struct parse *p,
if (pp->node != NODE_TERM)
continue;
if ( ! first)
- macro_addarg(p, ",", MACROLINE_NOWS);
+ macro_addarg(p, ",", 0);
pnode_print(p, pp);
first = 0;
}
@@ -1232,7 +1230,7 @@ pnode_printvariablelist(struct parse *p,
if (pp->node == NODE_VARLISTENTRY)
pnode_print(p, pp);
else
- macro_nodeline(p, "It", pp);
+ macro_nodeline(p, "It", pp, 0);
}
macro_line(p, "El");
pnode_unlinksub(pn);
@@ -1379,11 +1377,7 @@ pnode_print(struct parse *p, struct pnod
pnode_printpara(p, pn);
break;
case NODE_PARAMETER:
- /* Suppress non-text children... */
- macro_open(p, "Fa \"");
- macro_addnode(p, pn, MACROLINE_NOWS);
- macro_addarg(p, "\"", MACROLINE_NOWS);
- macro_close(p);
+ macro_nodeline(p, "Fa", pn, ARG_SINGLE);
pnode_unlinksub(pn);
break;
case NODE_QUOTE:
@@ -1403,7 +1397,7 @@ pnode_print(struct parse *p, struct pnod
case NODE_REFNAME:
/* Suppress non-text children... */
macro_open(p, "Nm");
- macro_addnode(p, pn, 0);
+ macro_addnode(p, pn, ARG_SPACE | ARG_SINGLE);
pnode_unlinksub(pn);
break;
case NODE_REFNAMEDIV:
@@ -1440,9 +1434,7 @@ pnode_print(struct parse *p, struct pnod
pnode_printtable(p, pn);
break;
case NODE_TEXT:
- bufclear(p);
- bufappend(p, pn);
- if (p->bsz == 0) {
+ if (pn->bsz == 0) {
assert(pn->real != pn->b);
break;
}
@@ -1457,7 +1449,7 @@ pnode_print(struct parse *p, struct pnod
* XXX: all whitespace, including tabs (?).
* Remember to escape control characters and escapes.
*/
- cp = p->b;
+ cp = pn->b;
/*
* There's often a superfluous "-" in its <option> tags
@@ -1574,7 +1566,7 @@ pnode_print(struct parse *p, struct pnod
pn->parent->node == NODE_REFNAMEDIV &&
TAILQ_NEXT(pn, child) != NULL &&
TAILQ_NEXT(pn, child)->node == NODE_REFNAME)
- macro_addarg(p, ",", 0);
+ macro_addarg(p, ",", ARG_SPACE);
if (sv == LINE_NEW)
macro_close(p);
break;
@@ -1614,7 +1606,6 @@ readfile(XML_Parser xp, int fd,
memset(&p, 0, sizeof(struct parse));
- p.b = malloc(p.bsz = p.mbsz = 1024);
p.fname = fn;
p.xml = xp;
@@ -1641,14 +1632,12 @@ readfile(XML_Parser xp, int fd,
if (p.linestate != LINE_NEW)
putchar('\n');
pnode_free(p.root);
- free(p.b);
return rc != 0 && p.stop == 0;
}
/* Read error has occured. */
perror(fn);
pnode_free(p.root);
- free(p.b);
return 0;
}
--
To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-03-25 17:29 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-25 17:29 docbook2mdoc: For macro_addarg(), macro_addnode(), and macro_nodeline(), schwarze
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).