* docbook2mdoc: Store the information whether a node is preceded by
@ 2019-04-06 22:38 schwarze
0 siblings, 0 replies; only message in thread
From: schwarze @ 2019-04-06 22:38 UTC (permalink / raw)
To: source
Log Message:
-----------
Store the information whether a node is preceded by whitespace
into the node tree. Use that information in the formatter
to suppress the insertion of whitespace
in text-text, macro-text, and macro-macro node sequences.
Text-macro sequences are not yet handled. They are more complicated
because they require emitting a .Pf macro as part of a text node
depending on the spacing properties of the *following* macro node.
Modified Files:
--------------
docbook2mdoc:
docbook2mdoc.c
macro.c
macro.h
node.h
parse.c
Revision Data
-------------
Index: docbook2mdoc.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v
retrieving revision 1.91
retrieving revision 1.92
diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.91 -r1.92
--- docbook2mdoc.c
+++ docbook2mdoc.c
@@ -642,6 +642,7 @@ pnode_print(struct format *p, struct pno
if (pn == NULL)
return;
+ p->spc = pn->spc;
sv = p->linestate;
switch (pn->node) {
@@ -674,6 +675,7 @@ pnode_print(struct format *p, struct pno
break;
case NODE_EDITOR:
print_text(p, "editor:", ARG_SPACE);
+ sv = LINE_TEXT;
macro_open(p, "An");
break;
case NODE_EMAIL:
@@ -692,7 +694,7 @@ pnode_print(struct format *p, struct pno
case NODE_ESCAPE:
if (p->linestate == LINE_NEW)
p->linestate = LINE_TEXT;
- else
+ else if (pn->spc || p->linestate == LINE_MACRO)
putchar(' ');
fputs(pn->b, stdout);
break;
@@ -822,7 +824,7 @@ pnode_print(struct format *p, struct pno
}
if (p->linestate == LINE_NEW)
p->linestate = LINE_TEXT;
- else
+ else if (pn->spc || p->linestate == LINE_MACRO)
putchar(' ');
/*
Index: macro.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/macro.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -Lmacro.c -Lmacro.c -u -p -r1.5 -r1.6
--- macro.c
+++ macro.c
@@ -17,6 +17,7 @@
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
+#include <string.h>
#include "node.h"
#include "macro.h"
@@ -39,6 +40,8 @@ macro_open(struct format *f, const char
break;
case LINE_MACRO:
putchar(' ');
+ if (f->spc == 0)
+ fputs("Ns ", stdout);
break;
}
fputs(name, stdout);
@@ -61,20 +64,68 @@ macro_line(struct format *f, const char
}
/*
- * If the next node is a text node starting with closing punctuation,
- * emit the closing punctuation as a trailing macro argument.
+ * At the end of a macro, decide whether the line needs to remain open
+ * because the next node follows without intervening whitespace;
+ * otherwise, close the line.
*/
void
macro_closepunct(struct format *f, struct pnode *pn)
{
- if ((pn = TAILQ_NEXT(pn, child)) != NULL &&
- pn->node == NODE_TEXT && pn->bsz > 0 &&
- (pn->b[0] == ',' || pn->b[0] == '.') &&
- (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) {
- putchar(' ');
- putchar(pn->b[0]);
- pn->b++;
- pn->bsz--;
+ char *cp;
+
+ if ((pn = TAILQ_NEXT(pn, child)) != NULL && pn->spc == 0) {
+
+ /*
+ * If a non-text node follows without intervening
+ * whitespace, the handler of that node will decide
+ * whether and how to suppress whitespace. To allow
+ * that, the macro line needs to remain open.
+ */
+
+ if (pn->node != NODE_TEXT && pn->node != NODE_ESCAPE)
+ return;
+
+ /*
+ * Give closing punctuation
+ * in the form of trailing macro arguments.
+ */
+
+ while (*pn->b != '\0' &&
+ strchr("!),.:;?]", *pn->b) != NULL) {
+ putchar(' ');
+ putchar(*pn->b);
+ pn->b++;
+ pn->bsz--;
+ }
+
+ /*
+ * Text follows without intervening whitespace.
+ * Append the first word with .Ns.
+ */
+
+ if (*pn->b != '\0' && isspace((unsigned char)*pn->b) == 0) {
+ fputs(" Ns", stdout);
+ for (cp = pn->b; *cp != '\0'; cp++)
+ if (isspace((unsigned char)*cp))
+ break;
+ *cp = '\0';
+ macro_addarg(f, pn->b, ARG_SPACE);
+ pn->bsz -= cp - pn->b;
+ pn->b = cp;
+ if (pn->bsz > 0) {
+ pn->b++;
+ pn->bsz--;
+ pn->spc = 1;
+ }
+ }
+
+ /* Skip whitespace after the first word. */
+
+ while (isspace((unsigned char)*pn->b)) {
+ pn->b++;
+ pn->bsz--;
+ pn->spc = 1;
+ }
}
macro_close(f);
}
Index: macro.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/macro.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -Lmacro.h -Lmacro.h -u -p -r1.2 -r1.3
--- macro.h
+++ macro.h
@@ -28,6 +28,7 @@ enum linestate {
struct format {
int level; /* Header level, starting at 1. */
+ int spc; /* Whitespace before next macro. */
enum linestate linestate;
};
Index: parse.c
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/parse.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -Lparse.c -Lparse.c -u -p -r1.15 -r1.16
--- parse.c
+++ parse.c
@@ -52,6 +52,7 @@ struct parse {
int nline; /* Line number of next token. */
int ncol; /* Column number of next token. */
int del; /* Levels of nested nodes being deleted. */
+ int spc; /* Whitespace before the next element. */
int attr; /* The most recent attribute is valid. */
int warn;
};
@@ -294,6 +295,7 @@ static void
xml_char(struct parse *ps, const char *p, int sz)
{
struct pnode *dat;
+ size_t newsz;
if (ps->del > 0)
return;
@@ -309,6 +311,7 @@ xml_char(struct parse *ps, const char *p
exit(1);
}
dat->node = NODE_TEXT;
+ dat->spc = ps->spc;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -323,24 +326,35 @@ xml_char(struct parse *ps, const char *p
/* Append to the current text node. */
assert(sz >= 0);
- ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1);
+ newsz = ps->cur->bsz + (ps->cur->bsz && ps->spc) + sz;
+ ps->cur->b = realloc(ps->cur->b, newsz + 1);
if (ps->cur->b == NULL) {
perror(NULL);
exit(1);
}
+ if (ps->cur->bsz && ps->spc)
+ ps->cur->b[ps->cur->bsz++] = ' ';
memcpy(ps->cur->b + ps->cur->bsz, p, sz);
- ps->cur->bsz += sz;
- ps->cur->b[ps->cur->bsz] = '\0';
+ ps->cur->b[ps->cur->bsz = newsz] = '\0';
ps->cur->real = ps->cur->b;
+ ps->spc = 0;
}
+/*
+ * Close out the text node and strip trailing whitespace, if one is open.
+ */
static void
-pnode_trim(struct pnode *pn)
+pnode_closetext(struct parse *p)
{
- assert(pn->node == NODE_TEXT);
- for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0')
- if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0)
- break;
+ struct pnode *n;
+
+ if ((n = p->cur) == NULL || n->node != NODE_TEXT)
+ return;
+ p->cur = n->parent;
+ while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) {
+ n->b[--n->bsz] = '\0';
+ p->spc = 1;
+ }
}
static void
@@ -357,11 +371,7 @@ xml_entity(struct parse *p, const char *
return;
}
- /* Close out the text node, if there is one. */
- if (p->cur->node == NODE_TEXT) {
- pnode_trim(p->cur);
- p->cur = p->cur->parent;
- }
+ pnode_closetext(p);
if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root)
warn_msg(p, "entity after end of document: &%s;", name);
@@ -383,10 +393,12 @@ xml_entity(struct parse *p, const char *
}
dat->node = NODE_ESCAPE;
dat->bsz = strlen(dat->b);
+ dat->spc = p->spc;
dat->parent = p->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
TAILQ_INSERT_TAIL(&p->cur->childq, dat, child);
+ p->spc = 0;
}
/*
@@ -410,11 +422,7 @@ xml_elem_start(struct parse *ps, const c
return;
}
- /* Close out the text node, if there is one. */
- if (ps->cur != NULL && ps->cur->node == NODE_TEXT) {
- pnode_trim(ps->cur);
- ps->cur = ps->cur->parent;
- }
+ pnode_closetext(ps);
for (elem = elements; elem->name != NULL; elem++)
if (strcmp(elem->name, name) == 0)
@@ -449,6 +457,7 @@ xml_elem_start(struct parse *ps, const c
exit(1);
}
dat->node = elem->node;
+ dat->spc = ps->spc;
dat->parent = ps->cur;
TAILQ_INIT(&dat->childq);
TAILQ_INIT(&dat->attrq);
@@ -519,11 +528,8 @@ xml_elem_end(struct parse *ps, const cha
return;
}
- /* Close out the text node, if there is one. */
- if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) {
- pnode_trim(ps->cur);
- ps->cur = ps->cur->parent;
- }
+ if (ps->del == 0)
+ pnode_closetext(ps);
if (name != NULL) {
for (elem = elements; elem->name != NULL; elem++)
@@ -558,6 +564,7 @@ xml_elem_end(struct parse *ps, const cha
ps->tree->flags |= TREE_CLOSED;
else
ps->cur = ps->cur->parent;
+ ps->spc = 0;
break;
}
assert(ps->del == 0);
@@ -651,6 +658,7 @@ parse_string(struct parse *p, char *b, s
size_t pend; /* Offset of the end of the current word. */
int elem_end;
+ p->spc = 0;
pend = 0;
for (;;) {
@@ -663,6 +671,7 @@ parse_string(struct parse *p, char *b, s
if ((poff = pend) == rlen)
break;
if (isspace((unsigned char)b[pend])) {
+ p->spc = 1;
increment(p, b, &pend, refill);
continue;
}
@@ -836,10 +845,7 @@ parse_file(struct parse *p, int fd, cons
perror(fname);
p->tree->flags |= TREE_FAIL;
}
- if (p->cur != NULL && p->cur->node == NODE_TEXT) {
- pnode_trim(p->cur);
- p->cur = p->cur->parent;
- }
+ pnode_closetext(p);
if ((p->tree->flags & TREE_CLOSED) == 0)
warn_msg(p, "document not closed");
return p->tree;
Index: node.h
===================================================================
RCS file: /home/cvs/mdocml/docbook2mdoc/node.h,v
retrieving revision 1.9
retrieving revision 1.10
diff -Lnode.h -Lnode.h -u -p -r1.9 -r1.10
--- node.h
+++ node.h
@@ -198,6 +198,7 @@ struct pnode {
char *b; /* String value. */
char *real; /* Storage for "b". */
size_t bsz; /* strlen(b) */
+ int spc; /* Whitespace before this node. */
struct pnode *parent; /* Parent node or NULL. */
struct pnodeq childq; /* Queue of children. */
struct pattrq attrq; /* Attributes of the node. */
--
To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-04-06 22:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-06 22:38 docbook2mdoc: Store the information whether a node is preceded by schwarze
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).