From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]) by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id e81818c7 for ; Sat, 6 Apr 2019 17:38:28 -0500 (EST) Date: Sat, 6 Apr 2019 17:38:28 -0500 (EST) X-Mailinglist: mandoc-source Reply-To: source@mandoc.bsd.lv MIME-Version: 1.0 From: schwarze@mandoc.bsd.lv To: source@mandoc.bsd.lv Subject: docbook2mdoc: Store the information whether a node is preceded by X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Message-ID: Log Message: ----------- Store the information whether a node is preceded by whitespace into the node tree. Use that information in the formatter to suppress the insertion of whitespace in text-text, macro-text, and macro-macro node sequences. Text-macro sequences are not yet handled. They are more complicated because they require emitting a .Pf macro as part of a text node depending on the spacing properties of the *following* macro node. Modified Files: -------------- docbook2mdoc: docbook2mdoc.c macro.c macro.h node.h parse.c Revision Data ------------- Index: docbook2mdoc.c =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/docbook2mdoc.c,v retrieving revision 1.91 retrieving revision 1.92 diff -Ldocbook2mdoc.c -Ldocbook2mdoc.c -u -p -r1.91 -r1.92 --- docbook2mdoc.c +++ docbook2mdoc.c @@ -642,6 +642,7 @@ pnode_print(struct format *p, struct pno if (pn == NULL) return; + p->spc = pn->spc; sv = p->linestate; switch (pn->node) { @@ -674,6 +675,7 @@ pnode_print(struct format *p, struct pno break; case NODE_EDITOR: print_text(p, "editor:", ARG_SPACE); + sv = LINE_TEXT; macro_open(p, "An"); break; case NODE_EMAIL: @@ -692,7 +694,7 @@ pnode_print(struct format *p, struct pno case NODE_ESCAPE: if (p->linestate == LINE_NEW) p->linestate = LINE_TEXT; - else + else if (pn->spc || p->linestate == LINE_MACRO) putchar(' '); fputs(pn->b, stdout); break; @@ -822,7 +824,7 @@ pnode_print(struct format *p, struct pno } if (p->linestate == LINE_NEW) p->linestate = LINE_TEXT; - else + else if (pn->spc || p->linestate == LINE_MACRO) putchar(' '); /* Index: macro.c =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/macro.c,v retrieving revision 1.5 retrieving revision 1.6 diff -Lmacro.c -Lmacro.c -u -p -r1.5 -r1.6 --- macro.c +++ macro.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "node.h" #include "macro.h" @@ -39,6 +40,8 @@ macro_open(struct format *f, const char break; case LINE_MACRO: putchar(' '); + if (f->spc == 0) + fputs("Ns ", stdout); break; } fputs(name, stdout); @@ -61,20 +64,68 @@ macro_line(struct format *f, const char } /* - * If the next node is a text node starting with closing punctuation, - * emit the closing punctuation as a trailing macro argument. + * At the end of a macro, decide whether the line needs to remain open + * because the next node follows without intervening whitespace; + * otherwise, close the line. */ void macro_closepunct(struct format *f, struct pnode *pn) { - if ((pn = TAILQ_NEXT(pn, child)) != NULL && - pn->node == NODE_TEXT && pn->bsz > 0 && - (pn->b[0] == ',' || pn->b[0] == '.') && - (pn->bsz == 1 || isspace((unsigned char)pn->b[1]))) { - putchar(' '); - putchar(pn->b[0]); - pn->b++; - pn->bsz--; + char *cp; + + if ((pn = TAILQ_NEXT(pn, child)) != NULL && pn->spc == 0) { + + /* + * If a non-text node follows without intervening + * whitespace, the handler of that node will decide + * whether and how to suppress whitespace. To allow + * that, the macro line needs to remain open. + */ + + if (pn->node != NODE_TEXT && pn->node != NODE_ESCAPE) + return; + + /* + * Give closing punctuation + * in the form of trailing macro arguments. + */ + + while (*pn->b != '\0' && + strchr("!),.:;?]", *pn->b) != NULL) { + putchar(' '); + putchar(*pn->b); + pn->b++; + pn->bsz--; + } + + /* + * Text follows without intervening whitespace. + * Append the first word with .Ns. + */ + + if (*pn->b != '\0' && isspace((unsigned char)*pn->b) == 0) { + fputs(" Ns", stdout); + for (cp = pn->b; *cp != '\0'; cp++) + if (isspace((unsigned char)*cp)) + break; + *cp = '\0'; + macro_addarg(f, pn->b, ARG_SPACE); + pn->bsz -= cp - pn->b; + pn->b = cp; + if (pn->bsz > 0) { + pn->b++; + pn->bsz--; + pn->spc = 1; + } + } + + /* Skip whitespace after the first word. */ + + while (isspace((unsigned char)*pn->b)) { + pn->b++; + pn->bsz--; + pn->spc = 1; + } } macro_close(f); } Index: macro.h =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/macro.h,v retrieving revision 1.2 retrieving revision 1.3 diff -Lmacro.h -Lmacro.h -u -p -r1.2 -r1.3 --- macro.h +++ macro.h @@ -28,6 +28,7 @@ enum linestate { struct format { int level; /* Header level, starting at 1. */ + int spc; /* Whitespace before next macro. */ enum linestate linestate; }; Index: parse.c =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/parse.c,v retrieving revision 1.15 retrieving revision 1.16 diff -Lparse.c -Lparse.c -u -p -r1.15 -r1.16 --- parse.c +++ parse.c @@ -52,6 +52,7 @@ struct parse { int nline; /* Line number of next token. */ int ncol; /* Column number of next token. */ int del; /* Levels of nested nodes being deleted. */ + int spc; /* Whitespace before the next element. */ int attr; /* The most recent attribute is valid. */ int warn; }; @@ -294,6 +295,7 @@ static void xml_char(struct parse *ps, const char *p, int sz) { struct pnode *dat; + size_t newsz; if (ps->del > 0) return; @@ -309,6 +311,7 @@ xml_char(struct parse *ps, const char *p exit(1); } dat->node = NODE_TEXT; + dat->spc = ps->spc; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -323,24 +326,35 @@ xml_char(struct parse *ps, const char *p /* Append to the current text node. */ assert(sz >= 0); - ps->cur->b = realloc(ps->cur->b, ps->cur->bsz + sz + 1); + newsz = ps->cur->bsz + (ps->cur->bsz && ps->spc) + sz; + ps->cur->b = realloc(ps->cur->b, newsz + 1); if (ps->cur->b == NULL) { perror(NULL); exit(1); } + if (ps->cur->bsz && ps->spc) + ps->cur->b[ps->cur->bsz++] = ' '; memcpy(ps->cur->b + ps->cur->bsz, p, sz); - ps->cur->bsz += sz; - ps->cur->b[ps->cur->bsz] = '\0'; + ps->cur->b[ps->cur->bsz = newsz] = '\0'; ps->cur->real = ps->cur->b; + ps->spc = 0; } +/* + * Close out the text node and strip trailing whitespace, if one is open. + */ static void -pnode_trim(struct pnode *pn) +pnode_closetext(struct parse *p) { - assert(pn->node == NODE_TEXT); - for (; pn->bsz > 0; pn->b[--pn->bsz] = '\0') - if (isspace((unsigned char)pn->b[pn->bsz - 1]) == 0) - break; + struct pnode *n; + + if ((n = p->cur) == NULL || n->node != NODE_TEXT) + return; + p->cur = n->parent; + while (n->bsz > 0 && isspace((unsigned char)n->b[n->bsz - 1])) { + n->b[--n->bsz] = '\0'; + p->spc = 1; + } } static void @@ -357,11 +371,7 @@ xml_entity(struct parse *p, const char * return; } - /* Close out the text node, if there is one. */ - if (p->cur->node == NODE_TEXT) { - pnode_trim(p->cur); - p->cur = p->cur->parent; - } + pnode_closetext(p); if (p->tree->flags & TREE_CLOSED && p->cur == p->tree->root) warn_msg(p, "entity after end of document: &%s;", name); @@ -383,10 +393,12 @@ xml_entity(struct parse *p, const char * } dat->node = NODE_ESCAPE; dat->bsz = strlen(dat->b); + dat->spc = p->spc; dat->parent = p->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); TAILQ_INSERT_TAIL(&p->cur->childq, dat, child); + p->spc = 0; } /* @@ -410,11 +422,7 @@ xml_elem_start(struct parse *ps, const c return; } - /* Close out the text node, if there is one. */ - if (ps->cur != NULL && ps->cur->node == NODE_TEXT) { - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - } + pnode_closetext(ps); for (elem = elements; elem->name != NULL; elem++) if (strcmp(elem->name, name) == 0) @@ -449,6 +457,7 @@ xml_elem_start(struct parse *ps, const c exit(1); } dat->node = elem->node; + dat->spc = ps->spc; dat->parent = ps->cur; TAILQ_INIT(&dat->childq); TAILQ_INIT(&dat->attrq); @@ -519,11 +528,8 @@ xml_elem_end(struct parse *ps, const cha return; } - /* Close out the text node, if there is one. */ - if (ps->del == 0 && ps->cur != NULL && ps->cur->node == NODE_TEXT) { - pnode_trim(ps->cur); - ps->cur = ps->cur->parent; - } + if (ps->del == 0) + pnode_closetext(ps); if (name != NULL) { for (elem = elements; elem->name != NULL; elem++) @@ -558,6 +564,7 @@ xml_elem_end(struct parse *ps, const cha ps->tree->flags |= TREE_CLOSED; else ps->cur = ps->cur->parent; + ps->spc = 0; break; } assert(ps->del == 0); @@ -651,6 +658,7 @@ parse_string(struct parse *p, char *b, s size_t pend; /* Offset of the end of the current word. */ int elem_end; + p->spc = 0; pend = 0; for (;;) { @@ -663,6 +671,7 @@ parse_string(struct parse *p, char *b, s if ((poff = pend) == rlen) break; if (isspace((unsigned char)b[pend])) { + p->spc = 1; increment(p, b, &pend, refill); continue; } @@ -836,10 +845,7 @@ parse_file(struct parse *p, int fd, cons perror(fname); p->tree->flags |= TREE_FAIL; } - if (p->cur != NULL && p->cur->node == NODE_TEXT) { - pnode_trim(p->cur); - p->cur = p->cur->parent; - } + pnode_closetext(p); if ((p->tree->flags & TREE_CLOSED) == 0) warn_msg(p, "document not closed"); return p->tree; Index: node.h =================================================================== RCS file: /home/cvs/mdocml/docbook2mdoc/node.h,v retrieving revision 1.9 retrieving revision 1.10 diff -Lnode.h -Lnode.h -u -p -r1.9 -r1.10 --- node.h +++ node.h @@ -198,6 +198,7 @@ struct pnode { char *b; /* String value. */ char *real; /* Storage for "b". */ size_t bsz; /* strlen(b) */ + int spc; /* Whitespace before this node. */ struct pnode *parent; /* Parent node or NULL. */ struct pnodeq childq; /* Queue of children. */ struct pattrq attrq; /* Attributes of the node. */ -- To unsubscribe send an email to source+unsubscribe@mandoc.bsd.lv