From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (schwarze@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.5/8.14.5) with ESMTP id s6FJ37Mr031396 for ; Tue, 15 Jul 2014 15:03:07 -0400 (EDT) Received: (from schwarze@localhost) by krisdoz.my.domain (8.14.5/8.14.3/Submit) id s6FJ37TW030865; Tue, 15 Jul 2014 15:03:07 -0400 (EDT) Date: Tue, 15 Jul 2014 15:03:07 -0400 (EDT) Message-Id: <201407151903.s6FJ37TW030865@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: schwarze@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: pod2mdoc: When formatting codes follow text without intervening X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- When formatting codes follow text without intervening whitespace, a .Pf macro was issued without arguments. While this (accidentally) works in mandoc(1), groff(1) does not support it. Introduce an output buffer such that the previous word of the text can be given as an argument to .Pf. In some situations, for example right after a section header, the newline printed before the .Pf could result in an empty line. Introduce the "hasnl" state variable to avoid this. Modified Files: -------------- pod2mdoc: pod2mdoc.c Revision Data ------------- Index: pod2mdoc.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/pod2mdoc/pod2mdoc.c,v retrieving revision 1.30 retrieving revision 1.31 diff -Lpod2mdoc.c -Lpod2mdoc.c -u -p -r1.30 -r1.31 --- pod2mdoc.c +++ pod2mdoc.c @@ -53,14 +53,18 @@ enum sect { }; struct state { + const char *fname; /* file being parsed */ int parsing; /* after =cut of before command */ int paused; /* in =begin and before =end */ - int haspar; /* in paragraph: do we need Pp? */ enum sect sect; /* which section are we in? */ - const char *fname; /* file being parsed */ #define LIST_STACKSZ 128 enum list lstack[LIST_STACKSZ]; /* open lists */ size_t lpos; /* where in list stack */ + int haspar; /* in paragraph: do we need Pp? */ + int hasnl; /* in text: just started a new line */ + char *outbuf; /* text buffered for output */ + size_t outbufsz; /* allocated size of outbuf */ + size_t outbuflen; /* current length of outbuf */ }; enum fmt { @@ -123,13 +127,75 @@ static const char fmts[FMT__MAX] = { static int last; + +static void +outbuf_grow(struct state *st, size_t by) +{ + + st->outbufsz += (by / 128 + 1) * 128; + st->outbuf = realloc(st->outbuf, st->outbufsz); + if (NULL == st->outbuf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static void +outbuf_addchar(struct state *st) +{ + + if (st->outbuflen + 2 >= st->outbufsz) + outbuf_grow(st, 1); + st->outbuf[st->outbuflen++] = last; + if ('\\' == last) + st->outbuf[st->outbuflen++] = 'e'; + st->outbuf[st->outbuflen] = '\0'; +} + +static void +outbuf_addstr(struct state *st, const char *str) +{ + size_t slen; + + slen = strlen(str); + if (st->outbuflen + slen >= st->outbufsz) + outbuf_grow(st, slen); + memcpy(st->outbuf + st->outbuflen, str, slen+1); + last = str[slen - 1]; +} + +static void +outbuf_flush(struct state *st) +{ + + if (0 == st->outbuflen) + return; + + fputs(st->outbuf, stdout); + *st->outbuf = '\0'; + st->outbuflen = 0; + st->hasnl = 0; +} + +static void +outbuf_newln(struct state *st) +{ + + if ('\n' == last) + return; + outbuf_flush(st); + putchar('\n'); + last = '\n'; + st->hasnl = 1; +} + /* * Given buf[*start] is at the start of an escape name, read til the end * of the escape ('>') then try to do something with it. * Sets start to be one after the '>'. */ static void -formatescape(const char *buf, size_t *start, size_t end) +formatescape(struct state *st, const char *buf, size_t *start, size_t end) { char esc[16]; /* no more needed */ size_t i, max; @@ -157,17 +223,13 @@ formatescape(const char *buf, size_t *st * Just let the rest of them go. */ if (0 == strcmp(esc, "lt")) - printf("\\(la"); + outbuf_addstr(st, "\\(la"); else if (0 == strcmp(esc, "gt")) - printf("\\(ra"); + outbuf_addstr(st, "\\(ra"); else if (0 == strcmp(esc, "vb")) - printf("\\(ba"); + outbuf_addstr(st, "\\(ba"); else if (0 == strcmp(esc, "sol")) - printf("\\(sl"); - else - return; - - last = 'a'; + outbuf_addstr(st, "\\(sl"); } /* @@ -391,6 +453,7 @@ formatcode(struct state *st, const char { enum fmt fmt; size_t i, j, dsz; + int white; assert(*start + 1 < end); assert('<' == buf[*start + 1]); @@ -427,7 +490,7 @@ formatcode(struct state *st, const char * processing for real macros. */ if (FMT_ESCAPE == fmt) { - formatescape(buf, start, end); + formatescape(st, buf, start, end); return(0); } else if (FMT_NULL == fmt || FMT_INDEX == fmt) { /* @@ -466,27 +529,38 @@ formatcode(struct state *st, const char * suppressed in, e.g., "Nm" and "Sh" macros). */ if (FMT__MAX != fmt && !nomacro) { + white = ' ' == last || '\n' == last || + ' ' == buf[*start]; + /* - * Print out the macro describing this format code. - * If we're not "reentrant" (not yet on a macro line) - * then print a newline, if necessary, and the macro - * indicator. - * Otherwise, offset us with a space. + * If we are on a text line and there is no + * whitespace before our content, we have to make + * the previous word a prefix to the macro line. */ - if ( ! reentrant) { + + if ( ! white && ! reentrant) { + if ( ! st->hasnl) + putchar('\n'); + printf(".Pf "); + } + + outbuf_flush(st); + + /* Whitespace is easier to suppress on macro lines. */ + + if ( ! white && reentrant) + printf(" Ns"); + + /* Unless we are on a macro line, start one. */ + + if (white && ! reentrant) { if (last != '\n') putchar('\n'); putchar('.'); - } else + } else putchar(' '); - - /* - * If we don't have whitespace before us (and none after - * the opening delimiter), then suppress macro - * whitespace with Pf. - */ - if (' ' != last && '\n' != last && ' ' != buf[*start]) - printf("Pf "); + + /* Print the macro corresponding to this format code. */ switch (fmt) { case (FMT_ITALIC): @@ -526,7 +600,8 @@ formatcode(struct state *st, const char default: abort(); } - } + } else + outbuf_flush(st); /* * Process until we reach the end marker (e.g., '>') or until we @@ -730,20 +805,20 @@ command(struct state *st, const char *bu st->sect = SECT_SYNOPSIS; } formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD2): printf(".Ss "); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); st->haspar = 1; break; case (CMD_HEAD3): puts(".Pp"); printf(".Em "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -751,7 +826,7 @@ command(struct state *st, const char *bu puts(".Pp"); printf(".No "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); puts(".Pp"); st->haspar = 1; break; @@ -805,7 +880,7 @@ command(struct state *st, const char *bu case (LIST_TAG): printf(".It "); formatcodeln(st, buf, &start, end, 0); - putchar('\n'); + putchar(last = '\n'); break; case (LIST_ENUM): /* FALLTHROUGH */ @@ -861,7 +936,6 @@ command(struct state *st, const char *bu static void verbatim(struct state *st, const char *buf, size_t start, size_t end) { - int last; size_t i; if ( ! st->parsing || st->paused) @@ -916,7 +990,7 @@ again: if ('\\' == buf[start]) printf("e"); } - putchar('\n'); + putchar(last = '\n'); puts(".Ed"); } @@ -946,14 +1020,13 @@ hasmatch(const char *buf, size_t start, * If we're an ending bracket, see if we have a stack already. */ static int -dosynopsisop(const char *buf, int *last, - size_t *start, size_t end, size_t *opstack) +dosynopsisop(const char *buf, size_t *start, size_t end, size_t *opstack) { assert('[' == buf[*start] || ']' == buf[*start]); if ('[' == buf[*start] && hasmatch(buf, *start + 1, end)) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oo"); (*opstack)++; @@ -961,7 +1034,7 @@ dosynopsisop(const char *buf, int *last, return(0); if (']' == buf[*start] && *opstack > 0) { - if ('\n' != *last) + if ('\n' != last) putchar('\n'); puts(".Oc"); (*opstack)--; @@ -969,7 +1042,7 @@ dosynopsisop(const char *buf, int *last, return(0); (*start)++; - *last = '\n'; + last = '\n'; while (' ' == buf[*start]) (*start)++; return(1); @@ -998,7 +1071,7 @@ donamenm(struct state *st, const char *b break; formatcodeln(st, buf, start, word, 1); if (*start == end) { - putchar('\n'); + putchar(last = '\n'); continue; } assert(',' == buf[*start]); @@ -1048,7 +1121,7 @@ ordinary(struct state *st, const char *b start++; fputs(".Nd ", stdout); formatcodeln(st, buf, &start, end, 1); - putchar('\n'); + putchar(last = '\n'); return; } } @@ -1057,6 +1130,7 @@ ordinary(struct state *st, const char *b puts(".Pp"); st->haspar = 0; + st->hasnl = 1; last = '\n'; opstack = 0; @@ -1071,9 +1145,9 @@ ordinary(struct state *st, const char *b else if ('\n' == buf[start]) break; else if ('\n' == last && '.' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); else if ('\n' == last && '\'' == buf[start]) - printf("\\&"); + outbuf_addstr(st, "\\&"); /* * If we're in the SYNOPSIS, have square * brackets indicate that we're opening and @@ -1082,12 +1156,14 @@ ordinary(struct state *st, const char *b if (SECT_SYNOPSIS == st->sect && ('[' == buf[start] || ']' == buf[start]) && - dosynopsisop(buf, &last, - &start, end, &opstack)) + dosynopsisop(buf, &start, end, &opstack)) continue; - putchar(last = buf[start++]); - if ('\\' == last) - putchar('e'); + last = buf[start++]; + if (' ' == last) { + outbuf_flush(st); + putchar(' '); + } else + outbuf_addchar(st); } if (start < end - 1 && '<' == buf[start + 1]) { @@ -1108,6 +1184,7 @@ ordinary(struct state *st, const char *b } /* End the macro line. */ putchar(last = '\n'); + st->hasnl = 1; /* * Consume all whitespace * so we don't accidentally start @@ -1117,12 +1194,7 @@ ordinary(struct state *st, const char *b start++; } } else if (start < end && '\n' == buf[start]) { - /* - * Print the newline only if we haven't already - * printed a newline. - */ - if (last != '\n') - putchar(last = buf[start]); + outbuf_newln(st); if (++start >= end) continue; /* @@ -1133,18 +1205,14 @@ ordinary(struct state *st, const char *b * have a macro subsequent it, which may be * possible if we have an escape next. */ - if (' ' == buf[start] || '\t' == buf[start]) { + if (' ' == buf[start] || '\t' == buf[start]) puts(".br"); - last = '\n'; - } for ( ; start < end; start++) if (' ' != buf[start] && '\t' != buf[start]) break; } } - - if (last != '\n') - putchar('\n'); + outbuf_newln(st); } /* -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv