From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]); by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id 7133443c; for ; Wed, 11 Mar 2015 07:52:12 -0500 (EST) Date: Wed, 11 Mar 2015 07:52:12 -0500 (EST) Message-Id: <1139008873107790471.enqueue@fantadrom.bsd.lv> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: kristaps@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: texi2mdoc: Enormous patch (re-)fixing (again) the vertical space issue, X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Enormous patch (re-)fixing (again) the vertical space issue, which works even better in delaying vertical space until needed. This also adds a lot more indexing capabilities. All indexing has an HAVE_INDEX preprocess directive, since this is not really mdoc(7) (yet). Modified Files: -------------- texi2mdoc: extern.h main.c util.c Revision Data ------------- Index: extern.h =================================================================== RCS file: /home/cvs/mdocml/texi2mdoc/extern.h,v retrieving revision 1.26 retrieving revision 1.27 diff -Lextern.h -Lextern.h -u -p -r1.26 -r1.27 --- extern.h +++ extern.h @@ -32,6 +32,8 @@ # endif #endif +#define HAVE_INDEX 1 + /* * This defines each one of the Texinfo commands that we understand. * Obviously this only refers to native commands; overriden names are a @@ -275,6 +277,7 @@ enum texicmd { TEXICMD_TITLEFONT, TEXICMD_TITLEPAGE, TEXICMD_TOP, + TEXICMD_TPINDEX, TEXICMD_U, TEXICMD_UBARACCENT, TEXICMD_UDOTACCENT, @@ -293,6 +296,7 @@ enum texicmd { TEXICMD_VERBATIM, TEXICMD_VERBATIMINCLUDE, TEXICMD_VINDEX, + TEXICMD_VRINDEX, TEXICMD_VSKIP, TEXICMD_VTABLE, TEXICMD_W, @@ -369,6 +373,17 @@ struct teximacro { }; /* + * Index tables. + * These have a two or three letter name (usually) and any number of + * terms that are associated with it. + */ +struct texidex { + char *name; + char **index; + size_t indexsz; +}; + +/* * The main parse structure. * This keeps any necessary information handy. */ @@ -388,7 +403,7 @@ struct texi { char *title; /* title of document */ char *subtitle; /* subtitle of document */ int secoffs; /* see sectioner() */ - char **indexs; /* @defindex indices */ + struct texidex *indexs; /* index entries */ size_t indexsz; /* entries in indexs */ struct texivalue *vals; /* @value entries */ size_t valsz; /* entries in vals */ @@ -440,6 +455,8 @@ enum texicmd texicmd(const struct texi *, size_t, size_t *, struct teximacro **); enum texicmd peekcmd(const struct texi *, size_t); +enum texicmd + peeklinecmd(const struct texi *, size_t); void texierr(struct texi *, const char *, ...) __attribute__((format(printf, 2, 3))) __attribute__((noreturn)); @@ -450,6 +467,9 @@ void teximacroclose(struct texi *); void teximacroopen(struct texi *, const char *); void teximdocopen(struct texi *, size_t *); void teximdocclose(struct texi *, int); +void texindex(struct texi *, const char *, + size_t, const char *, size_t); +void texindex_add(struct texi *, const char *, size_t); void texipunctuate(struct texi *, size_t *); void texiputbuf(struct texi *p, size_t, size_t); void texiputchar(struct texi *p, char); Index: main.c =================================================================== RCS file: /home/cvs/mdocml/texi2mdoc/main.c,v retrieving revision 1.65 retrieving revision 1.66 diff -Lmain.c -Lmain.c -u -p -r1.65 -r1.66 --- main.c +++ main.c @@ -30,12 +30,20 @@ #include "extern.h" +#define HAVE_INDEX 1 + +/* + * Texinfo can change the "meaning" of its section headings: chapter, + * section, subsection, etc., can be promoted and/or demoted to other + * levels of heading. + * Thus, we use an offset and just jump into this array. + */ #define SECTSZ 4 static const char *const sects[SECTSZ] = { - "Sh", - "Ss", - "Em", - "No", + "Sh", /* Chapters (sections) */ + "Ss", /* Sections (subsections) */ + "Em", /* Subsections (subsubsection) */ + "Sy", /* Subsubsections (...). */ }; static void doaccent(struct texi *, enum texicmd, size_t *); @@ -53,6 +61,7 @@ static void doignargn(struct texi *, enu static void doignblock(struct texi *, enum texicmd, size_t *); static void doignbracket(struct texi *, enum texicmd, size_t *); static void doignline(struct texi *, enum texicmd, size_t *); +static void doindex(struct texi *, enum texicmd, size_t *); static void doinline(struct texi *, enum texicmd, size_t *); static void doinclude(struct texi *, enum texicmd, size_t *); static void doinsertcopying(struct texi *, enum texicmd, size_t *); @@ -61,21 +70,16 @@ static void doitemize(struct texi *, enu static void dolink(struct texi *, enum texicmd, size_t *); static void domacro(struct texi *, enum texicmd, size_t *); static void domath(struct texi *, enum texicmd, size_t *); -#if 0 static void domenu(struct texi *, enum texicmd, size_t *); -#endif static void domultitable(struct texi *, enum texicmd, size_t *); -#if 0 static void donode(struct texi *, enum texicmd, size_t *); -#endif +static void doprintindex(struct texi *, enum texicmd, size_t *); static void doquotation(struct texi *, enum texicmd, size_t *); static void dotable(struct texi *, enum texicmd, size_t *); static void dotop(struct texi *, enum texicmd, size_t *); static void dosecoffs(struct texi *, enum texicmd, size_t *); static void dosection(struct texi *, enum texicmd, size_t *); static void dosp(struct texi *, enum texicmd, size_t *); -static void dosubsection(struct texi *, enum texicmd, size_t *); -static void dosubsubsection(struct texi *, enum texicmd, size_t *); static void dosymbol(struct texi *, enum texicmd, size_t *); static void dotab(struct texi *, enum texicmd, size_t *); static void dotitle(struct texi *, enum texicmd, size_t *); @@ -96,8 +100,8 @@ static const struct texitok __texitoks[T { doignbracket, "anchor", 6 }, /* TEXICMD_ANCHOR */ { dosection, "appendix", 8 }, /* TEXICMD_APPENDIX */ { dosection, "appendixsec", 11 }, /* TEXICMD_APPENDIXSEC */ - { dosubsection, "appendixsubsec", 14 }, /* TEXICMD_APPENDIXSUBSEC */ - { dosubsubsection, "appendixsubsubsec", 17 }, /* TEXICMD_APPENDIXSUBSUBSEC */ + { dosection, "appendixsubsec", 14 }, /* TEXICMD_APPENDIXSUBSEC */ + { dosection, "appendixsubsubsec", 17 }, /* TEXICMD_APPENDIXSUBSUBSEC */ { doinline, "asis", 4 }, /* TEXICMD_ASIS */ { dosymbol, "*", 1 }, /* TEXICMD_ASTERISK */ { dosymbol, "@", 1 }, /* TEXICMD_AT */ @@ -111,7 +115,7 @@ static const struct texitok __texitoks[T { doaccent, ",", 1 }, /* TEXICMD_CEDILLA */ { doignline, "center", 6 }, /* TEXICMD_CENTER */ { dosection, "chapter", 7 }, /* TEXICMD_CHAPTER */ - { doignline, "cindex", 6 }, /* TEXICMD_CINDEX */ + { doindex, "cindex", 6 }, /* TEXICMD_CINDEX */ { doaccent, "^", 1 }, /* TEXICMD_CIRCUMFLEX */ { doinline, "cite", 4 }, /* TEXICMD_CITE */ { dovalue, "clear", 5 }, /* TEXICMD_CLEAR */ @@ -149,7 +153,7 @@ static const struct texitok __texitoks[T { dodefn, "defvarx", 7 }, /* TEXICMD_DEFVARX */ { dodefn, "defvr", 5 }, /* TEXICMD_DEFVR */ { dodefn, "defvrx", 6 }, /* TEXICMD_DEFVRX */ - { doignblock, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */ + { domenu, "detailmenu", 10 }, /* TEXICMD_DETAILMENU */ { doinline, "dfn", 3 }, /* TEXICMD_DFN */ { dosymbol, "DH", 2 }, /* TEXICMD_DH */ { dosymbol, "dh", 2 }, /* TEXICMD_DHSMALL */ @@ -178,7 +182,7 @@ static const struct texitok __texitoks[T { dosymbol, "expansion", 9 }, /* TEXICMD_EXPANSION */ { doinline, "file", 4 }, /* TEXICMD_FILE */ { doignline, "finalout", 8 }, /* TEXICMD_FINALOUT */ - { doignline, "findex", 6 }, /* TEXICMD_FINDEX */ + { doindex, "findex", 6 }, /* TEXICMD_FINDEX */ { doblock, "flushleft", 9 }, /* TEXICMD_FLUSHLEFT */ { doblock, "flushright", 10 }, /* TEXICMD_FLUSHRIGHT */ { doignline, "firstparagraphindent", 20 }, /* TEXICMD_FIRSTPARAGRAPHINDENT */ @@ -237,20 +241,12 @@ static const struct texitok __texitoks[T { domacro, "macro", 5 }, /* TEXICMD_MACRO */ { doaccent, "=", 1 }, /* TEXICMD_MACRON */ { domath, "math", 4 }, /* TEXICMD_MATH */ -#if 0 { domenu, "menu", 4 }, /* TEXICMD_MENU */ -#else - { doignblock, "menu", 4 }, /* TEXICMD_MENU */ -#endif { dosymbol, "minus", 5 }, /* TEXICMD_MINUS */ { domultitable, "multitable", 10 }, /* TEXICMD_MULTITABLE */ { doignline, "need", 4 }, /* TEXICMD_NEED */ { dosymbol, "\n", 1 }, /* TEXICMD_NEWLINE */ -#if 0 { donode, "node", 4 }, /* TEXICMD_NODE */ -#else - { doignline, "node", 4 }, /* TEXICMD_NODE */ -#endif { doignline, "noindent", 8 }, /* TEXICMD_NOINDENT */ { dosymbol, "O", 1 }, /* TEXICMD_O */ { dosymbol, "OE", 2 }, /* TEXICMD_OE */ @@ -265,7 +261,7 @@ static const struct texitok __texitoks[T { dosymbol, ".", 1 }, /* TEXICMD_PERIOD */ { doignline, "pindex", 6 }, /* TEXICMD_PINDEX */ { dosymbol, "pounds", 6 }, /* TEXICMD_POUNDS */ - { doignline, "printindex", 10 }, /* TEXICMD_PRINTINDEX */ + { doprintindex, "printindex", 10 }, /* TEXICMD_PRINTINDEX */ { dolink, "pxref", 5 }, /* TEXICMD_PXREF */ { dosymbol, "questiondown", 12 }, /* TEXICMD_QUESTIONDOWN */ { dosymbol, "?", 1 }, /* TEXICMD_QUESTIONMARK */ @@ -306,10 +302,10 @@ static const struct texitok __texitoks[T { dosymbol, "}", 1 }, /* TEXICMD_SQUIGGLE_RIGHT */ { dosymbol, "ss", 2 }, /* TEXICMD_SS */ { doinline, "strong", 6 }, /* TEXICMD_STRONG */ - { dosubsection, "subheading", 10 }, /* TEXICMD_SUBHEADING */ - { dosubsection, "subsection", 10 }, /* TEXICMD_SUBSECTION */ - { dosubsubsection, "subsubheading", 13 }, /* TEXICMD_SUBSUBHEADING */ - { dosubsubsection, "subsubsection", 13 }, /* TEXICMD_SUBSUBSECTION */ + { dosection, "subheading", 10 }, /* TEXICMD_SUBHEADING */ + { dosection, "subsection", 10 }, /* TEXICMD_SUBSECTION */ + { dosection, "subsubheading", 13 }, /* TEXICMD_SUBSUBHEADING */ + { dosection, "subsubsection", 13 }, /* TEXICMD_SUBSUBSECTION */ { doignline, "subtitle", 8 }, /* TEXICMD_SUBTITLE */ { doignline, "summarycontents", 15 }, /* TEXICMD_SUMMARYCONTENTS */ { dodefindex, "synindex", 8 }, /* TEXICMD_SYNINDEX */ @@ -326,19 +322,20 @@ static const struct texitok __texitoks[T { dosymbol, "tie", 3 }, /* TEXICMD_TIE */ { doaccent, "tieaccent", 9 }, /* TEXICMD_TIEACCENT */ { doaccent, "~", 1 }, /* TEXICMD_TILDE */ - { doignline, "tindex", 6 }, /* TEXICMD_TINDEX */ + { doindex, "tindex", 6 }, /* TEXICMD_TINDEX */ { doignline, "title", 5 }, /* TEXICMD_TITLE */ { dobracket, "titlefont", 9 }, /* TEXICMD_TITLEFONT */ { doignblock, "titlepage", 9 }, /* TEXICMD_TITLEPAGE */ { dotop, "top", 3 }, /* TEXICMD_TOP */ + { doindex, "tpindex", 7 }, /* TEXICMD_TPINDEX */ { doaccent, "u", 1 }, /* TEXICMD_U */ { doaccent, "ubaraccent", 10 }, /* TEXICMD_UBARACCENT */ { doaccent, "udotaccent", 10 }, /* TEXICMD_UDOTACCENT */ { doaccent, "\"", 1 }, /* TEXICMD_UMLAUT */ { dosection, "unnumbered", 10 }, /* TEXICMD_UNNUMBERED */ { dosection, "unnumberedsec", 13 }, /* TEXICMD_UNNUMBEREDSEC */ - { dosubsection, "unnumberedsubsec", 16 }, /* TEXICMD_UNNUMBEREDSUBSEC */ - { dosubsubsection, "unnumberedsubsubsec", 19 }, /* TEXICMD_UNNUMBEREDSUBSUBSEC */ + { dosection, "unnumberedsubsec", 16 }, /* TEXICMD_UNNUMBEREDSUBSEC */ + { dosection, "unnumberedsubsubsec", 19 }, /* TEXICMD_UNNUMBEREDSUBSUBSEC */ { dolink, "uref", 4 }, /* TEXICMD_UREF */ { dolink, "url", 3 }, /* TEXICMD_URL */ { doignline, "", 0 }, /* TEXICMD_USER_INDEX */ @@ -348,7 +345,8 @@ static const struct texitok __texitoks[T { doverb, "verb", 4 }, /* TEXICMD_VERB */ { doverbatim, "verbatim", 8 }, /* TEXICMD_VERBATIM */ { doverbinclude, "verbatiminclude", 15 }, /* TEXICMD_VERBATIMINCLUDE */ - { doignline, "vindex", 6 }, /* TEXICMD_VINDEX */ + { doindex, "vindex", 6 }, /* TEXICMD_VINDEX */ + { doindex, "vrindex", 7 }, /* TEXICMD_VRINDEX */ { dosp, "vskip", 5 }, /* TEXICMD_VSKIP */ { dotable, "vtable", 6 }, /* TEXICMD_VTABLE */ { dobracket, "w", 1 }, /* TEXICMD_W */ @@ -359,43 +357,34 @@ static const struct texitok __texitoks[T const struct texitok *const texitoks = __texitoks; /* - * Texinfo has lots of indexes. - * You can add new ones in a variety of ways. - * We maintain an array of all of these index names (usually a few - * letters) and pass unknown commands through the array list. + * Define new indexes either by assignment or aliasing (both of these + * accept the first argument as the new index). */ static void dodefindex(struct texi *p, enum texicmd cmd, size_t *pos) { - size_t start, end; - char *cp; + size_t start; while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); - start = end = *pos; - while (end < BUFSZ(p) && ! ismspace(BUF(p)[end])) - end++; - - if (start == end) { - advanceeoln(p, pos, 1); + start = *pos; + while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) + advance(p, pos); + if (*pos == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); return; } - - if (NULL == (cp = malloc(end - start + 1))) - texiabort(p, NULL); - memcpy(cp, &BUF(p)[start], end - start); - cp[end - start] = '\0'; - - /* FIXME: use reallocarray(). */ - p->indexs = realloc(p->indexs, - sizeof(char *) * (p->indexsz + 1)); - if (NULL == p->indexs) - texiabort(p, NULL); - p->indexs[p->indexsz++] = cp; - - advanceeoln(p, pos, 1); + if (0 == *pos - start) + texiwarn(p, "zero-length index definition"); + else + texindex_add(p, &BUF(p)[start], *pos - start); + advance(p, pos); } +/* + * Handle both possible "define function" (type, etc.) classes: where + * we'll have a body and without one (suffixed with "x"). + */ static void dodefn(struct texi *p, enum texicmd cmd, size_t *pos) { @@ -427,8 +416,10 @@ dodefn(struct texi *p, enum texicmd cmd, return; } - if (NULL != blk) - texivspace(p); + if (p->seenvs >= 0) { + teximacro(p, "Pp"); + p->seenvs = -1; + } switch (cmd) { case (TEXICMD_DEFTYPEMETHOD): @@ -456,6 +447,7 @@ dodefn(struct texi *p, enum texicmd cmd, break; } + p->seenvs = 0; texiputchar(p, ':'); texiputchar(p, '\n'); @@ -563,10 +555,13 @@ dodefn(struct texi *p, enum texicmd cmd, return; } - teximacro(p, "Bd -filled -offset indent"); + texivspace(p); + teximacro(p, "Bd -filled -offset indent -compact"); + p->seenvs = -1; parseto(p, pos, blk); + p->seenvs = 0; teximacro(p, "Ed"); - p->seenvs = 1; + texivspace(p); } static void @@ -721,6 +716,7 @@ doinline(struct texi *p, enum texicmd cm switch (cmd) { case (TEXICMD_CODE): case (TEXICMD_KBD): + /* FIXME: quote around @samp{} */ case (TEXICMD_SAMP): case (TEXICMD_T): macro = "Li"; @@ -831,9 +827,8 @@ doinsertcopying(struct texi *p, enum tex { advanceeoln(p, pos, 0); - if (NULL == p->copying) - return; - texisplice(p, p->copying, p->copyingsz, *pos); + if (NULL != p->copying) + texisplice(p, p->copying, p->copyingsz, *pos); } static void @@ -898,13 +893,16 @@ doverbatim(struct texi *p, enum texicmd assert(endpos <= BUFSZ(p)); assert('\n' == BUF(p)[*pos]); advance(p, pos); - teximacro(p, "Bd -literal -offset indent"); + texivspace(p); + teximacro(p, "Bd -literal -offset indent -compact"); + p->seenvs = -1; while (*pos < endpos) { texiputchar(p, BUF(p)[*pos]); advance(p, pos); } + p->seenvs = 0; teximacro(p, "Ed"); - p->seenvs = 1; + texivspace(p); if (*pos < BUFSZ(p)) advanceto(p, pos, endpos + endsz); } @@ -1043,20 +1041,23 @@ dodisplay(struct texi *p, enum texicmd c { advanceeoln(p, pos, 1); + texivspace(p); switch (cmd) { case (TEXICMD_FORMAT): case (TEXICMD_SMALLFORMAT): - teximacro(p, "Bd -filled"); + teximacro(p, "Bd -filled -compact"); break; default: - teximacro(p, "Bd -filled -offset indent"); + teximacro(p, "Bd -filled -offset indent -compact"); break; } + p->seenvs = -1; parseto(p, pos, texitoks[cmd].tok); + p->seenvs = 0; teximacro(p, "Ed"); - p->seenvs = 1; + texivspace(p); } static void @@ -1065,12 +1066,14 @@ doexample(struct texi *p, enum texicmd c advanceeoln(p, pos, 1); - teximacro(p, "Bd -literal -offset indent"); + texivspace(p); + teximacro(p, "Bd -literal -offset indent -compact"); p->literal++; parseto(p, pos, texitoks[cmd].tok); p->literal--; + p->seenvs = 0; teximacro(p, "Ed"); - p->seenvs = 1; + texivspace(p); } static void @@ -1471,14 +1474,102 @@ doquotation(struct texi *p, enum texicmd teximacro(p, "Qc"); } -#if 0 +static int +indexcmp(const void *p1, const void *p2) +{ + + return(strcasecmp(*(const char **)p1, *(const char **)p2)); +} + +static void +doprintindex(struct texi *p, enum texicmd cmd, size_t *pos) +{ + size_t i, j, start, end, len; +#if HAVE_INDEX + char *cp; +#endif + + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) + advance(p, pos); + start = *pos; + while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) + advance(p, pos); + if ((end = *pos) == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + return; + } + + advance(p, pos); + if (0 == (len = end - start)) { + texiwarn(p, "zero-length index"); + return; + } + + /* Look for the index in our table. */ + for (i = 0; i < p->indexsz; i++) { + if (strlen(p->indexs[i].name) != len) + continue; + if (strncmp(p->indexs[i].name, &BUF(p)[start], len)) + continue; + break; + } + + if (i == p->indexsz) { + texiwarn(p, "cannot find index"); + return; + } else if (0 == p->indexs[i].indexsz) + return; + + /* Alphabetically sort our indices. */ + qsort(p->indexs[i].index, + p->indexs[i].indexsz, sizeof(char *), indexcmp); + + texivspace(p); + teximacro(p, "Bl -tag -width Ds -compact"); + for (j = 0; j < p->indexs[i].indexsz; j++) { + teximacroopen(p, "It"); +#if HAVE_INDEX + teximacroopen(p, "Lkx"); + texiputchars(p, "\"idx"); + texiputchars(p, p->indexs[i].name); + cp = p->indexs[i].index[j]; + while ('\n' != *cp) { + assert('\0' != *cp); + texiputchar(p, *cp++); + } + texiputchars(p, "\" \""); + p->literal++; +#endif + texisplice(p, p->indexs[i].index[j], + strlen(p->indexs[i].index[j]), *pos); + parseeoln(p, pos); +#if HAVE_INDEX + p->literal--; + texiputchars(p, "\""); + teximacroclose(p); +#endif + teximacroclose(p); + } + p->seenvs = 0; + teximacro(p, "El"); + texivspace(p); +} + static void donode(struct texi *p, enum texicmd cmd, size_t *pos) { + int sv = p->seenvs; + + if (NULL != p->chapters) { + advanceeoln(p, pos, 1); + return; + } +#if HAVE_INDEX + p->seenvs = -1; teximacroopen(p, "Ix"); - texiputchars(p, "Node"); - while (*pos < BUFSZ(p) && isspace(BUF(p)[*pos])) + texiputchars(p, "node"); + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); while (*pos < BUFSZ(p)) { if (BUF(p)[*pos] == ',') @@ -1488,15 +1579,21 @@ donode(struct texi *p, enum texicmd cmd, texiputchar(p, BUF(p)[*pos]); advance(p, pos); } - teximacroclose(p); +#endif advanceeoln(p, pos, 1); + p->seenvs = sv; } +/* + * This handles both menu and detailedmenu. + * The syntax of these is fairly... unspecific, but what we do here + * seems to work with most manuals. + */ static void domenu(struct texi *p, enum texicmd cmd, size_t *pos) { - size_t start, sv; + size_t start, end, sv; if (NULL != p->chapters) { doignblock(p, cmd, pos); @@ -1505,47 +1602,64 @@ domenu(struct texi *p, enum texicmd cmd, advanceeoln(p, pos, 1); + /* + * Parse past initial stuff. + * TODO: the manual says we're supposed to make this in bold or + * something. + */ + while (*pos < BUFSZ(p)) { + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) + advance(p, pos); + if ('*' != BUF(p)[*pos]) { + if (TEXICMD_END == peeklinecmd(p, *pos)) + break; + parseeoln(p, pos); + } else + break; + } + texivspace(p); teximacro(p, "Bl -tag -width Ds -compact"); while (*pos < BUFSZ(p)) { - /* Read to next menu item. */ - while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) + /* + * Read to next menu item. + * We simply parse every line until we get a magic '*'. + * These lines might occur interspersed OR as the + * description of an entry. + * Either way it's in the `It' block. + */ + if (0 == p->seenws) + p->seenws = *pos < BUFSZ(p) && isws(BUF(p)[*pos]); + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); - if ('*' != BUF(p)[*pos]) - break; + if ('*' != BUF(p)[*pos]) { + if (TEXICMD_END == peeklinecmd(p, *pos)) + break; + parseeoln(p, pos); + continue; + } - assert('*' == BUF(p)[*pos]); + /* Now we're parsing a menu item. */ advance(p, pos); - while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); - sv = start = *pos; + start = sv = *pos; while (*pos < BUFSZ(p) && ':' != BUF(p)[*pos]) advance(p, pos); - if (*pos == BUFSZ(p) || *pos == start) { - texiwarn(p, "empty menu name"); - break; - } - teximacroopen(p, "It"); - teximacroopen(p, "Lkx"); - texiputchar(p, '"'); - texiputchars(p, "Node"); - for (start = sv; start < *pos; start++) - texiputchar(p, BUF(p)[start]); - texiputchars(p, "\" \""); - for (start = sv; start < *pos; start++) - texiputchar(p, BUF(p)[start]); - texiputchar(p, '"'); - teximacroclose(p); - teximacroclose(p); - + end = *pos; advance(p, pos); + if (*pos == BUFSZ(p)) { texiwarn(p, "bad menu syntax"); break; } else if (':' != BUF(p)[*pos]) { while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) advance(p, pos); - start = *pos; + teximacroopen(p, "It"); +#ifdef HAVE_INDEX + teximacroopen(p, "Lkx"); + texiputchars(p, "\"node"); +#endif while (*pos < BUFSZ(p)) { switch (BUF(p)[*pos]) { case ('\t'): @@ -1563,52 +1677,47 @@ domenu(struct texi *p, enum texicmd cmd, } /* FALLTHROUGH */ default: + texiputchar(p, BUF(p)[*pos]); advance(p, pos); continue; } advance(p, pos); break; } - } else - advance(p, pos); - - while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) - advance(p, pos); - - if (*pos == BUFSZ(p)) { - texiwarn(p, "bad menu syntax"); - break; - } - - while (*pos < BUFSZ(p)) { - if ('*' == BUF(p)[*pos]) - break; - if ('\n' != BUF(p)[*pos]) { - texiputchar(p, BUF(p)[*pos]); - advance(p, pos); - continue; - } +#ifdef HAVE_INDEX + texiputchars(p, "\" \""); + for (start = sv; start < end; start++) + texiputchar(p, BUF(p)[start]); + texiputchar(p, '"'); +#endif + } else { advance(p, pos); - while (*pos == BUFSZ(p)) { - texiwarn(p, "bad menu syntax"); - break; - } - if ('\n' == BUF(p)[*pos]) { - advance(p, pos); - break; - } else if ('*' == BUF(p)[*pos]) { - continue; - } else if ('@' == BUF(p)[*pos]) - break; - texiputchar(p, ' '); + p->seenvs = 0; + teximacroopen(p, "It"); +#ifdef HAVE_INDEX + teximacroopen(p, "Lkx"); + texiputchars(p, "\"node"); + for (start = sv; start < end; start++) + texiputchar(p, BUF(p)[start]); + texiputchars(p, "\" \""); +#endif + for (start = sv; start < end; start++) + texiputchar(p, BUF(p)[start]); +#ifdef HAVE_INDEX + texiputchar(p, '"'); +#endif } +#ifdef HAVE_INDEX + teximacroclose(p); +#endif + teximacroclose(p); } + advanceeoln(p, pos, 0); + p->seenvs = 0; teximacro(p, "El"); - - doignblock(p, cmd, pos); + texivspace(p); } -#endif static void domath(struct texi *p, enum texicmd cmd, size_t *pos) @@ -1750,43 +1859,6 @@ sectioner(struct texi *p, int sec) } static void -dosubsubsection(struct texi *p, enum texicmd cmd, size_t *pos) -{ - int sec; - - sec = sectioner(p, 3); - - /* We don't have a subsubsubsection, so make one up. */ - texivspace(p); - teximacroopen(p, sects[sec]); - parseeoln(p, pos); - teximacroclose(p); - texivspace(p); -} - -static void -dosubsection(struct texi *p, enum texicmd cmd, size_t *pos) -{ - int sec; - - sec = sectioner(p, 2); - - if (p->outmacro) - texierr(p, "\"%s\" in open line scope!?", sects[sec]); - else if (p->literal) - texierr(p, "\"%s\" in a literal scope!?", sects[sec]); - - /* We don't have a subsubsection, so make one up. */ - if (sec > 1) - texivspace(p); - teximacroopen(p, sects[sec]); - parseeoln(p, pos); - teximacroclose(p); - if (sec > 1) - texivspace(p); -} - -static void dosecoffs(struct texi *p, enum texicmd cmd, size_t *pos) { @@ -1816,6 +1888,18 @@ dosection(struct texi *p, enum texicmd c case (TEXICMD_UNNUMBEREDSEC): sec = sectioner(p, 1); break; + case (TEXICMD_APPENDIXSUBSEC): + case (TEXICMD_SUBHEADING): + case (TEXICMD_SUBSECTION): + case (TEXICMD_UNNUMBEREDSUBSEC): + sec = sectioner(p, 2); + break; + case (TEXICMD_APPENDIXSUBSUBSEC): + case (TEXICMD_SUBSUBHEADING): + case (TEXICMD_SUBSUBSECTION): + case (TEXICMD_UNNUMBEREDSUBSUBSEC): + sec = sectioner(p, 3); + break; default: abort(); } @@ -1825,6 +1909,11 @@ dosection(struct texi *p, enum texicmd c else if (p->literal) texierr(p, "\"%s\" in a literal scope!?", sects[sec]); + if (sec < 2) + p->seenvs = -1; + else + texivspace(p); + if (0 == sec && NULL != p->chapters) { teximdocclose(p, 0); teximdocopen(p, pos); @@ -1833,6 +1922,11 @@ dosection(struct texi *p, enum texicmd c teximacroopen(p, sects[sec]); parseeoln(p, pos); teximacroclose(p); + + if (sec < 2) + p->seenvs = -1; + else + texivspace(p); } static void @@ -1876,9 +1970,11 @@ doitem(struct texi *p, enum texicmd cmd, switch (p->list) { case (TEXILIST_ITEM): + p->seenvs = -1; teximacroopen(p, "It"); break; case (TEXILIST_NOITEM): + p->seenvs = -1; teximacro(p, "It"); break; default: @@ -1975,6 +2071,7 @@ domultitable(struct texi *p, enum texicm teximacro(p, "TE"); p->literal = svliteral; p->list = sv; + texivspace(p); } static void @@ -1985,10 +2082,12 @@ dotable(struct texi *p, enum texicmd cmd advanceeoln(p, pos, 1); p->list = TEXILIST_ITEM; - teximacro(p, "Bl -tag -width Ds"); + texivspace(p); + teximacro(p, "Bl -tag -width Ds -compact"); parseto(p, pos, texitoks[cmd].tok); + p->seenvs = 0; teximacro(p, "El"); - p->seenvs = 1; + texivspace(p); p->list = sv; } @@ -2016,10 +2115,12 @@ doenumerate(struct texi *p, enum texicmd advanceeoln(p, pos, 1); p->list = TEXILIST_NOITEM; - teximacro(p, "Bl -enum"); + texivspace(p); + teximacro(p, "Bl -enum -compact"); parseto(p, pos, texitoks[cmd].tok); + p->seenvs = 0; teximacro(p, "El"); - p->seenvs = 1; + texivspace(p); p->list = sv; } @@ -2031,10 +2132,12 @@ doitemize(struct texi *p, enum texicmd c advanceeoln(p, pos, 1); p->list = TEXILIST_NOITEM; - teximacro(p, "Bl -bullet"); + texivspace(p); + teximacro(p, "Bl -bullet -compact"); parseto(p, pos, texitoks[cmd].tok); + p->seenvs = 0; teximacro(p, "El"); - p->seenvs = 1; + texivspace(p); p->list = sv; } @@ -2048,6 +2151,56 @@ doignbracket(struct texi *p, enum texicm } static void +doindex(struct texi *p, enum texicmd cmd, size_t *pos) +{ + size_t start, end, len; + + while (*pos < BUFSZ(p) && isws(BUF(p)[*pos])) + advance(p, pos); + + start = *pos; + while (*pos < BUFSZ(p) && '\n' != BUF(p)[*pos]) + advance(p, pos); + + if (*pos == BUFSZ(p)) { + texiwarn(p, "unexpected EOF"); + return; + } + + advance(p, pos); + end = *pos; + if (0 == (len = end - start)) { + texiwarn(p, "zero-length index"); + return; + } + + /* Two-letter combos we can look up verbatim. */ + if (7 == texitoks[cmd].len) { + texindex(p, texitoks[cmd].tok, 2, &BUF(p)[start], len); + return; + } + + assert(6 == texitoks[cmd].len); + /* Newer one-letter combos need to be mapped. */ + switch (texitoks[cmd].tok[0]) { + case ('c'): + texindex(p, "cp", 2, &BUF(p)[start], len); + break; + case ('v'): + texindex(p, "vr", 2, &BUF(p)[start], len); + break; + case ('f'): + texindex(p, "fn", 2, &BUF(p)[start], len); + break; + case ('t'): + texindex(p, "tp", 2, &BUF(p)[start], len); + break; + default: + abort(); + } +} + +static void doignline(struct texi *p, enum texicmd cmd, size_t *pos) { @@ -2110,6 +2263,7 @@ main(int argc, char *argv[]) memset(&texi, 0, sizeof(struct texi)); texi.ign = 1; texi.outfile = stdout; + texi.seenvs = -1; Idir = NULL; while (-1 != (c = getopt(argc, argv, "C:I:"))) @@ -2126,6 +2280,12 @@ main(int argc, char *argv[]) argv += optind; argc -= optind; + + /* Add the default Texinfo indices. */ + texindex_add(&texi, "cp", 2); + texindex_add(&texi, "vr", 2); + texindex_add(&texi, "tp", 2); + texindex_add(&texi, "fn", 2); if (argc > 0) { if (NULL == (dirpath = strdup(argv[0]))) Index: util.c =================================================================== RCS file: /home/cvs/mdocml/texi2mdoc/util.c,v retrieving revision 1.29 retrieving revision 1.30 diff -Lutil.c -Lutil.c -u -p -r1.29 -r1.30 --- util.c +++ util.c @@ -103,6 +103,120 @@ texivaluefree(struct texivalue *p) free(p->value); } +static void +texidex_free(struct texidex *p) +{ + size_t i; + + for (i = 0; i < p->indexsz; i++) + free(p->index[i]); + + free(p->index); + free(p->name); + p->index = NULL; + p->indexsz = 0; +} + +/* + * Add the text beginning at "index" and of "sz" bytes to the index + * named "tok" with name size "toksz". + * This will also output the necessary mdoc(7) to construct the index. + */ +void +texindex(struct texi *p, const char *tok, + size_t toksz, const char *index, size_t sz) +{ + size_t i; +#ifdef HAVE_INDEX + char *cp; +#endif + + if (0 == sz) { + texiwarn(p, "zero-length index entry"); + return; + } + + /* Look for the index. (Must be found.) */ + for (i = 0; i < p->indexsz; i++) { + if (strlen(p->indexs[i].name) != toksz) + continue; + if (strncmp(p->indexs[i].name, tok, toksz)) + continue; + break; + } + + assert(i < p->indexsz); + /* Reallocate index's terms. */ + p->indexs[i].index = realloc + (p->indexs[i].index, + (p->indexs[i].indexsz + 1) * + sizeof(char *)); + if (NULL == p->indexs[i].index) + texiabort(p, NULL); + + /* Add term to term array. */ + p->indexs[i].index[p->indexs[i].indexsz] = + malloc(sz + 1); + if (NULL == p->indexs[i].index[p->indexs[i].indexsz]) + texiabort(p, NULL); + memcpy(p->indexs[i].index[p->indexs[i].indexsz], + index, sz); + p->indexs[i].index[p->indexs[i].indexsz][sz] = '\0'; + + /* Output mdoc(7) for index. */ +#ifdef HAVE_INDEX + p->seenvs = -1; + teximacroopen(p, "Ix"); + texiputchars(p, "idx"); + texiputchars(p, p->indexs[i].name); + cp = p->indexs[i].index[p->indexs[i].indexsz]; + while ('\n' != *cp) { + assert('\0' != *cp); + texiputchar(p, *cp++); + } + teximacroclose(p); +#endif + p->indexs[i].indexsz++; +} + +/* + * Add an index entry named "tok" of length "sz". + * This usually consists of two letters, e.g., "cp" or "vr". + * This does nothing if the index exists or is zero-sized. + */ +void +texindex_add(struct texi *p, const char *tok, size_t sz) +{ + size_t i; + char *cp; + + if (0 == sz) + return; + + /* Make sure we don't have a duplicate. */ + for (i = 0; i < p->indexsz; i++) { + if (strlen(p->indexs[i].name) != sz) + continue; + if (strncmp(p->indexs[i].name, tok, sz)) + continue; + return; + } + + /* Reallocate indices. */ + p->indexs = realloc(p->indexs, + sizeof(struct texidex) * (p->indexsz + 1)); + if (NULL == p->indexs) + texiabort(p, NULL); + if (NULL == (cp = malloc(sz + 1))) + texiabort(p, NULL); + memcpy(cp, tok, sz); + cp[sz] = '\0'; + p->indexs[p->indexsz].name = cp; + p->indexs[p->indexsz].index = NULL; + p->indexs[p->indexsz].indexsz = 0; + p->indexsz++; +} + /* * Unmap all files that we're currently using and free all resources * that we've allocated during the parse. @@ -128,7 +242,7 @@ texiexit(struct texi *p) for (i = 0; i < p->dirsz; i++) free(p->dirs[i]); for (i = 0; i < p->indexsz; i++) - free(p->indexs[i]); + texidex_free(&p->indexs[i]); for (i = 0; i < p->valsz; i++) texivaluefree(&p->vals[i]); @@ -280,14 +394,13 @@ void teximacroclose(struct texi *p) { - if (p->ign) + if (p->ign || p->literal|| TEXILIST_TABLE == p->list) return; if (0 == --p->outmacro) { fputc('\n', p->outfile); p->outcol = p->seenws = 0; } - p->seenvs = 0; } /* @@ -301,7 +414,7 @@ teximacroopen(struct texi *p, const char { int rc; - if (p->ign) + if (p->ign || p->literal|| TEXILIST_TABLE == p->list) return; if (p->outcol && 0 == p->outmacro) { @@ -309,6 +422,9 @@ teximacroopen(struct texi *p, const char p->outcol = 0; } + if (p->seenvs > 0 && 0 == p->outmacro) + fputs(".Pp\n", p->outfile); + if (0 == p->outmacro) fputc('.', p->outfile); else @@ -320,8 +436,7 @@ teximacroopen(struct texi *p, const char fputc(' ', p->outfile); p->outcol++; p->outmacro++; - p->seenws = 0; - p->seenvs = 0; + p->seenws = p->seenvs = 0; } /* @@ -338,15 +453,15 @@ teximacro(struct texi *p, const char *s) texierr(p, "\"%s\" in open line scope!?", s); if (p->literal) texierr(p, "\"%s\" in a literal scope!?", s); - if (p->outcol) fputc('\n', p->outfile); + if (p->seenvs > 0) + fputs(".Pp\n", p->outfile); fputc('.', p->outfile); fputs(s, p->outfile); fputc('\n', p->outfile); p->outcol = p->seenws = 0; - p->seenvs = 0; } /* @@ -356,8 +471,8 @@ void texivspace(struct texi *p) { - if (TEXILIST_TABLE != p->list) - teximacro(p, "Pp"); + if (TEXILIST_TABLE != p->list && p->seenvs >= 0) + p->seenvs = 1; } /* @@ -420,7 +535,7 @@ texipunctuate(struct texi *p, size_t *po if (end == *pos) return; if (end + 1 == BUFSZ(p) || ' ' == BUF(p)[end] || - '\n' == BUF(p)[end]) { + '@' == BUF(p)[end] || '\n' == BUF(p)[end]) { for ( ; start < end; start++) { texiputchar(p, ' '); texiputchar(p, BUF(p)[start]); @@ -448,6 +563,9 @@ advancenext(struct texi *p, size_t *pos) while (*pos < BUFSZ(p) && ismspace(BUF(p)[*pos])) { p->seenws = 1; + if (0 == p->seenvs && '\n' == BUF(p)[*pos]) + if (*pos + 1 < BUFSZ(p) && '\n' == BUF(p)[*pos + 1]) + p->seenvs = 1; advance(p, pos); } return(*pos); @@ -608,10 +726,12 @@ parseword(struct texi *p, size_t *pos, c * We don't do this if we're in a literal context (we'll print * out the newlines themselves) nor in a `TS' table. */ - if (p->seenvs && 0 == p->literal && TEXILIST_TABLE != p->list) - teximacro(p, "Pp"); - - p->seenvs = 0; + if (p->seenvs > 0 && 0 == p->literal && TEXILIST_TABLE != p->list) { + if (p->outcol > 0) + fputc('\n', p->outfile); + fputs(".Pp\n", p->outfile); + p->outcol = 0; + } /* * Some line control: if we (non-macro, non-literal) already @@ -684,7 +804,9 @@ parseword(struct texi *p, size_t *pos, c continue; } - if (*pos < BUFSZ(p) - 2 && + if ('"' == BUF(p)[*pos]) { + texiputchars(p, "\\(dq"); + } else if (*pos < BUFSZ(p) - 2 && '-' == BUF(p)[*pos] && '-' == BUF(p)[*pos + 1] && '-' == BUF(p)[*pos + 2]) { @@ -712,19 +834,23 @@ parseword(struct texi *p, size_t *pos, c advance(p, pos); } - if (*pos + 1 < BUFSZ(p) && - '\n' == BUF(p)[*pos] && - '\n' == BUF(p)[*pos + 1]) - p->seenvs = 1; - /* * New sentence, new line:if we (non-macro, non-literal) see a * period at the end of the last printed word, then open a * newline. */ - if (0 == p->literal && 0 == p->outmacro && - *pos < BUFSZ(p) && '.' == BUF(p)[*pos - 1]) - texiputchar(p, '\n'); + if (0 == p->literal && 0 == p->outmacro && *pos < BUFSZ(p)) + switch (BUF(p)[*pos - 1]) { + case ('.'): + case ('!'): + case ('?'): + texiputchar(p, '\n'); + break; + default: + break; + } + + p->seenvs = 0; } /* @@ -778,10 +904,10 @@ texicmd(const struct texi *p, size_t pos /* Look for it in our indices. */ for (i = 0; i < p->indexsz; i++) { - toksz = strlen(p->indexs[i]); + toksz = strlen(p->indexs[i].name); if (len != 5 + toksz) continue; - if (strncmp(&BUF(p)[pos], p->indexs[i], toksz)) + if (strncmp(&BUF(p)[pos], p->indexs[i].name, toksz)) continue; if (0 == strncmp(&BUF(p)[pos + toksz], "index", 5)) return(TEXICMD_USER_INDEX); @@ -946,6 +1072,8 @@ parseeoln(struct texi *p, size_t *pos) texiwarn(p, "unexpected \"{\""); advance(p, pos); continue; + case ('\n'): + continue; case ('@'): break; default: @@ -968,6 +1096,18 @@ parseeoln(struct texi *p, size_t *pos) advance(p, pos); } +enum texicmd +peeklinecmd(const struct texi *p, size_t pos) +{ + size_t end; + + while (pos < BUFSZ(p) && isws(BUF(p)[pos])) + pos++; + if (pos == BUFSZ(p) || '@' != BUF(p)[pos]) + return(TEXICMD__MAX); + return(texicmd(p, pos, &end, NULL)); +} + /* * Peek to see if there's a command after subsequent whitespace. * If so, return the macro identifier. @@ -1152,6 +1292,9 @@ parseto(struct texi *p, size_t *pos, con if (NULL != texitoks[cmd].fp) (*texitoks[cmd].fp)(p, cmd, pos); } + + if (*pos == BUFSZ(p)) + texiwarn(p, "EOF expecting \"%s\" end\n", endtoken); } /* @@ -1577,6 +1720,7 @@ teximdocopen(struct texi *p, size_t *pos t = time(NULL); strftime(date, sizeof(date), "%F", localtime(&t)); + p->seenvs = -1; teximacroopen(p, "Dd"); texiputchars(p, date); teximacroclose(p); -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv