From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from localhost (fantadrom.bsd.lv [local]); by fantadrom.bsd.lv (OpenSMTPD) with ESMTPA id e5a1c3f3; for ; Mon, 23 Feb 2015 06:45:00 -0500 (EST) Date: Mon, 23 Feb 2015 06:45:00 -0500 (EST) Message-Id: <10567472215640127430.enqueue@fantadrom.bsd.lv> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: kristaps@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: texi2mdoc: Macro support! This parses the @macro blocks, maintains X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Macro support! This parses the @macro blocks, maintains their arguments and bodies, recognises invoked macros, fills in their arguments, then recursively invokes the macro bodies. Modified Files: -------------- texi2mdoc: extern.h main.c util.c Revision Data ------------- Index: main.c =================================================================== RCS file: /home/cvs/mdocml/texi2mdoc/main.c,v retrieving revision 1.29 retrieving revision 1.30 diff -Lmain.c -Lmain.c -u -p -r1.29 -r1.30 --- main.c +++ main.c @@ -58,6 +58,7 @@ static void doinclude(struct texi *, enu static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *); static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *); +static void domacro(struct texi *, enum texicmd, const char *, size_t, size_t *); static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *); static void domultitable(struct texi *, enum texicmd, const char *, size_t, size_t *); static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *); @@ -172,13 +173,13 @@ static const struct texitok __texitoks[T { doblock, "ifnotxml", 8 }, /* TEXICMD_IFNOTXML */ { doblock, "ifplaintext", 11 }, /* TEXICMD_IFPLAINTEXT */ { doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */ - { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */ + { dovalue, "ifset", 5 }, /* TEXICMD_IFSET */ { doignblock, "ifxml", 5 }, /* TEXICMD_IFXML */ { doignblock, "ignore", 6 }, /* TEXICMD_IGNORE */ { doignbracket, "image", 5 }, /* TEXICMD_IMAGE */ { doinclude, "include", 7 }, /* TEXICMD_INCLUDE */ { dodisplay, "indentblock", 11 }, /* TEXICMD_INDENTBLOCK */ - { doignline, "", 0 }, /* TEXICMD_INDEX */ + { doignline, "", 0 }, /* TEXICMD_USER_INDEX */ { doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */ { doitem, "item", 4 }, /* TEXICMD_ITEM */ { doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */ @@ -188,6 +189,7 @@ static const struct texitok __texitoks[T { doignline, "kindex", 6 }, /* TEXICMD_KINDEX */ { dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */ { dosecoffs, "lowersections", 13 }, /* TEXICMD_LOWERSECTIONS */ + { domacro, "macro", 5 }, /* TEXICMD_MACRO */ { domath, "math", 4 }, /* TEXICMD_MATH */ { doignblock, "menu", 4 }, /* TEXICMD_MENU */ { dosymbol, "minus", 5 }, /* TEXICMD_MINUS */ @@ -426,6 +428,65 @@ dodefn(struct texi *p, enum texicmd cmd, } static void +domacro(struct texi *p, enum texicmd cmd, + const char *buf, size_t sz, size_t *pos) +{ + size_t start, end, endtoksz, len; + struct teximacro m; + const char *endtok, *blk; + + memset(&m, 0, sizeof(struct teximacro)); + + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + + for (start = end = *pos; end < sz; end++) + if (ismspace(buf[end]) || '{' == buf[end]) + break; + + if (start == end) + texierr(p, "zero-length macro name"); + + advanceto(p, buf, pos, end); + + m.key = malloc(end - start + 1); + if (NULL == m.key) + texiabort(p, NULL); + memcpy(m.key, &buf[start], end - start); + m.key[end - start] = '\0'; + + m.args = argparse(p, buf, sz, pos, &m.argsz); + advanceeoln(p, buf, sz, pos, 0); + + start = *pos; + endtok = "\n@end macro\n"; + endtoksz = strlen(endtok); + blk = memmem(&buf[start], sz, endtok, endtoksz); + if (NULL == blk) + texierr(p, "unterminated macro body"); + while (&buf[*pos] != blk) + advance(p, buf, pos); + assert('\n' == buf[*pos]); + advance(p, buf, pos); + len = blk - &buf[start]; + m.value = malloc(len + 1); + if (NULL == m.value) + texiabort(p, NULL); + memcpy(m.value, &buf[start], len); + m.value[len] = '\0'; + + p->macros = realloc + (p->macros, + (p->macrosz + 1) * + sizeof(struct teximacro)); + if (NULL == p->macros) + texiabort(p, NULL); + + p->macros[p->macrosz++] = m; + advanceeoln(p, buf, sz, pos, 1); +} + +static void doignblock(struct texi *p, enum texicmd cmd, const char *buf, size_t sz, size_t *pos) { @@ -584,7 +645,7 @@ doverbinclude(struct texi *p, enum texic advance(p, buf, pos); continue; } - type = texicmd(p, buf, *pos, sz, &end); + type = texicmd(p, buf, *pos, sz, &end, NULL); advanceto(p, buf, pos, end); if (TEXICMD_VALUE != type) texierr(p, "unknown verbatiminclude command"); @@ -643,7 +704,7 @@ doinclude(struct texi *p, enum texicmd c advance(p, buf, pos); continue; } - type = texicmd(p, buf, *pos, sz, &end); + type = texicmd(p, buf, *pos, sz, &end, NULL); advanceto(p, buf, pos, end); if (TEXICMD_VALUE != type) texierr(p, "unknown include command"); @@ -912,7 +973,6 @@ dosymbol(struct texi *p, enum texicmd cm case (TEXICMD_HYPHEN): break; default: - texiwarn(p, "sym: %d", cmd); abort(); } @@ -1026,6 +1086,11 @@ dovalue(struct texi *p, enum texicmd cmd doignblock(p, cmd, buf, sz, pos); else parseto(p, buf, sz, pos, texitoks[cmd].tok); + } else if (TEXICMD_IFSET == cmd) { + if (NULL == valuellookup(p, buf, sz, pos)) + doignblock(p, cmd, buf, sz, pos); + else + parseto(p, buf, sz, pos, texitoks[cmd].tok); } else if (TEXICMD_CLEAR == cmd) valuelclear(p, buf, sz, pos); } @@ -1306,7 +1371,7 @@ domultitable(struct texi *p, enum texicm * arguments to set the number of columns that we'll * have. */ - type = texicmd(p, buf, *pos, sz, &end); + type = texicmd(p, buf, *pos, sz, &end, NULL); advanceto(p, buf, pos, end); if (TEXICMD_COLUMNFRACTIONS != type) texierr(p, "unknown multitable command"); Index: extern.h =================================================================== RCS file: /home/cvs/mdocml/texi2mdoc/extern.h,v retrieving revision 1.4 retrieving revision 1.5 diff -Lextern.h -Lextern.h -u -p -r1.4 -r1.5 --- extern.h +++ extern.h @@ -124,7 +124,7 @@ enum texicmd { TEXICMD_IMAGE, TEXICMD_INCLUDE, TEXICMD_INDENTBLOCK, - TEXICMD_INDEX, + TEXICMD_USER_INDEX, TEXICMD_INSERTCOPYING, TEXICMD_ITEM, TEXICMD_ITEMIZE, @@ -134,6 +134,7 @@ enum texicmd { TEXICMD_KINDEX, TEXICMD_LATEX, TEXICMD_LOWERSECTIONS, + TEXICMD_MACRO, TEXICMD_MATH, TEXICMD_MENU, TEXICMD_MINUS, @@ -256,6 +257,13 @@ struct texivalue { char *value; }; +struct teximacro { + char *key; + char *value; + char **args; + size_t argsz; +}; + /* * The main parse structure. * This keeps any necessary information handy. @@ -275,6 +283,8 @@ struct texi { size_t indexsz; /* entries in indexs */ struct texivalue *vals; /* @value entries */ size_t valsz; /* entries in vals */ + struct teximacro *macros; + size_t macrosz; /* * The following control what we output to the screen. * The complexity is required to accomodate for mdoc(7). @@ -298,6 +308,8 @@ void advance(struct texi *, const char * size_t advanceeoln(struct texi *, const char *, size_t, size_t *, int); void advanceto(struct texi *, const char *, size_t *, size_t); +char **argparse(struct texi *, const char *, size_t, size_t *, size_t *); + int parsearg(struct texi *, const char *, size_t, size_t *, size_t); void parsebracket(struct texi *, const char *, size_t, size_t *); void parsefile(struct texi *, const char *, int); @@ -310,7 +322,8 @@ void parseto(struct texi *, const char * void texiabort(struct texi *, const char *) __attribute__((noreturn)); enum texicmd - texicmd(struct texi *, const char *, size_t, size_t, size_t *); + texicmd(struct texi *, const char *, size_t, size_t, + size_t *, struct teximacro **); void texierr(struct texi *, const char *, ...) __attribute__((format(printf, 2, 3))) __attribute__((noreturn)); Index: util.c =================================================================== RCS file: /home/cvs/mdocml/texi2mdoc/util.c,v retrieving revision 1.6 retrieving revision 1.7 diff -Lutil.c -Lutil.c -u -p -r1.6 -r1.7 --- util.c +++ util.c @@ -46,6 +46,27 @@ texifilepop(struct texi *p) munmap(f->map, f->mapsz); } +static void +teximacrofree(struct teximacro *p) +{ + size_t i; + + for (i = 0; i < p->argsz; i++) + free(p->args[i]); + + free(p->args); + free(p->key); + free(p->value); +} + +static void +texivaluefree(struct texivalue *p) +{ + + free(p->key); + free(p->value); +} + /* * Unmap all files that we're currently using and free all resources * that we've allocated during the parse. @@ -64,17 +85,16 @@ texiexit(struct texi *p) while (p->filepos > 0) texifilepop(p); + for (i = 0; i < p->macrosz; i++) + teximacrofree(&p->macros[i]); for (i = 0; i < p->dirsz; i++) free(p->dirs[i]); - for (i = 0; i < p->indexsz; i++) free(p->indexs[i]); + for (i = 0; i < p->valsz; i++) + texivaluefree(&p->vals[i]); - for (i = 0; i < p->valsz; i++) { - free(p->vals[i].value); - free(p->vals[i].key); - } - + free(p->macros); free(p->vals); free(p->indexs); free(p->dirs); @@ -380,6 +400,95 @@ advanceto(struct texi *p, const char *bu advance(p, buf, pos); } +static void +texiexecmacro(struct texi *p, struct teximacro *m, + const char *buf, size_t sz, size_t *pos) +{ + size_t valsz, realsz, aasz, asz, + ssz, i, j, k, start, end; + char *val; + char **args; + + args = argparse(p, buf, sz, pos, &asz); + if (asz != m->argsz) + texiwarn(p, "invalid macro argument length"); + aasz = asz < m->argsz ? asz : m->argsz; + + if (0 == aasz) { + parseeof(p, m->value, strlen(m->value)); + return; + } + + valsz = realsz = strlen(m->value); + val = strdup(m->value); + + for (i = j = 0; i < realsz; i++) { + /* Parse blindly til the backslash delimiter. */ + if ('\\' != m->value[i]) { + val[j++] = m->value[i]; + val[j] = '\0'; + continue; + } else if (i == realsz - 1) + texierr(p, "trailing argument name delimiter"); + + /* Double-backslash is escaped. */ + if ('\\' == m->value[i + 1]) { + val[j++] = m->value[i++]; + val[j] = '\0'; + continue; + } + + assert('\\' == m->value[i] && i < realsz - 1); + + /* Parse to terminating delimiter. */ + /* FIXME: embedded, escaped delimiters? */ + for (start = end = i + 1; end < realsz; end++) + if ('\\' == m->value[end]) + break; + if (end == realsz) + texierr(p, "unterminated argument name"); + + for (k = 0; k < aasz; k++) { + if ((ssz = strlen(m->args[k])) != (end - start)) + continue; + if (strncmp(&m->value[start], m->args[k], ssz)) + continue; + break; + } + + /* + * Argument didn't exist in argument table. + * No need to reallocate here: we just copy the text + * directly from the macro value into the buffer. + */ + if (k == aasz) { + for ( ; i < end; i++) + val[j++] = m->value[i]; + assert('\\' == m->value[i]); + val[j++] = m->value[i]; + val[j] = '\0'; + continue; + } + + if (strlen(args[k]) > ssz) { + valsz += strlen(args[k]); + val = realloc(val, valsz + 1); + if (NULL == val) + texiabort(p, NULL); + } + + j = strlcat(val, args[k], valsz + 1); + i = end; + } + + parseeof(p, val, strlen(val)); + + for (i = 0; i < asz; i++) + free(args[i]); + free(args); + free(val); +} + /* * Output a free-form word in the input stream, progressing to the next * command or white-space. @@ -430,13 +539,16 @@ texiword(struct texi *p, const char *buf * index after the command name. */ enum texicmd -texicmd(struct texi *p, const char *buf, - size_t pos, size_t sz, size_t *end) +texicmd(struct texi *p, const char *buf, size_t pos, + size_t sz, size_t *end, struct teximacro **macro) { size_t i, len, toksz; assert('@' == buf[pos]); + if (NULL != macro) + *macro = NULL; + if ((*end = pos) == sz) return(TEXICMD__MAX); else if ((*end = ++pos) == sz) @@ -479,7 +591,17 @@ texicmd(struct texi *p, const char *buf, if (strncmp(&buf[pos], p->indexs[i], toksz)) continue; if (0 == strncmp(&buf[pos + toksz], "index", 5)) - return(TEXICMD_INDEX); + return(TEXICMD_USER_INDEX); + } + + for (i = 0; i < p->macrosz; i++) { + if (len != strlen(p->macros[i].key)) + continue; + if (strncmp(&buf[pos], p->macros[i].key, len)) + continue; + if (NULL != macro) + *macro = &p->macros[i]; + return(TEXICMD__MAX); } texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]); @@ -498,8 +620,9 @@ int parsearg(struct texi *p, const char *buf, size_t sz, size_t *pos, size_t num) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && ismspace(buf[*pos])) advance(p, buf, pos); @@ -528,8 +651,10 @@ parsearg(struct texi *p, const char *buf continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -545,8 +670,9 @@ parsearg(struct texi *p, const char *buf void parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && ismspace(buf[*pos])) advance(p, buf, pos); @@ -572,8 +698,10 @@ parsebracket(struct texi *p, const char continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -589,8 +717,9 @@ parsebracket(struct texi *p, const char void parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; while (*pos < sz && '\n' != buf[*pos]) { while (*pos < sz && isws(buf[*pos])) { @@ -617,8 +746,10 @@ parseeoln(struct texi *p, const char *bu continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) continue; if (NULL != texitoks[cmd].fp) @@ -633,8 +764,9 @@ parseeoln(struct texi *p, const char *bu void parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos) { - size_t end; - enum texicmd cmd; + size_t end; + enum texicmd cmd; + struct teximacro *macro; if ((*pos = advancenext(p, buf, sz, pos)) >= sz) return; @@ -657,8 +789,10 @@ parsesingle(struct texi *p, const char * return; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); if (TEXICMD__MAX == cmd) return; if (NULL != texitoks[cmd].fp) @@ -713,9 +847,10 @@ void parseto(struct texi *p, const char *buf, size_t sz, size_t *pos, const char *endtoken) { - size_t end; - enum texicmd cmd; - size_t endtoksz; + size_t end; + enum texicmd cmd; + size_t endtoksz; + struct teximacro *macro; endtoksz = strlen(endtoken); assert(endtoksz > 0); @@ -739,7 +874,7 @@ parseto(struct texi *p, const char *buf, continue; } - cmd = texicmd(p, buf, *pos, sz, &end); + cmd = texicmd(p, buf, *pos, sz, &end, ¯o); advanceto(p, buf, pos, end); if (TEXICMD_END == cmd) { while (*pos < sz && isws(buf[*pos])) @@ -757,9 +892,13 @@ parseto(struct texi *p, const char *buf, texiwarn(p, "unexpected \"end\""); advanceeoln(p, buf, sz, pos, 0); continue; - } else if (TEXICMD__MAX != cmd) - if (NULL != texitoks[cmd].fp) - (*texitoks[cmd].fp)(p, cmd, buf, sz, pos); + } + if (NULL != macro) + texiexecmacro(p, macro, buf, sz, pos); + if (TEXICMD__MAX == cmd) + continue; + if (NULL != texitoks[cmd].fp) + (*texitoks[cmd].fp)(p, cmd, buf, sz, pos); } } @@ -952,4 +1091,89 @@ valueadd(struct texi *p, char *key, char p->vals[p->valsz].value = val; p->valsz++; } +} + +/* + * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the + * declaration form, @macro foo {arg1, ...}) and textually convert it to + * an array of arguments of size "argsz". + * These need to be freed individually and as a whole. + * NOTE: this will puke on @, or @} macros, which can trick it into + * stopping argument parsing earlier. + * Ergo, textual: this doesn't interpret the arguments in any way. + */ +char ** +argparse(struct texi *p, const char *buf, + size_t sz, size_t *pos, size_t *argsz) +{ + char **args; + size_t start, end, stack; + + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + + args = NULL; + *argsz = 0; + + /* Check for no arguments. */ + if ('{' != buf[*pos]) + return(args); + + /* Parse til the closing '}', putting into the array. */ + advance(p, buf, pos); + while (*pos < sz) { + while (*pos < sz && isws(buf[*pos])) + advance(p, buf, pos); + start = *pos; + stack = 0; + while (*pos < sz) { + /* + * According to the manual, commas within + * embedded commands are escaped. + * We keep track of embedded-ness in the "stack" + * state anyway, so this is free. + */ + if (0 == stack && ',' == buf[*pos]) + break; + else if (0 == stack && '}' == buf[*pos]) + break; + else if (0 != stack && '}' == buf[*pos]) + stack--; + else if ('{' == buf[*pos]) + stack++; + advance(p, buf, pos); + } + if (stack) + texiwarn(p, "unterminated macro " + "in macro arguments"); + if ((end = *pos) == sz) + break; + /* Test for zero-length '{ }'. */ + if (start == end && '}' == buf[*pos] && 0 == *argsz) + break; + if (start == end) + texierr(p, "zero-length argument"); + /* FIXME: use reallocarray. */ + args = realloc + (args, sizeof(char *) * + (*argsz + 1)); + if (NULL == args) + texiabort(p, NULL); + args[*argsz] = malloc(end - start + 1); + if (NULL == args[*argsz]) + texiabort(p, NULL); + memcpy(args[*argsz], + &buf[start], end - start); + args[*argsz][end - start] = '\0'; + (*argsz)++; + if ('}' == buf[*pos]) + break; + advance(p, buf, pos); + } + + if (*pos == sz) + texierr(p, "unterminated arguments"); + assert('}' == buf[*pos]); + advance(p, buf, pos); + return(args); } -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv