* texi2mdoc: Macro support! This parses the @macro blocks, maintains
@ 2015-02-23 11:45 kristaps
0 siblings, 0 replies; only message in thread
From: kristaps @ 2015-02-23 11:45 UTC (permalink / raw)
To: source
Log Message:
-----------
Macro support! This parses the @macro blocks, maintains their arguments and
bodies, recognises invoked macros, fills in their arguments, then recursively
invokes the macro bodies.
Modified Files:
--------------
texi2mdoc:
extern.h
main.c
util.c
Revision Data
-------------
Index: main.c
===================================================================
RCS file: /home/cvs/mdocml/texi2mdoc/main.c,v
retrieving revision 1.29
retrieving revision 1.30
diff -Lmain.c -Lmain.c -u -p -r1.29 -r1.30
--- main.c
+++ main.c
@@ -58,6 +58,7 @@ static void doinclude(struct texi *, enu
static void doitem(struct texi *, enum texicmd, const char *, size_t, size_t *);
static void doitemize(struct texi *, enum texicmd, const char *, size_t, size_t *);
static void dolink(struct texi *, enum texicmd, const char *, size_t, size_t *);
+static void domacro(struct texi *, enum texicmd, const char *, size_t, size_t *);
static void domath(struct texi *, enum texicmd, const char *, size_t, size_t *);
static void domultitable(struct texi *, enum texicmd, const char *, size_t, size_t *);
static void doquotation(struct texi *, enum texicmd, const char *, size_t, size_t *);
@@ -172,13 +173,13 @@ static const struct texitok __texitoks[T
{ doblock, "ifnotxml", 8 }, /* TEXICMD_IFNOTXML */
{ doblock, "ifplaintext", 11 }, /* TEXICMD_IFPLAINTEXT */
{ doignblock, "iftex", 5 }, /* TEXICMD_IFTEX */
- { doignblock, "ifset", 5 }, /* TEXICMD_IFSET */
+ { dovalue, "ifset", 5 }, /* TEXICMD_IFSET */
{ doignblock, "ifxml", 5 }, /* TEXICMD_IFXML */
{ doignblock, "ignore", 6 }, /* TEXICMD_IGNORE */
{ doignbracket, "image", 5 }, /* TEXICMD_IMAGE */
{ doinclude, "include", 7 }, /* TEXICMD_INCLUDE */
{ dodisplay, "indentblock", 11 }, /* TEXICMD_INDENTBLOCK */
- { doignline, "", 0 }, /* TEXICMD_INDEX */
+ { doignline, "", 0 }, /* TEXICMD_USER_INDEX */
{ doignline, "insertcopying", 13 }, /* TEXICMD_INSERTCOPYING */
{ doitem, "item", 4 }, /* TEXICMD_ITEM */
{ doitemize, "itemize", 7 }, /* TEXICMD_ITEMIZE */
@@ -188,6 +189,7 @@ static const struct texitok __texitoks[T
{ doignline, "kindex", 6 }, /* TEXICMD_KINDEX */
{ dosymbol, "LaTeX", 5 }, /* TEXICMD_LATEX */
{ dosecoffs, "lowersections", 13 }, /* TEXICMD_LOWERSECTIONS */
+ { domacro, "macro", 5 }, /* TEXICMD_MACRO */
{ domath, "math", 4 }, /* TEXICMD_MATH */
{ doignblock, "menu", 4 }, /* TEXICMD_MENU */
{ dosymbol, "minus", 5 }, /* TEXICMD_MINUS */
@@ -426,6 +428,65 @@ dodefn(struct texi *p, enum texicmd cmd,
}
static void
+domacro(struct texi *p, enum texicmd cmd,
+ const char *buf, size_t sz, size_t *pos)
+{
+ size_t start, end, endtoksz, len;
+ struct teximacro m;
+ const char *endtok, *blk;
+
+ memset(&m, 0, sizeof(struct teximacro));
+
+ while (*pos < sz && isws(buf[*pos]))
+ advance(p, buf, pos);
+
+ for (start = end = *pos; end < sz; end++)
+ if (ismspace(buf[end]) || '{' == buf[end])
+ break;
+
+ if (start == end)
+ texierr(p, "zero-length macro name");
+
+ advanceto(p, buf, pos, end);
+
+ m.key = malloc(end - start + 1);
+ if (NULL == m.key)
+ texiabort(p, NULL);
+ memcpy(m.key, &buf[start], end - start);
+ m.key[end - start] = '\0';
+
+ m.args = argparse(p, buf, sz, pos, &m.argsz);
+ advanceeoln(p, buf, sz, pos, 0);
+
+ start = *pos;
+ endtok = "\n@end macro\n";
+ endtoksz = strlen(endtok);
+ blk = memmem(&buf[start], sz, endtok, endtoksz);
+ if (NULL == blk)
+ texierr(p, "unterminated macro body");
+ while (&buf[*pos] != blk)
+ advance(p, buf, pos);
+ assert('\n' == buf[*pos]);
+ advance(p, buf, pos);
+ len = blk - &buf[start];
+ m.value = malloc(len + 1);
+ if (NULL == m.value)
+ texiabort(p, NULL);
+ memcpy(m.value, &buf[start], len);
+ m.value[len] = '\0';
+
+ p->macros = realloc
+ (p->macros,
+ (p->macrosz + 1) *
+ sizeof(struct teximacro));
+ if (NULL == p->macros)
+ texiabort(p, NULL);
+
+ p->macros[p->macrosz++] = m;
+ advanceeoln(p, buf, sz, pos, 1);
+}
+
+static void
doignblock(struct texi *p, enum texicmd cmd,
const char *buf, size_t sz, size_t *pos)
{
@@ -584,7 +645,7 @@ doverbinclude(struct texi *p, enum texic
advance(p, buf, pos);
continue;
}
- type = texicmd(p, buf, *pos, sz, &end);
+ type = texicmd(p, buf, *pos, sz, &end, NULL);
advanceto(p, buf, pos, end);
if (TEXICMD_VALUE != type)
texierr(p, "unknown verbatiminclude command");
@@ -643,7 +704,7 @@ doinclude(struct texi *p, enum texicmd c
advance(p, buf, pos);
continue;
}
- type = texicmd(p, buf, *pos, sz, &end);
+ type = texicmd(p, buf, *pos, sz, &end, NULL);
advanceto(p, buf, pos, end);
if (TEXICMD_VALUE != type)
texierr(p, "unknown include command");
@@ -912,7 +973,6 @@ dosymbol(struct texi *p, enum texicmd cm
case (TEXICMD_HYPHEN):
break;
default:
- texiwarn(p, "sym: %d", cmd);
abort();
}
@@ -1026,6 +1086,11 @@ dovalue(struct texi *p, enum texicmd cmd
doignblock(p, cmd, buf, sz, pos);
else
parseto(p, buf, sz, pos, texitoks[cmd].tok);
+ } else if (TEXICMD_IFSET == cmd) {
+ if (NULL == valuellookup(p, buf, sz, pos))
+ doignblock(p, cmd, buf, sz, pos);
+ else
+ parseto(p, buf, sz, pos, texitoks[cmd].tok);
} else if (TEXICMD_CLEAR == cmd)
valuelclear(p, buf, sz, pos);
}
@@ -1306,7 +1371,7 @@ domultitable(struct texi *p, enum texicm
* arguments to set the number of columns that we'll
* have.
*/
- type = texicmd(p, buf, *pos, sz, &end);
+ type = texicmd(p, buf, *pos, sz, &end, NULL);
advanceto(p, buf, pos, end);
if (TEXICMD_COLUMNFRACTIONS != type)
texierr(p, "unknown multitable command");
Index: extern.h
===================================================================
RCS file: /home/cvs/mdocml/texi2mdoc/extern.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -Lextern.h -Lextern.h -u -p -r1.4 -r1.5
--- extern.h
+++ extern.h
@@ -124,7 +124,7 @@ enum texicmd {
TEXICMD_IMAGE,
TEXICMD_INCLUDE,
TEXICMD_INDENTBLOCK,
- TEXICMD_INDEX,
+ TEXICMD_USER_INDEX,
TEXICMD_INSERTCOPYING,
TEXICMD_ITEM,
TEXICMD_ITEMIZE,
@@ -134,6 +134,7 @@ enum texicmd {
TEXICMD_KINDEX,
TEXICMD_LATEX,
TEXICMD_LOWERSECTIONS,
+ TEXICMD_MACRO,
TEXICMD_MATH,
TEXICMD_MENU,
TEXICMD_MINUS,
@@ -256,6 +257,13 @@ struct texivalue {
char *value;
};
+struct teximacro {
+ char *key;
+ char *value;
+ char **args;
+ size_t argsz;
+};
+
/*
* The main parse structure.
* This keeps any necessary information handy.
@@ -275,6 +283,8 @@ struct texi {
size_t indexsz; /* entries in indexs */
struct texivalue *vals; /* @value entries */
size_t valsz; /* entries in vals */
+ struct teximacro *macros;
+ size_t macrosz;
/*
* The following control what we output to the screen.
* The complexity is required to accomodate for mdoc(7).
@@ -298,6 +308,8 @@ void advance(struct texi *, const char *
size_t advanceeoln(struct texi *, const char *, size_t, size_t *, int);
void advanceto(struct texi *, const char *, size_t *, size_t);
+char **argparse(struct texi *, const char *, size_t, size_t *, size_t *);
+
int parsearg(struct texi *, const char *, size_t, size_t *, size_t);
void parsebracket(struct texi *, const char *, size_t, size_t *);
void parsefile(struct texi *, const char *, int);
@@ -310,7 +322,8 @@ void parseto(struct texi *, const char *
void texiabort(struct texi *, const char *)
__attribute__((noreturn));
enum texicmd
- texicmd(struct texi *, const char *, size_t, size_t, size_t *);
+ texicmd(struct texi *, const char *, size_t, size_t,
+ size_t *, struct teximacro **);
void texierr(struct texi *, const char *, ...)
__attribute__((format(printf, 2, 3)))
__attribute__((noreturn));
Index: util.c
===================================================================
RCS file: /home/cvs/mdocml/texi2mdoc/util.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -Lutil.c -Lutil.c -u -p -r1.6 -r1.7
--- util.c
+++ util.c
@@ -46,6 +46,27 @@ texifilepop(struct texi *p)
munmap(f->map, f->mapsz);
}
+static void
+teximacrofree(struct teximacro *p)
+{
+ size_t i;
+
+ for (i = 0; i < p->argsz; i++)
+ free(p->args[i]);
+
+ free(p->args);
+ free(p->key);
+ free(p->value);
+}
+
+static void
+texivaluefree(struct texivalue *p)
+{
+
+ free(p->key);
+ free(p->value);
+}
+
/*
* Unmap all files that we're currently using and free all resources
* that we've allocated during the parse.
@@ -64,17 +85,16 @@ texiexit(struct texi *p)
while (p->filepos > 0)
texifilepop(p);
+ for (i = 0; i < p->macrosz; i++)
+ teximacrofree(&p->macros[i]);
for (i = 0; i < p->dirsz; i++)
free(p->dirs[i]);
-
for (i = 0; i < p->indexsz; i++)
free(p->indexs[i]);
+ for (i = 0; i < p->valsz; i++)
+ texivaluefree(&p->vals[i]);
- for (i = 0; i < p->valsz; i++) {
- free(p->vals[i].value);
- free(p->vals[i].key);
- }
-
+ free(p->macros);
free(p->vals);
free(p->indexs);
free(p->dirs);
@@ -380,6 +400,95 @@ advanceto(struct texi *p, const char *bu
advance(p, buf, pos);
}
+static void
+texiexecmacro(struct texi *p, struct teximacro *m,
+ const char *buf, size_t sz, size_t *pos)
+{
+ size_t valsz, realsz, aasz, asz,
+ ssz, i, j, k, start, end;
+ char *val;
+ char **args;
+
+ args = argparse(p, buf, sz, pos, &asz);
+ if (asz != m->argsz)
+ texiwarn(p, "invalid macro argument length");
+ aasz = asz < m->argsz ? asz : m->argsz;
+
+ if (0 == aasz) {
+ parseeof(p, m->value, strlen(m->value));
+ return;
+ }
+
+ valsz = realsz = strlen(m->value);
+ val = strdup(m->value);
+
+ for (i = j = 0; i < realsz; i++) {
+ /* Parse blindly til the backslash delimiter. */
+ if ('\\' != m->value[i]) {
+ val[j++] = m->value[i];
+ val[j] = '\0';
+ continue;
+ } else if (i == realsz - 1)
+ texierr(p, "trailing argument name delimiter");
+
+ /* Double-backslash is escaped. */
+ if ('\\' == m->value[i + 1]) {
+ val[j++] = m->value[i++];
+ val[j] = '\0';
+ continue;
+ }
+
+ assert('\\' == m->value[i] && i < realsz - 1);
+
+ /* Parse to terminating delimiter. */
+ /* FIXME: embedded, escaped delimiters? */
+ for (start = end = i + 1; end < realsz; end++)
+ if ('\\' == m->value[end])
+ break;
+ if (end == realsz)
+ texierr(p, "unterminated argument name");
+
+ for (k = 0; k < aasz; k++) {
+ if ((ssz = strlen(m->args[k])) != (end - start))
+ continue;
+ if (strncmp(&m->value[start], m->args[k], ssz))
+ continue;
+ break;
+ }
+
+ /*
+ * Argument didn't exist in argument table.
+ * No need to reallocate here: we just copy the text
+ * directly from the macro value into the buffer.
+ */
+ if (k == aasz) {
+ for ( ; i < end; i++)
+ val[j++] = m->value[i];
+ assert('\\' == m->value[i]);
+ val[j++] = m->value[i];
+ val[j] = '\0';
+ continue;
+ }
+
+ if (strlen(args[k]) > ssz) {
+ valsz += strlen(args[k]);
+ val = realloc(val, valsz + 1);
+ if (NULL == val)
+ texiabort(p, NULL);
+ }
+
+ j = strlcat(val, args[k], valsz + 1);
+ i = end;
+ }
+
+ parseeof(p, val, strlen(val));
+
+ for (i = 0; i < asz; i++)
+ free(args[i]);
+ free(args);
+ free(val);
+}
+
/*
* Output a free-form word in the input stream, progressing to the next
* command or white-space.
@@ -430,13 +539,16 @@ texiword(struct texi *p, const char *buf
* index after the command name.
*/
enum texicmd
-texicmd(struct texi *p, const char *buf,
- size_t pos, size_t sz, size_t *end)
+texicmd(struct texi *p, const char *buf, size_t pos,
+ size_t sz, size_t *end, struct teximacro **macro)
{
size_t i, len, toksz;
assert('@' == buf[pos]);
+ if (NULL != macro)
+ *macro = NULL;
+
if ((*end = pos) == sz)
return(TEXICMD__MAX);
else if ((*end = ++pos) == sz)
@@ -479,7 +591,17 @@ texicmd(struct texi *p, const char *buf,
if (strncmp(&buf[pos], p->indexs[i], toksz))
continue;
if (0 == strncmp(&buf[pos + toksz], "index", 5))
- return(TEXICMD_INDEX);
+ return(TEXICMD_USER_INDEX);
+ }
+
+ for (i = 0; i < p->macrosz; i++) {
+ if (len != strlen(p->macros[i].key))
+ continue;
+ if (strncmp(&buf[pos], p->macros[i].key, len))
+ continue;
+ if (NULL != macro)
+ *macro = &p->macros[i];
+ return(TEXICMD__MAX);
}
texiwarn(p, "bad command: @%.*s", (int)len, &buf[pos]);
@@ -498,8 +620,9 @@ int
parsearg(struct texi *p, const char *buf,
size_t sz, size_t *pos, size_t num)
{
- size_t end;
- enum texicmd cmd;
+ size_t end;
+ enum texicmd cmd;
+ struct teximacro *macro;
while (*pos < sz && ismspace(buf[*pos]))
advance(p, buf, pos);
@@ -528,8 +651,10 @@ parsearg(struct texi *p, const char *buf
continue;
}
- cmd = texicmd(p, buf, *pos, sz, &end);
+ cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
advanceto(p, buf, pos, end);
+ if (NULL != macro)
+ texiexecmacro(p, macro, buf, sz, pos);
if (TEXICMD__MAX == cmd)
continue;
if (NULL != texitoks[cmd].fp)
@@ -545,8 +670,9 @@ parsearg(struct texi *p, const char *buf
void
parsebracket(struct texi *p, const char *buf, size_t sz, size_t *pos)
{
- size_t end;
- enum texicmd cmd;
+ size_t end;
+ enum texicmd cmd;
+ struct teximacro *macro;
while (*pos < sz && ismspace(buf[*pos]))
advance(p, buf, pos);
@@ -572,8 +698,10 @@ parsebracket(struct texi *p, const char
continue;
}
- cmd = texicmd(p, buf, *pos, sz, &end);
+ cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
advanceto(p, buf, pos, end);
+ if (NULL != macro)
+ texiexecmacro(p, macro, buf, sz, pos);
if (TEXICMD__MAX == cmd)
continue;
if (NULL != texitoks[cmd].fp)
@@ -589,8 +717,9 @@ parsebracket(struct texi *p, const char
void
parseeoln(struct texi *p, const char *buf, size_t sz, size_t *pos)
{
- size_t end;
- enum texicmd cmd;
+ size_t end;
+ enum texicmd cmd;
+ struct teximacro *macro;
while (*pos < sz && '\n' != buf[*pos]) {
while (*pos < sz && isws(buf[*pos])) {
@@ -617,8 +746,10 @@ parseeoln(struct texi *p, const char *bu
continue;
}
- cmd = texicmd(p, buf, *pos, sz, &end);
+ cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
advanceto(p, buf, pos, end);
+ if (NULL != macro)
+ texiexecmacro(p, macro, buf, sz, pos);
if (TEXICMD__MAX == cmd)
continue;
if (NULL != texitoks[cmd].fp)
@@ -633,8 +764,9 @@ parseeoln(struct texi *p, const char *bu
void
parsesingle(struct texi *p, const char *buf, size_t sz, size_t *pos)
{
- size_t end;
- enum texicmd cmd;
+ size_t end;
+ enum texicmd cmd;
+ struct teximacro *macro;
if ((*pos = advancenext(p, buf, sz, pos)) >= sz)
return;
@@ -657,8 +789,10 @@ parsesingle(struct texi *p, const char *
return;
}
- cmd = texicmd(p, buf, *pos, sz, &end);
+ cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
advanceto(p, buf, pos, end);
+ if (NULL != macro)
+ texiexecmacro(p, macro, buf, sz, pos);
if (TEXICMD__MAX == cmd)
return;
if (NULL != texitoks[cmd].fp)
@@ -713,9 +847,10 @@ void
parseto(struct texi *p, const char *buf,
size_t sz, size_t *pos, const char *endtoken)
{
- size_t end;
- enum texicmd cmd;
- size_t endtoksz;
+ size_t end;
+ enum texicmd cmd;
+ size_t endtoksz;
+ struct teximacro *macro;
endtoksz = strlen(endtoken);
assert(endtoksz > 0);
@@ -739,7 +874,7 @@ parseto(struct texi *p, const char *buf,
continue;
}
- cmd = texicmd(p, buf, *pos, sz, &end);
+ cmd = texicmd(p, buf, *pos, sz, &end, ¯o);
advanceto(p, buf, pos, end);
if (TEXICMD_END == cmd) {
while (*pos < sz && isws(buf[*pos]))
@@ -757,9 +892,13 @@ parseto(struct texi *p, const char *buf,
texiwarn(p, "unexpected \"end\"");
advanceeoln(p, buf, sz, pos, 0);
continue;
- } else if (TEXICMD__MAX != cmd)
- if (NULL != texitoks[cmd].fp)
- (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
+ }
+ if (NULL != macro)
+ texiexecmacro(p, macro, buf, sz, pos);
+ if (TEXICMD__MAX == cmd)
+ continue;
+ if (NULL != texitoks[cmd].fp)
+ (*texitoks[cmd].fp)(p, cmd, buf, sz, pos);
}
}
@@ -952,4 +1091,89 @@ valueadd(struct texi *p, char *key, char
p->vals[p->valsz].value = val;
p->valsz++;
}
+}
+
+/*
+ * Take the arguments to a macro, e.g., @foo{bar, baz, xyzzy} (or the
+ * declaration form, @macro foo {arg1, ...}) and textually convert it to
+ * an array of arguments of size "argsz".
+ * These need to be freed individually and as a whole.
+ * NOTE: this will puke on @, or @} macros, which can trick it into
+ * stopping argument parsing earlier.
+ * Ergo, textual: this doesn't interpret the arguments in any way.
+ */
+char **
+argparse(struct texi *p, const char *buf,
+ size_t sz, size_t *pos, size_t *argsz)
+{
+ char **args;
+ size_t start, end, stack;
+
+ while (*pos < sz && isws(buf[*pos]))
+ advance(p, buf, pos);
+
+ args = NULL;
+ *argsz = 0;
+
+ /* Check for no arguments. */
+ if ('{' != buf[*pos])
+ return(args);
+
+ /* Parse til the closing '}', putting into the array. */
+ advance(p, buf, pos);
+ while (*pos < sz) {
+ while (*pos < sz && isws(buf[*pos]))
+ advance(p, buf, pos);
+ start = *pos;
+ stack = 0;
+ while (*pos < sz) {
+ /*
+ * According to the manual, commas within
+ * embedded commands are escaped.
+ * We keep track of embedded-ness in the "stack"
+ * state anyway, so this is free.
+ */
+ if (0 == stack && ',' == buf[*pos])
+ break;
+ else if (0 == stack && '}' == buf[*pos])
+ break;
+ else if (0 != stack && '}' == buf[*pos])
+ stack--;
+ else if ('{' == buf[*pos])
+ stack++;
+ advance(p, buf, pos);
+ }
+ if (stack)
+ texiwarn(p, "unterminated macro "
+ "in macro arguments");
+ if ((end = *pos) == sz)
+ break;
+ /* Test for zero-length '{ }'. */
+ if (start == end && '}' == buf[*pos] && 0 == *argsz)
+ break;
+ if (start == end)
+ texierr(p, "zero-length argument");
+ /* FIXME: use reallocarray. */
+ args = realloc
+ (args, sizeof(char *) *
+ (*argsz + 1));
+ if (NULL == args)
+ texiabort(p, NULL);
+ args[*argsz] = malloc(end - start + 1);
+ if (NULL == args[*argsz])
+ texiabort(p, NULL);
+ memcpy(args[*argsz],
+ &buf[start], end - start);
+ args[*argsz][end - start] = '\0';
+ (*argsz)++;
+ if ('}' == buf[*pos])
+ break;
+ advance(p, buf, pos);
+ }
+
+ if (*pos == sz)
+ texierr(p, "unterminated arguments");
+ assert('}' == buf[*pos]);
+ advance(p, buf, pos);
+ return(args);
}
--
To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-02-23 11:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-23 11:45 texi2mdoc: Macro support! This parses the @macro blocks, maintains kristaps
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).