tech@mandoc.bsd.lv
 help / color / mirror / Atom feed
* Re-written eqn(7) parser with associativity.
@ 2014-09-30 12:01 Kristaps Dzonsons
  2014-09-30 13:33 ` Kristaps Dzonsons
  0 siblings, 1 reply; 2+ messages in thread
From: Kristaps Dzonsons @ 2014-09-30 12:01 UTC (permalink / raw)
  To: tech

[-- Attachment #1: Type: text/plain, Size: 805 bytes --]

Folks,

Enclosed is a patch for a re-written equation parser.  During 
EuroBSDCon, I wrote an eqn_html.c that produced beautiful equations in 
MathML.  However, I noticed that the parser didn't account for grouping, 
thus,

  a sub b over c

would have grouped as "a with a subscript b/c".  Unfortunately, this is 
correctly "a with subscript b, both over c", where the "under" groups in 
the other direction.  With the new parser, this is all handled properly.

It also significantly simplifies the eqn_html.c code, which I've updated 
as well.

The patch ONLY modifies the logical parse: the physical parts, like 
token parsing, definitions, and so on, are all carried from the old code.

I've run it over all examples in the various eqn documents out there 
without issues.

Thoughts?

Best,

Kristaps

[-- Attachment #2: eqn.patch --]
[-- Type: text/plain, Size: 45088 bytes --]

? .DS_Store
? Makefile.local
? TEST.sh
? bar.1
? cgi-doc.diff
? cgi.h
? config.h
? config.log
? configure.local
? demandoc
? ditto.1
? eqn.bak.c
? eqn.patch
? foo
? foo.1
? foo.1.html
? foo.1.ps
? foo.2
? foo.2.html
? foo.3
? foo.3.html
? foo.4
? foo.4.html
? foo.5
? foo.5.html
? foo.5.ps
? foo.sh
? makewhatis
? man.cgi
? mandoc
? plockstat.1
? preconv
? term.diff
? test.1
Index: TODO
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/TODO,v
retrieving revision 1.179
diff -u -p -r1.179 TODO
--- TODO	18 Aug 2014 13:27:47 -0000	1.179
+++ TODO	30 Sep 2014 12:01:29 -0000
@@ -455,3 +455,9 @@ Several areas can be cleaned up to make 
    behaviour, which is not optimal.
  - Have Mac OSX systems automatically disable -static compilation of the
    CGI: -static isn't supported.
+
+************************************************************************
+* eqn missing features
+************************************************************************
+
+ - set, delim, ceiling/floor characters, diacriticals and fonts
Index: eqn.7
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn.7,v
retrieving revision 1.29
diff -u -p -r1.29 eqn.7
--- eqn.7	13 Jul 2013 19:41:16 -0000	1.29
+++ eqn.7	30 Sep 2014 12:01:29 -0000
@@ -75,7 +75,6 @@ box     : text
         | \*qtdefine\*q text text
         | \*qgfont\*q text
         | \*qgsize\*q text
-        | \*qset\*q text text
         | \*qundef\*q text
         | box pos box
         | box mark
Index: eqn.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn.c,v
retrieving revision 1.47
diff -u -p -r1.47 eqn.c
--- eqn.c	28 Sep 2014 14:05:11 -0000	1.47
+++ eqn.c	30 Sep 2014 12:01:29 -0000
@@ -30,14 +30,111 @@
 #include "libmandoc.h"
 #include "libroff.h"
 
+#define	EQN_MSG(t, x) \
+	mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL)
 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
-#define	EQN_MSG(t, x)	 mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL)
+#define	STRNEQ(p1, sz1, p2, sz2) \
+	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
+#define	EQNSTREQ(x, p, sz) \
+	STRNEQ((x)->name, (x)->sz, (p), (sz))
 
-enum	eqn_rest {
-	EQN_DESCOPE,
-	EQN_ERR,
-	EQN_OK,
-	EQN_EOF
+enum	eqn_tok {
+	EQN_TOK_DYAD = 0,
+	EQN_TOK_VEC,
+	EQN_TOK_UNDER,
+	EQN_TOK_BAR,
+	EQN_TOK_TILDE,
+	EQN_TOK_HAT,
+	EQN_TOK_DOT,
+	EQN_TOK_DOTDOT,
+	EQN_TOK_FWD,
+	EQN_TOK_BACK,
+	EQN_TOK_DOWN,
+	EQN_TOK_UP,
+	EQN_TOK_FAT,
+	EQN_TOK_ROMAN,
+	EQN_TOK_ITALIC,
+	EQN_TOK_BOLD,
+	EQN_TOK_SIZE,
+	EQN_TOK_SUB,
+	EQN_TOK_SUP,
+	EQN_TOK_SQRT,
+	EQN_TOK_OVER,
+	EQN_TOK_FROM,
+	EQN_TOK_TO,
+	EQN_TOK_BRACE_OPEN,
+	EQN_TOK_BRACE_CLOSE,
+	EQN_TOK_GSIZE,
+	EQN_TOK_GFONT,
+	EQN_TOK_MARK,
+	EQN_TOK_LINEUP,
+	EQN_TOK_LEFT,
+	EQN_TOK_RIGHT,
+	EQN_TOK_PILE,
+	EQN_TOK_LPILE,
+	EQN_TOK_RPILE,
+	EQN_TOK_CPILE,
+	EQN_TOK_MATRIX,
+	EQN_TOK_CCOL,
+	EQN_TOK_LCOL,
+	EQN_TOK_RCOL,
+	EQN_TOK_DELIM,
+	EQN_TOK_DEFINE,
+	EQN_TOK_TDEFINE,
+	EQN_TOK_NDEFINE,
+	EQN_TOK_UNDEF,
+	EQN_TOK_EOF,
+	EQN_TOK_ABOVE,
+	EQN_TOK__MAX
+};
+
+static	const char *eqn_toks[EQN_TOK__MAX] = {
+	"dyad", /* EQN_TOK_DYAD */
+	"vec", /* EQN_TOK_VEC */
+	"under", /* EQN_TOK_UNDER */
+	"bar", /* EQN_TOK_BAR */
+	"tilde", /* EQN_TOK_TILDE */
+	"hat", /* EQN_TOK_HAT */
+	"dot", /* EQN_TOK_DOT */
+	"dotdot", /* EQN_TOK_DOTDOT */
+	"fwd", /* EQN_TOK_FWD * */
+	"back", /* EQN_TOK_BACK */
+	"down", /* EQN_TOK_DOWN */
+	"up", /* EQN_TOK_UP */
+	"fat", /* EQN_TOK_FAT */
+	"roman", /* EQN_TOK_ROMAN */
+	"italic", /* EQN_TOK_ITALIC */
+	"bold", /* EQN_TOK_BOLD */
+	"size", /* EQN_TOK_SIZE */
+	"sub", /* EQN_TOK_SUB */
+	"sup", /* EQN_TOK_SUP */
+	"sqrt", /* EQN_TOK_SQRT */
+	"over", /* EQN_TOK_OVER */
+	"from", /* EQN_TOK_FROM */
+	"to", /* EQN_TOK_TO */
+	"{", /* EQN_TOK_BRACE_OPEN */
+	"}", /* EQN_TOK_BRACE_CLOSE */
+	"gsize", /* EQN_TOK_GSIZE */
+	"gfont", /* EQN_TOK_GFONT */
+	"mark", /* EQN_TOK_MARK */
+	"lineup", /* EQN_TOK_LINEUP */
+	"left", /* EQN_TOK_LEFT */
+	"right", /* EQN_TOK_RIGHT */
+	"pile", /* EQN_TOK_PILE */
+	"lpile", /* EQN_TOK_LPILE */
+	"rpile", /* EQN_TOK_RPILE */
+	"cpile", /* EQN_TOK_CPILE */
+	"matrix", /* EQN_TOK_MATRIX */
+	"ccol", /* EQN_TOK_CCOL */
+	"lcol", /* EQN_TOK_LCOL */
+	"rcol", /* EQN_TOK_RCOL */
+	"delim", /* EQN_TOK_DELIM */
+	"define", /* EQN_TOK_DEFINE */
+	"tdefine", /* EQN_TOK_TDEFINE */
+	"ndefine", /* EQN_TOK_NDEFINE */
+	"undef", /* EQN_TOK_UNDEF */
+	NULL, /* EQN_TOK_EOF */
+	"above", /* EQN_TOK_ABOVE */
 };
 
 enum	eqn_symt {
@@ -103,183 +200,73 @@ enum	eqn_symt {
 	EQNSYM__MAX
 };
 
-enum	eqnpartt {
-	EQN_DEFINE = 0,
-	EQN_NDEFINE,
-	EQN_TDEFINE,
-	EQN_SET,
-	EQN_UNDEF,
-	EQN_GFONT,
-	EQN_GSIZE,
-	EQN_BACK,
-	EQN_FWD,
-	EQN_UP,
-	EQN_DOWN,
-	EQN__MAX
-};
-
-struct	eqnstr {
-	const char	*name;
-	size_t		 sz;
-};
-
-#define	STRNEQ(p1, sz1, p2, sz2) \
-	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
-#define	EQNSTREQ(x, p, sz) \
-	STRNEQ((x)->name, (x)->sz, (p), (sz))
-
-struct	eqnpart {
-	struct eqnstr	 str;
-	int		(*fp)(struct eqn_node *);
-};
-
 struct	eqnsym {
-	struct eqnstr	 str;
+	const char	*str;
 	const char	*sym;
 };
 
-static	enum eqn_rest	 eqn_box(struct eqn_node *, struct eqn_box *);
-static	struct eqn_box	*eqn_box_alloc(struct eqn_node *,
-				struct eqn_box *);
-static	void		 eqn_box_free(struct eqn_box *);
-static	struct eqn_def	*eqn_def_find(struct eqn_node *,
-				const char *, size_t);
-static	int		 eqn_do_gfont(struct eqn_node *);
-static	int		 eqn_do_gsize(struct eqn_node *);
-static	int		 eqn_do_define(struct eqn_node *);
-static	int		 eqn_do_ign1(struct eqn_node *);
-static	int		 eqn_do_ign2(struct eqn_node *);
-static	int		 eqn_do_tdefine(struct eqn_node *);
-static	int		 eqn_do_undef(struct eqn_node *);
-static	enum eqn_rest	 eqn_eqn(struct eqn_node *, struct eqn_box *);
-static	enum eqn_rest	 eqn_list(struct eqn_node *, struct eqn_box *);
-static	enum eqn_rest	 eqn_matrix(struct eqn_node *, struct eqn_box *);
-static	const char	*eqn_nexttok(struct eqn_node *, size_t *);
-static	const char	*eqn_nextrawtok(struct eqn_node *, size_t *);
-static	const char	*eqn_next(struct eqn_node *,
-				char, size_t *, int);
-static	void		 eqn_rewind(struct eqn_node *);
-
-static	const struct eqnpart eqnparts[EQN__MAX] = {
-	{ { "define", 6 }, eqn_do_define }, /* EQN_DEFINE */
-	{ { "ndefine", 7 }, eqn_do_define }, /* EQN_NDEFINE */
-	{ { "tdefine", 7 }, eqn_do_tdefine }, /* EQN_TDEFINE */
-	{ { "set", 3 }, eqn_do_ign2 }, /* EQN_SET */
-	{ { "undef", 5 }, eqn_do_undef }, /* EQN_UNDEF */
-	{ { "gfont", 5 }, eqn_do_gfont }, /* EQN_GFONT */
-	{ { "gsize", 5 }, eqn_do_gsize }, /* EQN_GSIZE */
-	{ { "back", 4 }, eqn_do_ign1 }, /* EQN_BACK */
-	{ { "fwd", 3 }, eqn_do_ign1 }, /* EQN_FWD */
-	{ { "up", 2 }, eqn_do_ign1 }, /* EQN_UP */
-	{ { "down", 4 }, eqn_do_ign1 }, /* EQN_DOWN */
-};
-
-static	const struct eqnstr eqnmarks[EQNMARK__MAX] = {
-	{ "", 0 }, /* EQNMARK_NONE */
-	{ "dot", 3 }, /* EQNMARK_DOT */
-	{ "dotdot", 6 }, /* EQNMARK_DOTDOT */
-	{ "hat", 3 }, /* EQNMARK_HAT */
-	{ "tilde", 5 }, /* EQNMARK_TILDE */
-	{ "vec", 3 }, /* EQNMARK_VEC */
-	{ "dyad", 4 }, /* EQNMARK_DYAD */
-	{ "bar", 3 }, /* EQNMARK_BAR */
-	{ "under", 5 }, /* EQNMARK_UNDER */
-};
-
-static	const struct eqnstr eqnfonts[EQNFONT__MAX] = {
-	{ "", 0 }, /* EQNFONT_NONE */
-	{ "roman", 5 }, /* EQNFONT_ROMAN */
-	{ "bold", 4 }, /* EQNFONT_BOLD */
-	{ "fat", 3 }, /* EQNFONT_FAT */
-	{ "italic", 6 }, /* EQNFONT_ITALIC */
-};
-
-static	const struct eqnstr eqnposs[EQNPOS__MAX] = {
-	{ NULL, 0 }, /* EQNPOS_NONE */
-	{ "over", 4 }, /* EQNPOS_OVER */
-	{ "sup", 3 }, /* EQNPOS_SUP */
-	{ NULL, 0 }, /* EQNPOS_SUPSUB */
-	{ "sub", 3 }, /* EQNPOS_SUB */
-	{ "to", 2 }, /* EQNPOS_TO */
-	{ "from", 4 }, /* EQNPOS_FROM */
-	{ NULL, 0 }, /* EQNPOS_FROMTO */
-};
-
-static	const struct eqnstr eqnpiles[EQNPILE__MAX] = {
-	{ "", 0 }, /* EQNPILE_NONE */
-	{ "pile", 4 }, /* EQNPILE_PILE */
-	{ "cpile", 5 }, /* EQNPILE_CPILE */
-	{ "rpile", 5 }, /* EQNPILE_RPILE */
-	{ "lpile", 5 }, /* EQNPILE_LPILE */
-	{ "col", 3 }, /* EQNPILE_COL */
-	{ "ccol", 4 }, /* EQNPILE_CCOL */
-	{ "rcol", 4 }, /* EQNPILE_RCOL */
-	{ "lcol", 4 }, /* EQNPILE_LCOL */
-};
-
 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
-	{ { "alpha", 5 }, "*a" }, /* EQNSYM_alpha */
-	{ { "beta", 4 }, "*b" }, /* EQNSYM_beta */
-	{ { "chi", 3 }, "*x" }, /* EQNSYM_chi */
-	{ { "delta", 5 }, "*d" }, /* EQNSYM_delta */
-	{ { "epsilon", 7 }, "*e" }, /* EQNSYM_epsilon */
-	{ { "eta", 3 }, "*y" }, /* EQNSYM_eta */
-	{ { "gamma", 5 }, "*g" }, /* EQNSYM_gamma */
-	{ { "iota", 4 }, "*i" }, /* EQNSYM_iota */
-	{ { "kappa", 5 }, "*k" }, /* EQNSYM_kappa */
-	{ { "lambda", 6 }, "*l" }, /* EQNSYM_lambda */
-	{ { "mu", 2 }, "*m" }, /* EQNSYM_mu */
-	{ { "nu", 2 }, "*n" }, /* EQNSYM_nu */
-	{ { "omega", 5 }, "*w" }, /* EQNSYM_omega */
-	{ { "omicron", 7 }, "*o" }, /* EQNSYM_omicron */
-	{ { "phi", 3 }, "*f" }, /* EQNSYM_phi */
-	{ { "pi", 2 }, "*p" }, /* EQNSYM_pi */
-	{ { "psi", 2 }, "*q" }, /* EQNSYM_psi */
-	{ { "rho", 3 }, "*r" }, /* EQNSYM_rho */
-	{ { "sigma", 5 }, "*s" }, /* EQNSYM_sigma */
-	{ { "tau", 3 }, "*t" }, /* EQNSYM_tau */
-	{ { "theta", 5 }, "*h" }, /* EQNSYM_theta */
-	{ { "upsilon", 7 }, "*u" }, /* EQNSYM_upsilon */
-	{ { "xi", 2 }, "*c" }, /* EQNSYM_xi */
-	{ { "zeta", 4 }, "*z" }, /* EQNSYM_zeta */
-	{ { "DELTA", 5 }, "*D" }, /* EQNSYM_DELTA */
-	{ { "GAMMA", 5 }, "*G" }, /* EQNSYM_GAMMA */
-	{ { "LAMBDA", 6 }, "*L" }, /* EQNSYM_LAMBDA */
-	{ { "OMEGA", 5 }, "*W" }, /* EQNSYM_OMEGA */
-	{ { "PHI", 3 }, "*F" }, /* EQNSYM_PHI */
-	{ { "PI", 2 }, "*P" }, /* EQNSYM_PI */
-	{ { "PSI", 3 }, "*Q" }, /* EQNSYM_PSI */
-	{ { "SIGMA", 5 }, "*S" }, /* EQNSYM_SIGMA */
-	{ { "THETA", 5 }, "*H" }, /* EQNSYM_THETA */
-	{ { "UPSILON", 7 }, "*U" }, /* EQNSYM_UPSILON */
-	{ { "XI", 2 }, "*C" }, /* EQNSYM_XI */
-	{ { "inter", 5 }, "ca" }, /* EQNSYM_inter */
-	{ { "union", 5 }, "cu" }, /* EQNSYM_union */
-	{ { "prod", 4 }, "product" }, /* EQNSYM_prod */
-	{ { "int", 3 }, "integral" }, /* EQNSYM_int */
-	{ { "sum", 3 }, "sum" }, /* EQNSYM_sum */
-	{ { "grad", 4 }, "gr" }, /* EQNSYM_grad */
-	{ { "del", 3 }, "gr" }, /* EQNSYM_del */
-	{ { "times", 5 }, "mu" }, /* EQNSYM_times */
-	{ { "cdot", 4 }, "pc" }, /* EQNSYM_cdot */
-	{ { "nothing", 7 }, "&" }, /* EQNSYM_nothing */
-	{ { "approx", 6 }, "~~" }, /* EQNSYM_approx */
-	{ { "prime", 5 }, "aq" }, /* EQNSYM_prime */
-	{ { "half", 4 }, "12" }, /* EQNSYM_half */
-	{ { "partial", 7 }, "pd" }, /* EQNSYM_partial */
-	{ { "inf", 3 }, "if" }, /* EQNSYM_inf */
-	{ { ">>", 2 }, ">>" }, /* EQNSYM_muchgreat */
-	{ { "<<", 2 }, "<<" }, /* EQNSYM_muchless */
-	{ { "<-", 2 }, "<-" }, /* EQNSYM_larrow */
-	{ { "->", 2 }, "->" }, /* EQNSYM_rarrow */
-	{ { "+-", 2 }, "+-" }, /* EQNSYM_pm */
-	{ { "!=", 2 }, "!=" }, /* EQNSYM_nequal */
-	{ { "==", 2 }, "==" }, /* EQNSYM_equiv */
-	{ { "<=", 2 }, "<=" }, /* EQNSYM_lessequal */
-	{ { ">=", 2 }, ">=" }, /* EQNSYM_moreequal */
+	{ "alpha", "*a" }, /* EQNSYM_alpha */
+	{ "beta", "*b" }, /* EQNSYM_beta */
+	{ "chi", "*x" }, /* EQNSYM_chi */
+	{ "delta", "*d" }, /* EQNSYM_delta */
+	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
+	{ "eta", "*y" }, /* EQNSYM_eta */
+	{ "gamma", "*g" }, /* EQNSYM_gamma */
+	{ "iota", "*i" }, /* EQNSYM_iota */
+	{ "kappa", "*k" }, /* EQNSYM_kappa */
+	{ "lambda", "*l" }, /* EQNSYM_lambda */
+	{ "mu", "*m" }, /* EQNSYM_mu */
+	{ "nu", "*n" }, /* EQNSYM_nu */
+	{ "omega", "*w" }, /* EQNSYM_omega */
+	{ "omicron", "*o" }, /* EQNSYM_omicron */
+	{ "phi", "*f" }, /* EQNSYM_phi */
+	{ "pi", "*p" }, /* EQNSYM_pi */
+	{ "psi", "*q" }, /* EQNSYM_psi */
+	{ "rho", "*r" }, /* EQNSYM_rho */
+	{ "sigma", "*s" }, /* EQNSYM_sigma */
+	{ "tau", "*t" }, /* EQNSYM_tau */
+	{ "theta", "*h" }, /* EQNSYM_theta */
+	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
+	{ "xi", "*c" }, /* EQNSYM_xi */
+	{ "zeta", "*z" }, /* EQNSYM_zeta */
+	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
+	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
+	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
+	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
+	{ "PHI", "*F" }, /* EQNSYM_PHI */
+	{ "PI", "*P" }, /* EQNSYM_PI */
+	{ "PSI", "*Q" }, /* EQNSYM_PSI */
+	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
+	{ "THETA", "*H" }, /* EQNSYM_THETA */
+	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
+	{ "XI", "*C" }, /* EQNSYM_XI */
+	{ "inter", "ca" }, /* EQNSYM_inter */
+	{ "union", "cu" }, /* EQNSYM_union */
+	{ "prod", "product" }, /* EQNSYM_prod */
+	{ "int", "integral" }, /* EQNSYM_int */
+	{ "sum", "sum" }, /* EQNSYM_sum */
+	{ "grad", "gr" }, /* EQNSYM_grad */
+	{ "del", "gr" }, /* EQNSYM_del */
+	{ "times", "mu" }, /* EQNSYM_times */
+	{ "cdot", "pc" }, /* EQNSYM_cdot */
+	{ "nothing", "&" }, /* EQNSYM_nothing */
+	{ "approx", "~~" }, /* EQNSYM_approx */
+	{ "prime", "aq" }, /* EQNSYM_prime */
+	{ "half", "12" }, /* EQNSYM_half */
+	{ "partial", "pd" }, /* EQNSYM_partial */
+	{ "inf", "if" }, /* EQNSYM_inf */
+	{ ">>", ">>" }, /* EQNSYM_muchgreat */
+	{ "<<", "<<" }, /* EQNSYM_muchless */
+	{ "<-", "<-" }, /* EQNSYM_larrow */
+	{ "->", "->" }, /* EQNSYM_rarrow */
+	{ "+-", "+-" }, /* EQNSYM_pm */
+	{ "!=", "!=" }, /* EQNSYM_nequal */
+	{ "==", "==" }, /* EQNSYM_equiv */
+	{ "<=", "<=" }, /* EQNSYM_lessequal */
+	{ ">=", ">=" }, /* EQNSYM_moreequal */
 };
 
-
 enum rofferr
 eqn_read(struct eqn_node **epp, int ln,
 		const char *p, int pos, int *offs)
@@ -353,386 +340,27 @@ eqn_alloc(const char *name, int pos, int
 	return(p);
 }
 
-enum rofferr
-eqn_end(struct eqn_node **epp)
-{
-	struct eqn_node	*ep;
-	struct eqn_box	*root;
-	enum eqn_rest	 c;
-
-	ep = *epp;
-	*epp = NULL;
-
-	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
-
-	root = ep->eqn.root;
-	root->type = EQN_ROOT;
-
-	if (0 == ep->sz)
-		return(ROFF_IGN);
-
-	if (EQN_DESCOPE == (c = eqn_eqn(ep, root))) {
-		EQN_MSG(MANDOCERR_EQNNSCOPE, ep);
-		c = EQN_ERR;
-	}
-
-	return(EQN_EOF == c ? ROFF_EQN : ROFF_IGN);
-}
-
-static enum eqn_rest
-eqn_eqn(struct eqn_node *ep, struct eqn_box *last)
-{
-	struct eqn_box	*bp;
-	enum eqn_rest	 c;
-
-	bp = eqn_box_alloc(ep, last);
-	bp->type = EQN_SUBEXPR;
-
-	while (EQN_OK == (c = eqn_box(ep, bp)))
-		/* Spin! */ ;
-
-	return(c);
-}
-
-static enum eqn_rest
-eqn_matrix(struct eqn_node *ep, struct eqn_box *last)
-{
-	struct eqn_box	*bp;
-	const char	*start;
-	size_t		 sz;
-	enum eqn_rest	 c;
-
-	bp = eqn_box_alloc(ep, last);
-	bp->type = EQN_MATRIX;
-
-	if (NULL == (start = eqn_nexttok(ep, &sz))) {
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-		return(EQN_ERR);
-	}
-	if ( ! STRNEQ(start, sz, "{", 1)) {
-		EQN_MSG(MANDOCERR_EQNSYNT, ep);
-		return(EQN_ERR);
-	}
-
-	while (EQN_OK == (c = eqn_box(ep, bp)))
-		switch (bp->last->pile) {
-		case EQNPILE_LCOL:
-			/* FALLTHROUGH */
-		case EQNPILE_CCOL:
-			/* FALLTHROUGH */
-		case EQNPILE_RCOL:
-			continue;
-		default:
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(EQN_ERR);
-		};
-
-	if (EQN_DESCOPE != c) {
-		if (EQN_EOF == c)
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-		return(EQN_ERR);
-	}
-
-	eqn_rewind(ep);
-	start = eqn_nexttok(ep, &sz);
-	assert(start);
-	if (STRNEQ(start, sz, "}", 1))
-		return(EQN_OK);
-
-	EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
-	return(EQN_ERR);
-}
-
-static enum eqn_rest
-eqn_list(struct eqn_node *ep, struct eqn_box *last)
-{
-	struct eqn_box	*bp;
-	const char	*start;
-	size_t		 sz;
-	enum eqn_rest	 c;
-
-	bp = eqn_box_alloc(ep, last);
-	bp->type = EQN_LIST;
-
-	if (NULL == (start = eqn_nexttok(ep, &sz))) {
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-		return(EQN_ERR);
-	}
-	if ( ! STRNEQ(start, sz, "{", 1)) {
-		EQN_MSG(MANDOCERR_EQNSYNT, ep);
-		return(EQN_ERR);
-	}
-
-	while (EQN_DESCOPE == (c = eqn_eqn(ep, bp))) {
-		eqn_rewind(ep);
-		start = eqn_nexttok(ep, &sz);
-		assert(start);
-		if ( ! STRNEQ(start, sz, "above", 5))
-			break;
-	}
-
-	if (EQN_DESCOPE != c) {
-		if (EQN_ERR != c)
-			EQN_MSG(MANDOCERR_EQNSCOPE, ep);
-		return(EQN_ERR);
-	}
-
-	eqn_rewind(ep);
-	start = eqn_nexttok(ep, &sz);
-	assert(start);
-	if (STRNEQ(start, sz, "}", 1))
-		return(EQN_OK);
-
-	EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
-	return(EQN_ERR);
-}
-
-static enum eqn_rest
-eqn_box(struct eqn_node *ep, struct eqn_box *last)
-{
-	size_t		 sz;
-	const char	*start;
-	char		*left;
-	char		 sym[64];
-	enum eqn_rest	 c;
-	int		 i, size;
-	struct eqn_box	*bp;
-
-	if (NULL == (start = eqn_nexttok(ep, &sz)))
-		return(EQN_EOF);
-
-	if (STRNEQ(start, sz, "}", 1))
-		return(EQN_DESCOPE);
-	else if (STRNEQ(start, sz, "right", 5))
-		return(EQN_DESCOPE);
-	else if (STRNEQ(start, sz, "above", 5))
-		return(EQN_DESCOPE);
-	else if (STRNEQ(start, sz, "mark", 4))
-		return(EQN_OK);
-	else if (STRNEQ(start, sz, "lineup", 6))
-		return(EQN_OK);
-
-	for (i = 0; i < (int)EQN__MAX; i++) {
-		if ( ! EQNSTREQ(&eqnparts[i].str, start, sz))
-			continue;
-		return((*eqnparts[i].fp)(ep) ? EQN_OK : EQN_ERR);
-	}
-
-	if (STRNEQ(start, sz, "{", 1)) {
-		if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) {
-			if (EQN_ERR != c)
-				EQN_MSG(MANDOCERR_EQNSCOPE, ep);
-			return(EQN_ERR);
-		}
-		eqn_rewind(ep);
-		start = eqn_nexttok(ep, &sz);
-		assert(start);
-		if (STRNEQ(start, sz, "}", 1))
-			return(EQN_OK);
-		EQN_MSG(MANDOCERR_EQNBADSCOPE, ep);
-		return(EQN_ERR);
-	}
-
-	for (i = 0; i < (int)EQNPILE__MAX; i++) {
-		if ( ! EQNSTREQ(&eqnpiles[i], start, sz))
-			continue;
-		if (EQN_OK == (c = eqn_list(ep, last)))
-			last->last->pile = (enum eqn_pilet)i;
-		return(c);
-	}
-
-	if (STRNEQ(start, sz, "matrix", 6))
-		return(eqn_matrix(ep, last));
-
-	if (STRNEQ(start, sz, "left", 4)) {
-		if (NULL == (start = eqn_nexttok(ep, &sz))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		}
-		left = mandoc_strndup(start, sz);
-		c = eqn_eqn(ep, last);
-		if (last->last)
-			last->last->left = left;
-		else
-			free(left);
-		if (EQN_DESCOPE != c)
-			return(c);
-		assert(last->last);
-		eqn_rewind(ep);
-		start = eqn_nexttok(ep, &sz);
-		assert(start);
-		if ( ! STRNEQ(start, sz, "right", 5))
-			return(EQN_DESCOPE);
-		if (NULL == (start = eqn_nexttok(ep, &sz))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		}
-		last->last->right = mandoc_strndup(start, sz);
-		return(EQN_OK);
-	}
-
-	/*
-	 * Positional elements (e.g., over, sub, sup, ...).
-	 */
-	for (i = 0; i < (int)EQNPOS__MAX; i++) {
-		/* Some elements don't have names (are virtual). */
-		if (NULL == eqnposs[i].name)
-			continue;
-		else if ( ! EQNSTREQ(&eqnposs[i], start, sz))
-			continue;
-		if (NULL == last->last) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(EQN_ERR);
-		}
-		/*
-		 * If we encounter x sub y sup z, then according to the
-		 * eqn manual, we regard this as x subsup y z.
-		 */
-		if (EQNPOS_SUP == i &&
-			NULL != last->last->prev &&
-			EQNPOS_SUB == last->last->prev->pos)
-			last->last->prev->pos = EQNPOS_SUBSUP;
-		else if (EQNPOS_TO == i &&
-			NULL != last->last->prev &&
-			EQNPOS_FROM == last->last->prev->pos)
-			last->last->prev->pos = EQNPOS_FROMTO;
-		else
-			last->last->pos = (enum eqn_post)i;
-
-		if (EQN_EOF == (c = eqn_box(ep, last))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		}
-		return(c);
-	}
-
-	for (i = 0; i < (int)EQNMARK__MAX; i++) {
-		if ( ! EQNSTREQ(&eqnmarks[i], start, sz))
-			continue;
-		if (NULL == last->last) {
-			EQN_MSG(MANDOCERR_EQNSYNT, ep);
-			return(EQN_ERR);
-		}
-		last->last->mark = (enum eqn_markt)i;
-		if (EQN_EOF == (c = eqn_box(ep, last))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		}
-		return(c);
-	}
-
-	for (i = 0; i < (int)EQNFONT__MAX; i++) {
-		if ( ! EQNSTREQ(&eqnfonts[i], start, sz))
-			continue;
-		if (EQN_EOF == (c = eqn_box(ep, last))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		} else if (EQN_OK == c)
-			last->last->font = (enum eqn_fontt)i;
-		return(c);
-	}
-
-	if (STRNEQ(start, sz, "size", 4)) {
-		if (NULL == (start = eqn_nexttok(ep, &sz))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		}
-		size = mandoc_strntoi(start, sz, 10);
-		if (EQN_EOF == (c = eqn_box(ep, last))) {
-			EQN_MSG(MANDOCERR_EQNEOF, ep);
-			return(EQN_ERR);
-		} else if (EQN_OK != c)
-			return(c);
-		last->last->size = size;
-	}
-
-	bp = eqn_box_alloc(ep, last);
-	bp->type = EQN_TEXT;
-	for (i = 0; i < (int)EQNSYM__MAX; i++)
-		if (EQNSTREQ(&eqnsyms[i].str, start, sz)) {
-			sym[63] = '\0';
-			(void)snprintf(sym, 62, "\\[%s]", eqnsyms[i].sym);
-			bp->text = mandoc_strdup(sym);
-			return(EQN_OK);
-		}
-
-	bp->text = mandoc_strndup(start, sz);
-	return(EQN_OK);
-}
-
-void
-eqn_free(struct eqn_node *p)
+/*
+ * Find the key "key" of the give size within our eqn-defined values.
+ */
+static struct eqn_def *
+eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
 {
 	int		 i;
 
-	eqn_box_free(p->eqn.root);
-
-	for (i = 0; i < (int)p->defsz; i++) {
-		free(p->defs[i].key);
-		free(p->defs[i].val);
-	}
-
-	free(p->eqn.name);
-	free(p->data);
-	free(p->defs);
-	free(p);
-}
-
-static struct eqn_box *
-eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
-{
-	struct eqn_box	*bp;
-
-	bp = mandoc_calloc(1, sizeof(struct eqn_box));
-	bp->parent = parent;
-	bp->size = ep->gsize;
-
-	if (NULL != parent->first) {
-		parent->last->next = bp;
-		bp->prev = parent->last;
-	} else
-		parent->first = bp;
-
-	parent->last = bp;
-	return(bp);
-}
-
-static void
-eqn_box_free(struct eqn_box *bp)
-{
-
-	if (bp->first)
-		eqn_box_free(bp->first);
-	if (bp->next)
-		eqn_box_free(bp->next);
-
-	free(bp->text);
-	free(bp->left);
-	free(bp->right);
-	free(bp);
-}
-
-static const char *
-eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
-{
-
-	return(eqn_next(ep, '"', sz, 0));
-}
-
-static const char *
-eqn_nexttok(struct eqn_node *ep, size_t *sz)
-{
-
-	return(eqn_next(ep, '"', sz, 1));
-}
-
-static void
-eqn_rewind(struct eqn_node *ep)
-{
+	for (i = 0; i < (int)ep->defsz; i++)
+		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
+		    ep->defs[i].keysz, key, sz))
+			return(&ep->defs[i]);
 
-	ep->cur = ep->rew;
+	return(NULL);
 }
 
+/*
+ * Get the next token from the input stream using the given quote
+ * character.
+ * Optionally make any replacements.
+ */
 static const char *
 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
 {
@@ -822,48 +450,154 @@ again:
 	return(start);
 }
 
-static int
-eqn_do_ign1(struct eqn_node *ep)
+/*
+ * Get the next delimited token using the default current quote
+ * character.
+ */
+static const char *
+eqn_nexttok(struct eqn_node *ep, size_t *sz)
 {
 
-	if (NULL == eqn_nextrawtok(ep, NULL))
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-	else
-		return(1);
+	return(eqn_next(ep, '"', sz, 1));
+}
 
-	return(0);
+/*
+ * Get next token without replacement.
+ */
+static const char *
+eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
+{
+
+	return(eqn_next(ep, '"', sz, 0));
 }
 
-static int
-eqn_do_ign2(struct eqn_node *ep)
+/*
+ * Parse a token from the stream of text.
+ * A token consists of one of the recognised eqn(7) strings.
+ * Strings are separated by delimiting marks.
+ * This returns EQN_TOK_EOF when there are no more tokens.
+ * If the token is an unrecognised string literal, then it returns
+ * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
+ * string.
+ * This must be later freed with free(3).
+ */
+static enum eqn_tok
+eqn_tok_parse(struct eqn_node *ep, char **p)
 {
+	const char	*start;
+	size_t		 i, sz;
 
-	if (NULL == eqn_nextrawtok(ep, NULL))
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-	else if (NULL == eqn_nextrawtok(ep, NULL))
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-	else
-		return(1);
+	if (NULL != p)
+		*p = NULL;
 
-	return(0);
+	if (NULL == (start = eqn_nexttok(ep, &sz)))
+		return(EQN_TOK_EOF);
+
+	for (i = 0; i < EQN_TOK__MAX; i++) {
+		if (NULL == eqn_toks[i])
+			continue;
+		if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
+			break;
+	}
+
+	if (i == EQN_TOK__MAX && NULL != p)
+		*p = mandoc_strndup(start, sz);
+
+	return(i);
 }
 
+static void
+eqn_box_free(struct eqn_box *bp)
+{
+
+	if (bp->first)
+		eqn_box_free(bp->first);
+	if (bp->next)
+		eqn_box_free(bp->next);
+
+	free(bp->text);
+	free(bp->left);
+	free(bp->right);
+	free(bp);
+}
+
+/*
+ * Allocate a box as the last child of the parent node.
+ */
+static struct eqn_box *
+eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
+{
+	struct eqn_box	*bp;
+
+	bp = mandoc_calloc(1, sizeof(struct eqn_box));
+	bp->parent = parent;
+	bp->parent->args++;
+	bp->expectargs = UINT_MAX;
+	bp->size = ep->gsize;
+
+	assert(NULL != parent);
+
+	if (NULL != parent->first) {
+		assert(NULL != parent->last);
+		parent->last->next = bp;
+		bp->prev = parent->last;
+	} else
+		parent->first = bp;
+
+	parent->last = bp;
+	return(bp);
+}
+
+/*
+ * Reparent the current last node (of the current parent) under a new
+ * EQN_SUBEXPR as the first element.
+ * Then return the new parent.
+ * The new EQN_SUBEXPR will have a two-child limit.
+ */
+static struct eqn_box *
+eqn_box_makebinary(struct eqn_node *ep, 
+	enum eqn_post pos, struct eqn_box *parent)
+{
+	struct eqn_box	*b, *newb;
+
+	assert(NULL != parent->last);
+	b = parent->last;
+	if (parent->last == parent->first)
+		parent->first = NULL;
+	parent->args--;
+	parent->last = b->prev;
+	b->prev = NULL;
+	newb = eqn_box_alloc(ep, parent);
+	newb->pos = pos;
+	newb->type = EQN_SUBEXPR;
+	newb->expectargs = 2;
+	newb->args = 1;
+	newb->first = newb->last = b;
+	newb->first->next = NULL;
+	return(newb);
+}
+
+/*
+ * Undefine a previously-defined string.
+ */
 static int
-eqn_do_tdefine(struct eqn_node *ep)
+eqn_undef(struct eqn_node *ep)
 {
+	const char	*start;
+	struct eqn_def	*def;
+	size_t		 sz;
 
-	if (NULL == eqn_nextrawtok(ep, NULL))
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-	else if (NULL == eqn_next(ep, ep->data[(int)ep->cur], NULL, 0))
+	if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
 		EQN_MSG(MANDOCERR_EQNEOF, ep);
-	else
-		return(1);
+		return(0);
+	} else if (NULL != (def = eqn_def_find(ep, start, sz)))
+		def->keysz = 0;
 
-	return(0);
+	return(1);
 }
 
 static int
-eqn_do_define(struct eqn_node *ep)
+eqn_def(struct eqn_node *ep)
 {
 	const char	*start;
 	size_t		 sz;
@@ -879,7 +613,6 @@ eqn_do_define(struct eqn_node *ep)
 	 * Search for a key that already exists.
 	 * Create a new key if none is found.
 	 */
-
 	if (NULL == (def = eqn_def_find(ep, start, sz))) {
 		/* Find holes in string array. */
 		for (i = 0; i < (int)ep->defsz; i++)
@@ -906,7 +639,7 @@ eqn_do_define(struct eqn_node *ep)
 
 	if (NULL == start) {
 		EQN_MSG(MANDOCERR_EQNEOF, ep);
-		return(0);
+		return(-1);
 	}
 
 	def->valsz = sz;
@@ -916,56 +649,371 @@ eqn_do_define(struct eqn_node *ep)
 	return(1);
 }
 
+/*
+ * Recursively parse an eqn(7) expression.
+ */
 static int
-eqn_do_gfont(struct eqn_node *ep)
-{
-
-	if (NULL == eqn_nextrawtok(ep, NULL)) {
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-		return(0);
-	}
-	return(1);
-}
-
-static int
-eqn_do_gsize(struct eqn_node *ep)
+eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
 {
+	char		*p;
+	enum eqn_tok	 tok;
+	enum eqn_post	 pos;
+	struct eqn_box	*cur;
+	int		 rc, size;
+	size_t		 i, sz;
+	char		 sym[64];
 	const char	*start;
-	size_t		 sz;
 
-	if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
+	assert(NULL != parent);
+again:
+	
+	switch ((tok = eqn_tok_parse(ep, &p))) {
+	case (EQN_TOK_UNDEF):
+		if ((rc = eqn_undef(ep)) <= 0)
+			return(rc);
+		break;
+	case (EQN_TOK_NDEFINE):
+	case (EQN_TOK_DEFINE):
+		if ((rc = eqn_def(ep)) <= 0)
+			return(rc);
+		break;
+	case (EQN_TOK_TDEFINE):
+		if (NULL == eqn_nextrawtok(ep, NULL))
+			EQN_MSG(MANDOCERR_EQNEOF, ep);
+		else if (NULL == eqn_next(ep, 
+				ep->data[(int)ep->cur], NULL, 0))
+			EQN_MSG(MANDOCERR_EQNEOF, ep);
+		break;
+	case (EQN_TOK_GFONT):
+		if (NULL == eqn_nextrawtok(ep, NULL)) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		break;
+	case (EQN_TOK_MARK):
+	case (EQN_TOK_LINEUP):
+		/* Ignore these. */
+		break;
+	case (EQN_TOK_DYAD):
+	case (EQN_TOK_VEC):
+	case (EQN_TOK_UNDER):
+	case (EQN_TOK_BAR):
+	case (EQN_TOK_TILDE):
+	case (EQN_TOK_HAT):
+	case (EQN_TOK_DOT):
+	case (EQN_TOK_DOTDOT):
+		/* TODO */
+		break;
+	case (EQN_TOK_FWD):
+	case (EQN_TOK_BACK):
+	case (EQN_TOK_DOWN):
+	case (EQN_TOK_UP):
+		tok = eqn_tok_parse(ep, NULL); 
+		if (EQN_TOK__MAX != tok) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		break;
+	case (EQN_TOK_FAT):
+	case (EQN_TOK_ROMAN):
+	case (EQN_TOK_ITALIC):
+	case (EQN_TOK_BOLD):
+		/*
+		 * These values apply to the next word or sequence of
+		 * words; thus, we mark that we'll have a child with
+		 * exactly one of those.
+		 */
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_LISTONE;
+		parent->expectargs = 1;
+		switch (tok) {
+		case (EQN_TOK_FAT):
+			parent->font = EQNFONT_FAT;
+			break;
+		case (EQN_TOK_ROMAN):
+			parent->font = EQNFONT_ROMAN;
+			break;
+		case (EQN_TOK_ITALIC):
+			parent->font = EQNFONT_ITALIC;
+			break;
+		case (EQN_TOK_BOLD):
+			parent->font = EQNFONT_BOLD;
+			break;
+		default:
+			abort();
+		}
+		break;
+	case (EQN_TOK_SIZE):
+	case (EQN_TOK_GSIZE):
+		/* Accept two values: integral size and a single. */
+		if (NULL == (start = eqn_nexttok(ep, &sz))) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		size = mandoc_strntoi(start, sz, 10);
+		if (-1 == size) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		if (EQN_TOK_GSIZE == tok) {
+			ep->gsize = size;
+			break;
+		}
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_LISTONE;
+		parent->expectargs = 1;
+		parent->size = size;
+		break;
+	case (EQN_TOK_FROM):
+	case (EQN_TOK_TO):
+	case (EQN_TOK_SUB):
+	case (EQN_TOK_SUP):
+		/*
+		 * We have a left-right-associative expression.
+		 * Repivot under a positional node, open a child scope
+		 * and keep on reading.
+		 */
+		if (NULL == parent->last) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		/* Handle the "subsup" and "fromto" positions. */
+		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
+			parent->expectargs = 3;
+			parent->pos = EQNPOS_SUBSUP;
+			break;
+		}
+		if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
+			parent->expectargs = 3;
+			parent->pos = EQNPOS_FROMTO;
+			break;
+		}
+		switch (tok) {
+		case (EQN_TOK_FROM):
+			pos = EQNPOS_FROM;
+			break;
+		case (EQN_TOK_TO):
+			pos = EQNPOS_TO;
+			break;
+		case (EQN_TOK_SUP):
+			pos = EQNPOS_SUP;
+			break;
+		case (EQN_TOK_SUB):
+			pos = EQNPOS_SUB;
+			break;
+		default:
+			abort();
+		}
+		parent = eqn_box_makebinary(ep, pos, parent);
+		break;
+	case (EQN_TOK_SQRT):
+		/* 
+		 * Accept a left-right-associative set of arguments just
+		 * like sub and sup and friends but without rebalancing
+		 * under a pivot.
+		 */
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_SUBEXPR;
+		parent->pos = EQNPOS_SQRT;
+		parent->expectargs = 1;
+		break;
+	case (EQN_TOK_OVER):
+		/*
+		 * We have a right-left-associative fraction.
+		 * Close out anything that's currently open, then
+		 * rebalance and continue reading.
+		 */
+		if (NULL == parent->last) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		while (EQN_SUBEXPR == parent->type)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
+		break;
+	case (EQN_TOK_RIGHT):
+	case (EQN_TOK_BRACE_CLOSE):
+		/*
+		 * Close out the existing brace.
+		 * FIXME: this is a shitty sentinel: we should really
+		 * have a native EQN_BRACE type or whatnot.
+		 */
+		while (parent->type != EQN_LIST) 
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		if (EQN_TOK_RIGHT == tok) {
+			if (NULL == parent->left) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+			if (NULL == (start = eqn_nexttok(ep, &sz))) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+			parent->right = mandoc_strndup(start, sz);
+		}
+		if (NULL == (parent = parent->parent)) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		if (EQN_TOK_BRACE_CLOSE == tok && parent && 
+			(parent->type == EQN_PILE ||
+			 parent->type == EQN_MATRIX))
+			parent = parent->parent;
+		/* Close out any "singleton" lists. */
+		while (parent->type == EQN_LISTONE && 
+			parent->args == parent->expectargs)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		break;
+	case (EQN_TOK_BRACE_OPEN):
+	case (EQN_TOK_LEFT):
+		/*
+		 * If we already have something in the stack and we're
+		 * in an expression, then rewind til we're not any more
+		 * (just like with the text node).
+		 */
+		while (parent->args == parent->expectargs)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_LIST;
+		if (EQN_TOK_LEFT == tok) {
+			if (NULL == (start = eqn_nexttok(ep, &sz))) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+			parent->left = mandoc_strndup(start, sz);
+		}
+		break;
+	case (EQN_TOK_PILE):
+	case (EQN_TOK_LPILE):
+	case (EQN_TOK_RPILE):
+	case (EQN_TOK_CPILE):
+	case (EQN_TOK_CCOL):
+	case (EQN_TOK_LCOL):
+	case (EQN_TOK_RCOL):
+		while (parent->args == parent->expectargs)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_PILE;
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_LIST;
+		break;
+	case (EQN_TOK_ABOVE):
+		while (parent->type != EQN_PILE) 
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_LIST;
+		break;
+	case (EQN_TOK_MATRIX):
+		while (parent->args == parent->expectargs)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		if (EQN_TOK_BRACE_OPEN != eqn_tok_parse(ep, NULL)) {
+			EQN_MSG(MANDOCERR_EQNSYNT, ep);
+			return(-1);
+		}
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_MATRIX;
+		parent = eqn_box_alloc(ep, parent);
+		parent->type = EQN_LIST;
+		break;
+	case (EQN_TOK_EOF):
+		/*
+		 * End of file! 
+		 * TODO: make sure we're not in an open subexpression.
+		 */
 		return(0);
+	default:
+		assert(tok == EQN_TOK__MAX);
+		assert(NULL != p);
+		/*
+		 * If we already have something in the stack and we're
+		 * in an expression, then rewind til we're not any more.
+		 */
+		while (parent->args == parent->expectargs)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				free(p);
+				return(-1);
+			}
+		cur = eqn_box_alloc(ep, parent);
+		cur->type = EQN_TEXT;
+		for (i = 0; i < EQNSYM__MAX; i++)
+			if (0 == strcmp(eqnsyms[i].str, p)) {
+				(void)snprintf(sym, sizeof(sym), 
+					"\\[%s]", eqnsyms[i].sym);
+				cur->text = mandoc_strdup(sym);
+				free(p);
+				break;
+			}
+
+		if (i == EQNSYM__MAX)
+			cur->text = p;
+		/*
+		 * Post-process list status.
+		 */
+		while (parent->type == EQN_LISTONE && 
+			parent->args == parent->expectargs)
+			if (NULL == (parent = parent->parent)) {
+				EQN_MSG(MANDOCERR_EQNSYNT, ep);
+				return(-1);
+			}
+		break;
 	}
-	ep->gsize = mandoc_strntoi(start, sz, 10);
-	return(1);
+	goto again;
 }
 
-static int
-eqn_do_undef(struct eqn_node *ep)
+enum rofferr
+eqn_end(struct eqn_node **epp) 
 {
-	const char	*start;
-	struct eqn_def	*def;
-	size_t		 sz;
+	struct eqn_node	*ep;
 
-	if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
-		EQN_MSG(MANDOCERR_EQNEOF, ep);
-		return(0);
-	} else if (NULL != (def = eqn_def_find(ep, start, sz)))
-		def->keysz = 0;
+	ep = *epp;
+	*epp = NULL;
 
-	return(1);
+	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
+	ep->eqn.root->expectargs = UINT_MAX;
+	eqn_parse(ep, ep->eqn.root);
+	return(ROFF_EQN);
 }
 
-static struct eqn_def *
-eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
+void
+eqn_free(struct eqn_node *p)
 {
 	int		 i;
 
-	for (i = 0; i < (int)ep->defsz; i++)
-		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
-		    ep->defs[i].keysz, key, sz))
-			return(&ep->defs[i]);
+	eqn_box_free(p->eqn.root);
 
-	return(NULL);
+	for (i = 0; i < (int)p->defsz; i++) {
+		free(p->defs[i].key);
+		free(p->defs[i].val);
+	}
+
+	free(p->eqn.name);
+	free(p->data);
+	free(p->defs);
+	free(p);
 }
Index: eqn_html.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn_html.c,v
retrieving revision 1.7
diff -u -p -r1.7 eqn_html.c
--- eqn_html.c	28 Sep 2014 20:14:20 -0000	1.7
+++ eqn_html.c	30 Sep 2014 12:01:29 -0000
@@ -35,8 +35,7 @@ static	const enum htmltag fontmap[EQNFON
 	TAG_I /* EQNFONT_ITALIC */
 };
 
-static const struct eqn_box *
-	eqn_box(struct html *, const struct eqn_box *, int);
+static void eqn_box(struct html *, const struct eqn_box *);
 
 
 void
@@ -49,68 +48,23 @@ print_eqn(struct html *p, const struct e
 	t = print_otag(p, TAG_MATH, 1, &tag);
 
 	p->flags |= HTML_NONOSPACE;
-	eqn_box(p, ep->root, 1);
+	eqn_box(p, ep->root);
 	p->flags &= ~HTML_NONOSPACE;
 
 	print_tagq(p, t);
 }
 
-/*
- * This function is fairly brittle.
- * This is because the eqn syntax doesn't play so nicely with recusive
- * formats, e.g.,
- *     foo sub bar sub baz
- * ...needs to resolve into
- *     <msub> foo <msub> bar, baz </msub> </msub>
- * In other words, we need to embed some recursive work.
- * FIXME: this does NOT handle right-left associativity or precedence!
- */
-static const struct eqn_box *
-eqn_box(struct html *p, const struct eqn_box *bp, int next)
+static void
+eqn_box(struct html *p, const struct eqn_box *bp)
 {
-	struct tag	*post, *pilet, *tmp;
+	struct tag	*post;
 	struct htmlpair	 tag[2];
-	int		 skiptwo;
 
 	if (NULL == bp)
-		return(NULL);
+		return;
 
-	post = pilet = NULL;
-	skiptwo = 0;
+	post = NULL;
 
-	/*
-	 * If we're a "row" under a pile, then open up the piling
-	 * context here.
-	 * We do this first because the pile surrounds the content of
-	 * the contained expression.
-	 */
-	if (NULL != bp->parent && bp->parent->pile != EQNPILE_NONE) {
-		pilet = print_otag(p, TAG_MTR, 0, NULL);
-		print_otag(p, TAG_MTD, 0, NULL);
-	}
-	if (NULL != bp->parent && bp->parent->type == EQN_MATRIX) {
-		pilet = print_otag(p, TAG_MTABLE, 0, NULL);
-		print_otag(p, TAG_MTR, 0, NULL);
-		print_otag(p, TAG_MTD, 0, NULL);
-	}
-
-	/*
-	 * If we're establishing a pile, start the table mode now.
-	 * If we've already in a pile row, then don't override "pilet",
-	 * because we'll be closed out anyway.
-	 */
-	if (bp->pile != EQNPILE_NONE) {
-		tmp = print_otag(p, TAG_MTABLE, 0, NULL);
-		pilet = (NULL == pilet) ? tmp : pilet;
-	}
-
-	/*
-	 * Positioning.
-	 * This is the most complicated part, and actually doesn't quite
-	 * work (FIXME) because it doesn't account for associativity.
-	 * Setting "post" will mean that we're only going to process a
-	 * single or double following expression.
-	 */
 	switch (bp->pos) {
 	case (EQNPOS_TO):
 		post = print_otag(p, TAG_MOVER, 0, NULL);
@@ -129,93 +83,46 @@ eqn_box(struct html *p, const struct eqn
 		break;
 	case (EQNPOS_FROMTO):
 		post = print_otag(p, TAG_MUNDEROVER, 0, NULL);
-		skiptwo = 1;
 		break;
 	case (EQNPOS_SUBSUP):
 		post = print_otag(p, TAG_MSUBSUP, 0, NULL);
-		skiptwo = 1;
+		break;
+	case (EQNPOS_SQRT):
+		post = print_otag(p, TAG_MSQRT, 0, NULL);
 		break;
 	default:
 		break;
 	}
 
-	/*t = EQNFONT_NONE == bp->font ? NULL :
-	    print_otag(p, fontmap[(int)bp->font], 0, NULL);*/
+	if (EQN_PILE == bp->type) {
+		assert(NULL == post);
+		post = print_otag(p, TAG_MTABLE, 0, NULL);
+	} else if (bp->parent && EQN_PILE == bp->parent->type) {
+		assert(NULL == post);
+		post = print_otag(p, TAG_MTR, 0, NULL);
+		print_otag(p, TAG_MTD, 0, NULL);
+	}
 
 	if (NULL != bp->text) {
-		assert(NULL == bp->first);
-		/*
-		 * We have text.
-		 * This can be a number, a function, a variable, or
-		 * pretty much anything else.
-		 * First, check for some known functions.
-		 * If we're going to create a structural node (e.g.,
-		 * sqrt), then set the "post" variable only if it's not
-		 * already set.
-		 */
-		if (0 == strcmp(bp->text, "sqrt")) {
-			tmp = print_otag(p, TAG_MSQRT, 0, NULL);
-			post = (NULL == post) ? tmp : post;
-		} else if (0 == strcmp(bp->text, "+") ||
-			   0 == strcmp(bp->text, "-") ||
-			   0 == strcmp(bp->text, "=") ||
-			   0 == strcmp(bp->text, "(") ||
-			   0 == strcmp(bp->text, ")") ||
-			   0 == strcmp(bp->text, "/")) {
-			tmp = print_otag(p, TAG_MO, 0, NULL);
-			print_text(p, bp->text);
-			print_tagq(p, tmp);
-		} else {
-			tmp = print_otag(p, TAG_MI, 0, NULL);
-			print_text(p, bp->text);
-			print_tagq(p, tmp);
-		}
-	} else if (NULL != bp->first) {
-		assert(NULL == bp->text);
-		/* 
-		 * If we're a "fenced" component (i.e., having
-		 * brackets), then process those brackets now.
-		 * Otherwise, introduce a dummy row (if we're not
-		 * already in a table context).
-		 */
-		tmp = NULL;
+		assert(NULL == post);
+		post = print_otag(p, TAG_MO, 0, NULL);
+		print_text(p, bp->text);
+	} else if (NULL == post) { 
 		if (NULL != bp->left || NULL != bp->right) {
 			PAIR_INIT(&tag[0], ATTR_OPEN,
-				NULL != bp->left ? bp->left : "");
+				NULL == bp->left ? "" : bp->left);
 			PAIR_INIT(&tag[1], ATTR_CLOSE,
-				NULL != bp->right ? bp->right : "");
-			tmp = print_otag(p, TAG_MFENCED, 2, tag);
+				NULL == bp->right ? "" : bp->right);
+			post = print_otag(p, TAG_MFENCED, 2, tag);
 			print_otag(p, TAG_MROW, 0, NULL);
-		} else if (NULL == pilet)
-			tmp = print_otag(p, TAG_MROW, 0, NULL);
-		eqn_box(p, bp->first, 1);
-		if (NULL != tmp)
-			print_tagq(p, tmp);
+		} else
+			post = print_otag(p, TAG_MROW, 0, NULL);
 	}
 
-	/*
-	 * If a positional context, invoke the "next" context.
-	 * This is recursive and will return the end of the recursive
-	 * chain of "next" contexts.
-	 */
-	if (NULL != post) {
-		bp = eqn_box(p, bp->next, 0);
-		if (skiptwo)
-			bp = eqn_box(p, bp->next, 0);
+	eqn_box(p, bp->first);
+
+	if (NULL != post)
 		print_tagq(p, post);
-	}
 
-	/* 
-	 * If we're being piled (either directly, in the table, or
-	 * indirectly in a table row), then close that out.
-	 */
-	if (NULL != pilet)
-		print_tagq(p, pilet);
-
-	/*
-	 * If we're normally processing, then grab the next node.
-	 * If we're in a recursive context, then don't seek to the next
-	 * node; further recursion has already been handled.
-	 */
-	return(next ? eqn_box(p, bp->next, 1) : bp);
+	eqn_box(p, bp->next);
 }
Index: mandoc.h
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v
retrieving revision 1.158
diff -u -p -r1.158 mandoc.h
--- mandoc.h	28 Sep 2014 14:05:11 -0000	1.158
+++ mandoc.h	30 Sep 2014 12:01:29 -0000
@@ -306,8 +306,10 @@ enum	eqn_boxt {
 	EQN_ROOT, /* root of parse tree */
 	EQN_TEXT, /* text (number, variable, whatever) */
 	EQN_SUBEXPR, /* nested `eqn' subexpression */
-	EQN_LIST, /* subexpressions list */
-	EQN_MATRIX /* matrix subexpression */
+	EQN_LIST, /* list (braces, etc.) */
+	EQN_LISTONE, /* singleton list */
+	EQN_PILE, /* vertical pile */
+	EQN_MATRIX /* pile of piles */
 };
 
 enum	eqn_markt {
@@ -334,13 +336,14 @@ enum	eqn_fontt {
 
 enum	eqn_post {
 	EQNPOS_NONE = 0,
-	EQNPOS_OVER,
 	EQNPOS_SUP,
 	EQNPOS_SUBSUP,
 	EQNPOS_SUB,
 	EQNPOS_TO,
 	EQNPOS_FROM,
 	EQNPOS_FROMTO,
+	EQNPOS_OVER,
+	EQNPOS_SQRT,
 	EQNPOS__MAX
 };
 
@@ -371,8 +374,10 @@ struct	eqn_box {
 	struct eqn_box	 *prev; /* node sibling */
 	struct eqn_box	 *parent; /* node sibling */
 	char		 *text; /* text (or NULL) */
-	char		 *left;
-	char		 *right;
+	char		 *left; /* fence left-hand */
+	char		 *right; /* fence right-hand */
+	size_t		  args; /* arguments in parent */
+	size_t		  expectargs; /* max arguments in parent */
 	enum eqn_post	  pos; /* position of next box */
 	enum eqn_markt	  mark; /* a mark about the box */
 	enum eqn_fontt	  font; /* font of box */
Index: tree.c
===================================================================
RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/tree.c,v
retrieving revision 1.55
diff -u -p -r1.55 tree.c
--- tree.c	28 Sep 2014 11:32:33 -0000	1.55
+++ tree.c	30 Sep 2014 12:01:29 -0000
@@ -275,6 +275,7 @@ print_box(const struct eqn_box *ep, int 
 	case EQN_ROOT:
 		t = "eqn-root";
 		break;
+	case EQN_LISTONE:
 	case EQN_LIST:
 		t = "eqn-list";
 		break;
@@ -284,14 +285,18 @@ print_box(const struct eqn_box *ep, int 
 	case EQN_TEXT:
 		t = "eqn-text";
 		break;
+	case EQN_PILE:
+		t = "eqn-pile";
+		break;
 	case EQN_MATRIX:
 		t = "eqn-matrix";
 		break;
 	}
 
 	assert(t);
-	printf("%s(size=%d, pos=%d, font=%d, mark=%d, pile=%d, l=\"%s\", r=\"%s\") %s\n",
+	printf("%s(size=%d, args=%zu(%zu), pos=%d, font=%d, mark=%d, pile=%d, l=\"%s\", r=\"%s\") %s\n",
 	    t, EQN_DEFSIZE == ep->size ? 0 : ep->size,
+	    ep->args, ep->expectargs,
 	    ep->pos, ep->font, ep->mark, ep->pile,
 	    ep->left ? ep->left : "",
 	    ep->right ? ep->right : "",

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2014-09-30 13:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-30 12:01 Re-written eqn(7) parser with associativity Kristaps Dzonsons
2014-09-30 13:33 ` Kristaps Dzonsons

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).