From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from krisdoz.my.domain (kristaps@localhost [127.0.0.1]) by krisdoz.my.domain (8.14.3/8.14.3) with ESMTP id p6LNUeSo002478 for ; Thu, 21 Jul 2011 19:30:40 -0400 (EDT) Received: (from kristaps@localhost) by krisdoz.my.domain (8.14.3/8.14.3/Submit) id p6LNUdTo003956; Thu, 21 Jul 2011 19:30:39 -0400 (EDT) Date: Thu, 21 Jul 2011 19:30:39 -0400 (EDT) Message-Id: <201107212330.p6LNUdTo003956@krisdoz.my.domain> X-Mailinglist: mdocml-source Reply-To: source@mdocml.bsd.lv MIME-Version: 1.0 From: kristaps@mdocml.bsd.lv To: source@mdocml.bsd.lv Subject: mdocml: Complete eqn.7 parsing. X-Mailer: activitymail 1.26, http://search.cpan.org/dist/activitymail/ Content-Type: text/plain; charset=utf-8 Log Message: ----------- Complete eqn.7 parsing. Features all productions from the original 1975 CACM paper in an LR(1) parse (1 -> eqn_rewind()). Right now the code is a little jungly, but will clear up as I consolidate parse components. The AST structure will also be cleaned up, as right now it's pretty ad hoc (this won't change the parse itself). I added the mandoc_strndup() function will here. Modified Files: -------------- mdocml: eqn.7 eqn.c libroff.h mandoc.c mandoc.h read.c tree.c Revision Data ------------- Index: mandoc.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.c,v retrieving revision 1.54 retrieving revision 1.55 diff -Lmandoc.c -Lmandoc.c -u -p -r1.54 -r1.55 --- mandoc.c +++ mandoc.c @@ -432,6 +432,16 @@ mandoc_realloc(void *ptr, size_t size) return(ptr); } +char * +mandoc_strndup(const char *ptr, size_t sz) +{ + char *p; + + p = mandoc_malloc(sz + 1); + memcpy(p, ptr, sz); + p[(int)sz] = '\0'; + return(p); +} char * mandoc_strdup(const char *ptr) Index: eqn.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn.c,v retrieving revision 1.19 retrieving revision 1.20 diff -Leqn.c -Leqn.c -u -p -r1.19 -r1.20 --- eqn.c +++ eqn.c @@ -32,6 +32,13 @@ #define EQN_NEST_MAX 128 /* maximum nesting of defines */ #define EQN_MSG(t, x) mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL) +enum eqn_rest { + EQN_DESCOPE, + EQN_ERR, + EQN_OK, + EQN_EOF +}; + struct eqnstr { const char *name; size_t sz; @@ -49,6 +56,7 @@ enum eqnpartt { EQN__MAX }; +static struct eqn_box *eqn_box_alloc(struct eqn_box *); static void eqn_box_free(struct eqn_box *); static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); @@ -59,8 +67,9 @@ static const char *eqn_nexttok(struct eq static const char *eqn_nextrawtok(struct eqn_node *, size_t *); static const char *eqn_next(struct eqn_node *, char, size_t *, int); -static int eqn_box(struct eqn_node *, - struct eqn_box *, struct eqn_box **); +static void eqn_rewind(struct eqn_node *); +static enum eqn_rest eqn_eqn(struct eqn_node *, struct eqn_box *); +static enum eqn_rest eqn_box(struct eqn_node *, struct eqn_box *); static const struct eqnpart eqnparts[EQN__MAX] = { { { "define", 6 }, eqn_do_define }, /* EQN_DEFINE */ @@ -81,20 +90,26 @@ static const struct eqnstr eqnmarks[EQNM }; static const struct eqnstr eqnfonts[EQNFONT__MAX] = { - { "", 0 }, - { "roman", 5 }, - { "bold", 4 }, - { "italic", 6 }, + { "", 0 }, /* EQNFONT_NONE */ + { "roman", 5 }, /* EQNFONT_ROMAN */ + { "bold", 4 }, /* EQNFONT_BOLD */ + { "italic", 6 }, /* EQNFONT_ITALIC */ }; static const struct eqnstr eqnposs[EQNPOS__MAX] = { - { "", 0 }, - { "over", 4 }, - { "sup", 3 }, - { "sub", 3 }, - { "to", 2 }, - { "from", 4 }, - { "above", 5 }, + { "", 0 }, /* EQNPOS_NONE */ + { "over", 4 }, /* EQNPOS_OVER */ + { "sup", 3 }, /* EQNPOS_SUP */ + { "sub", 3 }, /* EQNPOS_SUB */ + { "to", 2 }, /* EQNPOS_TO */ + { "from", 4 }, /* EQNPOS_FROM */ +}; + +static const struct eqnstr eqnpiles[EQNPILE__MAX] = { + { "", 0 }, /* EQNPILE_NONE */ + { "cpile", 5 }, /* EQNPILE_CPILE */ + { "rpile", 5 }, /* EQNPILE_RPILE */ + { "lpile", 5 }, /* EQNPILE_LPILE */ }; /* ARGSUSED */ @@ -154,142 +169,215 @@ eqn_alloc(int pos, int line, struct mpar enum rofferr eqn_end(struct eqn_node *ep) { - struct eqn_box *root, *last; - int c; + struct eqn_box *root; + enum eqn_rest c; + + ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); - ep->eqn.root = root = - mandoc_calloc(1, sizeof(struct eqn_box)); + root = ep->eqn.root; root->type = EQN_ROOT; if (0 == ep->sz) return(ROFF_IGN); - /* - * Run the parser. - * If we return before reaching the end of our input, our scope - * is still open somewhere. - * If we return alright but don't have a symmetric scoping, then - * something's not right either. - * Otherwise, return the equation. - */ - - if (0 == (c = eqn_box(ep, root, &last))) { - if (last != root) { - EQN_MSG(MANDOCERR_EQNSCOPE, ep); - c = 0; - } - } else if (c > 0) + if (EQN_DESCOPE == (c = eqn_eqn(ep, root))) { EQN_MSG(MANDOCERR_EQNNSCOPE, ep); + c = EQN_ERR; + } + + return(EQN_EOF == c ? ROFF_EQN : ROFF_IGN); +} + +static enum eqn_rest +eqn_eqn(struct eqn_node *ep, struct eqn_box *last) +{ + struct eqn_box *bp; + enum eqn_rest c; + + bp = eqn_box_alloc(last); + bp->type = EQN_SUBEXPR; + + while (EQN_OK == (c = eqn_box(ep, bp))) + /* Spin! */ ; - return(0 == c ? ROFF_EQN : ROFF_IGN); + return(c); } -static int -eqn_box(struct eqn_node *ep, struct eqn_box *last, struct eqn_box **sv) +static enum eqn_rest +eqn_box(struct eqn_node *ep, struct eqn_box *last) { size_t sz; const char *start; - int c, i, nextc, size; - enum eqn_fontt font; + char *left; + enum eqn_rest c; + int i, size; struct eqn_box *bp; - /* - * Mark our last level of subexpression. - * Also mark whether that the next node should be a - * subexpression node. - */ - - *sv = last; - nextc = 1; - font = EQNFONT_NONE; - size = EQN_DEFSIZE; -again: if (NULL == (start = eqn_nexttok(ep, &sz))) - return(0); + return(EQN_EOF); - for (i = 0; i < (int)EQNFONT__MAX; i++) { - if (eqnfonts[i].sz != sz) + if (1 == sz && 0 == strncmp("}", start, 1)) + return(EQN_DESCOPE); + else if (5 == sz && 0 == strncmp("right", start, 5)) + return(EQN_DESCOPE); + else if (5 == sz && 0 == strncmp("above", start, 5)) + return(EQN_DESCOPE); + + for (i = 0; i < (int)EQN__MAX; i++) { + if (eqnparts[i].str.sz != sz) continue; - if (strncmp(eqnfonts[i].name, start, sz)) + if (strncmp(eqnparts[i].str.name, start, sz)) continue; - font = (enum eqn_fontt)i; - goto again; - } + return((*eqnparts[i].fp)(ep) ? EQN_OK : EQN_ERR); + } - for (i = 0; i < (int)EQNFONT__MAX; i++) { - if (eqnposs[i].sz != sz) + if (1 == sz && 0 == strncmp("{", start, 1)) { + if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) { + if (EQN_ERR != c) + EQN_MSG(MANDOCERR_EQNSCOPE, ep); + return(EQN_ERR); + } + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (1 == sz && 0 == strncmp("}", start, 1)) + return(EQN_OK); + EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); + return(EQN_ERR); + } + + for (i = 0; i < (int)EQNPILE__MAX; i++) { + if (eqnpiles[i].sz != sz) continue; - if (strncmp(eqnposs[i].name, start, sz)) + if (strncmp(eqnpiles[i].name, start, sz)) continue; - last->pos = (enum eqn_post)i; - goto again; + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + if (1 != sz || strncmp("{", start, 1)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) { + if (EQN_ERR != c) + EQN_MSG(MANDOCERR_EQNSCOPE, ep); + return(EQN_ERR); + } + assert(last->last); + last->last->pile = (enum eqn_pilet)i; + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (1 == sz && 0 == strncmp("}", start, 1)) + return(EQN_OK); + if (5 != sz || strncmp("above", start, 5)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + last->last->above = 1; + if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) { + if (EQN_ERR != c) + EQN_MSG(MANDOCERR_EQNSCOPE, ep); + return(EQN_ERR); + } + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (1 == sz && 0 == strncmp("}", start, 1)) + return(EQN_OK); + EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); + return(EQN_ERR); } - for (i = 0; i < (int)EQN__MAX; i++) { - if (eqnparts[i].str.sz != sz) + if (4 == sz && 0 == strncmp("left", start, 4)) { + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + left = mandoc_strndup(start, sz); + if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) + return(c); + assert(last->last); + last->last->left = left; + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (5 != sz || strncmp("right", start, 5)) + return(EQN_DESCOPE); + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + last->last->right = mandoc_strndup(start, sz); + return(EQN_OK); + } + + for (i = 0; i < (int)EQNPOS__MAX; i++) { + if (eqnposs[i].sz != sz) continue; - if (strncmp(eqnparts[i].str.name, start, sz)) + if (strncmp(eqnposs[i].name, start, sz)) continue; - if ( ! (*eqnparts[i].fp)(ep)) - return(-1); - goto again; - } + if (NULL == last->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + last->last->pos = (enum eqn_post)i; + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + return(c); + } for (i = 0; i < (int)EQNMARK__MAX; i++) { if (eqnmarks[i].sz != sz) continue; if (strncmp(eqnmarks[i].name, start, sz)) continue; - last->mark = (enum eqn_markt)i; - goto again; + if (NULL == last->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + last->last->mark = (enum eqn_markt)i; + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + return(c); } - if (sz == 4 && 0 == strncmp("size", start, 1)) { - if (NULL == (start = eqn_nexttok(ep, &sz))) - return(0); - size = mandoc_strntoi(start, sz, 10); - goto again; + for (i = 0; i < (int)EQNFONT__MAX; i++) { + if (eqnfonts[i].sz != sz) + continue; + if (strncmp(eqnfonts[i].name, start, sz)) + continue; + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } else if (EQN_OK == c) + last->last->font = (enum eqn_fontt)i; + return(c); } - if (sz == 1 && 0 == strncmp("}", start, 1)) - return(1); - - bp = mandoc_calloc(1, sizeof(struct eqn_box)); - bp->font = font; - bp->size = size; - - font = EQNFONT_NONE; - size = EQN_DEFSIZE; - - if (nextc) - last->child = bp; - else - last->next = bp; - - last = bp; - - /* - * See if we're to open a new subexpression. - * If so, mark our node as such and descend. - */ - - if (sz == 1 && 0 == strncmp("{", start, 1)) { - bp->type = EQN_SUBEXPR; - c = eqn_box(ep, bp, sv); - - nextc = 0; - goto again; + if (4 == sz && 0 == strncmp("size", start, 4)) { + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + size = mandoc_strntoi(start, sz, 10); + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } else if (EQN_OK != c) + return(c); + last->last->size = size; } - /* A regular text node. */ - + bp = eqn_box_alloc(last); bp->type = EQN_TEXT; - bp->text = mandoc_malloc(sz + 1); - *bp->text = '\0'; - strlcat(bp->text, start, sz + 1); - - nextc = 0; - goto again; + bp->text = mandoc_strndup(start, sz); + return(EQN_OK); } void @@ -309,16 +397,36 @@ eqn_free(struct eqn_node *p) free(p); } +static struct eqn_box * +eqn_box_alloc(struct eqn_box *parent) +{ + struct eqn_box *bp; + + bp = mandoc_calloc(1, sizeof(struct eqn_box)); + bp->parent = parent; + bp->size = EQN_DEFSIZE; + + if (NULL == parent->first) + parent->first = bp; + else + parent->last->next = bp; + + parent->last = bp; + return(bp); +} + static void eqn_box_free(struct eqn_box *bp) { - if (bp->child) - eqn_box_free(bp->child); + if (bp->first) + eqn_box_free(bp->first); if (bp->next) eqn_box_free(bp->next); free(bp->text); + free(bp->left); + free(bp->right); free(bp); } @@ -336,19 +444,26 @@ eqn_nexttok(struct eqn_node *ep, size_t return(eqn_next(ep, '"', sz, 1)); } +static void +eqn_rewind(struct eqn_node *ep) +{ + + ep->cur = ep->rew; +} + static const char * eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) { char *start, *next; int q, diff, lim; - size_t sv, ssz; + size_t ssz; struct eqn_def *def; if (NULL == sz) sz = &ssz; lim = 0; - sv = ep->cur; + ep->rew = ep->cur; again: /* Prevent self-definitions. */ @@ -357,7 +472,7 @@ again: return(NULL); } - ep->cur = sv; + ep->cur = ep->rew; start = &ep->data[(int)ep->cur]; q = 0; @@ -399,7 +514,7 @@ again: ep->sz += diff; ep->data = mandoc_realloc(ep->data, ep->sz + 1); ep->data[ep->sz] = '\0'; - start = &ep->data[(int)sv]; + start = &ep->data[(int)ep->rew]; } diff = def->valsz - *sz; Index: mandoc.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/mandoc.h,v retrieving revision 1.88 retrieving revision 1.89 diff -Lmandoc.h -Lmandoc.h -u -p -r1.88 -r1.89 --- mandoc.h +++ mandoc.h @@ -114,6 +114,9 @@ enum mandocerr { MANDOCERR_EQNNEST, /* too many nested equation defines */ MANDOCERR_EQNNSCOPE, /* unexpected equation scope closure*/ MANDOCERR_EQNSCOPE, /* equation scope open on exit */ + MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */ + MANDOCERR_EQNEOF, /* unexpected end of equation */ + MANDOCERR_EQNSYNT, /* equation syntax error */ /* related to tables */ MANDOCERR_TBL, /* bad table syntax */ @@ -313,10 +316,17 @@ enum eqn_post { EQNPOS_SUB, EQNPOS_TO, EQNPOS_FROM, - EQNPOS_ABOVE, EQNPOS__MAX }; +enum eqn_pilet { + EQNPILE_NONE = 0, + EQNPILE_CPILE, + EQNPILE_RPILE, + EQNPILE_LPILE, + EQNPILE__MAX +}; + /* * A "box" is a parsed mathematical expression as defined by the eqn.7 * grammar. @@ -325,12 +335,18 @@ struct eqn_box { int size; /* font size of expression */ #define EQN_DEFSIZE INT_MIN enum eqn_boxt type; /* type of node */ - struct eqn_box *child; /* child node */ - struct eqn_box *next; /* next in tree */ - enum eqn_post pos; /* position of next box */ + struct eqn_box *first; /* first child node */ + struct eqn_box *last; /* last child node */ + struct eqn_box *next; /* node sibling */ + struct eqn_box *parent; /* node sibling */ char *text; /* text (or NULL) */ + char *left; + char *right; + enum eqn_post pos; /* position of next box */ enum eqn_markt mark; /* a mark about the box */ enum eqn_fontt font; /* font of box */ + enum eqn_pilet pile; /* equation piling */ + int above; /* next node is above */ }; /* @@ -391,6 +407,7 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); char *mandoc_strdup(const char *); +char *mandoc_strndup(const char *, size_t); enum mandoc_esc mandoc_escape(const char **, const char **, int *); Index: libroff.h =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/libroff.h,v retrieving revision 1.23 retrieving revision 1.24 diff -Llibroff.h -Llibroff.h -u -p -r1.23 -r1.24 --- libroff.h +++ libroff.h @@ -46,6 +46,7 @@ struct eqn_node { struct eqn_def *defs; size_t defsz; char *data; + size_t rew; size_t cur; size_t sz; struct eqn eqn; Index: eqn.7 =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/eqn.7,v retrieving revision 1.12 retrieving revision 1.13 diff -Leqn.7 -Leqn.7 -u -p -r1.12 -r1.13 --- eqn.7 +++ eqn.7 @@ -69,9 +69,14 @@ box : text | UNDEF text | box pos box | box mark + | pile { list } | font box | SIZE text box + | LEFT text box [RIGHT text] text : TEXT +pile : LPILE + | CPILE + | RPILE pos : OVER | SUP | SUB @@ -88,6 +93,7 @@ mark : DOT font : ROMAN | ITALIC | BOLD +list : eqn | list ABOVE eqn .Ed .Pp Data in TEXT form is a non-empty sequence of non-space characters or a Index: tree.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/tree.c,v retrieving revision 1.43 retrieving revision 1.44 diff -Ltree.c -Ltree.c -u -p -r1.43 -r1.44 --- tree.c +++ tree.c @@ -274,13 +274,16 @@ print_box(const struct eqn_box *ep, int printf("eqn-root(%d, %d, %d, %d)\n", EQN_DEFSIZE == ep->size ? 0 : ep->size, ep->pos, ep->font, ep->mark); - print_box(ep->child, indent + 1); + print_box(ep->first, indent + 1); break; case (EQN_SUBEXPR): - printf("eqn-subxpr(%d, %d, %d, %d)\n", + printf("eqn-subxpr(%d, %d, %d, %d, %d, %d, \"%s\", \"%s\")\n", EQN_DEFSIZE == ep->size ? 0 : ep->size, - ep->pos, ep->font, ep->mark); - print_box(ep->child, indent + 1); + ep->pos, ep->font, ep->mark, + ep->pile, ep->above, + ep->left ? ep->left : "", + ep->right ? ep->right : ""); + print_box(ep->first, indent + 1); break; case (EQN_TEXT): printf("eqn-text(%d, %d, %d, %d): [%s]\n", Index: read.c =================================================================== RCS file: /usr/vhosts/mdocml.bsd.lv/cvs/mdocml/read.c,v retrieving revision 1.20 retrieving revision 1.21 diff -Lread.c -Lread.c -u -p -r1.20 -r1.21 --- read.c +++ read.c @@ -156,6 +156,9 @@ static const char * const mandocerrs[MAN "too many nested equation defines", "unexpected equation scope closure", "equation scope open on exit", + "overlapping equation scopes", + "unexpected end of equation", + "equation syntax error", /* related to tables */ "bad table syntax", -- To unsubscribe send an email to source+unsubscribe@mdocml.bsd.lv