From mboxrd@z Thu Jan 1 00:00:00 1970 X-Msuck: nntp://news.gmane.org/gmane.linux.lib.musl.general/5668 Path: news.gmane.org!not-for-mail From: Szabolcs Nagy Newsgroups: gmane.linux.lib.musl.general Subject: Re: Non-stub gettext API functions committed, ready for testing Date: Mon, 28 Jul 2014 12:18:30 +0200 Message-ID: <20140728101829.GJ10402@port70.net> References: <20140727084633.GA22355@brightrain.aerifal.cx> <53D4CF09.1050209@gmx.de> <20140727141417.GG10402@port70.net> <20140727164921.GY4038@brightrain.aerifal.cx> <20140727172308.GH10402@port70.net> <20140727173605.GZ4038@brightrain.aerifal.cx> <20140727175125.GI10402@port70.net> <20140727180041.GA4038@brightrain.aerifal.cx> Reply-To: musl@lists.openwall.com NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="HlL+5n6rz5pIUxbD" X-Trace: ger.gmane.org 1406542734 13930 80.91.229.3 (28 Jul 2014 10:18:54 GMT) X-Complaints-To: usenet@ger.gmane.org NNTP-Posting-Date: Mon, 28 Jul 2014 10:18:54 +0000 (UTC) To: musl@lists.openwall.com Original-X-From: musl-return-5673-gllmg-musl=m.gmane.org@lists.openwall.com Mon Jul 28 12:18:48 2014 Return-path: Envelope-to: gllmg-musl@plane.gmane.org Original-Received: from mother.openwall.net ([195.42.179.200]) by plane.gmane.org with smtp (Exim 4.69) (envelope-from ) id 1XBi11-0001zJ-4n for gllmg-musl@plane.gmane.org; Mon, 28 Jul 2014 12:18:47 +0200 Original-Received: (qmail 3512 invoked by uid 550); 28 Jul 2014 10:18:42 -0000 Mailing-List: contact musl-help@lists.openwall.com; run by ezmlm Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: Original-Received: (qmail 3500 invoked from network); 28 Jul 2014 10:18:42 -0000 Mail-Followup-To: musl@lists.openwall.com Content-Disposition: inline In-Reply-To: <20140727180041.GA4038@brightrain.aerifal.cx> User-Agent: Mutt/1.5.21 (2010-09-15) Xref: news.gmane.org gmane.linux.lib.musl.general:5668 Archived-At: --HlL+5n6rz5pIUxbD Content-Type: text/plain; charset=us-ascii Content-Disposition: inline * Rich Felker [2014-07-27 14:00:41 -0400]: > On Sun, Jul 27, 2014 at 07:51:26PM +0200, Szabolcs Nagy wrote: > > > >From what I can tell, that's not so bad. Anyone feel like writing an > > > expression evaluator for it? I think recursive descent is fine as long > > > as the length of the string being evaluated is capped at a sane length > > > (or just keep a depth counter and abort the evaluation if it exceeds > > > some reasonable limit). > > > > > > > i can try > > OK. Some thoughts on implementation: It should probably accept the > expression as a base+length rather than a C string so it can be used > in-place from within the mo file "header" (this design might help for > recursion anyway I suppose). And it should be safe against malicious > changes to the expression during evaluation (at worst give wrong > results or error out rather than risk of stack overflow, out-of-bounds > reads, etc.) since I'm aiming to make the whole system safe against > malicious translation files (assuming the caller doesn't use the > results in unsafe ways like as a format string). > ok i did something i parse the expression once and then do the eval separately so "changes to the expression during evaluation" does not apply (i expected the expr to be const and evaluated several times with different n) currently it checks if base[length-1] == ';' and then does not care about the length anymore, the first unexpected char ends the parsing the parser and eval code is about 2k now, i can try to do it without a separate parsing step (my approach requires a 100-200 byte buffer to store the parsed expr now) --HlL+5n6rz5pIUxbD Content-Type: text/x-chdr; charset=us-ascii Content-Disposition: attachment; filename="pl.h" // parse s into expr, returns -1 on failure int parse(unsigned char *expr, size_t elen, const char *s, size_t slen); // eval expr with input n unsigned long eval(const unsigned char *expr, unsigned long n); --HlL+5n6rz5pIUxbD Content-Type: text/x-csrc; charset=us-ascii Content-Disposition: attachment; filename="pl.c" #include #include #include #include "pl.h" /* grammar: Start = Expr ';' Expr = Or | Or '?' Expr ':' Expr Or = And | Or '||' And And = Rel | And '&&' Rel Rel = Add | Add '==' Add | Add '!=' Add | Add '<=' Add | Add '>=' Add | Add '<' Add | Add '>' Add Add = Mul | Add '+' Mul | Add '-' Mul Mul = Term | Mul '*' Term | Mul '/' decimal | Mul '%' decimal Term = '(' Expr ')' | '!' Term | decimal | 'n' compared to gnu gettext: right side of / and % must be const (and non-zero), chained relational/eq operators are not allowed, decimal is at most 255 internals: parser is recursive descent, terminals are pushed on a stack that grows down, a binary op "Left op Right" is parsed into "op length-of-Right Right Left" so eval is easy to implement. op chars on the stack n c ! | & = < > + - * / % ? are var, const, neg, or, and, eq, less, greater, add, sub, mul, div, mod, cond parse* functions push the parsed rule on the stack and return a pointer to the next non-space char */ #include struct st { unsigned char *p; unsigned char *e; }; static int ok(struct st *st) { return st->p != st->e; } static void fail(struct st *st) { st->p = st->e; } static void push(struct st *st, int c) { if (ok(st)) *--st->p = c; } static const char *skipspace(const char *s) { while (isspace(*s)) s++; return s; } static const char *parseconst(struct st *st, const char *s) { char *e; unsigned long n; n = strtoul(s, &e, 10); if (!isdigit(*s) || e == s || n > 255) fail(st); push(st, n); push(st, 'c'); return skipspace(e); } static const char *parseexpr(struct st *st, const char *s, int d); static const char *parseterm(struct st *st, const char *s, int d) { if (d <= 0) { fail(st); return s; } s = skipspace(s); if (*s == '!') { s = parseterm(st, s+1, d-1); push(st, '!'); return s; } if (*s == '(') { s = parseexpr(st, s+1, d-1); if (*s != ')') { fail(st); return s; } return skipspace(s+1); } if (*s == 'n') { push(st, 'n'); return skipspace(s+1); } return parseconst(st, s); } static const char *parsemul(struct st *st, const char *s, int d) { unsigned char *p; int op; s = parseterm(st, s, d-1); for (;;) { op = *s; p = st->p; if (op == '*') { s = parseterm(st, s+1, d-1); } else if (op == '/' || op == '%') { s = skipspace(s+1); if (*s == '0') { fail(st); return s; } s = parseconst(st, s); } else return s; push(st, p - st->p); push(st, op); } } static const char *parseadd(struct st *st, const char *s, int d) { unsigned char *p; int op; s = parsemul(st, s, d-1); for (;;) { op = *s; if (op != '+' && op != '-') return s; p = st->p; s = parsemul(st, s+1, d-1); push(st, p - st->p); push(st, op); } } static const char *parserel(struct st *st, const char *s, int d) { unsigned char *p; int neg = 0, op; s = parseadd(st, s, d-1); if (s[0] == '=' && s[1] == '=') { op = '='; s++; } else if (s[0] == '!' && s[1] == '=') { op = '='; neg = 1; s++; } else if (s[0] == '<' && s[1] == '=') { op = '>'; neg = 1; s++; } else if (s[0] == '<') { op = '<'; } else if (s[0] == '>' && s[1] == '=') { op = '<'; neg = 1; s++; } else if (s[0] == '>') { op = '>'; } else return s; p = st->p; s = parseadd(st, s+1, d-1); push(st, p - st->p); push(st, op); if (neg) push(st, '!'); return s; } static const char *parseand(struct st *st, const char *s, int d) { unsigned char *p; s = parserel(st, s, d-1); for (;;) { if (s[0] != '&' || s[1] != '&') return s; p = st->p; s = parserel(st, s+2, d-1); push(st, p - st->p); push(st, '&'); } } static const char *parseor(struct st *st, const char *s, int d) { unsigned char *p; s = parseand(st, s, d-1); for (;;) { if (s[0] != '|' || s[1] != '|') return s; p = st->p; s = parseand(st, s+2, --d); push(st, p - st->p); push(st, '|'); } } static const char *parseexpr(struct st *st, const char *s, int d) { unsigned char *p1, *p2; if (d <= 0) { fail(st); return s; } s = parseor(st, s, d-1); if (*s == '?') { p1 = st->p; s = parseexpr(st, s+1, d-1); p2 = st->p; if (*s != ':') fail(st); else s = parseexpr(st, s+1, d-1); push(st, p2 - st->p); push(st, p1 - st->p); push(st, '?'); } return s; } int parse(unsigned char *expr, size_t elen, const char *s, size_t slen) { const char *e = s + slen - 1; unsigned char *p; struct st st; if (*e != ';') return -1; if (elen > 200) elen = 200; st.e = expr; p = st.p = expr + elen; s = parseexpr(&st, s, 100); if (!ok(&st) || s != e) return -1; memmove(expr, st.p, p - st.p); return 0; } static unsigned long evalcond(const unsigned char *e, unsigned long n) { int offcond = *e++; unsigned long c = eval(e+offcond, n); int offtrue = *e++; return eval(c ? e+offtrue : e, n); } static unsigned long evalbin(int op, const unsigned char *e, unsigned long n) { int offleft = *e++; unsigned long right = eval(e, n); unsigned long left = eval(e+offleft, n); switch (op) { case '|': return left || right; case '&': return left && right; case '=': return left == right; case '<': return left < right; case '>': return left > right; case '+': return left + right; case '-': return left - right; case '*': return left * right; case '/': return left / right; case '%': return left % right; } return -1; } unsigned long eval(const unsigned char *e, unsigned long n) { int op = *e++; switch (op) { case 'n': return n; case 'c': return *e; case '!': return !eval(e, n); case '?': return evalcond(e, n); } return evalbin(op, e, n); } --HlL+5n6rz5pIUxbD--