mailing list of musl libc
 help / color / mirror / code / Atom feed
From: Szabolcs Nagy <nsz@port70.net>
To: musl@lists.openwall.com
Subject: Re: Non-stub gettext API functions committed, ready for testing
Date: Mon, 28 Jul 2014 12:18:30 +0200	[thread overview]
Message-ID: <20140728101829.GJ10402@port70.net> (raw)
In-Reply-To: <20140727180041.GA4038@brightrain.aerifal.cx>

[-- Attachment #1: Type: text/plain, Size: 1635 bytes --]

* Rich Felker <dalias@libc.org> [2014-07-27 14:00:41 -0400]:
> On Sun, Jul 27, 2014 at 07:51:26PM +0200, Szabolcs Nagy wrote:
> > > >From what I can tell, that's not so bad. Anyone feel like writing an
> > > expression evaluator for it? I think recursive descent is fine as long
> > > as the length of the string being evaluated is capped at a sane length
> > > (or just keep a depth counter and abort the evaluation if it exceeds
> > > some reasonable limit).
> > > 
> > 
> > i can try
> 
> OK. Some thoughts on implementation: It should probably accept the
> expression as a base+length rather than a C string so it can be used
> in-place from within the mo file "header" (this design might help for
> recursion anyway I suppose). And it should be safe against malicious
> changes to the expression during evaluation (at worst give wrong
> results or error out rather than risk of stack overflow, out-of-bounds
> reads, etc.) since I'm aiming to make the whole system safe against
> malicious translation files (assuming the caller doesn't use the
> results in unsafe ways like as a format string).
> 

ok i did something

i parse the expression once and then do the eval separately so
"changes to the expression during evaluation" does not apply
(i expected the expr to be const and evaluated several times
with different n)

currently it checks if base[length-1] == ';' and then does not
care about the length anymore, the first unexpected char ends
the parsing

the parser and eval code is about 2k now, i can try to do it
without a separate parsing step (my approach requires a 100-200
byte buffer to store the parsed expr now)


[-- Attachment #2: pl.h --]
[-- Type: text/x-chdr, Size: 207 bytes --]

// parse s into expr, returns -1 on failure
int parse(unsigned char *expr, size_t elen, const char *s, size_t slen);
// eval expr with input n
unsigned long eval(const unsigned char *expr, unsigned long n);

[-- Attachment #3: pl.c --]
[-- Type: text/x-csrc, Size: 5628 bytes --]

#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "pl.h"

/*
grammar:

Start = Expr ';'
Expr  = Or | Or '?' Expr ':' Expr
Or    = And | Or '||' And
And   = Rel | And '&&' Rel
Rel   = Add | Add '==' Add | Add '!=' Add | Add '<=' Add | Add '>=' Add | Add '<' Add | Add '>' Add
Add   = Mul | Add '+' Mul | Add '-' Mul
Mul   = Term | Mul '*' Term | Mul '/' decimal | Mul '%' decimal
Term  = '(' Expr ')' | '!' Term | decimal | 'n'

compared to gnu gettext:
	right side of / and % must be const (and non-zero),
	chained relational/eq operators are not allowed,
	decimal is at most 255

internals:

parser is recursive descent, terminals are pushed on a stack
that grows down, a binary op "Left op Right" is parsed into
"op length-of-Right Right Left" so eval is easy to implement.

op chars on the stack
	n c ! | & = < > + - * / % ?
are
	var, const, neg, or, and, eq, less, greater, add, sub, mul, div, mod, cond

parse* functions push the parsed rule on the stack and return
a pointer to the next non-space char
*/

#include <stdio.h>

struct st {
	unsigned char *p;
	unsigned char *e;
};

static int ok(struct st *st)
{
	return st->p != st->e;
}

static void fail(struct st *st)
{
	st->p = st->e;
}

static void push(struct st *st, int c)
{
	if (ok(st))
		*--st->p = c;
}

static const char *skipspace(const char *s)
{
	while (isspace(*s)) s++;
	return s;
}

static const char *parseconst(struct st *st, const char *s)
{
	char *e;
	unsigned long n;
	n = strtoul(s, &e, 10);
	if (!isdigit(*s) || e == s || n > 255)
		fail(st);
	push(st, n);
	push(st, 'c');
	return skipspace(e);
}

static const char *parseexpr(struct st *st, const char *s, int d);

static const char *parseterm(struct st *st, const char *s, int d)
{
	if (d <= 0) {
		fail(st);
		return s;
	}
	s = skipspace(s);
	if (*s == '!') {
		s = parseterm(st, s+1, d-1);
		push(st, '!');
		return s;
	}
	if (*s == '(') {
		s = parseexpr(st, s+1, d-1);
		if (*s != ')') {
			fail(st);
			return s;
		}
		return skipspace(s+1);
	}
	if (*s == 'n') {
		push(st, 'n');
		return skipspace(s+1);
	}
	return parseconst(st, s);
}

static const char *parsemul(struct st *st, const char *s, int d)
{
	unsigned char *p;
	int op;
	s = parseterm(st, s, d-1);
	for (;;) {
		op = *s;
		p = st->p;
		if (op == '*') {
			s = parseterm(st, s+1, d-1);
		} else  if (op == '/' || op == '%') {
			s = skipspace(s+1);
			if (*s == '0') {
				fail(st);
				return s;
			}
			s = parseconst(st, s);
		} else
			return s;
		push(st, p - st->p);
		push(st, op);
	}
}

static const char *parseadd(struct st *st, const char *s, int d)
{
	unsigned char *p;
	int op;
	s = parsemul(st, s, d-1);
	for (;;) {
		op = *s;
		if (op != '+' && op != '-')
			return s;
		p = st->p;
		s = parsemul(st, s+1, d-1);
		push(st, p - st->p);
		push(st, op);
	}
}

static const char *parserel(struct st *st, const char *s, int d)
{
	unsigned char *p;
	int neg = 0, op;
	s = parseadd(st, s, d-1);
	if (s[0] == '=' && s[1] == '=') {
		op = '=';
		s++;
	} else if (s[0] == '!' && s[1] == '=') {
		op = '=';
		neg = 1;
		s++;
	} else if (s[0] == '<' && s[1] == '=') {
		op = '>';
		neg = 1;
		s++;
	} else if (s[0] == '<') {
		op = '<';
	} else if (s[0] == '>' && s[1] == '=') {
		op = '<';
		neg = 1;
		s++;
	} else if (s[0] == '>') {
		op = '>';
	} else
		return s;
	p = st->p;
	s = parseadd(st, s+1, d-1);
	push(st, p - st->p);
	push(st, op);
	if (neg)
		push(st, '!');
	return s;
}

static const char *parseand(struct st *st, const char *s, int d)
{
	unsigned char *p;
	s = parserel(st, s, d-1);
	for (;;) {
		if (s[0] != '&' || s[1] != '&')
			return s;
		p = st->p;
		s = parserel(st, s+2, d-1);
		push(st, p - st->p);
		push(st, '&');
	}
}

static const char *parseor(struct st *st, const char *s, int d)
{
	unsigned char *p;
	s = parseand(st, s, d-1);
	for (;;) {
		if (s[0] != '|' || s[1] != '|')
			return s;
		p = st->p;
		s = parseand(st, s+2, --d);
		push(st, p - st->p);
		push(st, '|');
	}
}

static const char *parseexpr(struct st *st, const char *s, int d)
{
	unsigned char *p1, *p2;
	if (d <= 0) {
		fail(st);
		return s;
	}
	s = parseor(st, s, d-1);
	if (*s == '?') {
		p1 = st->p;
		s = parseexpr(st, s+1, d-1);
		p2 = st->p;
		if (*s != ':')
			fail(st);
		else
			s = parseexpr(st, s+1, d-1);
		push(st, p2 - st->p);
		push(st, p1 - st->p);
		push(st, '?');
	}
	return s;
}

int parse(unsigned char *expr, size_t elen, const char *s, size_t slen)
{
	const char *e = s + slen - 1;
	unsigned char *p;
	struct st st;

	if (*e != ';')
		return -1;
	if (elen > 200)
		elen = 200;
	st.e = expr;
	p = st.p = expr + elen;
	s = parseexpr(&st, s, 100);
	if (!ok(&st) || s != e)
		return -1;
	memmove(expr, st.p, p - st.p);
	return 0;
}

static unsigned long evalcond(const unsigned char *e, unsigned long n)
{
	int offcond = *e++;
	unsigned long c = eval(e+offcond, n);
	int offtrue = *e++;
	return eval(c ? e+offtrue : e, n);
}

static unsigned long evalbin(int op, const unsigned char *e, unsigned long n)
{
	int offleft = *e++;
	unsigned long right = eval(e, n);
	unsigned long left = eval(e+offleft, n);
	switch (op) {
	case '|': return left || right;
	case '&': return left && right;
	case '=': return left == right;
	case '<': return left < right;
	case '>': return left > right;
	case '+': return left + right;
	case '-': return left - right;
	case '*': return left * right;
	case '/': return left / right;
	case '%': return left % right;
	}
	return -1;
}

unsigned long eval(const unsigned char *e, unsigned long n)
{
	int op = *e++;
	switch (op) {
	case 'n': return n;
	case 'c': return *e;
	case '!': return !eval(e, n);
	case '?': return evalcond(e, n);
	}
	return evalbin(op, e, n);
}

  reply	other threads:[~2014-07-28 10:18 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-27  8:46 Rich Felker
2014-07-27 10:06 ` Harald Becker
2014-07-27 14:14   ` Szabolcs Nagy
2014-07-27 16:49     ` Rich Felker
2014-07-27 17:23       ` Szabolcs Nagy
2014-07-27 17:36         ` Rich Felker
2014-07-27 17:51           ` Szabolcs Nagy
2014-07-27 18:00             ` Rich Felker
2014-07-28 10:18               ` Szabolcs Nagy [this message]
2014-07-28 13:00                 ` Szabolcs Nagy
2014-07-28 14:01                   ` Szabolcs Nagy
2014-07-28 16:27                     ` Rich Felker
2014-07-29 13:49                       ` Szabolcs Nagy
2014-07-27 10:19 ` Harald Becker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140728101829.GJ10402@port70.net \
    --to=nsz@port70.net \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).