Thanks for the suggestion, Gabriel.

It is indeed less tiresome that i had imagined.

I attach the corresponding full code (with original camlp4 formulation in comment).
Handling of the minus operator is here handled before parsing; this is not a problem since such a parser is supposed to be called on very small strings in practice.

Jocelyn

ps : i’m still wondering whether some library and/or ppx-based  generic (afap) implementation of Camlp4 stream parsers is possible..

8<—— Stream-based parser for simple arithmetic expressions (with non-negative integers)

type ident = string 

type value = int 

type t = 
  EConst of value              (** Constants *)   
| EVar of ident                (** Input, output or local variable *)
| EBinop of string * t * t     (** Binary operation *)

let keywords = ["+"; "-"; "*"; "/"; "("; ")"]

let mk_binary_minus s = s |> String.split_on_char '-' |> String.concat " - "
                      
let lexer s = s |> mk_binary_minus |> Stream.of_string |> Genlex.make_lexer keywords 

open Genlex

(* let rec p_exp0  = parser
 *                 | [< 'Int n >] -> EConst n
 *                 | [< 'Ident i >] -> EVar i
 *                 | [< 'Kwd "("; e=p_exp ; 'Kwd ")" >] -> e *)

let rec p_exp0 s =
  match Stream.next s with
    | Int n -> EConst n
    | Ident i -> EVar i
    | Kwd "(" ->
       let e = p_exp s in
       begin match Stream.peek s with
       | Some (Kwd ")") -> Stream.junk s; e
       | _ -> raise Stream.Failure
       end
    | _ -> raise Stream.Failure

(* and p_exp1  = parser
 *             | [< e1=p_exp0 ; rest >] -> p_exp2  e1 rest *)

and p_exp1 s =
  let e1 = p_exp0 s in
  p_exp2 e1 s
  
(* and p_exp2  e1 = parser
 *                | [< 'Kwd "*"; e2=p_exp1  >] -> EBinop("*", e1, e2)
 *                | [< 'Kwd "/"; e2=p_exp1  >] -> EBinop("/", e1, e2)
 *                | [< >] -> e1 *)

and p_exp2 e1 s =
  match Stream.peek s with
  | Some (Kwd "*") -> Stream.junk s; let e2 = p_exp1 s in EBinop("*", e1, e2)
  | Some (Kwd "/") -> Stream.junk s; let e2 = p_exp1 s in EBinop("/", e1, e2)
  | _ -> e1
  
(* and p_exp  = parser
 *            | [< e1=p_exp1 ; rest >] -> p_exp3  e1 rest *)

and p_exp s =
  let e1 = p_exp1 s in p_exp3 e1 s
                     
(* and p_exp3  e1 = parser
 *                | [< 'Kwd "+"; e2=p_exp  >] -> EBinop("+", e1, e2)
 *                | [< 'Kwd "-"; e2=p_exp  >] -> EBinop("-", e1, e2)
 *                | [< >] -> e1 *)

and p_exp3 e1 s =
  match Stream.peek s with
  | Some (Kwd "+") -> Stream.junk s; let e2 = p_exp s in EBinop("+", e1, e2)
  | Some (Kwd "-") -> Stream.junk s; let e2 = p_exp s in EBinop("-", e1, e2)
  | _ -> e1

let parse s = s |> lexer |> p_exp


Le 25 juil. 2019 à 13:42, Gabriel Scherer <gabriel.scherer@gmail.com> a écrit :

Hi,

The parser from https://github.com/jserot/lascar/blob/master/src/lib/fsm_expr.ml seems fairly trivial, have you considered just rewriting it to use the functions of the Stream primitives directly?

For example, roughly (I have not tried to type-check or test the code),

    let rec aux = parser
    | [< 'Int n when n<0; t=aux >] -> [< 'Kwd "-"; 'Int (-n); t >]
    | [< 'h; t=aux >] -> [< 'h; t >]
    | [< >] -> [< >] in

would become

    let aux s =
      let next = ref [] in
      Stream.from @@ fun _ ->
        match !next with
        | tok::toks -> next := toks; Some tok
        | [] -> match Stream.next with
                | exception Stream.Failure -> None
                | Int n when n < 0 ->
                  next := [Int (-n)];
                  Some (Kwd "-")
                | tok -> Some tok
 
and

    let rec p_exp0  = parser
    | [< 'Int n >] -> EConst n
    | [< 'Ident i >] -> EVar i
    | [< 'Kwd "("; e=p_exp ; 'Kwd ")" >] -> e        

becomes

    let rec p_exp0 s = match Stream.next s with
    | Int n -> EConst n
    | Ident i -> EVar i
    | Kwd "(" ->
      let e = p_exp s in
      match Stream.peek s with
      | Some (Kwd ")") -> Stream.junk s; e
      | _ -> raise Stream.Failure

This is not exactly exciting code to write, but it's not a lot of work either for such a simple grammar.

On Thu, Jul 25, 2019 at 12:28 PM Jocelyn Sérot <jocelyn.serot@uca.fr> wrote:
HI Daniil,

Thanks for the example. It clearly shows how to embed a Menhir-specified parser into an existing program.

I still think, however that using Menhir for parsing arithmetic expressions is a bit overkill.

I’m having a look at Angstrom (and all the other parser combinator libs cited on the corresp. page).
It seems simpler.

Jocelyn

Le 24 juil. 2019 à 17:31, Daniil Baturin <daniil@baturin.org> a écrit :

> Hi Jocelyn,
>
> I've completed the first version of my project, so now I can start
> looking into this again!
>
> There's a third option: parser combinators like angstrom.
> My experience with Menhir is very positive though. After initial
> struggle, I came to like its new incremental API and declarative error
> reporting.
>
> Here's my parser for an extended BNF:
> Menhir grammar:
> https://github.com/dmbaturin/bnfgen/blob/master/src/bnf_parser.mly
> Parser driver that feeds it tokens:
> https://github.com/dmbaturin/bnfgen/blob/master/src/parse_bnf.ml
> Error messages:
> https://github.com/dmbaturin/bnfgen/blob/master/src/bnf_parser.messages
> Error message module build:
> https://github.com/dmbaturin/bnfgen/blob/master/src/dune#L6-L8
>
> On 7/24/19 10:10 PM, Jocelyn Sérot wrote:
>> Hi Daniil (and everyone interested by the subject),
>>
>> Did you have a closer look at this ?
>>
>> I’m still hesitating between these three approaches for replacing the implementation of the small arithm expression parser used in Lascar [1] :
>>
>> i. rewrite it using the basic fns provided by the Stream library (pro: no additionnal dependency, cons: not so trivial..)
>>
>> ii. replace camlp4 by camlp5 (pro: straightforward, cons: long term maintainability of camlp5 (?))
>>
>> iii. rewrite it using ocamlex/menhir and embed it in the main code (pro: « standard » soon; cons: a bit heavy)
>>
>> Jocelyn
>>
>> [1] https://github.com/jserot/lascar/blob/master/src/lib/fsm_expr.ml, lines 70–112
>>
>> Le 2 juil. 2019 à 11:25, Daniil Baturin <daniil@baturin.org> a écrit :
>>
>>> Hi Jocelyn,
>>> Camlp5 is still sort of maintained, but I don't think it's going to be
>>> developed beyond compatibility updates.
>>> For syntax extensions, everyone is switching to PPX.
>>>
>>> From a quick look, it seems like the only bit of camlp4 you use is
>>> stream expressions.
>>> This is one of the things PPX can't do (on purpose, since it doesn't
>>> allow _arbitrary_ extensions),
>>> but I don't think just using streams directly is going to make code much
>>> longer.
>>>
>>> Or I missed some other camlp4 bits?
>>>
>>> I'm ready to work on a patch if you are open to it.
>>>
>>> On 7/2/19 1:44 PM, Jocelyn Sérot wrote:
>>>> Le 29 juin 2019 à 17:15, Daniil Baturin <daniil@baturin.org> a écrit :
>>>>
>>>>> Perhaps we should make some coordinated effort to help them.
>>>>> I've just sent a pull request to the ocamldot maintainer that enables
>>>>> the graphviz files parsing and printing modules
>>>>> to build and work with 4.08. The GTK parts have their own issues.
>>>>> Next I'm going to look into LASCAR/RFSM (packages that interest me first ;).
>>>>>
>>>> Hi Daniil,
>>>>
>>>> I’ve been been thinking of removing the dependency of Lascar and RFSM on camlp4 for a while.
>>>> Is switching to CamlP5 a good alternative ?
>>>>
>>>> Jocelyn
>>>>
>>>>
>>>
>
>