caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: "Jocelyn Sérot" <jocelyn.serot@uca.fr>
To: Gabriel Scherer <gabriel.scherer@gmail.com>
Cc: caml users <caml-list@inria.fr>
Subject: Re: [Caml-list] Camlp4-free implementation of stream parsers (was camlp4 & OCaml 4.08)
Date: Thu, 25 Jul 2019 17:20:42 +0200	[thread overview]
Message-ID: <9D191A05-C9B7-4462-A7BB-BE705AF9E434@uca.fr> (raw)
In-Reply-To: <CAPFanBHeEM_osrF5ue4QO=vLVvNYZi+fywRSHrSzn-C1hSbHpA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 7990 bytes --]

Thanks for the suggestion, Gabriel.

It is indeed less tiresome that i had imagined.

I attach the corresponding full code (with original camlp4 formulation in comment).
Handling of the minus operator is here handled before parsing; this is not a problem since such a parser is supposed to be called on very small strings in practice.

Jocelyn

ps : i’m still wondering whether some library and/or ppx-based  generic (afap) implementation of Camlp4 stream parsers is possible..

8<—— Stream-based parser for simple arithmetic expressions (with non-negative integers)

type ident = string 

type value = int 

type t = 
  EConst of value              (** Constants *)   
| EVar of ident                (** Input, output or local variable *)
| EBinop of string * t * t     (** Binary operation *)

let keywords = ["+"; "-"; "*"; "/"; "("; ")"]

let mk_binary_minus s = s |> String.split_on_char '-' |> String.concat " - "
                      
let lexer s = s |> mk_binary_minus |> Stream.of_string |> Genlex.make_lexer keywords 

open Genlex

(* let rec p_exp0  = parser
 *                 | [< 'Int n >] -> EConst n
 *                 | [< 'Ident i >] -> EVar i
 *                 | [< 'Kwd "("; e=p_exp ; 'Kwd ")" >] -> e *)

let rec p_exp0 s =
  match Stream.next s with
    | Int n -> EConst n
    | Ident i -> EVar i
    | Kwd "(" ->
       let e = p_exp s in
       begin match Stream.peek s with
       | Some (Kwd ")") -> Stream.junk s; e
       | _ -> raise Stream.Failure
       end
    | _ -> raise Stream.Failure

(* and p_exp1  = parser
 *             | [< e1=p_exp0 ; rest >] -> p_exp2  e1 rest *)

and p_exp1 s =
  let e1 = p_exp0 s in
  p_exp2 e1 s
  
(* and p_exp2  e1 = parser
 *                | [< 'Kwd "*"; e2=p_exp1  >] -> EBinop("*", e1, e2)
 *                | [< 'Kwd "/"; e2=p_exp1  >] -> EBinop("/", e1, e2)
 *                | [< >] -> e1 *)

and p_exp2 e1 s =
  match Stream.peek s with
  | Some (Kwd "*") -> Stream.junk s; let e2 = p_exp1 s in EBinop("*", e1, e2)
  | Some (Kwd "/") -> Stream.junk s; let e2 = p_exp1 s in EBinop("/", e1, e2)
  | _ -> e1
  
(* and p_exp  = parser
 *            | [< e1=p_exp1 ; rest >] -> p_exp3  e1 rest *)

and p_exp s =
  let e1 = p_exp1 s in p_exp3 e1 s
                     
(* and p_exp3  e1 = parser
 *                | [< 'Kwd "+"; e2=p_exp  >] -> EBinop("+", e1, e2)
 *                | [< 'Kwd "-"; e2=p_exp  >] -> EBinop("-", e1, e2)
 *                | [< >] -> e1 *)

and p_exp3 e1 s =
  match Stream.peek s with
  | Some (Kwd "+") -> Stream.junk s; let e2 = p_exp s in EBinop("+", e1, e2)
  | Some (Kwd "-") -> Stream.junk s; let e2 = p_exp s in EBinop("-", e1, e2)
  | _ -> e1

let parse s = s |> lexer |> p_exp


Le 25 juil. 2019 à 13:42, Gabriel Scherer <gabriel.scherer@gmail.com> a écrit :

> Hi,
> 
> The parser from https://github.com/jserot/lascar/blob/master/src/lib/fsm_expr.ml seems fairly trivial, have you considered just rewriting it to use the functions of the Stream primitives directly?
> 
> For example, roughly (I have not tried to type-check or test the code),
> 
>     let rec aux = parser
>     | [< 'Int n when n<0; t=aux >] -> [< 'Kwd "-"; 'Int (-n); t >]
>     | [< 'h; t=aux >] -> [< 'h; t >]
>     | [< >] -> [< >] in
> 
> would become
> 
>     let aux s =
>       let next = ref [] in
>       Stream.from @@ fun _ ->
>         match !next with
>         | tok::toks -> next := toks; Some tok
>         | [] -> match Stream.next with
>                 | exception Stream.Failure -> None
>                 | Int n when n < 0 ->
>                   next := [Int (-n)];
>                   Some (Kwd "-")
>                 | tok -> Some tok
>  
> and
> 
>     let rec p_exp0  = parser
>     | [< 'Int n >] -> EConst n
>     | [< 'Ident i >] -> EVar i
>     | [< 'Kwd "("; e=p_exp ; 'Kwd ")" >] -> e        
> 
> becomes
> 
>     let rec p_exp0 s = match Stream.next s with
>     | Int n -> EConst n
>     | Ident i -> EVar i
>     | Kwd "(" ->
>       let e = p_exp s in
>       match Stream.peek s with
>       | Some (Kwd ")") -> Stream.junk s; e
>       | _ -> raise Stream.Failure
> 
> This is not exactly exciting code to write, but it's not a lot of work either for such a simple grammar.
> 
> On Thu, Jul 25, 2019 at 12:28 PM Jocelyn Sérot <jocelyn.serot@uca.fr> wrote:
> HI Daniil,
> 
> Thanks for the example. It clearly shows how to embed a Menhir-specified parser into an existing program.
> 
> I still think, however that using Menhir for parsing arithmetic expressions is a bit overkill.
> 
> I’m having a look at Angstrom (and all the other parser combinator libs cited on the corresp. page).
> It seems simpler. 
> 
> Jocelyn
> 
> Le 24 juil. 2019 à 17:31, Daniil Baturin <daniil@baturin.org> a écrit :
> 
> > Hi Jocelyn,
> > 
> > I've completed the first version of my project, so now I can start
> > looking into this again!
> > 
> > There's a third option: parser combinators like angstrom.
> > My experience with Menhir is very positive though. After initial
> > struggle, I came to like its new incremental API and declarative error
> > reporting.
> > 
> > Here's my parser for an extended BNF:
> > Menhir grammar:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/bnf_parser.mly
> > Parser driver that feeds it tokens:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/parse_bnf.ml
> > Error messages:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/bnf_parser.messages
> > Error message module build:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/dune#L6-L8
> > 
> > On 7/24/19 10:10 PM, Jocelyn Sérot wrote:
> >> Hi Daniil (and everyone interested by the subject),
> >> 
> >> Did you have a closer look at this ? 
> >> 
> >> I’m still hesitating between these three approaches for replacing the implementation of the small arithm expression parser used in Lascar [1] :
> >> 
> >> i. rewrite it using the basic fns provided by the Stream library (pro: no additionnal dependency, cons: not so trivial..)
> >> 
> >> ii. replace camlp4 by camlp5 (pro: straightforward, cons: long term maintainability of camlp5 (?)) 
> >> 
> >> iii. rewrite it using ocamlex/menhir and embed it in the main code (pro: « standard » soon; cons: a bit heavy)
> >> 
> >> Jocelyn
> >> 
> >> [1] https://github.com/jserot/lascar/blob/master/src/lib/fsm_expr.ml, lines 70–112
> >> 
> >> Le 2 juil. 2019 à 11:25, Daniil Baturin <daniil@baturin.org> a écrit :
> >> 
> >>> Hi Jocelyn,
> >>> Camlp5 is still sort of maintained, but I don't think it's going to be
> >>> developed beyond compatibility updates.
> >>> For syntax extensions, everyone is switching to PPX.
> >>> 
> >>> From a quick look, it seems like the only bit of camlp4 you use is
> >>> stream expressions.
> >>> This is one of the things PPX can't do (on purpose, since it doesn't
> >>> allow _arbitrary_ extensions),
> >>> but I don't think just using streams directly is going to make code much
> >>> longer.
> >>> 
> >>> Or I missed some other camlp4 bits?
> >>> 
> >>> I'm ready to work on a patch if you are open to it.
> >>> 
> >>> On 7/2/19 1:44 PM, Jocelyn Sérot wrote:
> >>>> Le 29 juin 2019 à 17:15, Daniil Baturin <daniil@baturin.org> a écrit :
> >>>> 
> >>>>> Perhaps we should make some coordinated effort to help them.
> >>>>> I've just sent a pull request to the ocamldot maintainer that enables
> >>>>> the graphviz files parsing and printing modules
> >>>>> to build and work with 4.08. The GTK parts have their own issues.
> >>>>> Next I'm going to look into LASCAR/RFSM (packages that interest me first ;).
> >>>>> 
> >>>> Hi Daniil,
> >>>> 
> >>>> I’ve been been thinking of removing the dependency of Lascar and RFSM on camlp4 for a while.
> >>>> Is switching to CamlP5 a good alternative ? 
> >>>> 
> >>>> Jocelyn
> >>>> 
> >>>> 
> >>> 
> > 
> > 
> 
> 


[-- Attachment #2: Type: text/html, Size: 17622 bytes --]

  reply	other threads:[~2019-07-25 15:21 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <C4399DA5-3383-4A7E-9546-0021083D6EF3@uca.fr>
2019-07-25 10:27 ` Jocelyn Sérot
2019-07-25 11:42   ` Gabriel Scherer
2019-07-25 15:20     ` Jocelyn Sérot [this message]
2019-06-27 13:45 [Caml-list] camlp4 & OCaml 4.08 Richard W.M. Jones
2019-06-29  9:06 ` Anil Madhavapeddy
2019-06-29 15:15   ` Daniil Baturin
     [not found]     ` <5CE377AD-CB06-4261-BD26-A2A697253F02@uca.fr>
     [not found]       ` <393603fa-0efa-5714-82da-ba4bc3e869b8@baturin.org>
2019-07-24 15:10         ` [Caml-list] Camlp4-free implementation of stream parsers (was camlp4 & OCaml 4.08) Jocelyn Sérot
2019-07-24 15:31           ` Daniil Baturin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9D191A05-C9B7-4462-A7BB-BE705AF9E434@uca.fr \
    --to=jocelyn.serot@uca.fr \
    --cc=caml-list@inria.fr \
    --cc=gabriel.scherer@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).