caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* Haskell parser combinators in OCaml?
@ 2006-10-19 22:01 Jorgen Hermanrud Fjeld
  2006-10-20  2:43 ` [Caml-list] " Jacques Garrigue
  2006-10-20 15:19 ` Tom
  0 siblings, 2 replies; 3+ messages in thread
From: Jorgen Hermanrud Fjeld @ 2006-10-19 22:01 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 13931 bytes --]

Hi.

From the world of Haskell, the work of S. Doaitse Swierstra in the paper
"Combinator Parsers: From Toys to Tools" 
"http://citeseer.ist.psu.edu/363886.html", introduces some very nice
combinator parsers that parse LALR(k) grammars, and give good error
messages.

I would love too express something equivalent in OCaml, but I'm not sure
how to translate the concepts used into concepts in OCaml.

I am hoping some of the type theorists out there would glance at the
paper, and bestow some reflection, advice or warning upon me.

There are several issues:
1) How to express the lazy lookahead data structure?
3) How to express the type of the parser in OCaml?

Some details:
1) The lazy data structure in 4.1 can not be expressed directly,
   and I believe some kind of explicit fixed point is needed.
   Would one need fixed points with deBruijn indexes?
   Do you know of any similar examples that I may look at for
   inspiration?
2) The parser has the haskell type 
type Parser a =
  forall b result .
     Future b result
  -> Stack a b
  -> Errs
  -> Input
  -> Steps result
which I can not express in OCaml. My attempts at encoding this 
using an encoding that express existential types, have so far not 
worked out. I always end up with a type error, and do not see how
to better design it. 
######## The type error
File "parser.ml", line 154, characters 21-26:
This field value has type
  ('a, 'a) future ->
  (symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps
which is less general than
  'b 'c.
    ('b, 'c) future ->
    ('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps
######## Begin code
module BraunTree = 
struct
    type ('key,'value) braun_tree = 
        | Node of ('key,'value) braun_tree * ('key * 'value) * ('key,'value) braun_tree
        | Nil
    ;;
    let tree_of_list (l:('key*'value) list) : ('key,'value) braun_tree = 
        let rec tree_of_list len l = 
            match l with
            | [] -> (Nil,[])
            | (h::[]) -> (Node (Nil,h,Nil),[])
            | (h::t) ->  
                    let left_len = (len - 1) / 2 in
                    let right_len = len - 1 - left_len in
                    let (left_tree,left_list) = tree_of_list left_len l in
                    match left_list with
                    | [] -> assert false
                    | (left_head::left_tail) ->
                            let (right_tree,right_tail) = tree_of_list right_len left_tail in
                            (Node (left_tree,left_head,right_tree),right_tail)
        in
        let (tree,l) = tree_of_list (List.length l) l in
        match l with
        | [] -> tree
        | _ -> assert false
    ;;
    let find ~(key:'key) ~(tree:('key,'value) braun_tree) : 'value option = 
        let rec find tree = 
            match tree with
            | Nil -> None
            | Node (left,(found_key,value),right) -> 
                    match compare key found_key with
                    | 0 -> Some value
                    | 1 -> find left
                    | -1 -> find right
                    | _ -> assert false
        in
        find tree
    ;;
end

module ContinuationTrieParser = 
struct
  type symbol = string
  type input = symbol list
  type 'result steps = 
      Ok of 'result steps
    | Fail of 'result steps 
    | Stop of 'result
  type ('a,'b) stack = 'a -> 'b;;
  type ('cont,'result) future = 'cont -> (errors->errors) -> input -> 'result steps
  and errors = 
      | Deleted of symbol * string * errors
      | Inserted of symbol * string * errors
      | Notused of string 
  type 'p automaton = 
    | Shift of 'p * (symbol * 'p automaton) list
    | ShiftReduce of 'p automaton * 'p automaton
    | Reduce of 'p
    | Found of 'p * 'p automaton
  type 'a combinator_parser = {
      parse:'cont 'result.
      ('cont,'result) future-> ('a,'cont) stack -> (errors->errors) -> input -> ('result*errors) steps
      }
  type 'a parser_generator = {
      automaton : ('a combinator_parser) automaton;
      generated : 'a combinator_parser
  }
  exception Ambigous_grammar
  ;;
  let rec best : 'result steps -> 'result steps -> 'result steps = 
    fun left right -> match (left,right) with
        (Ok left,Ok right) -> Ok (best left right) 
      | (Fail left,Fail right) -> Fail (best left right) 
      | (Ok _,Fail _) -> left 
      | (Fail _,Ok _) -> right 
      | (Stop _,_) -> left 
      | (_,Stop _) -> right 
  ;;
  let best_parser (left:'a combinator_parser) (right:'a combinator_parser) : 'a combinator_parser = 
      let parse cont stack errors input =
          best (left.parse cont stack errors input) (right.parse cont stack errors input)
      in
      {parse=parse}
  ;;

  (** Also known as a catamorphism *)
  let transform_automaton ((transform_shift
                     ,transform_shiftreduce
                     ,transform_reduce
                     ,transform_found
                     ): ((('p * (symbol*'p automaton) list) -> 'b)
                        *(('p automaton * 'p automaton) -> 'b)
                        *('p -> 'b)
                        *(('p * 'p automaton) -> 'b)))
                     (automaton:'a automaton) : 'b =
        let rec transform (automaton:'a automaton) = 
                         match automaton with
                         | Shift (p,choices) -> 
                                 let rec foreach choices collected = 
                                     match choices with
                                     | [] -> collected
                                     | ((symbol,choice)::tail) -> 
                                       let collected = (symbol,transform choice)::collected in
                                           foreach tail collected 

                                 in 
                                 transform_shift (p,foreach choices []) 
                         | ShiftReduce (shift,reduce) ->
                                 transform_shiftreduce (transform shift,transform reduce)
                         | Reduce reduce -> transform_reduce reduce
                         | Found (found,more) -> transform_found (found,transform more)
        in
        transform automaton
  ;;

  let map_automaton (f:'a->'b) (automaton: 'a automaton) : 'b automaton = 
    let transform_shift (p,choices) = Shift (f p,choices)
    in
    let transform_shiftreduce (shift,reduce) = ShiftReduce (shift,reduce)
    in
    let transform_reduce reduce = Reduce (f reduce)
    in
    let transform_found (found,more) = Found (f found,more)
    in
    transform_automaton
      (transform_shift,transform_shiftreduce,transform_reduce,transform_found)
      automaton
  ;;

  let rec mkparser (automaton: string automaton) : string parser_generator =
      let choose (input:input) : string combinator_parser = 
          let transform_shift ((p,choices)
              :symbol * (symbol*symbol automaton) list) : string combinator_parser = 
              let table : (symbol,symbol automaton) BraunTree.braun_tree = BraunTree.tree_of_list choices in
              let find key = BraunTree.find ~key ~tree:table in
              let parse cont stack errors input : ('a*errors) steps = 
                  match input with
                  | [] -> 
                          let error = 
                              errors (Inserted (p,"Insert at end of file",Notused ""))
                          in Stop (stack p,error)
                  | (h::t) -> begin
                      match find h with
                      | Some automaton ->
                              Ok ((mkparser automaton).generated.parse cont stack errors t)
                      | None -> 
                          let errors error = errors (Deleted (h,"Deleted symbol",error))
                          in
                          let errors error = errors (Inserted (p,"Insert symbol",error))
                          in Fail (Fail (Stop (stack p,errors (Notused h))))
                  end 
              in 
              {parse=parse}
          in
          let transform_shiftreduce ((shift,reduce) : symbol automaton * symbol automaton ) =
              let parse cont stack errors input = 
                  (best_parser (mkparser shift).generated (mkparser reduce).generated).parse cont stack errors input
              in 
              parse
          in
          let transform_reduce (reduce:symbol) =
              let parse cont stack errors input = reduce cont stack errors input
              in parse
          in
          let transform_found ((found,more):symbol*symbol automaton) =
              let parse cont stack errors input = found cont stack errors input
              in {parse=parse}
          in
          transform_automaton
          (transform_shift,transform_shiftreduce,transform_reduce,transform_found)
          automaton 
      in
      let parse cont stack errors input =
          (choose input).parse cont stack errors input
      in
      {automaton=automaton;parse=parse}
  ;;

  (** <|> *)
  let either : ('a parser_generator * 'a parser_generator) -> 'a parser_generator = 
      fun (p,q) -> 
          mkparser (merge_ch p.automaton q.automaton)
  ;;

  let rec combine (lefts: (symbol * 'p automaton) list )
                  (rights: (symbol * 'p automaton) list )
                  : (symbol * 'p automaton) list = 
      match (lefts,rights) with
      | ((((left_symbol,left_sentence) as left_head)::left_tail)
        ,(((right_symbol,right_sentence) as right_head)::right_tail)
        ) -> 
              begin
                  match compare left_symbol right_symbol with
                  | 1 -> left_head::(combine left_tail rights)
                  | -1 -> right_head::(combine lefts right_tail)
                  | 0 -> 
                       let head = (left_symbol,either(left_sentence,right_sentence)) in
                       let tail = (combine left_tail right_tail) in
                       head::tail
                 | _ -> assert false
              end
      | ([],_) -> rights
      | (_,[]) -> lefts
  ;;

  (** <*> *)
  let rec both : ('a parser_generator * 'a parser_generator) -> 'a parser_generator =
      fun (p,q) ->
          (** Use two combinator parsers in sequence
           * a both for combinator parsers
           *)
          let both_combinator_parsers first second = 
              let parse cont stack errors input =
                  let stack f x = stack (f x) in
                  first.parse (second.parse cont) stack errors input
              in
              {parse=parse}
          in
          let transform_shift (p,choices) = Shift (both_combinator_parsers p q.generated,choices)
          in
          let transform_shiftreduce (shift,reduce) = merge_ch shift reduce
          in
          let transform_reduce reduce = 
              let worker x = fwby reduce x in
              map_automaton worker q.automaton
          in
          let transform_found (found,more) = Found (both_combinator_parsers found q.generated,more)
          in
          let automaton = transform_automaton (transform_shift
                                              ,transform_shiftreduce
                                              ,transform_reduce
                                              ,transform_found) p.automaton
          in
          mkparser automaton
  ;;

  let merge_ch left right = 
      match (left,right) with
      | (Shift (left_parser,left_choices),Shift (right_parser,right_choices)) ->
              let best = best_parser left_parser right_parser in
              let choices = combine left_choices right_choices in
              Shift (best,choices)
      | (Shift _,ShiftReduce (shift,reduce)) ->
              ShiftReduce (merge_ch left shift,reduce)
      | (Shift _,Reduce _) -> ShiftReduce (left,right)
      | (Shift _,Found (_,more)) ->merge_ch left more
      | (Found (_,more),_) -> merge_ch more right
      | (_,Shift _) -> merge_ch right left
      | (ShiftReduce _,_) 
      | (Reduce _,_) -> raise Ambigous_grammar
  ;;

  let symbol (a:symbol)  : symbol combinator_parser = 
      let rec parse cont
                    stack
                    errors
                    input =
          match input with
          | x::xs -> 
              if a = x 
              then Ok (cont (stack a) errors xs)
              else 
                  let deleted_x =
                      let errors e = errors (Deleted (x,position xs,e)) in
                      parse cont stack errors xs in
                  let inserted_a =
                      let errors e = errors (Inserted (a,show_symbol a,e)) in
                      cont (stack x) errors input in
                  Fail (best deleted_x inserted_a)
          | [] -> 
                  let errors e = errors (Inserted (a,eof,e)) in
                  let inserted_a = cont (stack a) errors input in
                  Fail inserted_a
      in
      let accept cont stack errors input = 
          match input with
          | [] -> assert false
          | (x::xs) -> 
                  assert (a = x) ;
                  Ok  (cont (stack a) errors xs)
      in
      let shift = Shift ({parse=parse},[(a,Reduce {parse=accept})]) in
      let found = Found ({parse=parse},shift) in
      mkparser found
  ;;

  let succeed f =
      let parse cont stack errors input = cont (stack f) errors input in
      mkparser (End {parse=parse})
  ;;

end
;;


######## End code
   
-- 
Sincerely | Homepage:
Jørgen    | http://www.hex.no/jhf
          | Public GPG key:
          | http://www.hex.no/jhf/key.txt


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Caml-list] Haskell parser combinators in OCaml?
  2006-10-19 22:01 Haskell parser combinators in OCaml? Jorgen Hermanrud Fjeld
@ 2006-10-20  2:43 ` Jacques Garrigue
  2006-10-20 15:19 ` Tom
  1 sibling, 0 replies; 3+ messages in thread
From: Jacques Garrigue @ 2006-10-20  2:43 UTC (permalink / raw)
  To: jhf; +Cc: caml-list

From: Jorgen Hermanrud Fjeld <jhf@hex.no>
> From the world of Haskell, the work of S. Doaitse Swierstra in the paper
> "Combinator Parsers: From Toys to Tools" 
> "http://citeseer.ist.psu.edu/363886.html", introduces some very nice
> combinator parsers that parse LALR(k) grammars, and give good error
> messages.
> 
> I would love too express something equivalent in OCaml, but I'm not sure
> how to translate the concepts used into concepts in OCaml.
> 
> I am hoping some of the type theorists out there would glance at the
> paper, and bestow some reflection, advice or warning upon me.
> 
> There are several issues:
> 1) How to express the lazy lookahead data structure?
> 3) How to express the type of the parser in OCaml?
> 
> Some details:
> 1) The lazy data structure in 4.1 can not be expressed directly,
>    and I believe some kind of explicit fixed point is needed.
>    Would one need fixed points with deBruijn indexes?
>    Do you know of any similar examples that I may look at for
>    inspiration?

I don't see why you can't. OCaml has a lazy type, so you can define all
the lazy data structures you want easily. Of course you have to define
your own type of lazy lists, and insert lots of lazy's all over the
place, but there is no real difficulty.

> 2) The parser has the haskell type 
> type Parser a =
>   forall b result .
>      Future b result
>   -> Stack a b
>   -> Errs
>   -> Input
>   -> Steps result
> which I can not express in OCaml. My attempts at encoding this 
> using an encoding that express existential types, have so far not 
> worked out. I always end up with a type error, and do not see how
> to better design it. 
> ######## The type error
> File "parser.ml", line 154, characters 21-26:
> This field value has type
>   ('a, 'a) future ->
>   (symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps
> which is less general than
>   'b 'c.
>     ('b, 'c) future ->
>     ('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps

Your encoding uses universal types, not existential. But this seems
the correct thing to do, as far as I can understand the code.
The reasons for the above type error seem double:
* You annotate you local "parse" function in "mkparser" with the type
  ('a * errors). The trouble is that named type variables in ocaml are
  not locally polymorphic. So this is ok to use them for a toplevel
  function, but not for local definitions (if you want them to be
  polymorphic). Just remove the annotation, or replace 'a by _ (the
  anonymous type variable).
* The definition of parse itself seems wrong:
  Stop(stack p, errors) will have type 'cont steps, when you want
  something of type 'result steps. If you unify the two, you don't
  have enough polymorphism.
The first problem is easily solved, but I don't understand enough to
correct the second one. And the rest of the code does not typecheck
anyway.

Jacques Garrigue


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Caml-list] Haskell parser combinators in OCaml?
  2006-10-19 22:01 Haskell parser combinators in OCaml? Jorgen Hermanrud Fjeld
  2006-10-20  2:43 ` [Caml-list] " Jacques Garrigue
@ 2006-10-20 15:19 ` Tom
  1 sibling, 0 replies; 3+ messages in thread
From: Tom @ 2006-10-20 15:19 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 658 bytes --]

OCaml is able to infer types by itself, so what I suggest is that you don't
declare any local types, but only the global types (no (a:int) type
expressions, but only toplevel type 'a t = 'a -> 'a * 'a). Also, try
declaring only some of your types and define only some of your functions at
a time (using the toploop, interactive compiler) to see what are the
appropriate types the compiler infers.

Besides, you seem to have some errors in the code... for example:

  let succeed f =
>       let parse cont stack errors input = cont (stack f) errors input in
>       mkparser (End {parse=parse})
>   ;;



there is no End constructor declared.

Have fun, Tom

[-- Attachment #2: Type: text/html, Size: 938 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2006-10-20 15:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-10-19 22:01 Haskell parser combinators in OCaml? Jorgen Hermanrud Fjeld
2006-10-20  2:43 ` [Caml-list] " Jacques Garrigue
2006-10-20 15:19 ` Tom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).