Hi. From the world of Haskell, the work of S. Doaitse Swierstra in the paper "Combinator Parsers: From Toys to Tools" "http://citeseer.ist.psu.edu/363886.html", introduces some very nice combinator parsers that parse LALR(k) grammars, and give good error messages. I would love too express something equivalent in OCaml, but I'm not sure how to translate the concepts used into concepts in OCaml. I am hoping some of the type theorists out there would glance at the paper, and bestow some reflection, advice or warning upon me. There are several issues: 1) How to express the lazy lookahead data structure? 3) How to express the type of the parser in OCaml? Some details: 1) The lazy data structure in 4.1 can not be expressed directly, and I believe some kind of explicit fixed point is needed. Would one need fixed points with deBruijn indexes? Do you know of any similar examples that I may look at for inspiration? 2) The parser has the haskell type type Parser a = forall b result . Future b result -> Stack a b -> Errs -> Input -> Steps result which I can not express in OCaml. My attempts at encoding this using an encoding that express existential types, have so far not worked out. I always end up with a type error, and do not see how to better design it. ######## The type error File "parser.ml", line 154, characters 21-26: This field value has type ('a, 'a) future -> (symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps which is less general than 'b 'c. ('b, 'c) future -> ('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps ######## Begin code module BraunTree = struct type ('key,'value) braun_tree = | Node of ('key,'value) braun_tree * ('key * 'value) * ('key,'value) braun_tree | Nil ;; let tree_of_list (l:('key*'value) list) : ('key,'value) braun_tree = let rec tree_of_list len l = match l with | [] -> (Nil,[]) | (h::[]) -> (Node (Nil,h,Nil),[]) | (h::t) -> let left_len = (len - 1) / 2 in let right_len = len - 1 - left_len in let (left_tree,left_list) = tree_of_list left_len l in match left_list with | [] -> assert false | (left_head::left_tail) -> let (right_tree,right_tail) = tree_of_list right_len left_tail in (Node (left_tree,left_head,right_tree),right_tail) in let (tree,l) = tree_of_list (List.length l) l in match l with | [] -> tree | _ -> assert false ;; let find ~(key:'key) ~(tree:('key,'value) braun_tree) : 'value option = let rec find tree = match tree with | Nil -> None | Node (left,(found_key,value),right) -> match compare key found_key with | 0 -> Some value | 1 -> find left | -1 -> find right | _ -> assert false in find tree ;; end module ContinuationTrieParser = struct type symbol = string type input = symbol list type 'result steps = Ok of 'result steps | Fail of 'result steps | Stop of 'result type ('a,'b) stack = 'a -> 'b;; type ('cont,'result) future = 'cont -> (errors->errors) -> input -> 'result steps and errors = | Deleted of symbol * string * errors | Inserted of symbol * string * errors | Notused of string type 'p automaton = | Shift of 'p * (symbol * 'p automaton) list | ShiftReduce of 'p automaton * 'p automaton | Reduce of 'p | Found of 'p * 'p automaton type 'a combinator_parser = { parse:'cont 'result. ('cont,'result) future-> ('a,'cont) stack -> (errors->errors) -> input -> ('result*errors) steps } type 'a parser_generator = { automaton : ('a combinator_parser) automaton; generated : 'a combinator_parser } exception Ambigous_grammar ;; let rec best : 'result steps -> 'result steps -> 'result steps = fun left right -> match (left,right) with (Ok left,Ok right) -> Ok (best left right) | (Fail left,Fail right) -> Fail (best left right) | (Ok _,Fail _) -> left | (Fail _,Ok _) -> right | (Stop _,_) -> left | (_,Stop _) -> right ;; let best_parser (left:'a combinator_parser) (right:'a combinator_parser) : 'a combinator_parser = let parse cont stack errors input = best (left.parse cont stack errors input) (right.parse cont stack errors input) in {parse=parse} ;; (** Also known as a catamorphism *) let transform_automaton ((transform_shift ,transform_shiftreduce ,transform_reduce ,transform_found ): ((('p * (symbol*'p automaton) list) -> 'b) *(('p automaton * 'p automaton) -> 'b) *('p -> 'b) *(('p * 'p automaton) -> 'b))) (automaton:'a automaton) : 'b = let rec transform (automaton:'a automaton) = match automaton with | Shift (p,choices) -> let rec foreach choices collected = match choices with | [] -> collected | ((symbol,choice)::tail) -> let collected = (symbol,transform choice)::collected in foreach tail collected in transform_shift (p,foreach choices []) | ShiftReduce (shift,reduce) -> transform_shiftreduce (transform shift,transform reduce) | Reduce reduce -> transform_reduce reduce | Found (found,more) -> transform_found (found,transform more) in transform automaton ;; let map_automaton (f:'a->'b) (automaton: 'a automaton) : 'b automaton = let transform_shift (p,choices) = Shift (f p,choices) in let transform_shiftreduce (shift,reduce) = ShiftReduce (shift,reduce) in let transform_reduce reduce = Reduce (f reduce) in let transform_found (found,more) = Found (f found,more) in transform_automaton (transform_shift,transform_shiftreduce,transform_reduce,transform_found) automaton ;; let rec mkparser (automaton: string automaton) : string parser_generator = let choose (input:input) : string combinator_parser = let transform_shift ((p,choices) :symbol * (symbol*symbol automaton) list) : string combinator_parser = let table : (symbol,symbol automaton) BraunTree.braun_tree = BraunTree.tree_of_list choices in let find key = BraunTree.find ~key ~tree:table in let parse cont stack errors input : ('a*errors) steps = match input with | [] -> let error = errors (Inserted (p,"Insert at end of file",Notused "")) in Stop (stack p,error) | (h::t) -> begin match find h with | Some automaton -> Ok ((mkparser automaton).generated.parse cont stack errors t) | None -> let errors error = errors (Deleted (h,"Deleted symbol",error)) in let errors error = errors (Inserted (p,"Insert symbol",error)) in Fail (Fail (Stop (stack p,errors (Notused h)))) end in {parse=parse} in let transform_shiftreduce ((shift,reduce) : symbol automaton * symbol automaton ) = let parse cont stack errors input = (best_parser (mkparser shift).generated (mkparser reduce).generated).parse cont stack errors input in parse in let transform_reduce (reduce:symbol) = let parse cont stack errors input = reduce cont stack errors input in parse in let transform_found ((found,more):symbol*symbol automaton) = let parse cont stack errors input = found cont stack errors input in {parse=parse} in transform_automaton (transform_shift,transform_shiftreduce,transform_reduce,transform_found) automaton in let parse cont stack errors input = (choose input).parse cont stack errors input in {automaton=automaton;parse=parse} ;; (** <|> *) let either : ('a parser_generator * 'a parser_generator) -> 'a parser_generator = fun (p,q) -> mkparser (merge_ch p.automaton q.automaton) ;; let rec combine (lefts: (symbol * 'p automaton) list ) (rights: (symbol * 'p automaton) list ) : (symbol * 'p automaton) list = match (lefts,rights) with | ((((left_symbol,left_sentence) as left_head)::left_tail) ,(((right_symbol,right_sentence) as right_head)::right_tail) ) -> begin match compare left_symbol right_symbol with | 1 -> left_head::(combine left_tail rights) | -1 -> right_head::(combine lefts right_tail) | 0 -> let head = (left_symbol,either(left_sentence,right_sentence)) in let tail = (combine left_tail right_tail) in head::tail | _ -> assert false end | ([],_) -> rights | (_,[]) -> lefts ;; (** <*> *) let rec both : ('a parser_generator * 'a parser_generator) -> 'a parser_generator = fun (p,q) -> (** Use two combinator parsers in sequence * a both for combinator parsers *) let both_combinator_parsers first second = let parse cont stack errors input = let stack f x = stack (f x) in first.parse (second.parse cont) stack errors input in {parse=parse} in let transform_shift (p,choices) = Shift (both_combinator_parsers p q.generated,choices) in let transform_shiftreduce (shift,reduce) = merge_ch shift reduce in let transform_reduce reduce = let worker x = fwby reduce x in map_automaton worker q.automaton in let transform_found (found,more) = Found (both_combinator_parsers found q.generated,more) in let automaton = transform_automaton (transform_shift ,transform_shiftreduce ,transform_reduce ,transform_found) p.automaton in mkparser automaton ;; let merge_ch left right = match (left,right) with | (Shift (left_parser,left_choices),Shift (right_parser,right_choices)) -> let best = best_parser left_parser right_parser in let choices = combine left_choices right_choices in Shift (best,choices) | (Shift _,ShiftReduce (shift,reduce)) -> ShiftReduce (merge_ch left shift,reduce) | (Shift _,Reduce _) -> ShiftReduce (left,right) | (Shift _,Found (_,more)) ->merge_ch left more | (Found (_,more),_) -> merge_ch more right | (_,Shift _) -> merge_ch right left | (ShiftReduce _,_) | (Reduce _,_) -> raise Ambigous_grammar ;; let symbol (a:symbol) : symbol combinator_parser = let rec parse cont stack errors input = match input with | x::xs -> if a = x then Ok (cont (stack a) errors xs) else let deleted_x = let errors e = errors (Deleted (x,position xs,e)) in parse cont stack errors xs in let inserted_a = let errors e = errors (Inserted (a,show_symbol a,e)) in cont (stack x) errors input in Fail (best deleted_x inserted_a) | [] -> let errors e = errors (Inserted (a,eof,e)) in let inserted_a = cont (stack a) errors input in Fail inserted_a in let accept cont stack errors input = match input with | [] -> assert false | (x::xs) -> assert (a = x) ; Ok (cont (stack a) errors xs) in let shift = Shift ({parse=parse},[(a,Reduce {parse=accept})]) in let found = Found ({parse=parse},shift) in mkparser found ;; let succeed f = let parse cont stack errors input = cont (stack f) errors input in mkparser (End {parse=parse}) ;; end ;; ######## End code -- Sincerely | Homepage: Jørgen | http://www.hex.no/jhf | Public GPG key: | http://www.hex.no/jhf/key.txt