Thanks Jeremy for this enlightening post! I had not seen the possibility to actually build the type of the expression, using another GADT. This is sufficient to extract expressions of a given type, for instance. I still have a question though: what is the exact meaning of the _ character in the polymorphic type "< rn : 'a. int * 'a expr -> _>" and how is it useful/necessary for your example to run? Could your example be written without a record/object type using polymorphic type annotations for functions? Thanks again for your explanations, they are very valuable! ph. 2013/1/5 Jeremy Yallop > Dear Jeff, > > On 4 January 2013 22:00, Jeff Meister wrote: > > However, by using an existential type like that, you're losing the > > additional type checking benefits of GADTs. The return type of > parse_expr is > > now any_expr, not 'a expr. You can write e.g. the function > extract_int_value > > : int expr -> int, where OCaml knows you don't need to match the Float > case, > > but you'll never have an int expr to pass to this function, at least not > as > > a result of parsing. Anything handling a parsed any_expr must match the > Expr > > case, which of course can have any expr inside. At this point, it seems > like > > just a cumbersome way to write the traditional expr type. > > > > I went through basically this same thought process while trying to > > understand how I could apply the new OCaml GADT features, and I concluded > > that GADTs weren't providing any extra utility in the case where they > must > > be constructed by parsing an input string. That's a shame since parsing > and > > transformation is such a canonical use of OCaml, so I would love to be > > proven wrong here! > > The good news is that you can still enjoy the benefits of GADTs, even > when you need to construct values at runtime whose types you don't > know at compile time. In fact, that's perhaps the situation where > GADTs are of most benefit: well-typed evaluators (to take the > canonical example) are much more useful when the set of input > expressions can vary at runtime. > > Even the type string -> any_expr is sufficient to give a useful > guarantee about parse_expr, namely that if it returns at all, it > returns a well-typed AST, wrapped in an "Expr". The actual type of > the AST can't be written down in the program, since it isn't known > until runtime, but you know nevertheless that it's a well-typed > expression whose subexpressions are likewise well-typed. Importantly, > the compiler knows that too, and so you can pass your well-typed AST > to all the usual GADT-processing functions like 'eval', and enjoy all > the usual guarantees: if evaluation terminates then it produces a > value of the correct type, and so on. > > Here's some code that demonstrates the idea. It has all the pieces > for a toy interpreter with pairs and booleans, and you get GADT-based > guarantees all the way through: the parser either produces a > well-typed AST or raises an exception, the evaluator takes a > well-typed AST and produces a value of the same type, etc. The types > guarantee that ill-typed input is detected during parsing; there's no > attempt to evaluate it. > > # read_eval_print "((!!T,F),(!T,F))";; > - : string = "((T,F),(F,F))" > # read_eval_print "((!!T,F),!(!T,F))";; > Exception: > Failure > "Parsing failed at characters 10-16: > Expected an expression of type bool > but found (!T,F) of type (bool * bool)". > > > (* A well-typed AST for an expression language of booleans and pairs. > *) > type _ expr = > | Fst : ('a * 'b) expr -> 'a expr > | Snd : ('a * 'b) expr -> 'b expr > | Pair : 'a expr * 'b expr -> ('a * 'b) expr > | True : bool expr > | False : bool expr > | Not : bool expr -> bool expr > > (* A concrete representation for types that can encode all the types > for which we can build expressions. > > A value of type 't typ represents the type 't. For example, TPair > (TBool, > TBool) of type (bool * bool) typ represents the type bool * bool. > *) > type _ typ = > | TBool : bool typ > | TPair : 'a typ * 'b typ -> ('a * 'b) typ > > (* `printer' takes a representation for a type 't and gives you a > printer for 't, > i.e. a function from 't to string > *) > let rec printer : 'a. 'a typ -> 'a -> string = > fun (type a) (t : a typ) -> > match t with > | TBool -> fun (b: a) -> if b then "T" else "F" > | TPair (l, r) -> let print_l = printer l > and print_r = printer r in > fun (l, r) -> "("^ print_l l ^","^ print_r r ^")" > > (* show_type formats type representations as strings > *) > let rec show_type : 'a. 'a typ -> string = > fun (type a) (e : a typ) -> > match e with > | TBool -> "bool" > | TPair (l, r) -> "( "^ show_type l ^" * "^ show_type r ^")" > > (* The famous well-typed evaluator > *) > let rec eval : 'a. 'a expr -> 'a = > fun (type a) (e : a expr) -> > match e with > | Fst pair -> fst (eval pair) > | Snd pair -> snd (eval pair) > | Pair (l, r) -> (eval l, eval r) > | True -> true > | False -> false > | Not e -> not (eval e) > > (* Construct a representation of the type of an expression > *) > let rec type_of : 'a. 'a expr -> 'a typ = > fun (type a) (e : a expr) -> > match e with > | Fst pair -> (match type_of pair with TPair (l, _) -> l) > | Snd pair -> (match type_of pair with TPair (_, r) -> r) > | Pair (l, r) -> TPair (type_of l, type_of r) > | True -> TBool > | False -> TBool > | Not _ -> TBool > > (* An existential to hide the type index of a well-typed AST, making > it possible to write functions that return constructed ASTs whose > type is not statically known. > *) > type any_expr = > | Expr : 'a expr -> any_expr > > (* Raise an error indicating that the parser encountered an unexpected > character. > *) > let parsing_failure pos expected s = > failwith (Printf.sprintf > "Parsing failed at character %d: expected to find %c, > but found %c" > pos expected s.[pos]) > > (* Raise an error indicating that the parser determined that part of > the input string contained an ill-typed expression. > *) > let typing_failure start_pos end_pos s expected_type found_type = > failwith (Printf.sprintf > "Parsing failed at characters %d-%d: > Expected an expression of type %s > but found %s of type %s" > start_pos end_pos (show_type expected_type) (String.sub > s start_pos (end_pos - start_pos)) > (show_type found_type)) > > (* Well-typed parser. Rather hairy due to continuation-passing style with > polymorphic continuation functions, represented as objects with > polymorphic > methods. Also incomplete -- it doesn't handle fst and snd -- and a bit > careless about error-checking. Perhaps sufficient to give a flavour. > *) > let parse_expr : string -> any_expr = > let rec parse s pos (ret : < rn : 'a. int * 'a expr -> _>) = > match s.[pos] with > | 'T' -> ret#rn (pos + 1, True) > | 'F' -> ret#rn (pos + 1, False) > | '!' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr -> _ > = fun (type a) (pos', (e : a expr)) -> > (* Check that 'e' has boolean type before we can parse it to Not. > This is more than just good practice: without the > type-checking > step the parser won't compile. *) > match type_of e with > | TBool -> ret#rn (pos', Not e) > | t -> typing_failure (pos + 1) pos' s TBool t end) > | '(' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr > -> _ = fun (pos, l) -> > if s.[pos] <> ',' then parsing_failure pos ',' s > else parse s (pos + 1) (object method rn : 'a. int * 'a expr > -> _ = fun (pos, r) -> > if s.[pos] <> ')' then parsing_failure pos ')' s > else ret#rn (pos + 1, Pair (l, r)) end) end) > in > fun s -> parse s 0 (object method rn : 'a. int * 'a expr -> _ = > fun (_, l) -> Expr l end) > > > (* read_eval_print "((!!T,F),(!T,F))" => > "((T,F),(F,F))" > *) > let read_eval_print s = > let Expr e = parse_expr s in > let ty = type_of e in > let print_e = printer ty in > print_e (eval e) >