Thanks Jeremy for this enlightening post! I had not seen the possibility to
actually build the type of the expression, using another GADT. This is
sufficient to extract expressions of a given type, for instance. I still
have a question though: what is the exact meaning of the _ character in the
polymorphic type "< rn : 'a. int * 'a expr -> _>" and how is it
useful/necessary for your example to run? Could your example be written
without a record/object type using polymorphic type annotations for
functions?

Thanks again for your explanations, they are very valuable!
ph.

2013/1/5 Jeremy Yallop <yallop@gmail.com>

> Dear Jeff,
>
> On 4 January 2013 22:00, Jeff Meister <nanaki@gmail.com> wrote:
> > However, by using an existential type like that, you're losing the
> > additional type checking benefits of GADTs. The return type of
> parse_expr is
> > now any_expr, not 'a expr. You can write e.g. the function
> extract_int_value
> > : int expr -> int, where OCaml knows you don't need to match the Float
> case,
> > but you'll never have an int expr to pass to this function, at least not
> as
> > a result of parsing. Anything handling a parsed any_expr must match the
> Expr
> > case, which of course can have any expr inside. At this point, it seems
> like
> > just a cumbersome way to write the traditional expr type.
> >
> > I went through basically this same thought process while trying to
> > understand how I could apply the new OCaml GADT features, and I concluded
> > that GADTs weren't providing any extra utility in the case where they
> must
> > be constructed by parsing an input string. That's a shame since parsing
> and
> > transformation is such a canonical use of OCaml, so I would love to be
> > proven wrong here!
>
> The good news is that you can still enjoy the benefits of GADTs, even
> when you need to construct values at runtime whose types you don't
> know at compile time.  In fact, that's perhaps the situation where
> GADTs are of most benefit: well-typed evaluators (to take the
> canonical example) are much more useful when the set of input
> expressions can vary at runtime.
>
> Even the type string -> any_expr is sufficient to give a useful
> guarantee about parse_expr, namely that if it returns at all, it
> returns a well-typed AST, wrapped in an "Expr".  The actual type of
> the AST can't be written down in the program, since it isn't known
> until runtime, but you know nevertheless that it's a well-typed
> expression whose subexpressions are likewise well-typed.  Importantly,
> the compiler knows that too, and so you can pass your well-typed AST
> to all the usual GADT-processing functions like 'eval', and enjoy all
> the usual guarantees: if evaluation terminates then it produces a
> value of the correct type, and so on.
>
> Here's some code that demonstrates the idea.  It has all the pieces
> for a toy interpreter with pairs and booleans, and you get GADT-based
> guarantees all the way through: the parser either produces a
> well-typed AST or raises an exception, the evaluator takes a
> well-typed AST and produces a value of the same type, etc.  The types
> guarantee that ill-typed input is detected during parsing; there's no
> attempt to evaluate it.
>
>   # read_eval_print "((!!T,F),(!T,F))";;
>   - : string = "((T,F),(F,F))"
>   # read_eval_print "((!!T,F),!(!T,F))";;
>   Exception:
>   Failure
>    "Parsing failed at characters 10-16:
>     Expected an expression of type bool
>     but found (!T,F) of type (bool * bool)".
>
>
> (* A well-typed AST for an expression language of booleans and pairs.
> *)
> type _ expr =
>   | Fst  : ('a * 'b) expr -> 'a expr
>   | Snd  : ('a * 'b) expr -> 'b expr
>   | Pair : 'a expr * 'b expr -> ('a * 'b) expr
>   | True : bool expr
>   | False : bool expr
>   | Not  : bool expr -> bool expr
>
> (* A concrete representation for types that can encode all the types
>    for which we can build expressions.
>
>    A value of type 't typ represents the type 't. For example, TPair
> (TBool,
>    TBool) of type (bool * bool) typ represents the type bool * bool.
> *)
> type _ typ =
>   | TBool : bool typ
>   | TPair : 'a typ * 'b typ -> ('a * 'b) typ
>
> (* `printer' takes a representation for a type 't and gives you a
> printer for 't,
>    i.e. a function from 't to string
> *)
> let rec printer : 'a. 'a typ -> 'a -> string =
>   fun (type a) (t : a typ) ->
>     match t with
>       | TBool -> fun (b: a) -> if b then "T" else "F"
>       | TPair (l, r) -> let print_l = printer l
>                         and print_r = printer r in
>                         fun (l, r) -> "("^ print_l l ^","^ print_r r ^")"
>
> (* show_type formats type representations as strings
> *)
> let rec show_type : 'a. 'a typ -> string =
>   fun (type a) (e : a typ) ->
>     match e with
>       | TBool -> "bool"
>       | TPair (l, r) -> "( "^ show_type l ^" * "^ show_type r ^")"
>
> (* The famous well-typed evaluator
> *)
> let rec eval : 'a. 'a expr -> 'a  =
>   fun (type a) (e : a expr) ->
>     match e with
>       | Fst pair    -> fst (eval pair)
>       | Snd pair    -> snd (eval pair)
>       | Pair (l, r) -> (eval l, eval r)
>       | True        -> true
>       | False       -> false
>       | Not e       -> not (eval e)
>
> (* Construct a representation of the type of an expression
> *)
> let rec type_of : 'a. 'a expr -> 'a typ =
>   fun (type a) (e : a expr) ->
>     match e with
>       | Fst pair    -> (match type_of pair with TPair (l, _) -> l)
>       | Snd pair    -> (match type_of pair with TPair (_, r) -> r)
>       | Pair (l, r) -> TPair (type_of l, type_of r)
>       | True        -> TBool
>       | False       -> TBool
>       | Not _       -> TBool
>
> (* An existential to hide the type index of a well-typed AST, making
>    it possible to write functions that return constructed ASTs whose
>    type is not statically known.
> *)
> type any_expr =
>   | Expr : 'a expr -> any_expr
>
> (* Raise an error indicating that the parser encountered an unexpected
>    character.
> *)
> let parsing_failure pos expected s =
>   failwith (Printf.sprintf
>               "Parsing failed at character %d: expected to find %c,
> but found %c"
>               pos expected s.[pos])
>
> (* Raise an error indicating that the parser determined that part of
>    the input string contained an ill-typed expression.
> *)
> let typing_failure start_pos end_pos s expected_type found_type =
>   failwith (Printf.sprintf
>               "Parsing failed at characters %d-%d:
> Expected an expression of type %s
> but found %s of type %s"
>               start_pos end_pos (show_type expected_type) (String.sub
> s start_pos (end_pos - start_pos))
>               (show_type found_type))
>
> (* Well-typed parser.  Rather hairy due to continuation-passing style with
>    polymorphic continuation functions, represented as objects with
> polymorphic
>    methods.  Also incomplete -- it doesn't handle fst and snd -- and a bit
>    careless about error-checking.  Perhaps sufficient to give a flavour.
> *)
> let parse_expr : string -> any_expr =
>   let rec parse s pos (ret : < rn : 'a. int * 'a expr -> _>) =
>     match s.[pos] with
>       | 'T' -> ret#rn (pos + 1, True)
>       | 'F' -> ret#rn (pos + 1, False)
>       | '!' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr -> _
>         = fun (type a) (pos', (e : a expr)) ->
>           (* Check that 'e' has boolean type before we can parse it to Not.
>              This is more than just good practice: without the
> type-checking
>              step the parser won't compile. *)
>           match type_of e with
>             | TBool -> ret#rn (pos', Not e)
>             | t    -> typing_failure (pos + 1) pos' s TBool t end)
>       | '(' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr
> -> _ = fun (pos, l) ->
>         if s.[pos] <> ',' then parsing_failure pos ',' s
>         else parse s (pos + 1) (object method rn : 'a. int * 'a expr
> -> _ = fun (pos, r) ->
>           if s.[pos] <> ')' then parsing_failure pos ')' s
>           else ret#rn (pos + 1, Pair (l, r)) end) end)
>   in
>   fun s -> parse s 0 (object method rn : 'a. int * 'a expr -> _ =
>                                fun (_, l) -> Expr l end)
>
>
> (* read_eval_print "((!!T,F),(!T,F))" =>
>    "((T,F),(F,F))"
> *)
> let read_eval_print s =
>   let Expr e = parse_expr s in
>   let ty = type_of e in
>   let print_e = printer ty in
>   print_e (eval e)
>