Thanks Jeremy for this enlightening post! I had not seen the possibility to actually build the type of the expression, using another GADT. This is sufficient to extract expressions of a given type, for instance. I still have a question though: what is the exact meaning of the _ character in the polymorphic type "< rn : 'a. int * 'a expr -> _>" and how is it useful/necessary for your example to run? Could your example be written without a record/object type using polymorphic type annotations for functions?

Thanks again for your explanations, they are very valuable!
ph.

2013/1/5 Jeremy Yallop <yallop@gmail.com>
Dear Jeff,

On 4 January 2013 22:00, Jeff Meister <nanaki@gmail.com> wrote:
> However, by using an existential type like that, you're losing the
> additional type checking benefits of GADTs. The return type of parse_expr is
> now any_expr, not 'a expr. You can write e.g. the function extract_int_value
> : int expr -> int, where OCaml knows you don't need to match the Float case,
> but you'll never have an int expr to pass to this function, at least not as
> a result of parsing. Anything handling a parsed any_expr must match the Expr
> case, which of course can have any expr inside. At this point, it seems like
> just a cumbersome way to write the traditional expr type.
>
> I went through basically this same thought process while trying to
> understand how I could apply the new OCaml GADT features, and I concluded
> that GADTs weren't providing any extra utility in the case where they must
> be constructed by parsing an input string. That's a shame since parsing and
> transformation is such a canonical use of OCaml, so I would love to be
> proven wrong here!

The good news is that you can still enjoy the benefits of GADTs, even
when you need to construct values at runtime whose types you don't
know at compile time.  In fact, that's perhaps the situation where
GADTs are of most benefit: well-typed evaluators (to take the
canonical example) are much more useful when the set of input
expressions can vary at runtime.

Even the type string -> any_expr is sufficient to give a useful
guarantee about parse_expr, namely that if it returns at all, it
returns a well-typed AST, wrapped in an "Expr".  The actual type of
the AST can't be written down in the program, since it isn't known
until runtime, but you know nevertheless that it's a well-typed
expression whose subexpressions are likewise well-typed.  Importantly,
the compiler knows that too, and so you can pass your well-typed AST
to all the usual GADT-processing functions like 'eval', and enjoy all
the usual guarantees: if evaluation terminates then it produces a
value of the correct type, and so on.

Here's some code that demonstrates the idea.  It has all the pieces
for a toy interpreter with pairs and booleans, and you get GADT-based
guarantees all the way through: the parser either produces a
well-typed AST or raises an exception, the evaluator takes a
well-typed AST and produces a value of the same type, etc.  The types
guarantee that ill-typed input is detected during parsing; there's no
attempt to evaluate it.

  # read_eval_print "((!!T,F),(!T,F))";;
  - : string = "((T,F),(F,F))"
  # read_eval_print "((!!T,F),!(!T,F))";;
  Exception:
  Failure
   "Parsing failed at characters 10-16:
    Expected an expression of type bool
    but found (!T,F) of type (bool * bool)".


(* A well-typed AST for an expression language of booleans and pairs.
*)
type _ expr =
  | Fst  : ('a * 'b) expr -> 'a expr
  | Snd  : ('a * 'b) expr -> 'b expr
  | Pair : 'a expr * 'b expr -> ('a * 'b) expr
  | True : bool expr
  | False : bool expr
  | Not  : bool expr -> bool expr

(* A concrete representation for types that can encode all the types
   for which we can build expressions.

   A value of type 't typ represents the type 't. For example, TPair (TBool,
   TBool) of type (bool * bool) typ represents the type bool * bool.
*)
type _ typ =
  | TBool : bool typ
  | TPair : 'a typ * 'b typ -> ('a * 'b) typ

(* `printer' takes a representation for a type 't and gives you a
printer for 't,
   i.e. a function from 't to string
*)
let rec printer : 'a. 'a typ -> 'a -> string =
  fun (type a) (t : a typ) ->
    match t with
      | TBool -> fun (b: a) -> if b then "T" else "F"
      | TPair (l, r) -> let print_l = printer l
                        and print_r = printer r in
                        fun (l, r) -> "("^ print_l l ^","^ print_r r ^")"

(* show_type formats type representations as strings
*)
let rec show_type : 'a. 'a typ -> string =
  fun (type a) (e : a typ) ->
    match e with
      | TBool -> "bool"
      | TPair (l, r) -> "( "^ show_type l ^" * "^ show_type r ^")"

(* The famous well-typed evaluator
*)
let rec eval : 'a. 'a expr -> 'a  =
  fun (type a) (e : a expr) ->
    match e with
      | Fst pair    -> fst (eval pair)
      | Snd pair    -> snd (eval pair)
      | Pair (l, r) -> (eval l, eval r)
      | True        -> true
      | False       -> false
      | Not e       -> not (eval e)

(* Construct a representation of the type of an expression
*)
let rec type_of : 'a. 'a expr -> 'a typ =
  fun (type a) (e : a expr) ->
    match e with
      | Fst pair    -> (match type_of pair with TPair (l, _) -> l)
      | Snd pair    -> (match type_of pair with TPair (_, r) -> r)
      | Pair (l, r) -> TPair (type_of l, type_of r)
      | True        -> TBool
      | False       -> TBool
      | Not _       -> TBool

(* An existential to hide the type index of a well-typed AST, making
   it possible to write functions that return constructed ASTs whose
   type is not statically known.
*)
type any_expr =
  | Expr : 'a expr -> any_expr

(* Raise an error indicating that the parser encountered an unexpected
   character.
*)
let parsing_failure pos expected s =
  failwith (Printf.sprintf
              "Parsing failed at character %d: expected to find %c,
but found %c"
              pos expected s.[pos])

(* Raise an error indicating that the parser determined that part of
   the input string contained an ill-typed expression.
*)
let typing_failure start_pos end_pos s expected_type found_type =
  failwith (Printf.sprintf
              "Parsing failed at characters %d-%d:
Expected an expression of type %s
but found %s of type %s"
              start_pos end_pos (show_type expected_type) (String.sub
s start_pos (end_pos - start_pos))
              (show_type found_type))

(* Well-typed parser.  Rather hairy due to continuation-passing style with
   polymorphic continuation functions, represented as objects with polymorphic
   methods.  Also incomplete -- it doesn't handle fst and snd -- and a bit
   careless about error-checking.  Perhaps sufficient to give a flavour.
*)
let parse_expr : string -> any_expr =
  let rec parse s pos (ret : < rn : 'a. int * 'a expr -> _>) =
    match s.[pos] with
      | 'T' -> ret#rn (pos + 1, True)
      | 'F' -> ret#rn (pos + 1, False)
      | '!' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr -> _
        = fun (type a) (pos', (e : a expr)) ->
          (* Check that 'e' has boolean type before we can parse it to Not.
             This is more than just good practice: without the type-checking
             step the parser won't compile. *)
          match type_of e with
            | TBool -> ret#rn (pos', Not e)
            | t    -> typing_failure (pos + 1) pos' s TBool t end)
      | '(' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr
-> _ = fun (pos, l) ->
        if s.[pos] <> ',' then parsing_failure pos ',' s
        else parse s (pos + 1) (object method rn : 'a. int * 'a expr
-> _ = fun (pos, r) ->
          if s.[pos] <> ')' then parsing_failure pos ')' s
          else ret#rn (pos + 1, Pair (l, r)) end) end)
  in
  fun s -> parse s 0 (object method rn : 'a. int * 'a expr -> _ =
                               fun (_, l) -> Expr l end)


(* read_eval_print "((!!T,F),(!T,F))" =>
   "((T,F),(F,F))"
*)
let read_eval_print s =
  let Expr e = parse_expr s in
  let ty = type_of e in
  let print_e = printer ty in
  print_e (eval e)