From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Original-To: caml-list@sympa.inria.fr Delivered-To: caml-list@sympa.inria.fr Received: from mail1-relais-roc.national.inria.fr (mail1-relais-roc.national.inria.fr [192.134.164.82]) by sympa.inria.fr (Postfix) with ESMTPS id CE8E47EE94 for ; Sat, 5 Jan 2013 14:40:26 +0100 (CET) Received-SPF: None (mail1-smtp-roc.national.inria.fr: no sender authenticity information available from domain of philippe.veber@gmail.com) identity=pra; client-ip=209.85.214.180; receiver=mail1-smtp-roc.national.inria.fr; envelope-from="philippe.veber@gmail.com"; x-sender="philippe.veber@gmail.com"; x-conformance=sidf_compatible Received-SPF: Pass (mail1-smtp-roc.national.inria.fr: domain of philippe.veber@gmail.com designates 209.85.214.180 as permitted sender) identity=mailfrom; client-ip=209.85.214.180; receiver=mail1-smtp-roc.national.inria.fr; envelope-from="philippe.veber@gmail.com"; x-sender="philippe.veber@gmail.com"; x-conformance=sidf_compatible; x-record-type="v=spf1" Received-SPF: None (mail1-smtp-roc.national.inria.fr: no sender authenticity information available from domain of postmaster@mail-ob0-f180.google.com) identity=helo; client-ip=209.85.214.180; receiver=mail1-smtp-roc.national.inria.fr; envelope-from="philippe.veber@gmail.com"; x-sender="postmaster@mail-ob0-f180.google.com"; x-conformance=sidf_compatible X-IronPort-Anti-Spam-Filtered: true X-IronPort-Anti-Spam-Result: Ak0CAK0s6FDRVda0jmdsb2JhbABFvU4IFg4BAQEBCQsJCRIGI4IeAQEEAScZARsdAQMBCwYFCw0uIQEBEQEFARwGExqHagEDCQaZQowzgnuEKwoZJw1ZhhABBQyLXIUtA5Q1gVaLN4MxFimEFw X-IronPort-AV: E=Sophos;i="4.84,415,1355094000"; d="scan'208";a="188554572" Received: from mail-ob0-f180.google.com ([209.85.214.180]) by mail1-smtp-roc.national.inria.fr with ESMTP/TLS/RC4-SHA; 05 Jan 2013 14:40:25 +0100 Received: by mail-ob0-f180.google.com with SMTP id wd20so15333773obb.39 for ; Sat, 05 Jan 2013 05:40:24 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:in-reply-to:references:from:date:message-id:subject:to :cc:content-type; bh=4jD5fvHL33WObI8ffC76CNL7Waegn4Uqua2iO5bVHVw=; b=VsgQW+7IJ5mJWFDfJFdiRtCth7CQHzbqzSekOcCXUS/V4i8SzWvQ9P8QkLj/kwSnL4 z9RkuWbw5ecBoS5z7yeoRpejTprR1oVHy0ihA2yFJhJj8LCLLk5GplsU7+hdInogMz4W qJRA/mfFofCz6QazQXVN9WAFxI0+13sga4ST5LkbqqdvuYtOlIrBrlL0C5ELmgS5lxqu EGxpgDiIN6NxrJQUevwxFSm2SKlh16JfJEYx2LR5B+9a2QUxzbgBb6XIyPOXB8noKhKG 2PAwLmyt1NAFK+zDpvddiaCPKnXTAjhYMkbFsBK1E2ctnCRvrCKmi+MPKjFtOa9+eVfb vNug== Received: by 10.60.10.227 with SMTP id l3mr30837118oeb.119.1357393224247; Sat, 05 Jan 2013 05:40:24 -0800 (PST) MIME-Version: 1.0 Received: by 10.76.151.102 with HTTP; Sat, 5 Jan 2013 05:40:03 -0800 (PST) In-Reply-To: References: <50E701D3.9070008@free.fr> From: Philippe Veber Date: Sat, 5 Jan 2013 14:40:03 +0100 Message-ID: To: Jeremy Yallop Cc: Jeff Meister , Tiphaine Turpin , Caml List Content-Type: multipart/alternative; boundary=e89a8fb1f674f3ab5a04d28abc81 X-Validation-by: philippe.veber@gmail.com Subject: Re: [Caml-list] [GADT noob question] Building a GADT from an untyped representation --e89a8fb1f674f3ab5a04d28abc81 Content-Type: text/plain; charset=ISO-8859-1 Thanks Jeremy for this enlightening post! I had not seen the possibility to actually build the type of the expression, using another GADT. This is sufficient to extract expressions of a given type, for instance. I still have a question though: what is the exact meaning of the _ character in the polymorphic type "< rn : 'a. int * 'a expr -> _>" and how is it useful/necessary for your example to run? Could your example be written without a record/object type using polymorphic type annotations for functions? Thanks again for your explanations, they are very valuable! ph. 2013/1/5 Jeremy Yallop > Dear Jeff, > > On 4 January 2013 22:00, Jeff Meister wrote: > > However, by using an existential type like that, you're losing the > > additional type checking benefits of GADTs. The return type of > parse_expr is > > now any_expr, not 'a expr. You can write e.g. the function > extract_int_value > > : int expr -> int, where OCaml knows you don't need to match the Float > case, > > but you'll never have an int expr to pass to this function, at least not > as > > a result of parsing. Anything handling a parsed any_expr must match the > Expr > > case, which of course can have any expr inside. At this point, it seems > like > > just a cumbersome way to write the traditional expr type. > > > > I went through basically this same thought process while trying to > > understand how I could apply the new OCaml GADT features, and I concluded > > that GADTs weren't providing any extra utility in the case where they > must > > be constructed by parsing an input string. That's a shame since parsing > and > > transformation is such a canonical use of OCaml, so I would love to be > > proven wrong here! > > The good news is that you can still enjoy the benefits of GADTs, even > when you need to construct values at runtime whose types you don't > know at compile time. In fact, that's perhaps the situation where > GADTs are of most benefit: well-typed evaluators (to take the > canonical example) are much more useful when the set of input > expressions can vary at runtime. > > Even the type string -> any_expr is sufficient to give a useful > guarantee about parse_expr, namely that if it returns at all, it > returns a well-typed AST, wrapped in an "Expr". The actual type of > the AST can't be written down in the program, since it isn't known > until runtime, but you know nevertheless that it's a well-typed > expression whose subexpressions are likewise well-typed. Importantly, > the compiler knows that too, and so you can pass your well-typed AST > to all the usual GADT-processing functions like 'eval', and enjoy all > the usual guarantees: if evaluation terminates then it produces a > value of the correct type, and so on. > > Here's some code that demonstrates the idea. It has all the pieces > for a toy interpreter with pairs and booleans, and you get GADT-based > guarantees all the way through: the parser either produces a > well-typed AST or raises an exception, the evaluator takes a > well-typed AST and produces a value of the same type, etc. The types > guarantee that ill-typed input is detected during parsing; there's no > attempt to evaluate it. > > # read_eval_print "((!!T,F),(!T,F))";; > - : string = "((T,F),(F,F))" > # read_eval_print "((!!T,F),!(!T,F))";; > Exception: > Failure > "Parsing failed at characters 10-16: > Expected an expression of type bool > but found (!T,F) of type (bool * bool)". > > > (* A well-typed AST for an expression language of booleans and pairs. > *) > type _ expr = > | Fst : ('a * 'b) expr -> 'a expr > | Snd : ('a * 'b) expr -> 'b expr > | Pair : 'a expr * 'b expr -> ('a * 'b) expr > | True : bool expr > | False : bool expr > | Not : bool expr -> bool expr > > (* A concrete representation for types that can encode all the types > for which we can build expressions. > > A value of type 't typ represents the type 't. For example, TPair > (TBool, > TBool) of type (bool * bool) typ represents the type bool * bool. > *) > type _ typ = > | TBool : bool typ > | TPair : 'a typ * 'b typ -> ('a * 'b) typ > > (* `printer' takes a representation for a type 't and gives you a > printer for 't, > i.e. a function from 't to string > *) > let rec printer : 'a. 'a typ -> 'a -> string = > fun (type a) (t : a typ) -> > match t with > | TBool -> fun (b: a) -> if b then "T" else "F" > | TPair (l, r) -> let print_l = printer l > and print_r = printer r in > fun (l, r) -> "("^ print_l l ^","^ print_r r ^")" > > (* show_type formats type representations as strings > *) > let rec show_type : 'a. 'a typ -> string = > fun (type a) (e : a typ) -> > match e with > | TBool -> "bool" > | TPair (l, r) -> "( "^ show_type l ^" * "^ show_type r ^")" > > (* The famous well-typed evaluator > *) > let rec eval : 'a. 'a expr -> 'a = > fun (type a) (e : a expr) -> > match e with > | Fst pair -> fst (eval pair) > | Snd pair -> snd (eval pair) > | Pair (l, r) -> (eval l, eval r) > | True -> true > | False -> false > | Not e -> not (eval e) > > (* Construct a representation of the type of an expression > *) > let rec type_of : 'a. 'a expr -> 'a typ = > fun (type a) (e : a expr) -> > match e with > | Fst pair -> (match type_of pair with TPair (l, _) -> l) > | Snd pair -> (match type_of pair with TPair (_, r) -> r) > | Pair (l, r) -> TPair (type_of l, type_of r) > | True -> TBool > | False -> TBool > | Not _ -> TBool > > (* An existential to hide the type index of a well-typed AST, making > it possible to write functions that return constructed ASTs whose > type is not statically known. > *) > type any_expr = > | Expr : 'a expr -> any_expr > > (* Raise an error indicating that the parser encountered an unexpected > character. > *) > let parsing_failure pos expected s = > failwith (Printf.sprintf > "Parsing failed at character %d: expected to find %c, > but found %c" > pos expected s.[pos]) > > (* Raise an error indicating that the parser determined that part of > the input string contained an ill-typed expression. > *) > let typing_failure start_pos end_pos s expected_type found_type = > failwith (Printf.sprintf > "Parsing failed at characters %d-%d: > Expected an expression of type %s > but found %s of type %s" > start_pos end_pos (show_type expected_type) (String.sub > s start_pos (end_pos - start_pos)) > (show_type found_type)) > > (* Well-typed parser. Rather hairy due to continuation-passing style with > polymorphic continuation functions, represented as objects with > polymorphic > methods. Also incomplete -- it doesn't handle fst and snd -- and a bit > careless about error-checking. Perhaps sufficient to give a flavour. > *) > let parse_expr : string -> any_expr = > let rec parse s pos (ret : < rn : 'a. int * 'a expr -> _>) = > match s.[pos] with > | 'T' -> ret#rn (pos + 1, True) > | 'F' -> ret#rn (pos + 1, False) > | '!' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr -> _ > = fun (type a) (pos', (e : a expr)) -> > (* Check that 'e' has boolean type before we can parse it to Not. > This is more than just good practice: without the > type-checking > step the parser won't compile. *) > match type_of e with > | TBool -> ret#rn (pos', Not e) > | t -> typing_failure (pos + 1) pos' s TBool t end) > | '(' -> parse s (pos + 1) (object method rn : 'a. int * 'a expr > -> _ = fun (pos, l) -> > if s.[pos] <> ',' then parsing_failure pos ',' s > else parse s (pos + 1) (object method rn : 'a. int * 'a expr > -> _ = fun (pos, r) -> > if s.[pos] <> ')' then parsing_failure pos ')' s > else ret#rn (pos + 1, Pair (l, r)) end) end) > in > fun s -> parse s 0 (object method rn : 'a. int * 'a expr -> _ = > fun (_, l) -> Expr l end) > > > (* read_eval_print "((!!T,F),(!T,F))" => > "((T,F),(F,F))" > *) > let read_eval_print s = > let Expr e = parse_expr s in > let ty = type_of e in > let print_e = printer ty in > print_e (eval e) > --e89a8fb1f674f3ab5a04d28abc81 Content-Type: text/html; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Thanks Jeremy for this enlightening post! I had not seen the possibility to= actually build the type of the expression, using another GADT. This is suf= ficient to extract expressions of a given type, for instance. I still have = a question though: what is the exact meaning of the _ character in the poly= morphic type "< rn : 'a. int * 'a expr -> _>" an= d how is it useful/necessary for your example to run? Could your example be= written without a record/object type using polymorphic type annotations fo= r functions?

Thanks again for your explanations, they are very valuable!
ph.
<= br>
2013/1/5 Jeremy Yallop <= yallop@gmail.com&= gt;
Dear Jeff,

On 4 January 2013 22:00, Jeff Meister <nanaki@gmail.com> wrote:
> However, by using an existential type like that, you're losing the=
> additional type checking benefits of GADTs. The return type of parse_e= xpr is
> now any_expr, not 'a expr. You can write e.g. the function extract= _int_value
> : int expr -> int, where OCaml knows you don't need to match th= e Float case,
> but you'll never have an int expr to pass to this function, at lea= st not as
> a result of parsing. Anything handling a parsed any_expr must match th= e Expr
> case, which of course can have any expr inside. At this point, it seem= s like
> just a cumbersome way to write the traditional expr type.
>
> I went through basically this same thought process while trying to
> understand how I could apply the new OCaml GADT features, and I conclu= ded
> that GADTs weren't providing any extra utility in the case where t= hey must
> be constructed by parsing an input string. That's a shame since pa= rsing and
> transformation is such a canonical use of OCaml, so I would love to be=
> proven wrong here!

The good news is that you can still enjoy the benefits of GADTs, even=
when you need to construct values at runtime whose types you don't
know at compile time. =A0In fact, that's perhaps the situation where
GADTs are of most benefit: well-typed evaluators (to take the
canonical example) are much more useful when the set of input
expressions can vary at runtime.

Even the type string -> any_expr is sufficient to give a useful
guarantee about parse_expr, namely that if it returns at all, it
returns a well-typed AST, wrapped in an "Expr". =A0The actual typ= e of
the AST can't be written down in the program, since it isn't known<= br> until runtime, but you know nevertheless that it's a well-typed
expression whose subexpressions are likewise well-typed. =A0Importantly,
the compiler knows that too, and so you can pass your well-typed AST
to all the usual GADT-processing functions like 'eval', and enjoy a= ll
the usual guarantees: if evaluation terminates then it produces a
value of the correct type, and so on.

Here's some code that demonstrates the idea. =A0It has all the pieces for a toy interpreter with pairs and booleans, and you get GADT-based
guarantees all the way through: the parser either produces a
well-typed AST or raises an exception, the evaluator takes a
well-typed AST and produces a value of the same type, etc. =A0The types
guarantee that ill-typed input is detected during parsing; there's no attempt to evaluate it.

=A0 # read_eval_print "((!!T,F),(!T,F))";;
=A0 - : string =3D "((T,F),(F,F))"
=A0 # read_eval_print "((!!T,F),!(!T,F))";;
=A0 Exception:
=A0 Failure
=A0 =A0"Parsing failed at characters 10-16:
=A0 =A0 Expected an expression of type bool
=A0 =A0 but found (!T,F) of type (bool * bool)".


(* A well-typed AST for an expression language of booleans and pairs.
*)
type _ expr =3D
=A0 | Fst =A0: ('a * 'b) expr -> 'a expr
=A0 | Snd =A0: ('a * 'b) expr -> 'b expr
=A0 | Pair : 'a expr * 'b expr -> ('a * 'b) expr
=A0 | True : bool expr
=A0 | False : bool expr
=A0 | Not =A0: bool expr -> bool expr

(* A concrete representation for types that can encode all the types
=A0 =A0for which we can build expressions.

=A0 =A0A value of type 't typ represents the type 't. For example, = TPair (TBool,
=A0 =A0TBool) of type (bool * bool) typ represents the type bool * bool.
*)
type _ typ =3D
=A0 | TBool : bool typ
=A0 | TPair : 'a typ * 'b typ -> ('a * 'b) typ

(* `printer' takes a representation for a type 't and gives you a printer for 't,
=A0 =A0i.e. a function from 't to string
*)
let rec printer : 'a. 'a typ -> 'a -> string =3D
=A0 fun (type a) (t : a typ) ->
=A0 =A0 match t with
=A0 =A0 =A0 | TBool -> fun (b: a) -> if b then "T" else &qu= ot;F"
=A0 =A0 =A0 | TPair (l, r) -> let print_l =3D printer l
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 and print_r =3D printer r i= n
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 fun (l, r) -> "(&qu= ot;^ print_l l ^","^ print_r r ^")"

(* show_type formats type representations as strings
*)
let rec show_type : 'a. 'a typ -> string =3D
=A0 fun (type a) (e : a typ) ->
=A0 =A0 match e with
=A0 =A0 =A0 | TBool -> "bool"
=A0 =A0 =A0 | TPair (l, r) -> "( "^ show_type l ^" * &quo= t;^ show_type r ^")"

(* The famous well-typed evaluator
*)
let rec eval : 'a. 'a expr -> 'a =A0=3D
=A0 fun (type a) (e : a expr) ->
=A0 =A0 match e with
=A0 =A0 =A0 | Fst pair =A0 =A0-> fst (eval pair)
=A0 =A0 =A0 | Snd pair =A0 =A0-> snd (eval pair)
=A0 =A0 =A0 | Pair (l, r) -> (eval l, eval r)
=A0 =A0 =A0 | True =A0 =A0 =A0 =A0-> true
=A0 =A0 =A0 | False =A0 =A0 =A0 -> false
=A0 =A0 =A0 | Not e =A0 =A0 =A0 -> not (eval e)

(* Construct a representation of the type of an expression
*)
let rec type_of : 'a. 'a expr -> 'a typ =3D
=A0 fun (type a) (e : a expr) ->
=A0 =A0 match e with
=A0 =A0 =A0 | Fst pair =A0 =A0-> (match type_of pair with TPair (l, _) -= > l)
=A0 =A0 =A0 | Snd pair =A0 =A0-> (match type_of pair with TPair (_, r) -= > r)
=A0 =A0 =A0 | Pair (l, r) -> TPair (type_of l, type_of r)
=A0 =A0 =A0 | True =A0 =A0 =A0 =A0-> TBool
=A0 =A0 =A0 | False =A0 =A0 =A0 -> TBool
=A0 =A0 =A0 | Not _ =A0 =A0 =A0 -> TBool

(* An existential to hide the type index of a well-typed AST, making
=A0 =A0it possible to write functions that return constructed ASTs whose
=A0 =A0type is not statically known.
*)
type any_expr =3D
=A0 | Expr : 'a expr -> any_expr

(* Raise an error indicating that the parser encountered an unexpected
=A0 =A0character.
*)
let parsing_failure pos expected s =3D
=A0 failwith (Printf.sprintf
=A0 =A0 =A0 =A0 =A0 =A0 =A0 "Parsing failed at character %d: expected = to find %c,
but found %c"
=A0 =A0 =A0 =A0 =A0 =A0 =A0 pos expected s.[pos])

(* Raise an error indicating that the parser determined that part of
=A0 =A0the input string contained an ill-typed expression.
*)
let typing_failure start_pos end_pos s expected_type found_type =3D
=A0 failwith (Printf.sprintf
=A0 =A0 =A0 =A0 =A0 =A0 =A0 "Parsing failed at characters %d-%d:
Expected an expression of type %s
but found %s of type %s"
=A0 =A0 =A0 =A0 =A0 =A0 =A0 start_pos end_pos (show_type expected_type) (St= ring.sub
s start_pos (end_pos - start_pos))
=A0 =A0 =A0 =A0 =A0 =A0 =A0 (show_type found_type))

(* Well-typed parser. =A0Rather hairy due to continuation-passing style wit= h
=A0 =A0polymorphic continuation functions, represented as objects with poly= morphic
=A0 =A0methods. =A0Also incomplete -- it doesn't handle fst and snd -- = and a bit
=A0 =A0careless about error-checking. =A0Perhaps sufficient to give a flavo= ur.
*)
let parse_expr : string -> any_expr =3D
=A0 let rec parse s pos (ret : < rn : 'a. int * 'a expr -> _&= gt;) =3D
=A0 =A0 match s.[pos] with
=A0 =A0 =A0 | 'T' -> ret#rn (pos + 1, True)
=A0 =A0 =A0 | 'F' -> ret#rn (pos + 1, False)
=A0 =A0 =A0 | '!' -> parse s (pos + 1) (object method rn : '= a. int * 'a expr -> _
=A0 =A0 =A0 =A0 =3D fun (type a) (pos', (e : a expr)) ->
=A0 =A0 =A0 =A0 =A0 (* Check that 'e' has boolean type before we ca= n parse it to Not.
=A0 =A0 =A0 =A0 =A0 =A0 =A0This is more than just good practice: without th= e type-checking
=A0 =A0 =A0 =A0 =A0 =A0 =A0step the parser won't compile. *)
=A0 =A0 =A0 =A0 =A0 match type_of e with
=A0 =A0 =A0 =A0 =A0 =A0 | TBool -> ret#rn (pos', Not e)
=A0 =A0 =A0 =A0 =A0 =A0 | t =A0 =A0-> typing_failure (pos + 1) pos' = s TBool t end)
=A0 =A0 =A0 | '(' -> parse s (pos + 1) (object method rn : '= a. int * 'a expr
-> _ =3D fun (pos, l) ->
=A0 =A0 =A0 =A0 if s.[pos] <> ',' then parsing_failure pos &#= 39;,' s
=A0 =A0 =A0 =A0 else parse s (pos + 1) (object method rn : 'a. int * &#= 39;a expr
-> _ =3D fun (pos, r) ->
=A0 =A0 =A0 =A0 =A0 if s.[pos] <> ')' then parsing_failure po= s ')' s
=A0 =A0 =A0 =A0 =A0 else ret#rn (pos + 1, Pair (l, r)) end) end)
=A0 in
=A0 fun s -> parse s 0 (object method rn : 'a. int * 'a expr -&g= t; _ =3D
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0fun (_, l) -= > Expr l end)


(* read_eval_print "((!!T,F),(!T,F))" =3D>
=A0 =A0"((T,F),(F,F))"
*)
let read_eval_print s =3D
=A0 let Expr e =3D parse_expr s in
=A0 let ty =3D type_of e in
=A0 let print_e =3D printer ty in
=A0 print_e (eval e)

--e89a8fb1f674f3ab5a04d28abc81--