caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: Leo P White <lpw25@cam.ac.uk>
To: Leo P White <lpw25@cam.ac.uk>
Cc: Jacques Garrigue <garrigue@math.nagoya-u.ac.jp>,
	Dario Teixeira <darioteixeira@yahoo.com>,
	"caml-list@inria.fr" <caml-list@inria.fr>
Subject: Re: [Caml-list] GADTs and parsers
Date: 16 Jul 2012 11:06:59 +0100	[thread overview]
Message-ID: <Prayer.1.3.5.1207161106590.17673@hermes-2.csi.cam.ac.uk> (raw)
In-Reply-To: <Prayer.1.3.5.1207160950310.23367@hermes-2.csi.cam.ac.uk>


Sorry there was a typo in my previous e-mail, and the second GADT should be:

type _ linkp2 = Kind : 'a linkp -> ([< inkind > `Nonlink] as 'a) linkp2

However, the broken version does appear to be allowed by the type checker, 
which (assuming I haven't misunderstood Jacques' version of the inline_t 
type) seems to be a bug in the compiler, because it also allows:

# let broken seq =
  let rec process : type a. a linkp2 -> ast_t -> a inline_t =
    fun allow_link ast ->
      match (allow_link, ast) with
      | (Kind _, Ast_Text txt)    -> Link txt
      | (x, Ast_Bold xs)           -> Bold (List.map (process x) xs)
      | (Kind Maylink, Ast_Link lnk)    -> Link lnk
      | (Kind Nonlink, Ast_Link _)      -> assert false
      | (Kind Maylink, Ast_Mref (lnk, xs)) ->
          Mref (lnk, List.map (process (Kind Nonlink)) xs)
      | (Kind Nonlink, Ast_Mref _)      -> assert false
    in List.map (process (Kind Maylink)) seq;;
                      val broken : ast_t list -> inkind inline_t list = 
<fun>

# broken [Ast_Mref("foo", [Ast_Text "bar"])];;
- : inkind inline_t list = [Mref ("foo", [Link <abstr>])]

which breaks the desired invariant that an MRef doesn't contain a Link node.

Have I misunderstood something, or is this indeed a bug?

Regards,

Leo




On Jul 16 2012, Leo P White wrote:

>
> The duplicated Ast_Text case in the process function can also be avoided 
> by using another layer of GADT, so that Jacques version becomes:
>
>type _ linkp =
>  | Nonlink : [ `Nonlink ] linkp
>  | Maylink : inkind linkp
>
>type _ linkp2 = Kind : 'a linkp -> ([< inkind ] as 'a) linkp2
>
>let inlineseq_from_astseq seq =
>  let rec process : type a. a linkp2 -> ast_t -> a inline_t =
>    fun allow_link ast ->
>      match (allow_link, ast) with
>      | (Kind _, Ast_Text txt)    -> Text txt
>      | (x, Ast_Bold xs)           -> Bold (List.map (process x) xs)
>      | (Kind Maylink, Ast_Link lnk)    -> Link lnk
>      | (Kind Nonlink, Ast_Link _)      -> assert false
>      | (Kind Maylink, Ast_Mref (lnk, xs)) ->
>          Mref (lnk, List.map (process (Kind Nonlink)) xs)
>      | (Kind Nonlink, Ast_Mref _)      -> assert false
>    in List.map (process (Kind Maylink)) seq
>
>Regards,
>
>Leo
>
>On Jul 15 2012, Jacques Garrigue wrote:
>
>>Actually Leo more or less gave you the answer:
>>since your recursive process function needs to return either
>>`Nonlink or any inline_t nodes, it has to be polymorphically
>>recursive, and use a custom GADT to connect the allow_link
>>parameter with the result type.
>>
>>Here is a more complete version of his code.
>>I also changed a bit your types, because there is another problem
>>using polymorphic variants with GADTs: when you get a GADT
>>equation involving a refinable polymorphic variant, you are going
>>to get get a local private type. If your type is unbounded, you
>>cannot even use subtyping on it. Also [< `Nonlink] has exactly
>>the same elements as [ `Nonlink], so it is better to use the latter.
>>
>>   Jacques
>>
>>type inkind = [ `Link | `Nonlink ]
>>
>>type _ inline_t =
>>    | Text: string -> [< inkind > `Nonlink ] inline_t Bold: 'a inline_t 
>>    | list -> 'a inline_t Link: string -> [< inkind > `Link ] inline_t 
>>    | Mref: string * [ `Nonlink ] inline_t list -> [< inkind > `Link ] 
>>    | inline_t
>>
>>let uppercase seq =
>>    let rec process: type a. a inline_t -> a inline_t = function
>>        | Text txt       -> Text (String.uppercase txt)
>>        | Bold xs        -> Bold (List.map process xs)
>>        | Link lnk       -> Link lnk
>>        | Mref (lnk, xs) -> Mref (lnk, List.map process xs)
>>    in List.map process seq
>>
>>type ast_t =
>>    | Ast_Text of string
>>    | Ast_Bold of ast_t list
>>    | Ast_Link of string
>>    | Ast_Mref of string * ast_t list
>>
>>let inlineseq_from_astseq seq =
>>    let rec process_nonlink = function
>>        | Ast_Text txt  -> Text txt
>>        | Ast_Bold xs   -> Bold (List.map process_nonlink xs)
>>        | _             -> assert false in
>>    let rec process_any = function
>>        | Ast_Text txt       -> Text txt
>>        | Ast_Bold xs        -> Bold (List.map process_any xs)
>>        | Ast_Link lnk       -> Link lnk
>>        | Ast_Mref (lnk, xs) -> Mref (lnk, List.map process_nonlink xs)
>>    in List.map process_any seq
>>
>>type _ linkp =
>>  | Nonlink : [ `Nonlink ] linkp
>>  | Maylink : inkind linkp
>>
>>let inlineseq_from_astseq seq =
>>  let rec process : type a. a linkp -> ast_t -> a inline_t =
>>    fun allow_link ast ->
>>      match (allow_link, ast) with
>>      | (Maylink, Ast_Text txt)    -> Text txt
>>      | (Nonlink, Ast_Text txt)    -> Text txt
>>      | (x, Ast_Bold xs)           -> Bold (List.map (process x) xs)
>>      | (Maylink, Ast_Link lnk)    -> Link lnk
>>      | (Nonlink, Ast_Link _)      -> assert false
>>      | (Maylink, Ast_Mref (lnk, xs)) ->
>>          Mref (lnk, List.map (process Nonlink) xs)
>>      | (Nonlink, Ast_Mref _)      -> assert false
>>    in List.map (process Maylink) seq
>>
>>
>>On 2012/07/16, at 1:54, Dario Teixeira wrote:
>>
>>> Hi,
>>> 
>>> I'm revisiting an old problem with 4.00's newfangled GADTs. Suppose 
>>> you have four kinds of inline nodes, two of which (Text and Link) are 
>>> leaves, while the other two (Bold and Mref) are the parents of other 
>>> nodes. Moreover, you want to enforce the invariant that a "linkish" 
>>> node (Link and Mref) may not be the ancestor of another linkish node. 
>>> One possible implementation:
>>> 
>>> type _ inline_t =
>>>     | Text: string -> [> `Nonlink ] inline_t
>>>     | Bold: 'a inline_t list -> 'a inline_t
>>>     | Link: string -> [> `Link ] inline_t
>>>     | Mref: string * [< `Nonlink ] inline_t list -> [> `Link ] inline_t
>>> 
>>> 
>>> Defining a simple transformation function (in this case one which 
>>> uppercases all text) is also straightforward, just as long as one 
>>> includes the proper type annotations:
>>> 
>>> let uppercase seq =
>>>     let rec process: type a. a inline_t -> a inline_t = function
>>>         | Text txt       -> Text (String.uppercase txt)
>>>         | Bold xs        -> Bold (List.map process xs)
>>>         | Link lnk       -> Link lnk
>>>         | Mref (lnk, xs) -> Mref (lnk, List.map process xs)
>>>     in List.map process seq
>>> 
>>> 
>>> But suppose now that I got from a parser a ast_t value with an almost 
>>> identical structure to inline_t, with the exception that it does not 
>>> intrinsically satisfy the latter's invariant: (Note: for this toy 
>>> example it may well be easy to design the parser grammar such that the 
>>> invariant is always preserved; but suppose you're dealing with a "dumb" 
>>> parser)
>>> 
>>> type ast_t =
>>>     | Ast_Text of string
>>>     | Ast_Bold of ast_t list
>>>     | Ast_Link of string
>>>     | Ast_Mref of string * ast_t list
>>> 
>>> 
>>> Below is one possible implementation of a function that converts the 
>>> possibly "broken" ast_t into an inline_t. Note how the processing is 
>>> split into two separate functions -- one which deals only with 
>>> nonlinks, and other that takes anything -- so we can be sure to satisfy 
>>> the GADT constraints.
>>> 
>>> let inlineseq_from_astseq seq =
>>>     let rec process_nonlink = function
>>>         | Ast_Text txt  -> Text txt
>>>         | Ast_Bold xs   -> Bold (List.map process_nonlink xs)
>>>         | _             -> assert false in
>>>     let rec process_any = function
>>>         | Ast_Text txt       -> Text txt
>>>         | Ast_Bold xs        -> Bold (List.map process_any xs)
>>>         | Ast_Link lnk       -> Link lnk
>>>         | Ast_Mref (lnk, xs) -> Mref (lnk, List.map process_nonlink xs)
>>>     in List.map process_any seq
>>> 
>>> 
>>> Now here's my problem: suppose I wanted to avoid the branch duplication 
>>> present in the above function. The code below seems to do the trick, 
>>> while at the same time ensuring that the result is always a valid 
>>> inline_t. However, the compiler has trouble seeing that the code is a 
>>> sound way to produce convert an ast_t into an inline_t, and rejects the 
>>> code. Moreover, it is not enough to simply add the type annotations for 
>>> subfunction 'process', as was done in 'uppercase'.
>>> 
>>> let inlineseq_from_astseq seq =
>>>     let rec process allow_link ast = match (allow_link, ast) with
>>>         | (_, Ast_Text txt) -> Text txt (x, Ast_Bold xs) -> Bold 
>>>         | (List.map (process x) xs) (true, Ast_Link lnk) -> Link lnk 
>>>         | (false, Ast_Link _) -> assert false (true, Ast_Mref (lnk, 
>>>         | xs)) -> Mref (lnk, List.map (process false) xs) (false, 
>>>         | Ast_Mref _) -> assert false
>>>     in List.map (process true) seq
>>> 
>>> 
>>> Can the single function approach be made to work? I'm having trouble 
>>> figuring out just exactly what sort of help the compiler may require to 
>>> see the code above as correct... (Assuming it is correct, of course...)
>>> 
>>> Thanks in advance for your time!
>>> Cheers,
>>> Dario Teixeira
>>> 
>>> 
>>> -- 
>>> Caml-list mailing list.  Subscription management and archives:
>>> https://sympa-roc.inria.fr/wws/info/caml-list
>>> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
>>> Bug reports: http://caml.inria.fr/bin/caml-bugs
>>> 
>>
>>
>>
>
>

  reply	other threads:[~2012-07-16 10:07 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-15 16:54 Dario Teixeira
2012-07-15 17:43 ` Gabriel Scherer
2012-07-15 18:58   ` Dario Teixeira
2012-07-15 20:37     ` Gabriel Scherer
2012-07-15 19:55 ` Leo P White
2012-07-16 14:45   ` Dario Teixeira
2012-07-15 22:41 ` Jacques Garrigue
2012-07-16  8:50   ` Leo P White
2012-07-16 10:06     ` Leo P White [this message]
2012-07-16 15:12   ` Dario Teixeira

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Prayer.1.3.5.1207161106590.17673@hermes-2.csi.cam.ac.uk \
    --to=lpw25@cam.ac.uk \
    --cc=caml-list@inria.fr \
    --cc=darioteixeira@yahoo.com \
    --cc=garrigue@math.nagoya-u.ac.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).