caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* Defining and parsing subset
@ 2008-06-23 15:57 Dario Teixeira
  0 siblings, 0 replies; only message in thread
From: Dario Teixeira @ 2008-06-23 15:57 UTC (permalink / raw)
  To: caml-list

Hi,

Suppose you have two different types of documents, one of which is a subset
of the other.  The types themselves are fairly complex (lots of submodules,
etc), to the point that one cannot use the "include" directive to define
the more complex type as an extension of the simpler type.  Now, you want
to avoid any code duplication, both in the definition of each type and in
the code that provides the document parsers.

The only solution I could find relies on phantom types to annotate each
type component.  Elements allowed for either type are annotated with `Basic,
while the ones that belong *only* to the more complex type are annotated with
`Complex.  As for parsing, there is in fact only one parser, that produces
values of type `Complex.  The parser for `Basic values is faked by invoking
the `Complex parser and then walking through the data structure, making sure
it contains only those elements allowed for the `Basic subset.

This seems to work fine;  however, since I'm guessing this is a common problem,
I'm curious to know if there is another solution, perhaps even a canonical
"camlish" way to solve the problem.  Any thoughts?

Kind regards,
Dario Teixeira

P.S.  Some sample code (with dummy parser):


(************************************************************************)
(* Node module.								*)
(************************************************************************)

module Node:
sig
	type inline_t = node_t list
	 and node_t =
		private
		| Text of string	(* Allowed for basic *)
		| Bold of inline_t	(* Allowed for basic *)
		| Italic of inline_t	(* Complex only *)

	type 'a t = private node_t

	val text: string -> [> `Basic] t
	val bold: 'a t list -> 'a t
	val italic: [< `Basic | `Complex] t list -> [> `Complex] t

	val print_complex: [< `Complex | `Basic] t -> unit
	val print_basic: [< `Basic] t -> unit

	exception Invalid_subset

	val parse_complex: string -> [> `Complex] t
	val parse_basic: string -> [> `Basic] t
end =
struct
	type inline_t = node_t list
	 and node_t =
		| Text of string
		| Bold of inline_t
		| Italic of inline_t

	type 'a t = node_t

	let text s = Text s
	let bold inline = Bold inline
	let italic inline = Italic inline

	let print_complex node = print_endline "complex"
	let print_basic node = print_endline "basic"

	exception Invalid_subset

	let parse_complex = function
		| "complex"	-> italic [bold [text "ola"]]
		| _		-> bold [text "ola"]

	let rec complex_to_basic = function
		| Text s	-> text s
		| Bold inline	-> bold (List.map complex_to_basic inline)
		| Italic inline	-> raise Invalid_subset

	let parse_basic s = complex_to_basic (parse_complex s)
end


(************************************************************************)
(* Top-level.								*)
(************************************************************************)

open Node

let () =
	let node = bold [text "ola"]
	in print_basic node;;

let () =
	let node = italic [bold [text "ola"]]
	in print_complex node;;

let () =
	let node = parse_complex "complex"
	in print_complex node;;

let () =
	let node = parse_basic "complex"  (* Raises exception *)
	in print_basic node;;



      __________________________________________________________
Sent from Yahoo! Mail.
A Smarter Email http://uk.docs.yahoo.com/nowyoucan.html


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-06-23 15:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-06-23 15:57 Defining and parsing subset Dario Teixeira

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).