caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* Thread safe heterogenous property lists (dictionaries)
@ 2010-02-02 13:12 Daniel Bünzli
  2010-02-02 13:36 ` [Caml-list] " Alain Frisch
  0 siblings, 1 reply; 3+ messages in thread
From: Daniel Bünzli @ 2010-02-02 13:12 UTC (permalink / raw)
  To: caml-list

Hello,

I needed an implementation of heterogenous property lists [1] ---
hereafter dictionaries. There's some code floating around on the www
(e.g. here [2]) but it uses a thread unsafe implementation of
universal types. This makes it unacceptably ugly as thread safety is
not even guaranteed among independent dictionaries, locking is needed
per key.

Below I give two implementations of immutable heterogenous
dictionaries. Both use exceptions to implement a thread safe universal
type. This is based on code by Andrej Bauer and refined by Stephen
Weeks here [3]. A functor application is needed to create a function
that create new keys for a given type but in practice that
inconvenience is rather small (see the test code). This means you
don't have to wait on OCaml 3.12 to get thread safe heterogenous
dictionaries (see Alain Frish's "perfect" solution for universal types
with first class modules there [3]).

The first implementation uses association lists, it's suitable for
small dictionaries as lookup time is linear in the number of entries.
This implementation is completely thread-safe.

The second one uses Maps for logarithmic time lookups. Operations on
dictionaries are thread safe. However key creation is not because
unique ids need to be generated for them. While not perfect this is
acceptable to me as keys are likely to be created in module
initialization code and thus will be executed by a single thread. This
implementation can be easily modified to implement mutable
dictionaries using Hashtbl as the underlying map for constant lookup
time.

Best,

Daniel

[1] http://mlton.org/PropertyList
[2] http://eigenclass.org/R2/writings/heterogeneous-containers-in-ocaml
[3] http://ocaml.janestreet.com/?q=node/18

(* The signature we are interested in. *)

(** Heterogenous dictionaries. *)
module type Dict = sig

  type t
  (** The type for dictionaries. *)

  type 'a key
  (** The type for keys whose lookup value is of type ['a]. *)

  val empty : t
  (** The empty dictionary. *)

  val is_empty : t -> bool
  (** [is_empty d] is [true] iff [d] is empty. *)

  val add : 'a key -> 'a -> t -> t
  (** [add k v d] is [d] with [k] mapping to [v]. *)

  val find : 'a key -> t -> 'a option
  (** [find k d] is the value of [k] in [d], if any. *)

  module Key : sig
  (** Creating keys. *)

    val bool : unit -> bool key
    (** [bool ()] is a new key for a boolean value. *)

    val int : unit -> int key
    (** [int ()] is a new key for an integer value. *)

    val float : unit -> float key
    (** [float ()] is a new key for a float value. *)

    val string : unit -> string key
    (** [string ()] is a new key for string value. *)

    module ForType (T : sig type t end) : sig
      val create : unit -> T.t key
      (** [create ()] is a new key for the type [T.t]. *)
    end
  end
end

(* Implementation. *)

module type Id = sig                              (* A signature for key ids. *)
  type t
  val create : unit -> t
end

module Key (Id : Id) = struct         (* Given key ids, implements dict keys. *)
  type 'a t = Id.t * ('a -> exn) * (exn -> 'a option)

  module ForType (T : sig type t end) = struct
    exception E of T.t
    let inject v = E v
    let project = function E v -> Some v | _ -> None
    let create () = Id.create (), inject, project
  end

  module BoolKey = ForType (struct type t = bool end)
  module IntKey = ForType (struct type t = int end)
  module FloatKey = ForType (struct type t = float end)
  module StringKey = ForType (struct type t = string end)

  let bool = BoolKey.create
  let int = IntKey.create
  let float = FloatKey.create
  let string = StringKey.create
end

module DList : Dict = struct    (* Dictionaries as assoc lists, thread-safe. *)

  module Id = struct
    type t = unit ref
    let create () = ref ()
  end

  module Key = Key (Id)

  type t = (Id.t * exn) list
  type 'a key = 'a Key.t

  let empty = []
  let is_empty = function [] -> true | _ -> false
  let add k v l =
    let rec aux ((id, inject, _) as k) v left right = match right with
    | [] -> (id, inject v) :: left
    | ((id', _) as b) :: right' ->
	if id' == id then List.rev_append left ((id, inject v) :: right') else
	aux k v (b :: left) right'
    in
    aux k v [] l

  let rec find ((id, _, project) as k) = function
    | (id', exn) :: l' -> if id' == id then project exn else find k l'
    | [] -> None

end

module DMap : Dict = struct (* Dicts as maps, thread-safe except for key gen. *)

  module Id = struct
    type t = int
    let compare : int -> int -> int = compare
    let create =                                          (* NOT thread safe. *)
      let c = ref min_int in
      fun () ->
	let id = !c in
	incr c; if id > !c then assert false (* too many ids *) else id
  end

  module Key = Key (Id)
  module Map = Map.Make(Id)

  type t = exn Map.t
  type 'a key = 'a Key.t
	
  let empty = Map.empty
  let is_empty = Map.is_empty
  let add (id, inject, _) v m = Map.add id (inject v) m
  let find  (id, _, proj) m = try proj (Map.find id m) with Not_found -> None
end

(* Testing *)

module Test (Dict : Dict) = struct
  let b1 = Dict.Key.bool ()
  let b2 = Dict.Key.bool ()
  let i1 = Dict.Key.int ()
  let i2 = Dict.Key.int ()
  let s1 = Dict.Key.string ()
  let s2 = Dict.Key.string ()

  module IntPairKey = Dict.Key.ForType (struct type t = int * int end)

  let p1 = IntPairKey.create ()
  let p2 = IntPairKey.create ()

  let d0 = Dict.empty
  let d1 = Dict.add b1 true d0
  let d2 = Dict.add i1 84 d1
  let d3 = Dict.add s1 "dip" d2
  let d4 = Dict.add p1 (4,2) d3
  let d5 = Dict.add i1 85 d4

  let () =
    let all_dicts = [d0; d1; d2; d3; d4; d5] in
    let assert_bind k some d = assert (Dict.find k d = some) in
    List.iter (assert_bind b2 None) all_dicts;
    List.iter (assert_bind i2 None) all_dicts;
    List.iter (assert_bind s2 None) all_dicts;
    List.iter (assert_bind p2 None) all_dicts;
    List.iter (assert_bind b1 None) [d0];
    List.iter (assert_bind b1 (Some true)) [d1; d2; d3; d4; d5];
    List.iter (assert_bind i1 None) [d0; d1];
    List.iter (assert_bind i1 (Some 84)) [d2; d3; d4];
    assert_bind i1 (Some 85) d5;
    List.iter (assert_bind s1 None) [d0; d1; d2];
    List.iter (assert_bind s1 (Some "dip")) [d3; d4; d5];
    List.iter (assert_bind p1 None) [d0; d1; d2; d3];
    List.iter (assert_bind p1 (Some (4,2))) [d4; d5];
end

module Test_DList = Test (DList)
module Test_DMap = Test (DMap)


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Caml-list] Thread safe heterogenous property lists (dictionaries)
  2010-02-02 13:12 Thread safe heterogenous property lists (dictionaries) Daniel Bünzli
@ 2010-02-02 13:36 ` Alain Frisch
  2010-02-03 13:14   ` Daniel Bünzli
  0 siblings, 1 reply; 3+ messages in thread
From: Alain Frisch @ 2010-02-02 13:36 UTC (permalink / raw)
  To: Daniel Bünzli; +Cc: caml-list

On 02/02/2010 14:12, Daniel Bünzli wrote:
> The second one uses Maps for logarithmic time lookups. Operations on
> dictionaries are thread safe. However key creation is not because
> unique ids need to be generated for them.

FWIW, a thread-safe way to generate fresh ids is:

let fresh_id () = Oo.id (object end)


Also, the "perfect" solution you are referring to becomes in the syntax 
of OCaml's trunk:

let embed () (type s) =
   let module M = struct exception E of s end in
   (fun x -> M.E x), (function M.E x -> Some x | _ -> None)


-- Alain



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Caml-list] Thread safe heterogenous property lists  (dictionaries)
  2010-02-02 13:36 ` [Caml-list] " Alain Frisch
@ 2010-02-03 13:14   ` Daniel Bünzli
  0 siblings, 0 replies; 3+ messages in thread
From: Daniel Bünzli @ 2010-02-03 13:14 UTC (permalink / raw)
  To: Alain Frisch; +Cc: caml-list

> FWIW, a thread-safe way to generate fresh ids is:
>
> let fresh_id () = Oo.id (object end)

Thanks for the tip.

Daniel


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-02-03 13:14 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-02 13:12 Thread safe heterogenous property lists (dictionaries) Daniel Bünzli
2010-02-02 13:36 ` [Caml-list] " Alain Frisch
2010-02-03 13:14   ` Daniel Bünzli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).