caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: Richard Jones <rich@annexia.org>
To: caml-list@inria.fr
Subject: Re: [Caml-list] Weak hash table for attaching extra data to an object
Date: Tue, 14 Aug 2007 17:22:06 +0100	[thread overview]
Message-ID: <20070814162206.GA1651@furbychan.cocan.org> (raw)
In-Reply-To: <20070814101535.GA14485@furbychan.cocan.org>

[-- Attachment #1: Type: text/plain, Size: 669 bytes --]

Attached is my attempt to make a WeakMetadata module (a weak hash
table where only the key is weak and the value is used to store
metadata about the key).  It is derived from Remi Vanicat's Hweak
module, which is itself derived from the weak set in stdlib.

Unfortunately it relies on the following being safe.  I have no idea
if this is safe or not.

  Array.create 0 (Obj.magic ())

Also it doesn't clean up the extra data (metadata) very aggressively.
It basically waits for it to get overwritten when further bindings are
added to a bucket.  So if the metadata is very large it would be worth
adding some sort of "compact" method.

Rich.

-- 
Richard Jones
Red Hat

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: weakMetadata.ml --]
[-- Type: text/plain; charset=utf-8, Size: 7104 bytes --]

(***********************************************************************)
(*                                                                     *)
(*                           HWeak                                     *)
(*                                                                     *)
(*                        Remi Vanicat                                 *)
(*                                                                     *)
(*  Copyright 2002 Rémi Vanicat                                        *)
(*  All rights reserved.  This file is distributed under the terms of  *)
(*  the GNU Library General Public License, with the special exception *)
(*  on linking described in the LICENCE file of the Objective Caml     *)
(*  distribution                                                       *)
(*                                                                     *)
(*  Most of this file is an adptation of the implentation of Weak      *)
(*  Hastable by Damien Doligez that can be found into Objective Caml   *)
(*  which is Copyright 1997 Institut National de Recherche en          *)
(*  Informatique et en Automatique and is distributed under the same   *)
(*  licence                                                            *)
(*                                                                     *)
(***********************************************************************)

(* RWMJ: Modified so that only the key is weak. *)

(** Weak array operations *)

(** Weak hash tables *)

module type S = sig
  type key
  type 'a t
  val create : int -> 'a t
  val clear : 'a t -> unit
  val add : 'a t -> key -> 'a -> unit
  val replace : 'a t -> key -> 'a -> unit
  val remove : 'a t -> key -> unit
  val merge : 'a t -> key -> 'a -> 'a
  val find : 'a t -> key -> 'a
  val find_all : 'a t -> key -> 'a list
  val mem : 'a t -> key -> bool
  val iter : (key -> 'a -> unit) -> 'a t -> unit
  val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
  val count : 'a t -> int
  val stats : 'a t -> int * int * int * int * int * int
end;;

module Make (H : Hashtbl.HashedType) : (S with type key = H.t) = struct

  type key = H.t;;

  let emptykeybucket : key Weak.t = Weak.create 0;;

  type 'a t = {
    emptydatabucket : 'a array;
    mutable table : (key Weak.t * 'a array) array;
    mutable totsize : int;             (* sum of the bucket sizes *)
    mutable limit : int;               (* max ratio totsize/table length *)
  };;

  let get_index t d = (H.hash d land max_int) mod (Array.length t.table);;

  let create sz =
    let sz = if sz < 7 then 7 else sz in
    let sz = if sz > Sys.max_array_length then Sys.max_array_length else sz in
    let edb = Array.create 0 (Obj.magic ()) in
    {
      emptydatabucket = edb;
      table = Array.create sz (emptykeybucket, edb);
      totsize = 0;
      limit = 3;
    };;

  let clear t =
    for i = 0 to Array.length t.table - 1 do
      t.table.(i) <- (emptykeybucket, t.emptydatabucket);
    done;
    t.totsize <- 0;
    t.limit <- 3;
  ;;

  let fold f t init =
    let rec fold_bucket i ((b1, b2) as cpl) accu =
      if i >= Weak.length b1 then accu else
      match (Weak.get b1 i, Array.get b2 i) with
      | (Some v1, v2) -> fold_bucket (i+1) cpl (f v1 v2 accu)
      | _ -> fold_bucket (i+1) cpl accu
    in
    Array.fold_right (fold_bucket 0) t.table init
  ;;

  let iter f t = fold (fun d1 d2 () -> f d1 d2) t ();;

  let count t =
    let rec count_bucket i ((b1, b2) as cpl) accu =
      if i >= Weak.length b1 then accu else
      count_bucket (i+1) cpl (accu + (if Weak.check b1 i then 1 else 0))
    in
    Array.fold_right (count_bucket 0) t.table 0
  ;;

  let next_sz n = min (3*n/2 + 3) (Sys.max_array_length - 1);;

  let rec resize t =
    let oldlen = Array.length t.table in
    let newlen = next_sz oldlen in
    if newlen > oldlen then begin
      let newt = create newlen in
      newt.limit <- t.limit + 100;          (* prevent resizing of newt *)
      fold (fun f t () -> add newt f t) t ();
   (* assert Array.length newt.table = newlen; *)
      t.table <- newt.table;
      t.limit <- t.limit + 2;
    end

  and add_aux t k e index =
    let bucket1, bucket2 = t.table.(index) in
    let sz = Weak.length bucket1 in
    let rec loop i =
      if i >= sz then begin
        let newsz = min (sz + 3) (Sys.max_array_length - 1) in
        if newsz <= sz then failwith "Weak.Make : hash bucket cannot grow more";
        let newbucket1 = Weak.create newsz 
	and newbucket2 = Array.create newsz e in
        Weak.blit bucket1 0 newbucket1 0 sz;
        Array.blit bucket2 0 newbucket2 0 sz;
        Weak.set newbucket1 i (Some k);
	(*Array.set newbucket2 i e; -- implied by Array.create above*)
        t.table.(index) <- (newbucket1, newbucket2);
        t.totsize <- t.totsize + (newsz - sz);
        if t.totsize > t.limit * Array.length t.table then resize t;
      end else begin
        if Weak.check bucket1 i
        then loop (i+1)
        else 
	  begin
	    Weak.set bucket1 i (Some k);
	    Array.set bucket2 i e;
	  end
      end
    in
    loop 0;

  and add t k e = add_aux t k e (get_index t k)
  ;;

  let find_or t d ifnotfound =
    let index = get_index t d in
    let (bucket1, bucket2) = t.table.(index) in
    let sz = Weak.length bucket1 in
    let rec loop i =
      if i >= sz then ifnotfound index
      else begin
        match Weak.get_copy bucket1 i with
        | Some v when H.equal v d -> Array.get bucket2 i
        | _ -> loop (i+1)
      end
    in
    loop 0
  ;;

  let merge t k d = find_or t k (fun index -> add_aux t k d index; d);;

  let find t d = find_or t d (fun index -> raise Not_found);;

  let find_shadow t d iffound ifnotfound =
    let index = get_index t d in
    let (bucket1, bucket2) = t.table.(index) in
    let sz = Weak.length bucket1 in
    let rec loop i =
      if i >= sz then ifnotfound else begin
        match Weak.get_copy bucket1 i with
        | Some v when H.equal v d
	    -> iffound bucket1 bucket2 i
        | _ -> loop (i+1)
      end
    in
    loop 0
  ;;

  let replace t k d = 
    if (find_shadow t k 
	  (fun w1 w2 i -> 
	     Weak.set w1 i (Some k); Array.set w2 i d; false ) true) 
    then
      add t k d

  let remove t d = find_shadow t d
		     (fun w1 w2 i ->
			Weak.set w1 i None;
			(*Array.set w2 i ? -- leave it, it'll be overwritten*))
		     ()

  let mem t d = find_shadow t d (fun _ _ i -> true) false

  let find_all t d =
    let index = get_index t d in
    let (bucket1, bucket2) = t.table.(index) in
    let sz = Weak.length bucket1 in
    let rec loop i accu =
      if i >= sz then accu
      else begin
        match Weak.get_copy bucket1 i with
        | Some v when H.equal v d
	    -> let v = Array.get bucket2 i in loop (i+1) (v::accu)
        | _ -> loop (i+1) accu
      end
    in
    loop 0 []
  ;;

  let stats t =
    let len = Array.length t.table in
    let lens = Array.map (fun (b,_) -> Weak.length b) t.table in
    Array.sort compare lens;
    let totlen = Array.fold_left ( + ) 0 lens in
    (len, count t, totlen, lens.(0), lens.(len/2), lens.(len-1))
  ;;

end;;

  parent reply	other threads:[~2007-08-14 16:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-14 10:15 Richard Jones
2007-08-14 11:08 ` [Caml-list] " Thomas Fischbacher
2007-08-14 11:48   ` Till Varoquaux
2007-08-14 20:44   ` Jon Harrop
2007-08-14 23:09     ` Stefano Zacchiroli
2007-08-15  0:33       ` Jon Harrop
2007-08-15 12:33         ` Daniel Bünzli
2007-08-16 14:54         ` Markus Mottl
2007-08-15  1:28     ` skaller
2007-08-15 19:04     ` Richard Jones
2007-08-16 16:17       ` Some observations and measurements with using Weak Hashtable to annotate a tree (was: Re: [Caml-list] Weak hash table for attaching extra data to an object) Richard Jones
2007-08-14 16:22 ` Richard Jones [this message]
2007-08-14 23:24   ` [Caml-list] Weak hash table for attaching extra data to an object Till Varoquaux
2007-08-14 23:35   ` Fernando Alegre
2007-08-15  7:59     ` Richard Jones

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070814162206.GA1651@furbychan.cocan.org \
    --to=rich@annexia.org \
    --cc=caml-list@inria.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).