From: Richard Jones <rich@annexia.org>
To: caml-list@inria.fr
Subject: Re: [Caml-list] Weak hash table for attaching extra data to an object
Date: Tue, 14 Aug 2007 17:22:06 +0100 [thread overview]
Message-ID: <20070814162206.GA1651@furbychan.cocan.org> (raw)
In-Reply-To: <20070814101535.GA14485@furbychan.cocan.org>
[-- Attachment #1: Type: text/plain, Size: 669 bytes --]
Attached is my attempt to make a WeakMetadata module (a weak hash
table where only the key is weak and the value is used to store
metadata about the key). It is derived from Remi Vanicat's Hweak
module, which is itself derived from the weak set in stdlib.
Unfortunately it relies on the following being safe. I have no idea
if this is safe or not.
Array.create 0 (Obj.magic ())
Also it doesn't clean up the extra data (metadata) very aggressively.
It basically waits for it to get overwritten when further bindings are
added to a bucket. So if the metadata is very large it would be worth
adding some sort of "compact" method.
Rich.
--
Richard Jones
Red Hat
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: weakMetadata.ml --]
[-- Type: text/plain; charset=utf-8, Size: 7104 bytes --]
(***********************************************************************)
(* *)
(* HWeak *)
(* *)
(* Remi Vanicat *)
(* *)
(* Copyright 2002 Rémi Vanicat *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Library General Public License, with the special exception *)
(* on linking described in the LICENCE file of the Objective Caml *)
(* distribution *)
(* *)
(* Most of this file is an adptation of the implentation of Weak *)
(* Hastable by Damien Doligez that can be found into Objective Caml *)
(* which is Copyright 1997 Institut National de Recherche en *)
(* Informatique et en Automatique and is distributed under the same *)
(* licence *)
(* *)
(***********************************************************************)
(* RWMJ: Modified so that only the key is weak. *)
(** Weak array operations *)
(** Weak hash tables *)
module type S = sig
type key
type 'a t
val create : int -> 'a t
val clear : 'a t -> unit
val add : 'a t -> key -> 'a -> unit
val replace : 'a t -> key -> 'a -> unit
val remove : 'a t -> key -> unit
val merge : 'a t -> key -> 'a -> 'a
val find : 'a t -> key -> 'a
val find_all : 'a t -> key -> 'a list
val mem : 'a t -> key -> bool
val iter : (key -> 'a -> unit) -> 'a t -> unit
val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
val count : 'a t -> int
val stats : 'a t -> int * int * int * int * int * int
end;;
module Make (H : Hashtbl.HashedType) : (S with type key = H.t) = struct
type key = H.t;;
let emptykeybucket : key Weak.t = Weak.create 0;;
type 'a t = {
emptydatabucket : 'a array;
mutable table : (key Weak.t * 'a array) array;
mutable totsize : int; (* sum of the bucket sizes *)
mutable limit : int; (* max ratio totsize/table length *)
};;
let get_index t d = (H.hash d land max_int) mod (Array.length t.table);;
let create sz =
let sz = if sz < 7 then 7 else sz in
let sz = if sz > Sys.max_array_length then Sys.max_array_length else sz in
let edb = Array.create 0 (Obj.magic ()) in
{
emptydatabucket = edb;
table = Array.create sz (emptykeybucket, edb);
totsize = 0;
limit = 3;
};;
let clear t =
for i = 0 to Array.length t.table - 1 do
t.table.(i) <- (emptykeybucket, t.emptydatabucket);
done;
t.totsize <- 0;
t.limit <- 3;
;;
let fold f t init =
let rec fold_bucket i ((b1, b2) as cpl) accu =
if i >= Weak.length b1 then accu else
match (Weak.get b1 i, Array.get b2 i) with
| (Some v1, v2) -> fold_bucket (i+1) cpl (f v1 v2 accu)
| _ -> fold_bucket (i+1) cpl accu
in
Array.fold_right (fold_bucket 0) t.table init
;;
let iter f t = fold (fun d1 d2 () -> f d1 d2) t ();;
let count t =
let rec count_bucket i ((b1, b2) as cpl) accu =
if i >= Weak.length b1 then accu else
count_bucket (i+1) cpl (accu + (if Weak.check b1 i then 1 else 0))
in
Array.fold_right (count_bucket 0) t.table 0
;;
let next_sz n = min (3*n/2 + 3) (Sys.max_array_length - 1);;
let rec resize t =
let oldlen = Array.length t.table in
let newlen = next_sz oldlen in
if newlen > oldlen then begin
let newt = create newlen in
newt.limit <- t.limit + 100; (* prevent resizing of newt *)
fold (fun f t () -> add newt f t) t ();
(* assert Array.length newt.table = newlen; *)
t.table <- newt.table;
t.limit <- t.limit + 2;
end
and add_aux t k e index =
let bucket1, bucket2 = t.table.(index) in
let sz = Weak.length bucket1 in
let rec loop i =
if i >= sz then begin
let newsz = min (sz + 3) (Sys.max_array_length - 1) in
if newsz <= sz then failwith "Weak.Make : hash bucket cannot grow more";
let newbucket1 = Weak.create newsz
and newbucket2 = Array.create newsz e in
Weak.blit bucket1 0 newbucket1 0 sz;
Array.blit bucket2 0 newbucket2 0 sz;
Weak.set newbucket1 i (Some k);
(*Array.set newbucket2 i e; -- implied by Array.create above*)
t.table.(index) <- (newbucket1, newbucket2);
t.totsize <- t.totsize + (newsz - sz);
if t.totsize > t.limit * Array.length t.table then resize t;
end else begin
if Weak.check bucket1 i
then loop (i+1)
else
begin
Weak.set bucket1 i (Some k);
Array.set bucket2 i e;
end
end
in
loop 0;
and add t k e = add_aux t k e (get_index t k)
;;
let find_or t d ifnotfound =
let index = get_index t d in
let (bucket1, bucket2) = t.table.(index) in
let sz = Weak.length bucket1 in
let rec loop i =
if i >= sz then ifnotfound index
else begin
match Weak.get_copy bucket1 i with
| Some v when H.equal v d -> Array.get bucket2 i
| _ -> loop (i+1)
end
in
loop 0
;;
let merge t k d = find_or t k (fun index -> add_aux t k d index; d);;
let find t d = find_or t d (fun index -> raise Not_found);;
let find_shadow t d iffound ifnotfound =
let index = get_index t d in
let (bucket1, bucket2) = t.table.(index) in
let sz = Weak.length bucket1 in
let rec loop i =
if i >= sz then ifnotfound else begin
match Weak.get_copy bucket1 i with
| Some v when H.equal v d
-> iffound bucket1 bucket2 i
| _ -> loop (i+1)
end
in
loop 0
;;
let replace t k d =
if (find_shadow t k
(fun w1 w2 i ->
Weak.set w1 i (Some k); Array.set w2 i d; false ) true)
then
add t k d
let remove t d = find_shadow t d
(fun w1 w2 i ->
Weak.set w1 i None;
(*Array.set w2 i ? -- leave it, it'll be overwritten*))
()
let mem t d = find_shadow t d (fun _ _ i -> true) false
let find_all t d =
let index = get_index t d in
let (bucket1, bucket2) = t.table.(index) in
let sz = Weak.length bucket1 in
let rec loop i accu =
if i >= sz then accu
else begin
match Weak.get_copy bucket1 i with
| Some v when H.equal v d
-> let v = Array.get bucket2 i in loop (i+1) (v::accu)
| _ -> loop (i+1) accu
end
in
loop 0 []
;;
let stats t =
let len = Array.length t.table in
let lens = Array.map (fun (b,_) -> Weak.length b) t.table in
Array.sort compare lens;
let totlen = Array.fold_left ( + ) 0 lens in
(len, count t, totlen, lens.(0), lens.(len/2), lens.(len-1))
;;
end;;
next prev parent reply other threads:[~2007-08-14 16:22 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-14 10:15 Richard Jones
2007-08-14 11:08 ` [Caml-list] " Thomas Fischbacher
2007-08-14 11:48 ` Till Varoquaux
2007-08-14 20:44 ` Jon Harrop
2007-08-14 23:09 ` Stefano Zacchiroli
2007-08-15 0:33 ` Jon Harrop
2007-08-15 12:33 ` Daniel Bünzli
2007-08-16 14:54 ` Markus Mottl
2007-08-15 1:28 ` skaller
2007-08-15 19:04 ` Richard Jones
2007-08-16 16:17 ` Some observations and measurements with using Weak Hashtable to annotate a tree (was: Re: [Caml-list] Weak hash table for attaching extra data to an object) Richard Jones
2007-08-14 16:22 ` Richard Jones [this message]
2007-08-14 23:24 ` [Caml-list] Weak hash table for attaching extra data to an object Till Varoquaux
2007-08-14 23:35 ` Fernando Alegre
2007-08-15 7:59 ` Richard Jones
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070814162206.GA1651@furbychan.cocan.org \
--to=rich@annexia.org \
--cc=caml-list@inria.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).