caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: sejourne_kevin <sejourne_kevin@yahoo.fr>
To: padiolea@irisa.fr
Cc: Erik de Castro Lopo <ocaml-erikd@mega-nerd.com>,
	caml-list@yquem.inria.fr
Subject: Re: [Caml-list] Hashtbl.create 401
Date: Sat, 23 Apr 2005 00:26:57 +0000	[thread overview]
Message-ID: <42699651.6020200@yahoo.fr> (raw)
In-Reply-To: <40833.131.254.50.45.1114171604.squirrel@mail.irisa.fr>

padiolea@irisa.fr a écrit :
>>On Thu, 21 Apr 2005 21:09:18 -0600
>>Matt Gushee <mgushee@havenrock.com> wrote:
>>
>>
>>>Hello, all--
>>>
>>>While browsing through the Labltk source code (and perhaps in some other
>>>places), I noticed several instances of:
>>>
>>>   Hashtbl.create 401
>>>
>>>Why 401?
>>
>>Hash tables distribute data across a set of hash buckets. If
>>the number of hash buckets is prime (401 is prime) then their
>>is a better chance of the data being evenly distributed across
>>the buckets.
> 
> 
> but it seems that this number is not that much important since,
> as said in the ml source:
> (* We do dynamic hashing, and resize the table and rehash the elements
>    when buckets become too long. *)
If you want to have a HashTbl with only a prime number of hash buckets,
you can do things like that (with code stolen from the stdlib):

let table = (* list of (next_primes 2n+1) of root 3 *)
[| 3; 11; 29; 61; 127; 257; 521; 1049; 2111; 4229; 8461; 16927; 33857;
67723; 135449; 270913; 541831; 1083689; 2167393;(*max 32 bits*) 4334791;
8669593; 17339197; 34678421; 69356857; 138713717; 277427441; 554854889
|]
;;
type ('a, 'b) t =
  {
    mutable size: int;                        (* number of elements *)
    mutable index: int;                       (* index of the next array
size*)
    mutable data: ('a, 'b) bucketlist array   (* the buckets *)
  }

and ...
let create initial_size =
  let i_m, _ =
      fold_lefti (fun i ((_,v_m) as u) x ->
		    let v = abs (x - initial_size) in
		      if v < v_m  then (i,v)  else u
		 ) (0,max_int) table
  in
  let s = min (max 1 table.(i_m)) Sys.max_array_length
  in { size = 0; index = i_m; data = Array.make s Empty }


let resize hashfun tbl =
  let _ = tbl.index <- succ tbl.index in
  let osize = Array.length tbl.data in
  let nsize = try min (table.(tbl.index)) Sys.max_array_length with |_->
osize
  in
    if nsize <> osize then
      begin
	let ndata = Array.create nsize Empty in
	let rec insert_bucket = function
            Empty -> ()
	  | Cons(key, data, rest) ->
              insert_bucket rest; (* preserve original order of elements *)
              let nidx = (hashfun key) mod nsize in
		ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in
	  for i = 0 to osize - 1 do
	    insert_bucket tbl.data.(i)
	  done;
	  tbl.data <- ndata
      end

let copy h =
  { size = h.size;
    index = h.index;
    data = Array.copy h.data }

....



  reply	other threads:[~2005-04-22 22:16 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-22  3:09 Matt Gushee
2005-04-22  3:21 ` [Caml-list] " Erik de Castro Lopo
2005-04-22 12:06   ` padiolea
2005-04-23  0:26     ` sejourne_kevin [this message]
2005-04-22  3:33 ` Radu Grigore

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=42699651.6020200@yahoo.fr \
    --to=sejourne_kevin@yahoo.fr \
    --cc=caml-list@yquem.inria.fr \
    --cc=ocaml-erikd@mega-nerd.com \
    --cc=padiolea@irisa.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).