caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: Christoph Bauer <christoph.bauer@lms-gmbh.de>
To: Erick Tryzelaar <erickt@dslextreme.com>,
	Christoph Bauer <christoph.bauer@lms-gmbh.de>
Cc: caml-list <caml-list@inria.fr>
Subject: AW: [Caml-list] generic Hashtbl.to_array
Date: Tue, 25 Jul 2006 12:19:52 +0200	[thread overview]
Message-ID: <26EB47FDD566A7469FC862DAF373792F0171128D@kaiserslautern1.lmsintl.com> (raw)

Hi,

> 
> You could also try inverting the Hashtbl fold into an 
> iterator+closure and pass the closure into the Array.init 
> function, but I'm not sure how complicated/efficient that would be.

Something like:

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a


> 
> I suppose it just depends on how efficient you need it to be. 
> If it's just some simple stuff, I'd just use the intermediary list.


benchmarking shows, that all three approaches are similar
with respect to efficiency.

Regards,
Christoph Bauer

Benchmark:

Throughputs for to_array_1, to_array_2, to_array_3, each running 5 times for
at least 1 CPU seconds:
to_array_1:  1 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             1 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             2 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             1 WALL ( 1.15 usr +  0.00 sys =  1.15 CPU) @ 486.96/s (n=560)
             1 WALL ( 1.15 usr +  0.00 sys =  1.15 CPU) @ 486.96/s (n=560)
to_array_2:  1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             2 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
to_array_3:  1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             2 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)

            Rate    to_array_2 to_array_3 to_array_1
to_array_2 482/s            --      [-0%]        -1%
to_array_3 482+-0/s       [0%]         --        -1%
to_array_1 490+-2/s         2%         2%         --

open Benchmark

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a

let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])


let h () = 
  let h = Hashtbl.create 100000 in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let h = h () in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h); ] in
    tabulate res


let () =  main () 


             reply	other threads:[~2006-07-25 10:20 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-25 10:19 Christoph Bauer [this message]
2006-07-25 10:45 ` skaller
2006-07-25 12:44 Christoph Bauer
2006-07-26  9:46 ` Damien Doligez
2006-07-25 15:34 Christoph Bauer
2006-07-25 15:53 AW: " Christoph Bauer
2006-07-25 16:35 ` Tom
2006-07-26  9:29 Christoph Bauer
2006-07-26 14:41 Christoph Bauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=26EB47FDD566A7469FC862DAF373792F0171128D@kaiserslautern1.lmsintl.com \
    --to=christoph.bauer@lms-gmbh.de \
    --cc=caml-list@inria.fr \
    --cc=erickt@dslextreme.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).