caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 12:44 Christoph Bauer
  2006-07-25 15:20 ` Tom
  2006-07-26  9:46 ` AW: " Damien Doligez
  0 siblings, 2 replies; 10+ messages in thread
From: Christoph Bauer @ 2006-07-25 12:44 UTC (permalink / raw)
  To: caml-list

> 
> let to_array_4 t =
>    let init = ref None in
>    begin try Hashtbl.iter (fun k v -> init := Some (k,v); 
> raise Exit) t

My guess: hashtbl has to loop over the first empty buckets.
And this eats the cpu cycles.

>    with Exit -> ()
>    end;
>    match !init with
>    | None -> [| |]
>    | Some i ->
>      let a = Array.make (Hashtbl.length t) i in
>        ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i 
> + 1) t 0);
>        a

Regards,
Christoph Bauer


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25 12:44 AW: [Caml-list] generic Hashtbl.to_array Christoph Bauer
@ 2006-07-25 15:20 ` Tom
  2006-08-15  8:08   ` Stéphane Glondu
  2006-07-26  9:46 ` AW: " Damien Doligez
  1 sibling, 1 reply; 10+ messages in thread
From: Tom @ 2006-07-25 15:20 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 180 bytes --]

The dirtiest solution:

let to_array t =
 let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
   ignore
     (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ;
     a

[-- Attachment #2: Type: text/html, Size: 319 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: AW: [Caml-list] generic Hashtbl.to_array
  2006-07-25 12:44 AW: [Caml-list] generic Hashtbl.to_array Christoph Bauer
  2006-07-25 15:20 ` Tom
@ 2006-07-26  9:46 ` Damien Doligez
  1 sibling, 0 replies; 10+ messages in thread
From: Damien Doligez @ 2006-07-26  9:46 UTC (permalink / raw)
  To: caml-list


On 2006-07-25, at 14:44, Christoph Bauer wrote:

>>
>> let to_array_4 t =
>>    let init = ref None in
>>    begin try Hashtbl.iter (fun k v -> init := Some (k,v);
>> raise Exit) t
>
> My guess: hashtbl has to loop over the first empty buckets.
> And this eats the cpu cycles.

I guess that's correct, since you're doing your tests with a
100000-sized hash table that contains only one entry. I wouldn't
call that a typical case.

-- Damien


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25 15:20 ` Tom
@ 2006-08-15  8:08   ` Stéphane Glondu
  0 siblings, 0 replies; 10+ messages in thread
From: Stéphane Glondu @ 2006-08-15  8:08 UTC (permalink / raw)
  To: caml-list

I know I am late.

Tom a écrit :
> The dirtiest solution:
> 
> let to_array t =
>  let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
>    ignore
>      (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ;
>      a

What about:

let to_array h =
  let res = ref [||] in
  let rec assign = ref (fun i x ->
			  res := Array.make (Hashtbl.length h) x;
			  !res.(i) <- x;
			  assign := Array.set !res) in
  ignore (Hashtbl.fold (fun k v i -> !assign i (k, v); i+1) h 0);
  !res;;

Sorry if it has already been proposed (but I have not seen it).

-- 
Stéphane Glondu


^ permalink raw reply	[flat|nested] 10+ messages in thread

* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-26 14:41 Christoph Bauer
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Bauer @ 2006-07-26 14:41 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 4182 bytes --]

 


  _____  

Von: Tom [mailto:tom.primozic@gmail.com] 
Gesendet: Mittwoch, 26. Juli 2006 15:53
An: Christoph Bauer
Betreff: Re: [Caml-list] generic Hashtbl.to_array


Once again...

I'm sorry to say that, but I believe that you results are flawed...


Upon inspecting your code with Toploop, I found out some flaws... 


let h n =
 let m = n * 100000 in
 let h = Hashtbl.create m in

   for i = 0 to m - 1 do       (* <<< not Hashtbl.length h, as it returns 0
for ampty hashtable *)
     Hashtbl.replace h (Random.int <http://random.int/>  max_int) (
Random.int <http://random.int/>  max_int);
   done;
   h


 
 

 
thanks, sorry for this stupid bug.  to_array_5 is now the clear winner.
 
Christoph Bauer
 
 n=8
             Rate      to_array_2 to_array_3 to_array_1c to_array_1
to_array_4 to_array_1b to_array_5
 to_array_2 34.6+-0.4/s         --       -50%        -71%       -72%
-74%        -74%       -79%
 to_array_3 68.8+-0.6/s        99%         --        -42%       -45%
-49%        -49%       -58%
to_array_1c  118+-  2/s       241%        71%          --        -6%
-12%        -13%       -27%
 to_array_1  126+-  0/s       264%        83%          7%         --
-6%         -7%       -22%
 to_array_4  134+-  2/s       287%        95%         14%         6%
--       [-1%]       -17%
to_array_1b  135+-  1/s       290%        96%         15%         7%
[1%]          --       -17%
 to_array_5  162+- 10/s       369%       136%         37%        29%
21%         20%         --
 
 
 (* compile with
 
ocamlopt -o to_array -I benchmark-0.7 unix.cmxa benchmark-0.7/benchmark.cmx
to_array.ml
 
*)
 
open Benchmark
 
let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 1)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))
 
let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a
 
let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])
 

let to_array_1b t =
  let a = ref (Array.init 0 (fun _ -> raise Not_found)) in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            if i = 0 then
              (a := Array.make (Hashtbl.length t) (k, v);
               1)
            else
              ((!a).(i) <- (k, v); i + 1))
         t 0);
    !a
 
 
 
let to_array_4 t =
  let init = ref None in
    begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
      with Exit -> ()
    end;
    match !init with
      | None -> [| |]
      | Some i ->
          let a = Array.make (Hashtbl.length t) i in
            ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
            a
 
 
 
let to_array_5 =
  let init = Obj.magic 0 in
    fun t ->
      let a =  Array.make (Hashtbl.length t) init  in
        ignore
          (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ; 
        a 
 
let to_array_1c t =
  let r =
    Hashtbl.fold (fun k v seed ->
                    match seed with
                 Some (a,i) -> a.(i) <- (k,v); Some (a,i+1)
                      | None -> let a =  Array.make (Hashtbl.length t) (k,v)
in
                                  Some (a,1))
      t None 
  in
    match r with
        None -> Array.init 0 (fun _ -> raise Not_found)
      | Some (a, _) -> a
 

      
let h n = 
  let m = n * 1000 in
  let h = Hashtbl.create m in
    for i = 0 to m - 1 do
      Hashtbl.replace h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let n = try int_of_string Sys.argv.(1) with _ -> 1 in
  let h = h n in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_1b", to_array_1b, h);
     ("to_array_1c", to_array_1c, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h);
     ("to_array_4", to_array_4, h);
     ("to_array_5", to_array_5, h);
 
      ] in
      tabulate res
 

let () =  main () 


[-- Attachment #2: Type: text/html, Size: 12793 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-26  9:29 Christoph Bauer
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Bauer @ 2006-07-26  9:29 UTC (permalink / raw)
  To: caml-list

> 
> let to_array9 t =
>   let Some (a,_) =
>     Hashtbl.fold (fun k v seed ->
>       match seed with
> 	Some (a,i) -> a.(i) <- (k,v); Some (a,i+1)
>       | None -> let a =  Array.make (Hashtbl.length t) (k,v) in
>                 Some (a,1))
>       t None
>   in a
> ;;

I called this to_array_1c. The hashtable is initialized with n * 100,000
elements, where n is n = 1, 2, 4, 8. Here the results. n = 8 is a good
summary.

n = 1 

             Rate    to_array_4 to_array_1c to_array_3 to_array_2
to_array_1b to_array_5 to_array_1
 to_array_4 413+-1/s         --        -17%       -17%       -18%
-18%       -19%       -19%
to_array_1c 498+-2/s        21%          --      [-0%]        -1%
-1%        -2%        -3%
 to_array_3 500+-3/s        21%        [0%]         --      [-0%]
[-0%]        -2%        -2%
 to_array_2 502+-2/s        21%          1%       [0%]         --
[-0%]        -1%        -2%
to_array_1b 502+-2/s        21%          1%       [0%]       [0%]
--        -1%        -2%
 to_array_5 509+-2/s        23%          2%         2%         1%
1%         --      [-1%]
 to_array_1 512+-2/s        24%          3%         2%         2%
2%       [1%]         --

n = 2

            Rate    to_array_4 to_array_2 to_array_3 to_array_1c to_array_1b
to_array_1 to_array_5
 to_array_4 203+-1/s         --        -7%        -7%         -8%
-8%        -8%        -8%
 to_array_2 218+-1/s         8%         --      [-0%]       [-0%]
[-0%]      [-1%]        -1%
 to_array_3 218+-1/s         8%       [0%]         --       [-0%]
[-0%]      [-1%]        -1%
to_array_1c 219+-1/s         8%       [0%]       [0%]          --
[-0%]      [-0%]      [-1%]
to_array_1b 219+-1/s         8%       [0%]       [0%]        [0%]
--      [-0%]        -1%
 to_array_1 220+-1/s         8%       [1%]       [1%]        [0%]
[0%]         --      [-1%]
 to_array_5 221+-1/s         9%         1%         1%        [1%]
1%       [1%]         --

n = 4 

              Rate      to_array_4 to_array_2 to_array_1c to_array_1b
to_array_1 to_array_3 to_array_5
 to_array_4 64.7+-0.3/s         --       -34%        -34%        -35%
-35%       -35%       -35%
 to_array_2 98.4+-0.6/s        52%         --       [-0%]       [-1%]
[-1%]        -1%        -1%
to_array_1c 98.7+-0.4/s        52%       [0%]          --       [-0%]
[-1%]        -1%        -1%
to_array_1b 99.0+-0.0/s        53%       [1%]        [0%]          --
[-0%]        -1%        -1%
 to_array_1 99.4+-0.7/s        54%       [1%]        [1%]        [0%]
--      [-0%]      [-0%]
 to_array_3 99.6+-0.4/s        54%         1%          1%          1%
[0%]         --      [-0%]
 to_array_5 99.8+-0.4/s        54%         1%          1%          1%
[0%]       [0%]         --

n = 8

             Rate      to_array_4 to_array_5 to_array_3 to_array_2
to_array_1 to_array_1b to_array_1c
 to_array_4 38.8+-0.2/s         --       -20%       -20%       -21%
-21%        -21%        -21%
 to_array_5 48.7+-0.3/s        26%         --      [-0%]      [-0%]
[-0%]       [-0%]       [-0%]
 to_array_3 48.7+-0.2/s        26%       [0%]         --      [-0%]
[-0%]       [-0%]       [-0%]
 to_array_2 48.8+-0.2/s        26%       [0%]       [0%]         --
[-0%]       [-0%]       [-0%]
 to_array_1 48.8+-0.2/s        26%       [0%]       [0%]       [0%]
--       [-0%]       [-0%]
to_array_1b 48.9+-0.2/s        26%       [0%]       [0%]       [0%]
[0%]          --       [-0%]
to_array_1c 48.9+-0.2/s        26%       [0%]       [0%]       [0%]
[0%]        [0%]          --



(* compile with

ocamlopt -o to_array -I benchmark-0.7 unix.cmxa benchmark-0.7/benchmark.cmx
to_array.ml

*)

open Benchmark

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 1)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a

let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])


let to_array_1b t =
  let a = ref (Array.init 0 (fun _ -> raise Not_found)) in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            if i = 0 then
              (a := Array.make (Hashtbl.length t) (k, v);
               i)
            else
              ((!a).(i) <- (k, v); i + 1))
         t 0);
    !a



let to_array_4 t =
  let init = ref None in
    begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
      with Exit -> ()
    end;
    match !init with
      | None -> [| |]
      | Some i ->
          let a = Array.make (Hashtbl.length t) i in
            ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
            a



let to_array_5 =
  let init = Obj.magic 0 in
    fun t ->
      let a =  Array.make (Hashtbl.length t) init  in
        ignore
          (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ; 
        a 

let to_array_1c t =
  let r =
    Hashtbl.fold (fun k v seed ->
                    match seed with
	                Some (a,i) -> a.(i) <- (k,v); Some (a,i+1)
                      | None -> let a =  Array.make (Hashtbl.length t) (k,v)
in
                                  Some (a,1))
      t None 
  in
    match r with
        None -> Array.init 0 (fun _ -> raise Not_found)
      | Some (a, _) -> a


      
let h n = 
  let h = Hashtbl.create (n*100000) in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.replace h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let n = try int_of_string Sys.argv.(1) with _ -> 1 in
  let h = h n in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_1b", to_array_1b, h);
     ("to_array_1c", to_array_1c, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h);
     ("to_array_4", to_array_4, h);
     ("to_array_5", to_array_5, h);

      ] in
      tabulate res


let () =  main () 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: AW: [Caml-list] generic Hashtbl.to_array
  2006-07-25 15:53 AW: " Christoph Bauer
@ 2006-07-25 16:35 ` Tom
  0 siblings, 0 replies; 10+ messages in thread
From: Tom @ 2006-07-25 16:35 UTC (permalink / raw)
  To: Christoph Bauer; +Cc: Brian Hurt, caml-list

[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]

I'm sorry to say that, but I believe that you results are flawed...

If we look at the code of to_array_1 and to_array_5, there is no possibility
that the former was faster... if nothing else, it has an additional if jump
each and every loop. I simply couldn't believe your results.

Upon inspecting your code with Toploop, I found out some flaws...

let h () =
  let h = Hashtbl.create 100000 in
    for i = 0 to 99999 do            (* <<< not Hashtbl.length h, as it
returns 0 for ampty hashtable *)
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold

         (fun k v (a, i) ->
            if i = 0 then
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 1)           (* <<<<< Not 0, as it causes no progress *)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

I also corrected my implementation:

let mgc = Obj.magic 0      <<< So that the function is executed only once.
let to_array_5 t =
 let a =  Array.make (Hashtbl.length t) mgc in
   ignore
     (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ;
     a

I tried to do some benchmarking, but I do not have much time... anyhow, my
implementation is faster as far as I tested it.

Believe in your dreams!

[-- Attachment #2: Type: text/html, Size: 2241 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 15:34 Christoph Bauer
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Bauer @ 2006-07-25 15:34 UTC (permalink / raw)
  To: Tom, caml-list

[-- Attachment #1: Type: text/plain, Size: 3271 bytes --]

 


  _____  

Von: caml-list-bounces@yquem.inria.fr
[mailto:caml-list-bounces@yquem.inria.fr] Im Auftrag von Tom
Gesendet: Dienstag, 25. Juli 2006 17:21
An: caml-list
Betreff: Re: [Caml-list] generic Hashtbl.to_array


The dirtiest solution:

let to_array t =
 let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
   ignore
     (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ; 
     a  
 
 
 

This is to_array_5 in the table. Dirty and not the fastest. Don't know why. 
Maybe a in to_array_1 is keept in a register.
 
 
 
          Rate    to_array_4 to_array_3 to_array_5 to_array_1b to_array_2
to_array_1
 to_array_4 418+-0/s         --       -16%       -16%        -16%       -17%
-18%
 to_array_3 497+-3/s        19%         --      [-0%]       [-0%]        -1%
-3%
 to_array_5 499+-2/s        19%       [0%]         --       [-0%]        -1%
-2%
to_array_1b 499+-2/s        19%       [0%]       [0%]          --        -1%
-2%
 to_array_2 504+-2/s        21%         1%         1%          1%         --
-1%
 to_array_1 511+-2/s        22%         3%         2%          2%         1%
--
 
Christoph Bauer
 
(* compile with
 
ocamlopt -o to_array -I benchmark-0.7 unix.cmxa benchmark-0.7/benchmark.cmx
to_array.ml
 
*)
 
open Benchmark
 
let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))
 
let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a
 
let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])
 

let to_array_1b t =
  let a = ref (Array.init 0 (fun _ -> raise Not_found)) in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            if i = 0 then
              (a := Array.make (Hashtbl.length t) (k, v);
               i)
            else
              ((!a).(i) <- (k, v); i + 1))
         t 0);
    !a
 
 
 
let to_array_4 t =
  let init = ref None in
    begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
      with Exit -> ()
    end;
    match !init with
      | None -> [| |]
      | Some i ->
          let a = Array.make (Hashtbl.length t) i in
            ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
            a
 
 
 
let to_array_5 t =
 let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
   ignore
     (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ; 
     a 
 
let h () = 
  let h = Hashtbl.create 100000 in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let h = h () in
  let res = throughputN ~repeat:5 1
    [     ("to_array_5", to_array_5, h);
          ("to_array_1", to_array_1, h);
          ("to_array_1b", to_array_1b, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h);
     ("to_array_4", to_array_4, h);
 
      ] in
      tabulate res
 

let () =  main () 


 

[-- Attachment #2: Type: text/html, Size: 10058 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: AW: [Caml-list] generic Hashtbl.to_array
  2006-07-25 10:19 Christoph Bauer
@ 2006-07-25 10:45 ` skaller
  0 siblings, 0 replies; 10+ messages in thread
From: skaller @ 2006-07-25 10:45 UTC (permalink / raw)
  To: Christoph Bauer; +Cc: Erick Tryzelaar, caml-list

On Tue, 2006-07-25 at 12:19 +0200, Christoph Bauer wrote:
> Hi,
> 
> > 
> > You could also try inverting the Hashtbl fold into an 
> > iterator+closure and pass the closure into the Array.init 
> > function, but I'm not sure how complicated/efficient that would be.
> 
> Something like:
> 
> let to_array_2 t =
>   let init _ = fun () -> raise Not_found  in
>   let a = Array.init (Hashtbl.length t) init in
>     ignore
>       (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
>     Array.map (fun f -> f ())  a

ugg .. really need a variable length array here. However the most
efficient solution is:

(1) Use Hashtbl.iter to capture just one value out of the 
Hashtble:

	let get1 h = 
		Hashtbl.iter (fun k v -> raise XSome (k,v)) h;
		raise XNone
	in

(2) Initialise the array with it:

	let a = try get1 h with
	| XSome x -> Array.init (Hashtbl.length h) x
	| XNone -> raise Not_found (* zero length array? *)
	in

(3) Now use Hashtbl.fold or iter to initialise the array.

This 'costs' a write barrier but there is no additional
data structure required, and the whole thing is quite safe.

This leaves open the problem of making a zero length array
of the correct type.. not a problem if you're writing
the code inside the Hashtbl module, but trickier if you're
doing it outside.

-- 
John Skaller <skaller at users dot sf dot net>
Felix, successor to C++: http://felix.sf.net


^ permalink raw reply	[flat|nested] 10+ messages in thread

* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 10:19 Christoph Bauer
  2006-07-25 10:45 ` skaller
  0 siblings, 1 reply; 10+ messages in thread
From: Christoph Bauer @ 2006-07-25 10:19 UTC (permalink / raw)
  To: Erick Tryzelaar, Christoph Bauer; +Cc: caml-list

Hi,

> 
> You could also try inverting the Hashtbl fold into an 
> iterator+closure and pass the closure into the Array.init 
> function, but I'm not sure how complicated/efficient that would be.

Something like:

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a


> 
> I suppose it just depends on how efficient you need it to be. 
> If it's just some simple stuff, I'd just use the intermediary list.


benchmarking shows, that all three approaches are similar
with respect to efficiency.

Regards,
Christoph Bauer

Benchmark:

Throughputs for to_array_1, to_array_2, to_array_3, each running 5 times for
at least 1 CPU seconds:
to_array_1:  1 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             1 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             2 WALL ( 1.14 usr +  0.00 sys =  1.14 CPU) @ 491.23/s (n=560)
             1 WALL ( 1.15 usr +  0.00 sys =  1.15 CPU) @ 486.96/s (n=560)
             1 WALL ( 1.15 usr +  0.00 sys =  1.15 CPU) @ 486.96/s (n=560)
to_array_2:  1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             2 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
to_array_3:  1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             1 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)
             2 WALL ( 1.07 usr +  0.00 sys =  1.07 CPU) @ 482.24/s (n=516)

            Rate    to_array_2 to_array_3 to_array_1
to_array_2 482/s            --      [-0%]        -1%
to_array_3 482+-0/s       [0%]         --        -1%
to_array_1 490+-2/s         2%         2%         --

open Benchmark

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a

let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])


let h () = 
  let h = Hashtbl.create 100000 in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let h = h () in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h); ] in
    tabulate res


let () =  main () 


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2006-08-15  8:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-07-25 12:44 AW: [Caml-list] generic Hashtbl.to_array Christoph Bauer
2006-07-25 15:20 ` Tom
2006-08-15  8:08   ` Stéphane Glondu
2006-07-26  9:46 ` AW: " Damien Doligez
  -- strict thread matches above, loose matches on Subject: below --
2006-07-26 14:41 Christoph Bauer
2006-07-26  9:29 Christoph Bauer
2006-07-25 15:53 AW: " Christoph Bauer
2006-07-25 16:35 ` Tom
2006-07-25 15:34 Christoph Bauer
2006-07-25 10:19 Christoph Bauer
2006-07-25 10:45 ` skaller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).