caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* AW: AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 15:53 Christoph Bauer
  2006-07-25 16:35 ` Tom
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Bauer @ 2006-07-25 15:53 UTC (permalink / raw)
  To: Brian Hurt, caml-list

[-- Attachment #1: Type: text/plain, Size: 887 bytes --]


 

The dirtiest solution:

let to_array t =
 let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
   ignore
     (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ; 
     a  
 
 


Does it work correctly for floats? 

 

Looks good for floats. 
 
# let to_array t =
   let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
     ignore
       (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ;
       a
 
  ;;
val to_array : ('a, 'b) Hashtbl.t -> ('a * 'b) array = <fun>
# let h = Hashtbl.create 0;;
val h : ('_a, '_b) Hashtbl.t = <abstr>
# Hashtbl.add h 1.0 2.0;;
- : unit = ()
# to_array h;;
- : (float * float) array = [|(1., 2.)|]
# Gc.compact ();;
- : unit = ()
#
 
 BTW, the array should store a pointer to a tuple of two floats, so
I thinkt float or ints doesn't matter. I won't use this solution, because
 it isn't better than others. 

 Christoph Bauer 



[-- Attachment #2: Type: text/html, Size: 3244 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread
* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-26 14:41 Christoph Bauer
  2006-07-26 14:53 ` Tom
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Bauer @ 2006-07-26 14:41 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 4182 bytes --]

 


  _____  

Von: Tom [mailto:tom.primozic@gmail.com] 
Gesendet: Mittwoch, 26. Juli 2006 15:53
An: Christoph Bauer
Betreff: Re: [Caml-list] generic Hashtbl.to_array


Once again...

I'm sorry to say that, but I believe that you results are flawed...


Upon inspecting your code with Toploop, I found out some flaws... 


let h n =
 let m = n * 100000 in
 let h = Hashtbl.create m in

   for i = 0 to m - 1 do       (* <<< not Hashtbl.length h, as it returns 0
for ampty hashtable *)
     Hashtbl.replace h (Random.int <http://random.int/>  max_int) (
Random.int <http://random.int/>  max_int);
   done;
   h


 
 

 
thanks, sorry for this stupid bug.  to_array_5 is now the clear winner.
 
Christoph Bauer
 
 n=8
             Rate      to_array_2 to_array_3 to_array_1c to_array_1
to_array_4 to_array_1b to_array_5
 to_array_2 34.6+-0.4/s         --       -50%        -71%       -72%
-74%        -74%       -79%
 to_array_3 68.8+-0.6/s        99%         --        -42%       -45%
-49%        -49%       -58%
to_array_1c  118+-  2/s       241%        71%          --        -6%
-12%        -13%       -27%
 to_array_1  126+-  0/s       264%        83%          7%         --
-6%         -7%       -22%
 to_array_4  134+-  2/s       287%        95%         14%         6%
--       [-1%]       -17%
to_array_1b  135+-  1/s       290%        96%         15%         7%
[1%]          --       -17%
 to_array_5  162+- 10/s       369%       136%         37%        29%
21%         20%         --
 
 
 (* compile with
 
ocamlopt -o to_array -I benchmark-0.7 unix.cmxa benchmark-0.7/benchmark.cmx
to_array.ml
 
*)
 
open Benchmark
 
let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 1)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))
 
let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a
 
let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])
 

let to_array_1b t =
  let a = ref (Array.init 0 (fun _ -> raise Not_found)) in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            if i = 0 then
              (a := Array.make (Hashtbl.length t) (k, v);
               1)
            else
              ((!a).(i) <- (k, v); i + 1))
         t 0);
    !a
 
 
 
let to_array_4 t =
  let init = ref None in
    begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
      with Exit -> ()
    end;
    match !init with
      | None -> [| |]
      | Some i ->
          let a = Array.make (Hashtbl.length t) i in
            ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
            a
 
 
 
let to_array_5 =
  let init = Obj.magic 0 in
    fun t ->
      let a =  Array.make (Hashtbl.length t) init  in
        ignore
          (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ; 
        a 
 
let to_array_1c t =
  let r =
    Hashtbl.fold (fun k v seed ->
                    match seed with
                 Some (a,i) -> a.(i) <- (k,v); Some (a,i+1)
                      | None -> let a =  Array.make (Hashtbl.length t) (k,v)
in
                                  Some (a,1))
      t None 
  in
    match r with
        None -> Array.init 0 (fun _ -> raise Not_found)
      | Some (a, _) -> a
 

      
let h n = 
  let m = n * 1000 in
  let h = Hashtbl.create m in
    for i = 0 to m - 1 do
      Hashtbl.replace h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let n = try int_of_string Sys.argv.(1) with _ -> 1 in
  let h = h n in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_1b", to_array_1b, h);
     ("to_array_1c", to_array_1c, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h);
     ("to_array_4", to_array_4, h);
     ("to_array_5", to_array_5, h);
 
      ] in
      tabulate res
 

let () =  main () 


[-- Attachment #2: Type: text/html, Size: 12793 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread
* generic Hashtbl.to_array
@ 2006-07-26  2:16 oleg
  2006-07-26  9:48 ` [Caml-list] " Damien Doligez
  0 siblings, 1 reply; 11+ messages in thread
From: oleg @ 2006-07-26  2:16 UTC (permalink / raw)
  To: caml-list


I wonder about the following solution. At least it traverses the
hashtable exactly once (and it does not ignore the result of the
fold).


let to_array9 t =
  let Some (a,_) =
    Hashtbl.fold (fun k v seed ->
      match seed with
	Some (a,i) -> a.(i) <- (k,v); Some (a,i+1)
      | None -> let a =  Array.make (Hashtbl.length t) (k,v) in
                Some (a,1))
      t None
  in a
;;


^ permalink raw reply	[flat|nested] 11+ messages in thread
* AW: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 12:44 Christoph Bauer
  2006-07-25 15:20 ` Tom
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Bauer @ 2006-07-25 12:44 UTC (permalink / raw)
  To: caml-list

> 
> let to_array_4 t =
>    let init = ref None in
>    begin try Hashtbl.iter (fun k v -> init := Some (k,v); 
> raise Exit) t

My guess: hashtbl has to loop over the first empty buckets.
And this eats the cpu cycles.

>    with Exit -> ()
>    end;
>    match !init with
>    | None -> [| |]
>    | Some i ->
>      let a = Array.make (Hashtbl.length t) i in
>        ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i 
> + 1) t 0);
>        a

Regards,
Christoph Bauer


^ permalink raw reply	[flat|nested] 11+ messages in thread
* Re: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 12:00 Christoph Bauer
  2006-07-25 16:19 ` skaller
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Bauer @ 2006-07-25 12:00 UTC (permalink / raw)
  To: Damien Doligez, caml-list

Hi,

> let to_array t =
>    let init = ref None in
>    begin try Hashtbl.iter (fun k v -> init := Some (k,v); 
> raise Exit) t
>    with Exit -> ()
>    end;
>    match !init with
>    | None -> [| |]
>    | Some i ->
>      let a = Array.make (Hashtbl.length t) i in
>      ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
>      a
> ;;

it's curious, but this solution is slower than the others!

[skaller's solution seems to be the same, so I
include only this one in the "benchmark"]

             Rate     to_array_4  to_array_3 to_array_1b  to_array_2
to_array_1
 to_array_4 407+-0/s          --        -16%        -16%        -17%
-17%
 to_array_3 486+-2/s         19%          --       [-0%]       [-1%]
-1%
to_array_1b 487+-0/s         20%        [0%]          --       [-0%]
-1%
 to_array_2 489+-2/s         20%        [1%]        [0%]          --
-1%
 to_array_1 491+-0/s         21%          1%          1%          1%
--

from http://ocaml-benchmark.sourceforge.net/doc/Benchmark.html

Benchmark.tablulate results prints a comparison table for a list of results
obtained by Benchmark.latencyN or Benchmark.throughputN with each function
compared to all the others. The table is of the type 


              Rate name1 name2 ...   OR          s/iter name1 name2 ...
        name1  #/s    --   r12             name1   #       --   r12
        name2  #/s   r21    --             name2   #      r21    --
        ...                                ...                            

where name1, name2,... are the labels of the tests sorted from slowest to
fastest and rij says how much namei is faster (or slower if < 0) than namej
(technically it is equal to (ri - rj) expressed in percents of rj where ri
and rj are the rates of namei and namej respectively). 

If several results are associated to a given name, they are used to compute
a Student's statistic to check whether the rates are significantly
different. If ri and rj are not believed to be different, rij will be
printed between brackets.

(* compile with

ocamlopt -o to_array -I benchmark-0.7 unix.cmxa benchmark-0.7/benchmark.cmx
to_array.ml

*)

open Benchmark

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a

let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])


let to_array_1b t =
  let a = ref (Array.init 0 (fun _ -> raise Not_found)) in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            if i = 0 then
              (a := Array.make (Hashtbl.length t) (k, v);
               i)
            else
              ((!a).(i) <- (k, v); i + 1))
         t 0);
    !a



let to_array_4 t =
   let init = ref None in
   begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
   with Exit -> ()
   end;
   match !init with
   | None -> [| |]
   | Some i ->
     let a = Array.make (Hashtbl.length t) i in
       ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
       a



let h () = 
  let h = Hashtbl.create 100000 in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let h = h () in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_1b", to_array_1b, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h);
     ("to_array_4", to_array_4, h); ] in
      tabulate res


let () =  main () 



^ permalink raw reply	[flat|nested] 11+ messages in thread
* generic Hashtbl.to_array
@ 2006-07-25  8:29 Christoph Bauer
  2006-07-25  9:14 ` [Caml-list] " Erick Tryzelaar
  2006-07-25 11:45 ` Damien Doligez
  0 siblings, 2 replies; 11+ messages in thread
From: Christoph Bauer @ 2006-07-25  8:29 UTC (permalink / raw)
  To: caml-list

Hi,

what is the best way to write Hashtbl.to_array?

Hashtbl.to_array : ('a, 'b) Hashtbl.t -> ('a * 'b) array

The simples idea has the problem, that you don't have
a initial value to make the result array:

let to_array t =
  let a =  Array.init make (Hashtbl.length t) ?init?  in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            a.(i) <- (k, v); i + 1)
         t 0);
    a

The best solution I found is

let to_array t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

Is there a better one?

Thanks,
Christoph Bauer


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2006-08-15  8:26 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-07-25 15:53 AW: AW: [Caml-list] generic Hashtbl.to_array Christoph Bauer
2006-07-25 16:35 ` Tom
2006-08-15  8:26   ` Stéphane Glondu
  -- strict thread matches above, loose matches on Subject: below --
2006-07-26 14:41 AW: " Christoph Bauer
2006-07-26 14:53 ` Tom
2006-07-26  2:16 oleg
2006-07-26  9:48 ` [Caml-list] " Damien Doligez
2006-07-25 12:44 AW: " Christoph Bauer
2006-07-25 15:20 ` Tom
2006-08-15  8:08   ` Stéphane Glondu
2006-07-25 12:00 Christoph Bauer
2006-07-25 16:19 ` skaller
2006-07-25  8:29 Christoph Bauer
2006-07-25  9:14 ` [Caml-list] " Erick Tryzelaar
2006-07-25 11:45 ` Damien Doligez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).