caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* Re: [Caml-list] generic Hashtbl.to_array
@ 2006-07-25 12:00 Christoph Bauer
  2006-07-25 16:19 ` skaller
  0 siblings, 1 reply; 9+ messages in thread
From: Christoph Bauer @ 2006-07-25 12:00 UTC (permalink / raw)
  To: Damien Doligez, caml-list

Hi,

> let to_array t =
>    let init = ref None in
>    begin try Hashtbl.iter (fun k v -> init := Some (k,v); 
> raise Exit) t
>    with Exit -> ()
>    end;
>    match !init with
>    | None -> [| |]
>    | Some i ->
>      let a = Array.make (Hashtbl.length t) i in
>      ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
>      a
> ;;

it's curious, but this solution is slower than the others!

[skaller's solution seems to be the same, so I
include only this one in the "benchmark"]

             Rate     to_array_4  to_array_3 to_array_1b  to_array_2
to_array_1
 to_array_4 407+-0/s          --        -16%        -16%        -17%
-17%
 to_array_3 486+-2/s         19%          --       [-0%]       [-1%]
-1%
to_array_1b 487+-0/s         20%        [0%]          --       [-0%]
-1%
 to_array_2 489+-2/s         20%        [1%]        [0%]          --
-1%
 to_array_1 491+-0/s         21%          1%          1%          1%
--

from http://ocaml-benchmark.sourceforge.net/doc/Benchmark.html

Benchmark.tablulate results prints a comparison table for a list of results
obtained by Benchmark.latencyN or Benchmark.throughputN with each function
compared to all the others. The table is of the type 


              Rate name1 name2 ...   OR          s/iter name1 name2 ...
        name1  #/s    --   r12             name1   #       --   r12
        name2  #/s   r21    --             name2   #      r21    --
        ...                                ...                            

where name1, name2,... are the labels of the tests sorted from slowest to
fastest and rij says how much namei is faster (or slower if < 0) than namej
(technically it is equal to (ri - rj) expressed in percents of rj where ri
and rj are the rates of namei and namej respectively). 

If several results are associated to a given name, they are used to compute
a Student's statistic to check whether the rates are significantly
different. If ri and rj are not believed to be different, rij will be
printed between brackets.

(* compile with

ocamlopt -o to_array -I benchmark-0.7 unix.cmxa benchmark-0.7/benchmark.cmx
to_array.ml

*)

open Benchmark

let to_array_1 t =
  let dummy =  Array.init 0 (fun _ -> raise Not_found) in
    fst
      (Hashtbl.fold
         (fun k v (a, i) ->
            if i = 0 then  
              let a = Array.make (Hashtbl.length t) (k, v) in
                (a, 0)
            else (a.(i) <- (k, v); (a, i + 1)))
         t (dummy, 0))

let to_array_2 t =
  let init _ = fun () -> raise Not_found  in
  let a = Array.init (Hashtbl.length t) init in
    ignore
      (Hashtbl.fold (fun k v i -> a.(i) <- (fun () -> (k, v)); i+1) t 0);
    Array.map (fun f -> f ())  a

let to_array_3 t =
  Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) t [])


let to_array_1b t =
  let a = ref (Array.init 0 (fun _ -> raise Not_found)) in
    ignore
      (Hashtbl.fold
         (fun k v i ->
            if i = 0 then
              (a := Array.make (Hashtbl.length t) (k, v);
               i)
            else
              ((!a).(i) <- (k, v); i + 1))
         t 0);
    !a



let to_array_4 t =
   let init = ref None in
   begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
   with Exit -> ()
   end;
   match !init with
   | None -> [| |]
   | Some i ->
     let a = Array.make (Hashtbl.length t) i in
       ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
       a



let h () = 
  let h = Hashtbl.create 100000 in
    for i = 0 to (Hashtbl.length h) do
      Hashtbl.add h (Random.int max_int) (Random.int max_int);
    done;
    h
      
let main () =
  let h = h () in
  let res = throughputN ~repeat:5 1
    [("to_array_1", to_array_1, h);
     ("to_array_1b", to_array_1b, h);
     ("to_array_2", to_array_2, h);
     ("to_array_3", to_array_3, h);
     ("to_array_4", to_array_4, h); ] in
      tabulate res


let () =  main () 



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25 12:00 [Caml-list] generic Hashtbl.to_array Christoph Bauer
@ 2006-07-25 16:19 ` skaller
  0 siblings, 0 replies; 9+ messages in thread
From: skaller @ 2006-07-25 16:19 UTC (permalink / raw)
  To: Christoph Bauer; +Cc: Damien Doligez, caml-list

On Tue, 2006-07-25 at 14:00 +0200, Christoph Bauer wrote:
> Hi,
> 
> > let to_array t =
> >    let init = ref None in
> >    begin try Hashtbl.iter (fun k v -> init := Some (k,v); 
> > raise Exit) t
> >    with Exit -> ()
> >    end;
> >    match !init with
> >    | None -> [| |]
> >    | Some i ->
> >      let a = Array.make (Hashtbl.length t) i in
> >      ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
> >      a
> > ;;
> 
> it's curious, but this solution is slower than the others!
> 
> [skaller's solution seems to be the same, so I
> include only this one in the "benchmark"]

This is not entirely surprising, since Ocaml wastes time 
first initialising the array.. and then assigning the same 
cells new values, recycling the same store through the 
cache twice. However there is no need for a secondary
data structure, which may delay hitting limits of the
next level of caching (for example VM paging disk).

I see in your tests the number of elements is 100,000.
It would be interesting to increase this in a series
of tests to see if the performance tradeoffs change:
cache effects would predict a 'kink' when you hit
cache boundaries?

-- 
John Skaller <skaller at users dot sf dot net>
Felix, successor to C++: http://felix.sf.net


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25 16:35 ` Tom
@ 2006-08-15  8:26   ` Stéphane Glondu
  0 siblings, 0 replies; 9+ messages in thread
From: Stéphane Glondu @ 2006-08-15  8:26 UTC (permalink / raw)
  To: caml-list

We shouldn't talk about Obj.magic, but...

Tom a écrit :
> I also corrected my implementation:
> 
> let mgc = Obj.magic 0      <<< So that the function is executed only once.

Does this provide any benefit? It seems to me that Obj.magic is the
(inlined) identity (so basically Obj.magic 0 is compiled directly into
the integer 0).

-- 
Stéphane


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25 15:20 ` Tom
@ 2006-08-15  8:08   ` Stéphane Glondu
  0 siblings, 0 replies; 9+ messages in thread
From: Stéphane Glondu @ 2006-08-15  8:08 UTC (permalink / raw)
  To: caml-list

I know I am late.

Tom a écrit :
> The dirtiest solution:
> 
> let to_array t =
>  let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
>    ignore
>      (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ;
>      a

What about:

let to_array h =
  let res = ref [||] in
  let rec assign = ref (fun i x ->
			  res := Array.make (Hashtbl.length h) x;
			  !res.(i) <- x;
			  assign := Array.set !res) in
  ignore (Hashtbl.fold (fun k v i -> !assign i (k, v); i+1) h 0);
  !res;;

Sorry if it has already been proposed (but I have not seen it).

-- 
Stéphane Glondu


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-26 14:41 AW: " Christoph Bauer
@ 2006-07-26 14:53 ` Tom
  0 siblings, 0 replies; 9+ messages in thread
From: Tom @ 2006-07-26 14:53 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 53 bytes --]

At last the result I desire (and have predicted ;) )

[-- Attachment #2: Type: text/html, Size: 57 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-26  2:16 oleg
@ 2006-07-26  9:48 ` Damien Doligez
  0 siblings, 0 replies; 9+ messages in thread
From: Damien Doligez @ 2006-07-26  9:48 UTC (permalink / raw)
  To: caml users


On 2006-07-26, at 04:16, oleg@pobox.com wrote:

> let to_array9 t =
>   let Some (a,_) =
>     Hashtbl.fold (fun k v seed ->
>       match seed with
> 	Some (a,i) -> a.(i) <- (k,v); Some (a,i+1)
>       | None -> let a =  Array.make (Hashtbl.length t) (k,v) in
>                 Some (a,1))
>       t None
>   in a
> ;;

It fails whenever the hash table is empty, and the compiler
warns you about it.  Replace your "let Some (a,_) = ..."
with a pattern-matching and you have a nice solution.

-- Damien


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25 12:44 AW: " Christoph Bauer
@ 2006-07-25 15:20 ` Tom
  2006-08-15  8:08   ` Stéphane Glondu
  0 siblings, 1 reply; 9+ messages in thread
From: Tom @ 2006-07-25 15:20 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 180 bytes --]

The dirtiest solution:

let to_array t =
 let a =  Array.make (Hashtbl.length t) (Obj.magic 0)  in
   ignore
     (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0) ;
     a

[-- Attachment #2: Type: text/html, Size: 319 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25  8:29 Christoph Bauer
  2006-07-25  9:14 ` [Caml-list] " Erick Tryzelaar
@ 2006-07-25 11:45 ` Damien Doligez
  1 sibling, 0 replies; 9+ messages in thread
From: Damien Doligez @ 2006-07-25 11:45 UTC (permalink / raw)
  To: caml-list

Hello,

On 2006-07-25, at 10:29, Christoph Bauer wrote:

> The simples idea has the problem, that you don't have
> a initial value to make the result array:

You can get it from the hash table itself:

let to_array t =
   let init = ref None in
   begin try Hashtbl.iter (fun k v -> init := Some (k,v); raise Exit) t
   with Exit -> ()
   end;
   match !init with
   | None -> [| |]
   | Some i ->
     let a = Array.make (Hashtbl.length t) i in
     ignore (Hashtbl.fold (fun k v i -> a.(i) <- (k, v); i + 1) t 0);
     a
;;

-- Damien


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Caml-list] generic Hashtbl.to_array
  2006-07-25  8:29 Christoph Bauer
@ 2006-07-25  9:14 ` Erick Tryzelaar
  2006-07-25 11:45 ` Damien Doligez
  1 sibling, 0 replies; 9+ messages in thread
From: Erick Tryzelaar @ 2006-07-25  9:14 UTC (permalink / raw)
  To: Christoph Bauer; +Cc: caml-list

Christoph Bauer wrote:
> Hi,
>
> what is the best way to write Hashtbl.to_array?
>
> Hashtbl.to_array : ('a, 'b) Hashtbl.t -> ('a * 'b) array
>
> The simples idea has the problem, that you don't have
> a initial value to make the result array:

The easiest is to use a temporary list:

# let x = Hashtbl.create 2;;
val x : ('_a, '_b) Hashtbl.t = <abstr>
# Hashtbl.add x 5 3;;
- : unit = ()
# Hashtbl.add x 7 2;;
- : unit = ()
# Array.of_list (Hashtbl.fold (fun a b c -> (a, b) :: c) x []);;
- : (int * int) array = [|(7, 2); (5, 3)|]


You could also try inverting the Hashtbl fold into an iterator+closure 
and pass the closure into the Array.init function, but I'm not sure how 
complicated/efficient that would be.

I suppose it just depends on how efficient you need it to be. If it's 
just some simple stuff, I'd just use the intermediary list.

-e



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2006-08-15  8:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-07-25 12:00 [Caml-list] generic Hashtbl.to_array Christoph Bauer
2006-07-25 16:19 ` skaller
  -- strict thread matches above, loose matches on Subject: below --
2006-07-26 14:41 AW: " Christoph Bauer
2006-07-26 14:53 ` Tom
2006-07-26  2:16 oleg
2006-07-26  9:48 ` [Caml-list] " Damien Doligez
2006-07-25 15:53 AW: AW: " Christoph Bauer
2006-07-25 16:35 ` Tom
2006-08-15  8:26   ` Stéphane Glondu
2006-07-25 12:44 AW: " Christoph Bauer
2006-07-25 15:20 ` Tom
2006-08-15  8:08   ` Stéphane Glondu
2006-07-25  8:29 Christoph Bauer
2006-07-25  9:14 ` [Caml-list] " Erick Tryzelaar
2006-07-25 11:45 ` Damien Doligez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).