strange timing between search trees

caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed

* strange timing between search trees
@ 2008-01-25  1:32 Christophe Raffalli
  2008-01-25  2:28 ` [Caml-list] " Jon Harrop
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Christophe Raffalli @ 2008-01-25  1:32 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 1272 bytes --]


Dear list members,

I wanted to compare 2-3-4 trees (look in wikipedia if you do not know 
them) with the balanced trees of the standard library.
I expected the 2-3-4 to be much faster for search because they use much 
less indirections. However, I thought
that construction should make little difference ...

I was wrong :
Construction is arround 20% faster for 2-3-4 trees
Search is slower arround 5-10% for 2-3-4 trees (the diff gets smaller 
when the trees are larger which is expected)

I wonder if the difference in code size is the explanation (the search 
function for balanced trees is really small and fits better
in cache) ?

I attach my code with two files (the code and the test, compile with 
ocamlopt unix.cmxa set234.ml test234tree.ml)

Any remarks or comments ?

Cheers,
Christophe

Here are the timing on an intel mac laptop with OCaml 3.10

construction of random tree with 200000 interger less then 10^6
234: 0.40s
bal: 0.51s
234: 0.41s
bal: 0.52s
234: 0.41s
bal: 0.52s
234: 0.41s
bal: 0.51s
search 10^6 times in tree of size 100000
234: 0.73s
bal: 0.60s
234: 0.73s
bal: 0.60s
234: 0.73s
bal: 0.60s
234: 0.74s
bal: 0.60s
search 10^6 times in tree of size 500000
234: 0.97s
bal: 0.90s
234: 0.98s
bal: 0.90s
234: 0.98s
bal: 0.90s
234: 0.98s
bal: 0.90s


[-- Attachment #2: set234.ml --]
[-- Type: text/plain, Size: 5517 bytes --]


module type OrderedType =
  sig
    type t
    val compare: t -> t -> int
  end

module Make(Ord: OrderedType) =
  struct
    type elt = Ord.t
    type t = 
	Empty 
      | Node2 of t * elt * t
      | Node3 of t * elt * t * elt * t
      | Node4 of t * elt * t * elt * t * elt * t
      | Node5 of t * elt * t * elt * t * elt * t * elt * t (* Only intermediate, never in final result *)

    let empty = Empty

    let rec mem x = function
	Empty -> false
      | Node2(t1,d1,t2) ->
	  let c = Ord.compare x d1 in
	  c = 0 || mem x (if c < 0 then t1 else t2)
      | Node3(t1,d1,t2,d2,t3) ->
	  let c = Ord.compare x d1 in
	  c = 0 || 
	    if c < 0 then mem x t1 else
	      let c = Ord.compare x d2 in
	      c = 0 || mem x (if c < 0 then t2 else t3)
      | Node4(t1,d1,t2,d2,t3,d3,t4) ->
	  let c = Ord.compare x d2 in
	  c = 0 ||
	      if c < 0 then 	      
		let c = Ord.compare x d1 in
		c = 0 || mem x (if c < 0 then t1 else t2)
	      else
		let c = Ord.compare x d3 in
		c = 0 || mem x (if c < 0 then t3 else t4)
      | Node5 _ -> 
	  assert false

    let bNode2 t1 d1 t2 = match t1, t2 with
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), _ ->
	  Node3(Node3(t11,d11,t12,d12,t13),d13,Node2(t14,d14,t15),d1,t2)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25) ->
	  Node3(t1,d1,Node2(t21,d21,t22),d22,Node3(t23,d23,t24,d24,t25))
      | Node2(Empty,d11,Empty),Empty ->
	  Node3(Empty,d11,Empty,d1,Empty)
      | Empty, Node2(Empty,d21,Empty) ->
	  Node3(Empty,d1,Empty,d21,Empty)
      | _ ->
	  Node2(t1,d1,t2)

    let bNode3 t1 d1 t2 d2 t3 = match t1, t2, t3 with
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), _, _ ->
	  Node4(Node3(t11,d11,t12,d12,t13),d13,Node2(t14,d14,t15),d1,t2,d2,t3)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _ ->
	  Node4(t1,d1,Node3(t21,d21,t22,d22,t23),d23,Node2(t24,d24,t25),d2,t3)
      | _, _, Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35) ->
	  Node4(t1,d1,t2,d2,Node3(t31,d31,t32,d32,t33),d33,Node2(t34,d34,t35))
      | Node2(Empty,d11,Empty),Empty, Empty ->
	  Node4(Empty,d11,Empty,d1,Empty,d2,Empty)
      | Empty, Node2(Empty,d21,Empty),Empty ->
	  Node4(Empty,d1,Empty,d21,Empty,d2,Empty)
      | Empty, Empty, Node2(Empty,d31,Empty) ->
	  Node4(Empty,d1,Empty,d2,Empty,d31,Empty)
      | _ ->
	  Node3(t1,d1,t2,d2,t3)

    let bNode4 t1 d1 t2 d2 t3 d3 t4 = 
      match t1, t2, t3, t4 with
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), _, _, _ ->
	  Node5(Node3(t11,d11,t12,d12,t13),d13,Node2(t14,d14,t15),d1,t2,d2,t3,d3,t4)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _, _ ->
	  Node5(t1,d1,Node3(t21,d21,t22,d22,t23),d23,Node2(t24,d24,t25),d2,t3,d3,t4)
      | _, _, Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), _ ->
	  Node5(t1,d1,t2,d2,Node3(t31,d31,t32,d32,t33),d33,Node2(t34,d34,t35),d3,t4)
      | _, _, _, Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node5(t1,d1,t2,d2,t3,d3,Node3(t41,d41,t42,d42,t43),d43,Node2(t44,d44,t45))
      | Node2(Empty,d11,Empty),Empty, Empty, Empty ->
	  Node5(Empty,d11,Empty,d1,Empty,d2,Empty,d3,Empty)
      | Empty, Node2(Empty,d21,Empty),Empty, Empty ->
	  Node5(Empty,d1,Empty,d21,Empty,d2,Empty,d3,Empty)
      | Empty, Empty, Node2(Empty,d31,Empty), Empty ->
	  Node5(Empty,d1,Empty,d2,Empty,d31,Empty,d3,Empty)
      | Empty, Empty, Empty, Node2(Empty,d41,Empty) ->
	  Node5(Empty,d1,Empty,d2,Empty,d3,Empty,d41,Empty)
      | _ ->
	  Node4(t1,d1,t2,d2,t3,d3,t4)

    let treat_root = function
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15) ->
	  Node2(Node3(t11,d11,t12,d12,t13),d13,Node2(t14,d14,t15))
      | t -> t

    let add x s = 
      let rec fn s = match s with
	  Empty -> Node2(Empty,x,Empty)
	| Node2(t1,d1,t2) ->
	    let c = Ord.compare x d1 in
	    if c = 0 then s else
	      if c < 0 then bNode2 (fn t1) d1 t2
	      else bNode2 t1 d1 (fn t2)
	| Node3(t1,d1,t2,d2,t3) ->
	    let c = Ord.compare x d1 in
	    if c = 0 then s else
	      if c < 0 then bNode3 (fn t1) d1 t2 d2 t3 else
		let c = Ord.compare x d2 in
		if c = 0 then s else 
		  if c < 0 then bNode3 t1 d1 (fn t2) d2 t3 
		  else bNode3 t1 d1 t2 d2 (fn t3)
	| Node4(t1,d1,t2,d2,t3,d3,t4) ->
	    let c = Ord.compare x d2 in
	    if c = 0 then s else
	      if c < 0 then 	      
		let c = Ord.compare x d1 in
		if c = 0 then s	else 
		  if c < 0 then bNode4 (fn t1) d1 t2 d2 t3 d3 t4
		  else bNode4 t1 d1 (fn t2) d2 t3 d3 t4
	    else
	      let c = Ord.compare x d3 in
	      if c = 0 then s else 
		if c < 0 then bNode4 t1 d1 t2 d2 (fn t3) d3 t4 
		else bNode4 t1 d1 t2 d2 t3 d3 (fn t4)
	| Node5 _ -> 
	    assert false
      in
      treat_root (fn s)
		
    let rec cardinal = function
	Empty -> 0
      | Node2(t1,_,t2) -> cardinal t1 + cardinal t2 + 1
      | Node3(t1,_,t2,_,t3) -> cardinal t1 + cardinal t2 + cardinal t3 + 2
      | Node4(t1,_,t2,_,t3,_,t4) -> cardinal t1 + cardinal t2 + cardinal t3 + cardinal t4 + 3
      | Node5 _ -> assert false


    let rec test_height t =
      match t with
	Empty -> 0
      | Node2(t1,_,t2) ->
	  let h1 = test_height t1 in
	  let h2 = test_height t2 in
	  assert (h1 = h2);
	  h1 + 1
      | Node3(t1,_,t2,_,t3) ->
	  let h1 = test_height t1 in
	  let h2 = test_height t2 in
	  let h3 = test_height t3 in
	  assert (h1 = h2);
	  assert (h1 = h3);
	  h1 + 1
      | Node4(t1,_,t2,_,t3,_,t4) ->
	  let h1 = test_height t1 in
	  let h2 = test_height t2 in
	  let h3 = test_height t3 in
	  let h4 = test_height t4 in
	  assert (h1 = h2);
	  assert (h1 = h3);
	  assert (h1 = h4);
	  h1 + 1
      | Node5 _ -> assert false
	  

  end
		


[-- Attachment #3: test234tree.ml --]
[-- Type: text/plain, Size: 3082 bytes --]


module Int = struct
  type t = int
  let compare = compare
end

module Set1 = Set234.Make(Int)

let create1 size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (Set1.add x acc) (remain - 1)
  in
  fn Set1.empty size

module Set2 = Set.Make(Int)

let create2 size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (Set2.add x acc) (remain - 1)
  in
  fn Set2.empty size

let create12 size max =
  let rec fn acc acc' remain =
    if remain = 0 then acc, acc'
    else
      let x = Random.int max in
      fn (Set1.add x acc) (Set2.add x acc') (remain - 1)
  in
  fn Set1.empty Set2.empty size

let search1 t size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (if Set1.mem x t then acc+1 else acc) (remain - 1)
  in
  fn 0 size

let search2 t size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (if Set2.mem x t then acc+1 else acc) (remain - 1)
  in
  fn 0 size


let t1,t2 = create12 (int_of_string Sys.argv.(1)) 1000
let h = Set1.test_height t1
let _ = 
  print_int h; print_newline ();
  print_int (Set1.cardinal t1); print_newline ();
  print_int (Set2.cardinal t2); print_newline ();
  if Set2.for_all (fun n -> Set1.mem n t1) t2 then
    print_string "identical trees\n"

let chrono s f x =
  let {Unix.tms_utime = ut;Unix.tms_stime = st} = Unix.times () in
  let r = f x in
  let {Unix.tms_utime = ut';Unix.tms_stime = st'} = Unix.times () in
  Printf.printf "%s: %.2fs\n" s ((ut' -. ut) +. (st' -. st));
  flush stdout;
  ignore r

let _ =
  print_string "construction of random tree with 200000 interger less then 10^6"; print_newline ();
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  let t1, t2 = create12 100000 1000000 in
  print_string "search 10^6 times in tree of size 100000"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  let t1, t2 = create12 500000 1000000 in
  print_string "search 10^6 times in tree of size 500000"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Caml-list] strange timing between search trees
  2008-01-25  1:32 strange timing between search trees Christophe Raffalli
@ 2008-01-25  2:28 ` Jon Harrop
  2008-01-25 18:12   ` Christophe Raffalli
  2008-01-25  2:31 ` Jon Harrop
  2008-01-30 17:50 ` Christophe Raffalli
  2 siblings, 1 reply; 6+ messages in thread
From: Jon Harrop @ 2008-01-25  2:28 UTC (permalink / raw)
  To: caml-list

On Friday 25 January 2008 01:32:49 Christophe Raffalli wrote:
> Dear list members,
>
> I wanted to compare 2-3-4 trees (look in wikipedia if you do not know
> them) with the balanced trees of the standard library.
> I expected the 2-3-4 to be much faster for search because they use much
> less indirections. However, I thought
> that construction should make little difference ...
>
> I was wrong :
> Construction is arround 20% faster for 2-3-4 trees
> Search is slower arround 5-10% for 2-3-4 trees (the diff gets smaller
> when the trees are larger which is expected)
>
> I wonder if the difference in code size is the explanation (the search
> function for balanced trees is really small and fits better
> in cache) ?

I doubt it. You can make the built-in Set module much faster by increasing the 
code size.

> I attach my code with two files (the code and the test, compile with
> ocamlopt unix.cmxa set234.ml test234tree.ml)
>
> Any remarks or comments ?

I get quantitatively similar performance results. You're missing a lot of 
information in your benchmarking though:

What happens if the sets are constructed in-order (affects locality)?

What happens if you iterate the benchmark over an array of preallocated sets?

I did lots of benchmarking along these lines for the optimization chapter in 
OCaml for Scientists (including benchmarking sets). I found that there are 
several alternative set implementations out there but they all give almost 
identical performance (as you're observing). However, the AVL trees of the 
built-in sets provide asymptotically more efficient set-theoretic operations 
(union, diff, inter) than most other implementations.

I also found that the benchmarking strategy more representative of the real 
code that I had was to iterate over preallocate sets, i.e. more cache misses.

Ultimately, I found it much more productive to simply optimize the Set module 
that comes with OCaml. I'll describe how in a later OCaml Journal article but 
the main ideas are to get the comparison function inlined (OCaml doesn't 
inline across functor boundaries) and add a new node type for leaves. The 
latter greatly reduces the stress on the GC, which is the main performance 
bottleneck of the current implementation, and I found it to be up to 30% 
faster.

We've also discussed this in detail before here so you might find more 
information in the caml-list archives.

HTH.

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?e

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Caml-list] strange timing between search trees
  2008-01-25  1:32 strange timing between search trees Christophe Raffalli
  2008-01-25  2:28 ` [Caml-list] " Jon Harrop
@ 2008-01-25  2:31 ` Jon Harrop
  2008-01-30 17:50 ` Christophe Raffalli
  2 siblings, 0 replies; 6+ messages in thread
From: Jon Harrop @ 2008-01-25  2:31 UTC (permalink / raw)
  To: caml-list

On Friday 25 January 2008 01:32:49 Christophe Raffalli wrote:
> I wanted to compare 2-3-4 trees...

PS: You might also want to check out the parallel implementation of the Set 
module by Dr Frederic Gava. He used the "n"th-nearest neighbour program from 
OCaml for Scientists as a test case and the results were very impressive!

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?e


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Caml-list] strange timing between search trees
  2008-01-25  2:28 ` [Caml-list] " Jon Harrop
@ 2008-01-25 18:12   ` Christophe Raffalli
  0 siblings, 0 replies; 6+ messages in thread
From: Christophe Raffalli @ 2008-01-25 18:12 UTC (permalink / raw)
  To: Jon Harrop; +Cc: caml-list

Jon Harrop a écrit :
> On Friday 25 January 2008 01:32:49 Christophe Raffalli wrote:
>   
>> Dear list members,
>>
>> I wanted to compare 2-3-4 trees (look in wikipedia if you do not know
>> them) with the balanced trees of the standard library.
>> I expected the 2-3-4 to be much faster for search because they use much
>> less indirections. However, I thought
>> that construction should make little difference ...
>>
>> I was wrong :
>> Construction is arround 20% faster for 2-3-4 trees
>> Search is slower arround 5-10% for 2-3-4 trees (the diff gets smaller
>> when the trees are larger which is expected)
>>
>> I wonder if the difference in code size is the explanation (the search
>> function for balanced trees is really small and fits better
>> in cache) ?
>>     
>
> I doubt it. You can make the built-in Set module much faster by increasing the 
> code size.
>
>   
>> I attach my code with two files (the code and the test, compile with
>> ocamlopt unix.cmxa set234.ml test234tree.ml)
>>
>> Any remarks or comments ?
>>     
>
> I get quantitatively similar performance results. You're missing a lot of 
> information in your benchmarking though:
>
> What happens if the sets are constructed in-order (affects locality)?
>
> What happens if you iterate the benchmark over an array of preallocated sets?
>
>   
I known, but I was quite surprised enough with these cases.

I now have an explanation: for binary balanced trees, at least one son 
of every node is near to its ancestor,
because you create one branch at each insertion (this is true if 
comparison do not allocate and if the GC do not break locality to much, but
I think copying GC will mostly keep this property if there is not too 
much sharing ?) ... Unfortunately,
making comparison allocates memory changed nothing to my timing so I am 
not sure my explanation is reasonnable ...
At least, this shows that there is no room for a lot of improvement by 
building larger nodes ...
> Ultimately, I found it much more productive to simply optimize the Set module 
> that comes with OCaml. I'll describe how in a later OCaml Journal article but 
> the main ideas are to get the comparison function inlined (OCaml doesn't 
> inline across functor boundaries) and add a new node type for leaves. The 
> latter greatly reduces the stress on the GC, which is the main performance 
> bottleneck of the current implementation, and I found it to be up to 30% 
> faster.
>
>   
Are you sure the leaf node is a gain ? I just tried that and it failed 
(slow down) on my code (not a benchmark, my real code).

Do you add one node for Leaf, or Three (one node when the two son are 
empty and two when one of the son is empty) ?
Do you make more subtil changes ?

And could you make your optimized set library available ?


Best regards,
Christophe


PS: yet another remark: filter and partition seem suboptimal in the set 
standard library ?

Why aren't they written with join and merge as in:

    let rec filter p = function
        | Empty -> Empty
        | Node(l, v, r, _) ->
        if p v then join (filter p l) v (filter p r)
        else merge (filter p l) (filter p r)

    let rec partition p = function
        | Empty -> (Empty, Empty)
        | Node(l, v, r, _) ->
        let (lt, lf) = partition p l in
        let (rt, rf) = partition p r in
        if p v then join lt v rt, merge lf rf else
          merge lt rt, join lf v rf


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Caml-list] strange timing between search trees
  2008-01-25  1:32 strange timing between search trees Christophe Raffalli
  2008-01-25  2:28 ` [Caml-list] " Jon Harrop
  2008-01-25  2:31 ` Jon Harrop
@ 2008-01-30 17:50 ` Christophe Raffalli
  2008-01-30 18:10   ` Edgar Friendly
  2 siblings, 1 reply; 6+ messages in thread
From: Christophe Raffalli @ 2008-01-30 17:50 UTC (permalink / raw)
  To: caml-list


[-- Attachment #1.1: Type: text/plain, Size: 3321 bytes --]


Dear list members,

I did more research for balanced trees ...
The conclusion, is really that it is not worth changing the data structure for map
and set in the stdlib.

I produced an algorithm balancing more the 234-trees with the following properties (code attached):

Let d be the depth of the tree and n the number of node we have:

in all cases d <= ln(n)/ln(3) + 1 - I think one can produce a better lower bound

for ordered insertion d = ln(n)/ln(4) + 1 - This is completely optimal and means that for 4^d-1
nodes only 4-nodes appear in the tree.

for random insertion, it seems that d = ln(n)/ln(4) + 2 (I did not prove it, but observed it)
- this is almost optimal !

These good results are obtained because rebalancing is done to have that sons of 3-nodes and 4-nodes
are never 2-nodes.

Despite all this the gain again stdlib set is arround 20% for insertion, and depends on the size of
the trees in all cases for searches. The gain is also much smaller with 3.10 on my intel OS X while
I have a more constant gain on 3.09 on my intel linux box.

So, unless the mem functions are not fairly optimized in the case of 234-tree (maybe someone could
check the generated assembly code for set234 and stdlib set) it is not worth the work.

Here are the timings with 3.09 on linux intel ubuntu 7.10:

raffalli@www:~/Caml$ ocamlopt unix.cmxa set234.ml test234tree.ml
raffalli@www:~/Caml$ ./a.out 4095
construction of random tree with 200000 integers less then 10^6
234: 1.08s
bal: 1.29s
234: 1.10s
bal: 1.29s
234: 1.10s
bal: 1.29s
234: 1.11s
bal: 1.29s
search 10^6 times in tree of size 100000
234: 1.78s
bal: 1.87s
234: 1.78s
bal: 1.86s
234: 1.77s
bal: 1.86s
234: 1.78s
bal: 1.86s
search 10^6 times in tree of size 500000
234: 2.43s
bal: 2.72s
234: 2.43s
bal: 2.68s
234: 2.43s
bal: 2.69s
234: 2.42s
bal: 2.68s
search 10^6 times in tree of size 2000000
234: 2.71s
bal: 3.06s
234: 2.71s
bal: 3.05s
234: 2.71s
bal: 3.05s
234: 2.72s
bal: 3.05s
construction of random tree with 200000 integers less then 10^6  (ordered insertion)
234: 0.35s
bal: 0.40s
234: 0.40s
bal: 0.38s
234: 0.33s
bal: 0.38s
234: 0.36s
bal: 0.38s
search 10^6 times in tree of size 100000 (ordered insertion)
234: 1.33s
bal: 1.21s
234: 1.34s
bal: 1.21s
234: 1.34s
bal: 1.21s
234: 1.34s
bal: 1.21s
search 10^6 times in tree of size 500000 (ordered insertion)
234: 1.90s
bal: 1.90s
234: 1.90s
bal: 1.91s
234: 1.90s
bal: 1.91s
234: 1.90s
bal: 1.91s
search 10^6 times in tree of size 2000000 (ordered insertion)
234: 2.81s
bal: 3.29s
234: 2.85s
bal: 3.27s
234: 2.81s
bal: 3.28s
234: 2.81s
bal: 3.26s

Christophe

PS: I should create a page for "boring ocaml code whose writing is advised for meditation" to
store this code ;-)


-- 
Christophe Raffalli
Universite de Savoie
Batiment Le Chablais, bureau 21
73376 Le Bourget-du-Lac Cedex

tel: (33) 4 79 75 81 03
fax: (33) 4 79 75 87 42
mail: Christophe.Raffalli@univ-savoie.fr
www: http://www.lama.univ-savoie.fr/~RAFFALLI
---------------------------------------------
IMPORTANT: this mail is signed using PGP/MIME
At least Enigmail/Mozilla, mutt or evolution
can check this signature. The public key is
stored on www.keyserver.net
---------------------------------------------

[-- Attachment #1.2: set234.ml --]
[-- Type: text/plain, Size: 14420 bytes --]


module type OrderedType =
  sig
    type t
    val compare: t -> t -> int
  end

module Make(Ord: OrderedType) =
  struct
    type elt = Ord.t
    let cmp = Ord.compare

    type t = 
	Empty 
      | Node2 of t * elt * t
      | Node3 of t * elt * t * elt * t
      | Node4 of t * elt * t * elt * t * elt * t
      | Node5 of t * elt * t * elt * t * elt * t * elt * t (* Only intermediate, never in final result *)

    let empty = Empty

    let rec mem x = function
	Empty -> false
      | Node2(t1,d1,t2) ->
	  let c = cmp x d1 in
	  c = 0 || mem x (if c < 0 then t1 else t2)
      | Node3(t1,d1,t2,d2,t3) ->
	  let c = cmp x d1 in
	  c = 0 || 
	    if c < 0 then mem x t1 else
	      let c = cmp x d2 in
	      c = 0 || mem x (if c < 0 then t2 else t3)
      | Node4(t1,d1,t2,d2,t3,d3,t4) ->
	  let c = cmp x d2 in
	  c = 0 ||
	      if c < 0 then 	      
		let c = cmp x d1 in
		c = 0 || mem x (if c < 0 then t1 else t2)
	      else
		let c = cmp x d3 in
		c = 0 || mem x (if c < 0 then t3 else t4)
      | Node5 _ -> 
	  assert false

    let bNode2 t1 d1 t2 = match t1, t2 with
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node2(t21,d21,t22) ->
	  Node2(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node3(t15,d1,t21,d21,t22))
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node3(t21,d21,t22,d22,t23) ->
	  Node2(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23))
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24) ->
	  Node3(Node3(t11,d11,t12,d12,t13),d13,Node3(t14,d14,t15,d1,t21),d21,Node3(t22,d22,t23,d23,t24))

      | Node2(t11,d11,t12), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25) ->
	  Node2(Node3(t11,d11,t12,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d24,t25))
      | Node3(t11,d11,t12,d12,t13), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25) ->
	  Node2(Node4(t11,d11,t12,d12,t13,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d24,t25))
      | Node4(t11,d11,t12,d12,t13,d13,t14), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25) ->
	  Node3(Node3(t11,d11,t12,d12,t13),d13,Node3(t14,d1,t21,d21,t22),d22,Node3(t23,d23,t24,d24,t25))

      | Node2(Empty,d11,Empty),Empty ->
	  Node3(Empty,d11,Empty,d1,Empty)
      | Empty, Node2(Empty,d21,Empty) ->
	  Node3(Empty,d1,Empty,d21,Empty)
      | _ ->
	  Node2(t1,d1,t2)

    let bNode3 t1 d1 t2 d2 t3 = match t1, t2, t3 with
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node2(t21,d21,t22), _ ->
	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node3(t15,d1,t21,d21,t22),d2,t3)
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node3(t21,d21,t22,d22,t23), _ ->
	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d2,t3)
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node2(t31,d31,t32) ->
	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node3(t24,d2,t31,d31,t32))
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node3(t31,d31,t32,d32,t33) ->
	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node4(t24,d2,t31,d31,t32,d32,t33))
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), _ ->
	  Node4(Node3(t11,d11,t12,d12,t13),d13,Node3(t14,d14,t15,d1,t21),d21,Node3(t22,d22,t23,d23,t24),d2,t3)

      |  Node2(t11,d11,t12), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _ ->
	  Node3(Node3(t11,d11,t12,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d24,t25),d2,t3)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), Node2(t31,d31,t32) ->
	  Node3(t1,d1,Node4(t21,d21,t22,d22,t23,d23,t24),d24,Node3(t25,d2,t31,d31,t32))
      |  Node3(t11,d11,t12,d12,t13), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _ ->
	  Node3(Node4(t11,d11,t12,d12,t13,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d24,t25),d2,t3)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), Node3(t31,d31,t32,d32,t33) ->
	  Node3(t1,d1,Node4(t21,d21,t22,d22,t23,d23,t24),d24,Node4(t25,d2,t31,d31,t32,d32,t33))
      | Node4(t11,d11,t12,d12,t13,d13,t14), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _ ->
	  Node4(Node3(t11,d11,t12,d12,t13),d13,Node3(t14,d1,t21,d21,t22),d22,Node3(t23,d23,t24,d24,t25),d2,t3)

      | _, Node2(t21,d21,t22), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35) ->
	  Node3(t1,d1,Node3(t21,d21,t22,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35))
      | _, Node3(t21,d21,t22,d22,t23), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35) ->
	  Node3(t1,d1,Node4(t21,d21,t22,d22,t23,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35))
      | Node2(t11,d11,t12), Node4(t21,d21,t22,d22,t23,d23,t24), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35) ->
	  Node3(Node3(t11,d11,t12,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35))
      | Node3(t11,d11,t12,d12,t13), Node4(t21,d21,t22,d22,t23,d23,t24), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35) ->
	  Node3(Node4(t11,d11,t12,d12,t13,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35))
      | _,  Node4(t21,d21,t22,d22,t23,d23,t24), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35) ->
	  Node4(t1,d1,Node3(t21,d21,t22,d22,t23),d23,Node3(t24,d2,t31,d31,t32),d32,Node3(t33,d33,t34,d34,t35))

      | Node2(Empty,d11,Empty),Empty, Empty ->
	  Node4(Empty,d11,Empty,d1,Empty,d2,Empty)
      | Empty, Node2(Empty,d21,Empty),Empty ->
	  Node4(Empty,d1,Empty,d21,Empty,d2,Empty)
      | Empty, Empty, Node2(Empty,d31,Empty) ->
	  Node4(Empty,d1,Empty,d2,Empty,d31,Empty)
      | _ ->
	  Node3(t1,d1,t2,d2,t3)

    let bNode4 t1 d1 t2 d2 t3 d3 t4 = 
      match t1, t2, t3, t4 with
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node2(t21,d21,t22), _, _ ->
	  Node4(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node3(t15,d1,t21,d21,t22),d2,t3,d3,t4)
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node3(t21,d21,t22,d22,t23), _, _ ->
	  Node4(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d2,t3,d3,t4)
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node2(t31,d31,t32), _ ->
	  Node4(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node3(t24,d2,t31,d31,t32),d3,t4)
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node3(t31,d31,t32,d32,t33), _ ->
	  Node4(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node4(t24,d2,t31,d31,t32,d32,t33),d3,t4)
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node4(t31,d31,t32,d32,t33,d33,t34), Node2(t41,d41,t42) ->
	  Node4(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node4(t24,d2,t31,d31,t32,d32,t33),d33,Node3(t34,d3,t41,d41,t42))
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node4(t31,d31,t32,d32,t33,d33,t34), Node3(t41,d41,t42,d42,t43) ->
	  Node4(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node4(t24,d2,t31,d31,t32,d32,t33),d33,Node4(t34,d3,t41,d41,t42,d42,t43))
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), _, _ ->
	  Node5(Node3(t11,d11,t12,d12,t13),d13,Node3(t14,d14,t15,d1,t21),d21,Node3(t22,d22,t23,d23,t24),d2,t3,d3,t4)

      | Node2(t11,d11,t12), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _, _ ->
	  Node4(Node3(t11,d11,t12,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d24,t25),d2,t3,d3,t4)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), Node2(t31,d31,t32), _ ->
	  Node4(t1,d1,Node3(t21,d21,t22,d22,t23),d23,Node4(t24,d24,t25,d2,t31,d31,t32),d3,t4)
      | Node3(t11,d11,t12,d12,t13), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _, _ ->
	  Node4(Node4(t11,d11,t12,d12,t13,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d24,t25),d2,t3,d3,t4)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), Node3(t31,d31,t32,d32,t33), _ ->
	  Node4(t1,d1,Node4(t21,d21,t22,d22,t23,d23,t24),d24,Node4(t25,d2,t31,d31,t32,d32,t33),d3,t4)
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), Node4(t31,d31,t32,d32,t33,d33,t34), Node2(t41,d41,t42) ->
	  Node4(t1,d1,Node4(t21,d21,t22,d22,t23,d23,t24),d24,Node4(t25,d2,t31,d31,t32,d32,t33),d33, Node3(t34,d3,t41,d41,t42))
      | _, Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), Node4(t31,d31,t32,d32,t33,d33,t34), Node3(t41,d41,t42,d42,t43) ->
	  Node4(t1,d1,Node4(t21,d21,t22,d22,t23,d23,t24),d24,Node4(t25,d2,t31,d31,t32,d32,t33),d33, Node4(t34,d3,t41,d41,t42,d42,t43))
      | Node4(t11,d11,t12,d12,t13,d13,t14), Node5(t21,d21,t22,d22,t23,d23,t24,d24,t25), _, _ ->
	  Node5(Node3(t11,d11,t12,d12,t13),d13,Node3(t14,d1,t21,d21,t22),d22,Node3(t23,d23,t24,d24,t25),d2,t3,d3,t4)

      | _, Node2(t21,d21,t22), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), _ ->
	  Node4(t1,d1,Node3(t21,d21,t22,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35),d3,t4)
      | _, _, Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), Node2(t41,d41,t42) ->
	  Node4(t1,d1,t2,d2,Node4(t31,d31,t32,d32,t33,d33,t34),d34,Node3(t35,d3,t41,d41,t42))
      | _, Node3(t21,d21,t22,d22,t23), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), _ ->
	  Node4(t1,d1,Node4(t21,d21,t22,d22,t23,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35),d3,t4)
      | _, _, Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), Node3(t41,d41,t42,d42,t43) ->
	  Node4(t1,d1,t2,d2,Node4(t31,d31,t32,d32,t33,d33,t34),d34,Node4(t35,d3,t41,d41,t42,d42,t43))
      | Node2(t11,d11,t12), Node4(t21,d21,t22,d22,t23,d23,t24), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), _ ->
	  Node4(Node3(t11,d11,t12,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35),d3,t4)
      | Node3(t11,d11,t12,d12,t13), Node4(t21,d21,t22,d22,t23,d23,t24), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), _ ->
	  Node4(Node4(t11,d11,t12,d12,t13,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d34,t35),d3,t4)
      | _, Node4(t21,d21,t22,d22,t23,d23,t24), Node5(t31,d31,t32,d32,t33,d33,t34,d34,t35), _ ->
	  Node5(t1,d1,Node3(t21,d21,t22,d22,t23),d23,Node3(t24,d2,t31,d31,t32),d32,Node3(t33,d33,t34,d34,t35),d3,t4)

      | _, _, Node2(t31,d31,t32), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node4(t1,d1,t2,d2,Node3(t31,d31,t32,d3,t41),d41,Node4(t42,d42,t43,d43,t44,d44,t45))
      | _, _, Node3(t31,d31,t32,d32,t33), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node4(t1,d1,t2,d2,Node4(t31,d31,t32,d32,t33,d3,t41),d41,Node4(t42,d42,t43,d43,t44,d44,t45))
      | _, Node2(t21,d21,t22), Node4(t31,d31,t32,d32,t33,d33,t34), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node4(t1,d1,Node3(t21,d21,t22,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d3,t41),d41,Node4(t42,d42,t43,d43,t44,d44,t45))
      | _, Node3(t21,d21,t22,d22,t23), Node4(t31,d31,t32,d32,t33,d33,t34), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node4(t1,d1,Node4(t21,d21,t22,d22,t23,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d3,t41),d41,Node4(t42,d42,t43,d43,t44,d44,t45))
      | Node2(t11,d11,t12), Node4(t21,d21,t22,d22,t23,d23,t24), Node4(t31,d31,t32,d32,t33,d33,t34), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node4(Node3(t11,d11,t12,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d3,t41),d41,Node4(t42,d42,t43,d43,t44,d44,t45))
      | Node3(t11,d11,t12,d12,t13), Node4(t21,d21,t22,d22,t23,d23,t24), Node4(t31,d31,t32,d32,t33,d33,t34), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node4(Node4(t11,d11,t12,d12,t13,d1,t21),d21,Node4(t22,d22,t23,d23,t24,d2,t31),d31,Node4(t32,d32,t33,d33,t34,d3,t41),d41,Node4(t42,d42,t43,d43,t44,d44,t45))
      | _, _, Node4(t31,d31,t32,d32,t33,d33,t34), Node5(t41,d41,t42,d42,t43,d43,t44,d44,t45)->
	  Node5(t1,d1,t2,d2,Node3(t31,d31,t32,d32,t33),d33,Node3(t34,d3,t41,d41,t42),d42,Node3(t43,d43,t44,d44,t45))

      | Node2(Empty,d11,Empty),Empty, Empty, Empty ->
	  Node5(Empty,d11,Empty,d1,Empty,d2,Empty,d3,Empty)
      | Empty, Node2(Empty,d21,Empty),Empty, Empty ->
	  Node5(Empty,d1,Empty,d21,Empty,d2,Empty,d3,Empty)
      | Empty, Empty, Node2(Empty,d31,Empty), Empty ->
	  Node5(Empty,d1,Empty,d2,Empty,d31,Empty,d3,Empty)
      | Empty, Empty, Empty, Node2(Empty,d41,Empty) ->
	  Node5(Empty,d1,Empty,d2,Empty,d3,Empty,d41,Empty)
      | _ ->
	  Node4(t1,d1,t2,d2,t3,d3,t4)

    let treat_root = function
      | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15) ->
	  Node2(Node3(t11,d11,t12,d12,t13),d13,Node2(t14,d14,t15))
      | t -> t

    let add x s = 
      let rec fn s = match s with
	  Empty -> Node2(Empty,x,Empty)
	| Node2(t1,d1,t2) ->
	    let c = cmp x d1 in
	    if c = 0 then s else
	      if c < 0 then bNode2 (fn t1) d1 t2
	      else bNode2 t1 d1 (fn t2)
	| Node3(t1,d1,t2,d2,t3) ->
	    let c = cmp x d1 in
	    if c = 0 then s else
	      if c < 0 then bNode3 (fn t1) d1 t2 d2 t3 else
		let c = cmp x d2 in
		if c = 0 then s else 
		  if c < 0 then bNode3 t1 d1 (fn t2) d2 t3 
		  else bNode3 t1 d1 t2 d2 (fn t3)
	| Node4(t1,d1,t2,d2,t3,d3,t4) ->
	    let c = cmp x d2 in
	    if c = 0 then s else
	      if c < 0 then 	      
		let c = cmp x d1 in
		if c = 0 then s	else 
		  if c < 0 then bNode4 (fn t1) d1 t2 d2 t3 d3 t4
		  else bNode4 t1 d1 (fn t2) d2 t3 d3 t4
	    else
	      let c = cmp x d3 in
	      if c = 0 then s else 
		if c < 0 then bNode4 t1 d1 t2 d2 (fn t3) d3 t4 
		else bNode4 t1 d1 t2 d2 t3 d3 (fn t4)
	| Node5 _ -> 
	    assert false
      in
      treat_root (fn s)
		
    let rec cardinal = function
	Empty -> 0
      | Node2(t1,_,t2) -> cardinal t1 + cardinal t2 + 1
      | Node3(t1,_,t2,_,t3) -> cardinal t1 + cardinal t2 + cardinal t3 + 2
      | Node4(t1,_,t2,_,t3,_,t4) -> cardinal t1 + cardinal t2 + cardinal t3 + cardinal t4 + 3
      | Node5 _ -> assert false


    let rec test_height t =
      match t with
	Empty -> 0
      | Node2(t1,_,t2) ->
	  let h1 = test_height t1 in
	  let h2 = test_height t2 in
	  assert (h1 = h2);
	  h1 + 1
      | Node3(t1,_,t2,_,t3) ->
	  let h1 = test_height t1 in
	  let h2 = test_height t2 in
	  let h3 = test_height t3 in
	  assert (h1 = h2);
	  assert (h1 = h3);
	  h1 + 1
      | Node4(t1,_,t2,_,t3,_,t4) ->
	  let h1 = test_height t1 in
	  let h2 = test_height t2 in
	  let h3 = test_height t3 in
	  let h4 = test_height t4 in
	  assert (h1 = h2);
	  assert (h1 = h3);
	  assert (h1 = h4);
	  h1 + 1
      | Node5 _ -> assert false
	  

  end
		


[-- Attachment #1.3: test234tree.ml --]
[-- Type: text/plain, Size: 6640 bytes --]


module Int = struct
  type t = int
  let compare = compare
end

module Set1 = Set234.Make(Int)

let create1 size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (Set1.add x acc) (remain - 1)
  in
  fn Set1.empty size

module Set2 = Set.Make(Int)

let create2 size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (Set2.add x acc) (remain - 1)
  in
  fn Set2.empty size

let create12 size max =
  let rec fn acc acc' remain =
    if remain = 0 then acc, acc'
    else
      let x = Random.int max in
      fn (Set1.add x acc) (Set2.add x acc') (remain - 1)
  in
  fn Set1.empty Set2.empty size


let ocreate1 size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = remain in
      fn (Set1.add x acc) (remain - 1)
  in
  fn Set1.empty size

let ocreate2 size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = remain in
      fn (Set2.add x acc) (remain - 1)
  in
  fn Set2.empty size

let ocreate12 size max =
  let rec fn acc acc' remain =
    if remain = 0 then acc, acc'
    else
      let x = remain in
      fn (Set1.add x acc) (Set2.add x acc') (remain - 1)
  in
  fn Set1.empty Set2.empty size

let search1 t size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (if Set1.mem x t then acc+1 else acc) (remain - 1)
  in
  fn 0 size

let search2 t size max =
  let rec fn acc remain =
    if remain = 0 then acc
    else
      let x = Random.int max in
      fn (if Set2.mem x t then acc+1 else acc) (remain - 1)
  in
  fn 0 size


let t1,t2 = 
  let n = int_of_string Sys.argv.(1) in
  create12 n (n*3)

let h = Set1.test_height t1
let _ = 
  print_int h; print_newline ();
  print_int (Set1.cardinal t1); print_newline ();
  print_int (Set2.cardinal t2); print_newline ();
  if Set2.for_all (fun n -> Set1.mem n t1) t2 then
    print_string "identical trees\n"

let t1,t2 = 
  let n = int_of_string Sys.argv.(1) in
  ocreate12 n (n*3)

let h = Set1.test_height t1
let _ = 
  print_int h; print_newline ();
  print_int (Set1.cardinal t1); print_newline ();
  print_int (Set2.cardinal t2); print_newline ();
  if Set2.for_all (fun n -> Set1.mem n t1) t2 then
    print_string "identical trees\n"

let chrono s f x =
  let {Unix.tms_utime = ut;Unix.tms_stime = st} = Unix.times () in
  let r = f x in
  let {Unix.tms_utime = ut';Unix.tms_stime = st'} = Unix.times () in
  Printf.printf "%s: %.2fs\n" s ((ut' -. ut) +. (st' -. st));
  flush stdout;
  ignore r

let _ =
  print_string "construction of random tree with 200000 interger less then 10^6"; print_newline ();
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  chrono "234" (create1 200000) 1000000;
  chrono "bal" (create2 200000) 1000000;
  let t1, t2 = create12 100000 1000000 in
  print_string "search 10^6 times in tree of size 100000"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  let t1, t2 = create12 500000 1000000 in
  print_string "search 10^6 times in tree of size 500000"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  let t1, t2 = create12 2000000 1000000 in
  print_string "search 10^6 times in tree of size 2000000"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  print_string "construction of random tree with 200000 interger less then 10^6  (ordered insertion)"; print_newline ();
  chrono "234" (ocreate1 200000) 1000000;
  chrono "bal" (ocreate2 200000) 1000000;
  chrono "234" (ocreate1 200000) 1000000;
  chrono "bal" (ocreate2 200000) 1000000;
  chrono "234" (ocreate1 200000) 1000000;
  chrono "bal" (ocreate2 200000) 1000000;
  chrono "234" (ocreate1 200000) 1000000;
  chrono "bal" (ocreate2 200000) 1000000;
  let t1, t2 = ocreate12 100000 1000000 in
  print_string "search 10^6 times in tree of size 100000 (ordered insertion)"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  let t1, t2 = ocreate12 500000 1000000 in
  print_string "search 10^6 times in tree of size 500000 (ordered insertion)"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  let t1, t2 = ocreate12 2000000 1000000 in
  print_string "search 10^6 times in tree of size 2000000 (ordered insertion)"; print_newline ();
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000;
  chrono "234" (search1 t1 1000000) 1000000;
  chrono "bal" (search2 t2 1000000) 1000000



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 252 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Caml-list] strange timing between search trees
  2008-01-30 17:50 ` Christophe Raffalli
@ 2008-01-30 18:10   ` Edgar Friendly
  0 siblings, 0 replies; 6+ messages in thread
From: Edgar Friendly @ 2008-01-30 18:10 UTC (permalink / raw)
  To: Christophe Raffalli; +Cc: caml-list

Christophe Raffalli wrote:
> PS: I should create a page for "boring ocaml code whose writing is advised for meditation" to
> store this code ;-)

> 	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node3(t15,d1,t21,d21,t22),d2,t3)
>       | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node3(t21,d21,t22,d22,t23), _ ->
> 	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d2,t3)
>       | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node2(t31,d31,t32) ->
> 	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node3(t24,d2,t31,d31,t32))
>       | Node5(t11,d11,t12,d12,t13,d13,t14,d14,t15), Node4(t21,d21,t22,d22,t23,d23,t24), Node3(t31,d31,t32,d32,t33) ->
> 	  Node3(Node4(t11,d11,t12,d12,t13,d13,t14),d14,Node4(t15,d1,t21,d21,t22,d22,t23),d23,Node4(t24,d2,t31,d31,t32,d32,t33))
>    

Wow, even I'm impressed at this boring code...  My eyes glaze over just
looking at it - I'd need advanced meditative skills to produce such code
correctly.

E.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-01-30 18:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-25  1:32 strange timing between search trees Christophe Raffalli
2008-01-25  2:28 ` [Caml-list] " Jon Harrop
2008-01-25 18:12   ` Christophe Raffalli
2008-01-25  2:31 ` Jon Harrop
2008-01-30 17:50 ` Christophe Raffalli
2008-01-30 18:10   ` Edgar Friendly

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).