From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Original-To: caml-list@yquem.inria.fr Delivered-To: caml-list@yquem.inria.fr Received: from mail4-relais-sop.national.inria.fr (mail4-relais-sop.national.inria.fr [192.134.164.105]) by yquem.inria.fr (Postfix) with ESMTP id AB98CBBAF for ; Wed, 24 Nov 2010 18:57:27 +0100 (CET) X-IronPort-Anti-Spam-Filtered: true X-IronPort-Anti-Spam-Result: AqkAAPfg7EzBMHhLmWdsb2JhbACDT581FQEBAQEBCAsKBxEirg+RC4RUcwSKFYNa X-IronPort-AV: E=Sophos;i="4.59,249,1288566000"; d="asc'?ml'?scan'208";a="80143691" Received: from net.univ-savoie.fr ([193.48.120.75]) by mail4-smtp-sop.national.inria.fr with ESMTP/TLS/EDH-RSA-DES-CBC3-SHA; 24 Nov 2010 18:57:27 +0100 Received: from post.bourget.univ-savoie.fr (post.bourget.univ-savoie.fr [193.48.120.73]) by net.univ-savoie.fr (8.12.3/jtpda-5.4) with ESMTP id oAOHvOfN011292 for ; Wed, 24 Nov 2010 18:57:24 +0100 Received: from [193.48.123.45] (d45.lama.univ-savoie.fr [193.48.123.45]) by post.bourget.univ-savoie.fr (8.12.3/jtpda-5.4) with ESMTP id oAOHvFn4014991 ; Wed, 24 Nov 2010 18:57:15 +0100 Message-ID: <4CED51FD.6020305@univ-savoie.fr> Date: Wed, 24 Nov 2010 18:57:17 +0100 From: Christophe Raffalli User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101027 Lightning/1.0b2 Thunderbird/3.1.6 MIME-Version: 1.0 To: caml-list@yquem.inria.fr Subject: Re: [Caml-list] Re: Is OCaml fast? References: <20101124.112433.2237591808641262985.Christophe.Troestler+ocaml@umons.ac.be> In-Reply-To: X-Enigmail-Version: 1.1.2 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="------------enig9521F61CD60F389FF666BC33" X-Scanned-By: MIMEDefang 2.33 (www . roaringpenguin . com / mimedefang) X-Spam: no; 0.00; christophe:01 raffalli:01 christophe:01 raffalli:01 univ-savoie:01 ocaml:01 compilation:01 syntax:01 camlp:01 ocaml-:01 camlp:01 ocamlopt:01 2048:01 2048:01 ocamlopt:01 X-Attachments: cset="UTF-8" type="text/x-ocaml" name="gctweak.ml" name="gctweak.ml" type="application/pgp-signature" name="signature.asc" name="signature.asc" This is an OpenPGP/MIME signed message (RFC 2440 and 3156) --------------enig9521F61CD60F389FF666BC33 Content-Type: multipart/mixed; boundary="------------010307050904070709090809" This is a multi-part message in MIME format. --------------010307050904070709090809 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Hello, Here is a test of gctweak.ml on the "now famous" binary-tree shootout ben= ch ... As you can see it is a 30% speed up which is not too bad, just adding a file on the compilation command line ! I reattached the file, because I correct a few comments in it ... and a s= yntax error that is only visible when not using camlp4 in ocaml-3.11.2 ( { get () wit= h ... } is valid with camlp4 and invalid without ???) ... Anyway, more work on Gctwe= ak is still needed ... raffalli@d45-lama:~/Caml$ ocamlopt -o binary_tree gctweak.ml binary_tree.= ml ; time ./binary_tree 20 stretch tree of depth 21 check: -1 2097152 trees of depth 4 check: -2097152 524288 trees of depth 6 check: -524288 131072 trees of depth 8 check: -131072 32768 trees of depth 10 check: -32768 8192 trees of depth 12 check: -8192 2048 trees of depth 14 check: -2048 512 trees of depth 16 check: -512 128 trees of depth 18 check: -128 32 trees of depth 20 check: -32 long lived tree of depth 20 check: -1 real 0m19.212s user 0m18.960s sys 0m0.180s raffalli@d45-lama:~/Caml$ ocamlopt -o binary_tree binary_tree.ml ; time .= /binary_tree 20 stretch tree of depth 21 check: -1 2097152 trees of depth 4 check: -2097152 524288 trees of depth 6 check: -524288 131072 trees of depth 8 check: -131072 32768 trees of depth 10 check: -32768 8192 trees of depth 12 check: -8192 2048 trees of depth 14 check: -2048 512 trees of depth 16 check: -512 128 trees of depth 18 check: -128 32 trees of depth 20 check: -32 long lived tree of depth 20 check: -1 real 0m27.484s user 0m27.270s sys 0m0.110s Here is the run with debug :=3D 1 and you see that minor heap size is gue= ssed at 524288, with almost no promoted word (model =3D 1 means no promoted word)= raffalli@d45-lama:~/Caml$ ocamlopt -o binary_tree gctweak.ml binary_tree.= ml ; time ./binary_tree 20 MHS DOUBLED <- 65536 (model 3.996155) MHS DOUBLED <- 131072 (model 3.000397) stretch tree of depth 21 check: -1 MHS DOUBLED <- 262144 (model 2.495375) MHS DOUBLED <- 524288 (model 1.027698) 2097152 trees of depth 4 check: -2097152 524288 trees of depth 6 check: -524288 131072 trees of depth 8 check: -131072 32768 trees of depth 10 check: -32768 8192 trees of depth 12 check: -8192 2048 trees of depth 14 check: -2048 512 trees of depth 16 check: -512 128 trees of depth 18 check: -128 32 trees of depth 20 check: -32 long lived tree of depth 20 check: -1 real 0m19.342s user 0m19.100s sys 0m0.170s --=20 Christophe Raffalli Universite de Savoie Batiment Le Chablais, bureau 21 73376 Le Bourget-du-Lac Cedex tel: (33) 4 79 75 81 03 fax: (33) 4 79 75 87 42 mail: Christophe.Raffalli@univ-savoie.fr www: http://www.lama.univ-savoie.fr/~RAFFALLI --------------------------------------------- IMPORTANT: this mail is signed using PGP/MIME At least Enigmail/Mozilla, mutt or evolution can check this signature. The public key is stored on www.keyserver.net --------------------------------------------- --------------010307050904070709090809 Content-Type: text/x-ocaml; name="gctweak.ml" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="gctweak.ml" open Gc (* adjustable parameters, should be a functor ? *) let space_overhead =3D 100 let gamma =3D 3.0 (* time in major slice attached to a minor GC / time f= or minor GC=20 : should use a real estimation, here just a guess !!! *) let reactivity =3D 0.6 (* between 0.5 and 1.0, less or equal than 0.5 is = not very reasonable: it is likely to double the minor_heap_size at each= major GC, Decrease=20 if reactivity is not important to you *) let retraction_coef =3D 0.9 (* between 0.0 and 1.0. the smaller, the less= oscillation in the minor heap size. 0.0: never decrease = minor heap *) let debug =3D ref 1 (* between 0 and 4, 2 and above is for debugging only= *) let max_minor_heap_size =3D 1 lsl 25 (* the names is clear,=20 rounded to the power of 2 below *) = let major_heap_increment_ratio =3D 0.5 (* proportional heap increment rat= io *) (* End of tuning constants *) (* Justification: We use a model saying that the time in each GC slice is=20 T =3D K * (m + gamma * r * m * f) where m =3D minor heap size (goes away in O()) gamma =3D define above r =3D ratio of promoted word at each minor cycle f =3D (space_overhead + 100) / space_overhead used as an estimation of free space in major heap after collection K a time constant in the sum abobe:=20 - K * m is the time in the minor GC - K * gamma * r * m * f is the time in the major GC slice for each minor = GC If gamma * f is more than 1 (which is likely), it is easy to see that=20 increasing m, if it decreases r enough, will both increase overall speed = and time is a GC slice, increasing therefore both speed and reactivity. More precisely, the model says that this is OK to double the size of the = minor heap when 1 + gamma * f * r > 2*(1 + gamma * f * r') where r is the= ratio=20 associated to m and r' is the ratio associated to 2*m. The code below keeps and update a table of ponderated average value of=20 1 + gamma * f * r for all used heap size and tries to make sensible decis= ion looking at it. *) let param =3D get () let _ =3D set { param with space_overhead =3D space_overhead } let main_coef =3D (* gamma * f *) gamma *. (float) (space_overhead + 100) /.(float) (space_overhead) let ratio_double =3D 1.0 /. reactivity=20 let ratio_half =3D ratio_double /. retraction_coef (* tranlated log2 of the minor heap size, used at initialization only *) let index m =3D=20 let rec fn i m =3D=20 if m <=3D 32768 then i else fn (i + 1) (m / 2) in fn 0 m (* a table to store 1 + gamma * f * r for each heap size *) let max_index =3D index max_minor_heap_size let model_table =3D Array.create (max_index+1) None let old_heap_words =3D ref (quick_stat ()).heap_words let old_promoted_words =3D ref 0.0 let old_minor_collections =3D ref 0 let minor_heap_size =3D ref param.minor_heap_size=20 let minor_heap_index_size =3D ref (index param.minor_heap_size) let _ =3D create_alarm (fun () ->=20 let s =3D quick_stat () in (* tweak minor heap size *) let promoted_words =3D s.promoted_words in let minor_collections =3D s.minor_collections in let delta_promoted_words =3D promoted_words -. !old_promoted_words in let delta_minor_collections =3D minor_collections - !old_minor_collecti= ons in old_promoted_words :=3D promoted_words; old_minor_collections :=3D minor_collections; let ratio =3D delta_promoted_words /. (float) delta_minor_collections=20 /. (float) !minor_heap_size=20 in let new_model =3D 1.0 +. gamma *. ratio in let i =3D !minor_heap_index_size in let mean_model =3D match model_table.(i) with None -> new_model | Some r -> (r +. new_model) /. 2.0 in model_table.(i) <- Some mean_model; if !debug > 2 then begin let i =3D ref 0 in while !i <=3D max_index && model_table.(!i) <> None do match model_table.(!i) with | None -> assert false | Some r -> Printf.fprintf stderr "model(%d) =3D %f - " !i r; incr= i done; Printf.fprintf stderr "\n"; flush stderr; end; let lower_double, lower_half =3D if i <=3D 0 then true, false else=20 match model_table.(i-1) with | None -> false, true | Some r -> let x =3D 2.0 *. mean_model /. r in (i < max_index) && x < ratio_double, x > ratio_half in let upper_double, upper_half =3D if i >=3D max_index then false, true else=20 match model_table.(i+1) with=20 | None -> true, false | Some r -> let x =3D 2.0 *. r /. mean_model in=20 x < ratio_double, (i > 0) && x > ratio_half in if !debug > 2 then begin Printf.fprintf stderr "ld =3D %b, lh =3D %b, ud =3D %b, uh =3D %b\n"=20 lower_double lower_half upper_double upper_half; flush stderr; end; if (lower_half && not upper_double) || (upper_half && not lower_double)= then begin minor_heap_size :=3D !minor_heap_size / 2; minor_heap_index_size :=3D i - 1; if !debug > 0 then begin Printf.fprintf stderr "MHS HALFED <- %d (model %f)\n" !minor_heap_s= ize mean_model; flush stderr; end; set { (get ()) with minor_heap_size =3D !minor_heap_size } end else if (lower_double && not upper_half) || (upper_double && not lower_half)= then begin minor_heap_size :=3D !minor_heap_size * 2; minor_heap_index_size :=3D i + 1; if !debug > 0 then begin Printf.fprintf stderr "MHS DOUBLED <- %d (model %f)\n" !minor_heap_= size mean_model; flush stderr;=20 end; set { (get ()) with minor_heap_size =3D !minor_heap_size } end else if !debug > 1 then begin Printf.fprintf stderr "MHS UNCHANGED (model %f) mean_model\n" mean_= model; flush stderr;=20 end; (* tweak major heap increment to be a fraction of major heap size *) if !old_heap_words <> s.heap_words then begin old_heap_words :=3D s.heap_words;=20 let major_heap_increment =3D max (124*1024) (int_of_float (float s.he= ap_words *. major_heap_increment_ratio) )in (* Printf.fprintf stderr "MHI <- %d \n" major_heap_increment; flush stderr; *) set { (get ()) with major_heap_increment =3D major_heap_increment; } end; ) --------------010307050904070709090809-- --------------enig9521F61CD60F389FF666BC33 Content-Type: application/pgp-signature; name="signature.asc" Content-Description: OpenPGP digital signature Content-Disposition: attachment; filename="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/ iEYEARECAAYFAkztUf0ACgkQi9jr/RgYAS6LxwCeMgWerhWOXBBMYcXOreZJCxvy X44AoKXqAUH+hdggA3krQKtFUjuIvX9o =MVMJ -----END PGP SIGNATURE----- --------------enig9521F61CD60F389FF666BC33--