From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail3-relais-sop.national.inria.fr (mail3-relais-sop.national.inria.fr [192.134.164.104]) by walapai.inria.fr (8.13.6/8.13.6) with ESMTP id p4IIZxan031676 for ; Wed, 18 May 2011 20:35:59 +0200 X-IronPort-Anti-Spam-Filtered: true X-IronPort-Anti-Spam-Result: AqMDADAR1E1KfVIqkGdsb2JhbACmFAgUAQEBAQkJDQcUBCGnUIJCjBqCNYRsN4hiAQEDBoYTBJARhC+BEYVFO4M3 X-IronPort-AV: E=Sophos;i="4.65,232,1304287200"; d="scan'208";a="83468094" Received: from mail-ww0-f42.google.com ([74.125.82.42]) by mail3-smtp-sop.national.inria.fr with ESMTP/TLS/RC4-MD5; 18 May 2011 20:35:49 +0200 Received: by wwk4 with SMTP id 4so5264735wwk.3 for ; Wed, 18 May 2011 11:35:48 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:from:content-type:content-transfer-encoding :subject:date:message-id:to:mime-version:x-mailer; bh=V4bWPvxSKJxaNDZlrN4ur5WwTIs2MLGqoWl/OQUEP+k=; b=PnofNUcz5VYFkhfma79nELht9KWZi36ee8sso+4Q4q+xpZrc4DMlkFuZszq75p0qBu jd/u8R3zrniZtAIzyZjBJjp0pJzxSrQbuAIhimxiAgvlDtQ9uiuT2AP5qKLnc8KgcLhy zmZHvoGMcUc0+VTDrrMWc2NzdQKy0APoxUtyc= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:content-type:content-transfer-encoding:subject:date:message-id :to:mime-version:x-mailer; b=IUOwITO3leW5Gaqwwzfz/Ci/tbCWab9H5uQTuAIGggHOLy/uGt596I6XrN9UxWJHwF 98os9ZSdCBfjHw5KFM07MYNk7JPX2LQ6XtgI1g5d1vC1wXStkUBhv1WsPh4rB9iNA2wS UmHUbvnoFL4H96OOhvagNPn66XVRbdPPSXHGY= Received: by 10.227.172.74 with SMTP id k10mr2184533wbz.80.1305743748567; Wed, 18 May 2011 11:35:48 -0700 (PDT) Received: from [172.16.13.82] (0xc2efec01.hotspot.tele.dk [194.239.236.1]) by mx.google.com with ESMTPS id k2sm1129112wby.51.2011.05.18.11.35.45 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 18 May 2011 11:35:46 -0700 (PDT) From: Joel Reymont Content-Type: text/plain; charset=us-ascii Date: Wed, 18 May 2011 20:35:43 +0200 Message-Id: <346B52D2-EE21-43D1-B41E-3AEB3BBF0013@gmail.com> To: caml-list Mime-Version: 1.0 (Apple Message framework v1084) X-Mailer: Apple Mail (2.1084) Content-Transfer-Encoding: 8bit X-MIME-Autoconverted: from quoted-printable to 8bit by walapai.inria.fr id p4IIZxan031676 Subject: [Caml-list] optimizing numerical code Consider the following two functions that I'm trying to optimize. Why is there an allocation for every iteration of the loop in divergence1 but not in divergence2? What else can I do to make divergence2 faster, apart from using -unsafe? Thanks in advance, Joel --- let js_divergence1 v1 v2 = let acc = ref 0. in for i = 0 to (Array.length v1) - 1 do let x = v1.(i) and y = v2.(i) in let m = 0.5 *. (x +. y) in let d1 = x *. log (x /. m) and d2 = y *. log (y /. m) in acc := !acc +. d1 +. d2 done; (!acc) let js_divergence2 v1 v2 = let sum1 = ref 0. and sum2 = ref 0. in for i = 0 to (Array.length v1) - 1 do let x = v1.(i) and y = v2.(i) in let m = 2. /. (x +. y) in let d1 = x *. log (x *. m) and d2 = y *. log (y *. m) in sum1 := !sum1 +. d1; sum2 := !sum2 +. d2; done; !sum1 +. !sum2 ocamlopt -dcmm (function camlFoo__js_divergence1_1030 (v1/1031: addr v2/1032: addr) (let acc/1033 "camlFoo__3" (let (i/1034 1 bound/1066 (+ (or (>>u (load (+a v1/1031 -8)) 9) 1) -2)) (catch (if (> i/1034 bound/1066) (exit 17) (loop (let (x/1067 (seq (checkbound (>>u (load (+a v1/1031 -8)) 9) i/1034) (load float64u (+a (+a v1/1031 (<< i/1034 2)) -4))) y/1068 (seq (checkbound (>>u (load (+a v2/1032 -8)) 9) i/1034) (load float64u (+a (+a v2/1032 (<< i/1034 2)) -4))) m/1069 (*f 0.5 (+f x/1067 y/1068)) d1/1070 (*f x/1067 (extcall "log" (/f x/1067 m/1069) float)) d2/1071 (*f y/1068 (extcall "log" (/f y/1068 m/1069) float))) (assign acc/1033 (alloc 1277 (+f (+f (load float64u acc/1033) d1/1070) d2/1071)))) (let i/1065 i/1034 (assign i/1034 (+ i/1034 2)) (if (== i/1065 bound/1066) (exit 17) [])))) with(17) [])) acc/1033)) (function camlFoo__js_divergence2_1040 (v1/1041: addr v2/1042: addr) (let (sum1/1055 0. sum2/1056 0.) (let (i/1045 1 bound/1058 (+ (or (>>u (load (+a v1/1041 -8)) 9) 1) -2)) (catch (if (> i/1045 bound/1058) (exit 16) (loop (let (x/1059 (seq (checkbound (>>u (load (+a v1/1041 -8)) 9) i/1045) (load float64u (+a (+a v1/1041 (<< i/1045 2)) -4))) y/1060 (seq (checkbound (>>u (load (+a v2/1042 -8)) 9) i/1045) (load float64u (+a (+a v2/1042 (<< i/1045 2)) -4))) m/1061 (/f 2. (+f x/1059 y/1060)) d1/1062 (*f x/1059 (extcall "log" (*f x/1059 m/1061) float)) d2/1063 (*f y/1060 (extcall "log" (*f y/1060 m/1061) float))) (assign sum1/1055 (+f sum1/1055 d1/1062)) (assign sum2/1056 (+f sum2/1056 d2/1063))) (let i/1057 i/1045 (assign i/1045 (+ i/1045 2)) (if (== i/1057 bound/1058) (exit 16) [])))) with(16) [])) (alloc 1277 (+f sum1/1055 sum2/1056)))) -------------------------------------------------------------------------- - for hire: mac osx device driver ninja, kernel extensions and usb drivers ---------------------+------------+--------------------------------------- http://wagerlabs.com | @wagerlabs | http://www.linkedin.com/in/joelreymont ---------------------+------------+---------------------------------------