caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* [Benchmark] NBody
@ 2005-02-07 18:57 Christophe TROESTLER
  2005-02-07 19:16 ` [Caml-list] " Will M. Farr
                   ` (3 more replies)
  0 siblings, 4 replies; 35+ messages in thread
From: Christophe TROESTLER @ 2005-02-07 18:57 UTC (permalink / raw)
  To: O'Caml Mailing List

[-- Attachment #1: Type: Text/Plain, Size: 736 bytes --]

Hi,

For fun I have implemented an nbody simulation following
http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu
(code is attached).  I've compiled it with

  ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml

I've compared with the Java program they give.  I get (on a Pentium(R)
4 CPU 2.40GHz Debian):

n	OCaml	Java
1000	0.004	0.112
10000	0.016	0.112
100000	0.159	0.218
200000	0.284	0.370
500000	0.707	0.702
1000000	1.410	1.359
2000000	2.884	2.453
3000000	4.294	3.590
4000000	5.735	4.774

I am interested in explanations why OCaml seems asymptotically slower
than Java and ways to improve that.  My concern is actually not so
much for the shootout as for my own numerical programs.

Regards,
ChriS

[-- Attachment #2: nbody.ml --]
[-- Type: Text/Plain, Size: 3598 bytes --]

(*
  http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu
*)

let pi = 3.141592653589793
let solar_mass = 4. *. pi *. pi
let days_per_year = 365.24

type planet = {
  mutable x : float;  mutable y : float;  mutable z : float;
  mutable vx: float;  mutable vy: float;  mutable vz: float;
  mass : float;
}


let advance bodies dt =
  let n = Array.length bodies - 1 in
  for i = 0 to n do
    let b = bodies.(i) in
    for j = i+1 to n do
      let b' = bodies.(j) in
      let dx = b.x -. b'.x  and dy = b.y -. b'.y  and dz = b.z -. b'.z in
      let distance = sqrt(dx *. dx +. dy *. dy +. dz *. dz) in
      let mag = dt /. (distance *. distance *. distance) in

      b.vx <- b.vx -. dx *. b'.mass *. mag;
      b.vy <- b.vy -. dy *. b'.mass *. mag;
      b.vz <- b.vz -. dz *. b'.mass *. mag;

      b'.vx <- b'.vx +. dx *. b.mass *. mag;
      b'.vy <- b'.vy +. dy *. b.mass *. mag;
      b'.vz <- b'.vz +. dz *. b.mass *. mag;
    done
  done;
  for i = 0 to n do
    let b = bodies.(i) in
    b.x <- b.x +. dt *. b.vx;
    b.y <- b.y +. dt *. b.vy;
    b.z <- b.z +. dt *. b.vz;
  done


let energy bodies =
  let e = ref 0. in
  for i = 0 to Array.length bodies - 1 do
    let b = bodies.(i) in
    e := !e +. 0.5 *. b.mass *. (b.vx *. b.vx +. b.vy *. b.vy +. b.vz *. b.vz);
    for j = i+1 to Array.length bodies - 1 do
      let b' = bodies.(j) in
      let dx = b.x -. b'.x  and dy = b.y -. b'.y  and dz = b.z -. b'.z in
      let distance = sqrt(dx *. dx +. dy *. dy +. dz *. dz) in
      e := !e -. (b.mass *. b'.mass) /. distance
    done
  done;
  !e


let offset_momentum bodies =
  let px = ref 0. and py = ref 0. and pz = ref 0. in
  for i = 0 to Array.length bodies - 1 do
    px := !px +. bodies.(i).vx *. bodies.(i).mass;
    py := !py +. bodies.(i).vy *. bodies.(i).mass;
    pz := !pz +. bodies.(i).vz *. bodies.(i).mass;
  done;
  bodies.(0).vx <- -. !px /. solar_mass;
  bodies.(0).vy <- -. !py /. solar_mass;
  bodies.(0).vz <- -. !pz /. solar_mass


let jupiter = {
  x = 4.84143144246472090e+00;
  y = -1.16032004402742839e+00;
  z = -1.03622044471123109e-01;
  vx = 1.66007664274403694e-03 *. days_per_year;
  vy = 7.69901118419740425e-03 *. days_per_year;
  vz = -6.90460016972063023e-05 *. days_per_year;
  mass = 9.54791938424326609e-04 *. solar_mass;
}

let saturn = {
  x = 8.34336671824457987e+00;
  y = 4.12479856412430479e+00;
  z = -4.03523417114321381e-01;
  vx = -2.76742510726862411e-03 *. days_per_year;
  vy = 4.99852801234917238e-03 *. days_per_year;
  vz = 2.30417297573763929e-05 *. days_per_year;
  mass = 2.85885980666130812e-04 *. solar_mass;
}

let uranus = {
  x = 1.28943695621391310e+01;
  y = -1.51111514016986312e+01;
  z = -2.23307578892655734e-01;
  vx = 2.96460137564761618e-03 *. days_per_year;
  vy = 2.37847173959480950e-03 *. days_per_year;
  vz = -2.96589568540237556e-05 *. days_per_year;
  mass = 4.36624404335156298e-05 *. solar_mass;
}

let neptune = {
  x = 1.53796971148509165e+01;
  y = -2.59193146099879641e+01;
  z = 1.79258772950371181e-01;
  vx = 2.68067772490389322e-03 *. days_per_year;
  vy = 1.62824170038242295e-03 *. days_per_year;
  vz = -9.51592254519715870e-05 *. days_per_year;
  mass = 5.15138902046611451e-05 *. solar_mass;
}

let sun = {
  x = 0.;  y = 0.;  z = 0.;  vx = 0.;  vy = 0.; vz = 0.;
  mass= solar_mass;
}

let bodies = [| sun; jupiter; saturn; uranus; neptune |]

let () =
  let n = int_of_string(Sys.argv.(1)) in
  offset_momentum bodies;
  Printf.printf "%.9f\n" (energy bodies);
  for i = 1 to n do
    advance bodies 0.01
  done;
  Printf.printf "%.9f\n" (energy bodies)

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 18:57 [Benchmark] NBody Christophe TROESTLER
@ 2005-02-07 19:16 ` Will M. Farr
  2005-02-07 19:36   ` Christophe TROESTLER
  2005-02-07 19:37 ` Martin Jambon
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 35+ messages in thread
From: Will M. Farr @ 2005-02-07 19:16 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: O'Caml Mailing List

Chris,

You might try profiling (using gprof); maybe it will give you an idea  
where your time is being spent.  One possibility is that the java  
*program* is faster, but the java runtime takes longer to start up than  
the ocaml runtime; this would explain why you beat the pants off java  
for n = 1000, but then things look brighter for java.  In fact, if you  
fit the data to a linear curve of runtime = a*n + b, you find

ocaml: a = 1.4333e-06, b = -0.00027567
java: a = 1.1629e-06, b = 0.1242

it looks like the java code is faster, but it clearly has a large  
startup time.

Will


On 7 Feb 2005, at 1:57 PM, Christophe TROESTLER wrote:

> Hi,
>
> For fun I have implemented an nbody simulation following
> http://shootout.alioth.debian.org/benchmark.php? 
> test=nbody&lang=all&sort=cpu
> (code is attached).  I've compiled it with
>
>   ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml
>
> I've compared with the Java program they give.  I get (on a Pentium(R)
> 4 CPU 2.40GHz Debian):
>
> n	OCaml	Java
> 1000	0.004	0.112
> 10000	0.016	0.112
> 100000	0.159	0.218
> 200000	0.284	0.370
> 500000	0.707	0.702
> 1000000	1.410	1.359
> 2000000	2.884	2.453
> 3000000	4.294	3.590
> 4000000	5.735	4.774
>
> I am interested in explanations why OCaml seems asymptotically slower
> than Java and ways to improve that.  My concern is actually not so
> much for the shootout as for my own numerical programs.
>
> Regards,
> ChriS
> (*
>    
> http://shootout.alioth.debian.org/benchmark.php? 
> test=nbody&lang=all&sort=cpu
> *)
>
> let pi = 3.141592653589793
> let solar_mass = 4. *. pi *. pi
> let days_per_year = 365.24
>
> type planet = {
>   mutable x : float;  mutable y : float;  mutable z : float;
>   mutable vx: float;  mutable vy: float;  mutable vz: float;
>   mass : float;
> }
>
>
> let advance bodies dt =
>   let n = Array.length bodies - 1 in
>   for i = 0 to n do
>     let b = bodies.(i) in
>     for j = i+1 to n do
>       let b' = bodies.(j) in
>       let dx = b.x -. b'.x  and dy = b.y -. b'.y  and dz = b.z -. b'.z  
> in
>       let distance = sqrt(dx *. dx +. dy *. dy +. dz *. dz) in
>       let mag = dt /. (distance *. distance *. distance) in
>
>       b.vx <- b.vx -. dx *. b'.mass *. mag;
>       b.vy <- b.vy -. dy *. b'.mass *. mag;
>       b.vz <- b.vz -. dz *. b'.mass *. mag;
>
>       b'.vx <- b'.vx +. dx *. b.mass *. mag;
>       b'.vy <- b'.vy +. dy *. b.mass *. mag;
>       b'.vz <- b'.vz +. dz *. b.mass *. mag;
>     done
>   done;
>   for i = 0 to n do
>     let b = bodies.(i) in
>     b.x <- b.x +. dt *. b.vx;
>     b.y <- b.y +. dt *. b.vy;
>     b.z <- b.z +. dt *. b.vz;
>   done
>
>
> let energy bodies =
>   let e = ref 0. in
>   for i = 0 to Array.length bodies - 1 do
>     let b = bodies.(i) in
>     e := !e +. 0.5 *. b.mass *. (b.vx *. b.vx +. b.vy *. b.vy +. b.vz  
> *. b.vz);
>     for j = i+1 to Array.length bodies - 1 do
>       let b' = bodies.(j) in
>       let dx = b.x -. b'.x  and dy = b.y -. b'.y  and dz = b.z -. b'.z  
> in
>       let distance = sqrt(dx *. dx +. dy *. dy +. dz *. dz) in
>       e := !e -. (b.mass *. b'.mass) /. distance
>     done
>   done;
>   !e
>
>
> let offset_momentum bodies =
>   let px = ref 0. and py = ref 0. and pz = ref 0. in
>   for i = 0 to Array.length bodies - 1 do
>     px := !px +. bodies.(i).vx *. bodies.(i).mass;
>     py := !py +. bodies.(i).vy *. bodies.(i).mass;
>     pz := !pz +. bodies.(i).vz *. bodies.(i).mass;
>   done;
>   bodies.(0).vx <- -. !px /. solar_mass;
>   bodies.(0).vy <- -. !py /. solar_mass;
>   bodies.(0).vz <- -. !pz /. solar_mass
>
>
> let jupiter = {
>   x = 4.84143144246472090e+00;
>   y = -1.16032004402742839e+00;
>   z = -1.03622044471123109e-01;
>   vx = 1.66007664274403694e-03 *. days_per_year;
>   vy = 7.69901118419740425e-03 *. days_per_year;
>   vz = -6.90460016972063023e-05 *. days_per_year;
>   mass = 9.54791938424326609e-04 *. solar_mass;
> }
>
> let saturn = {
>   x = 8.34336671824457987e+00;
>   y = 4.12479856412430479e+00;
>   z = -4.03523417114321381e-01;
>   vx = -2.76742510726862411e-03 *. days_per_year;
>   vy = 4.99852801234917238e-03 *. days_per_year;
>   vz = 2.30417297573763929e-05 *. days_per_year;
>   mass = 2.85885980666130812e-04 *. solar_mass;
> }
>
> let uranus = {
>   x = 1.28943695621391310e+01;
>   y = -1.51111514016986312e+01;
>   z = -2.23307578892655734e-01;
>   vx = 2.96460137564761618e-03 *. days_per_year;
>   vy = 2.37847173959480950e-03 *. days_per_year;
>   vz = -2.96589568540237556e-05 *. days_per_year;
>   mass = 4.36624404335156298e-05 *. solar_mass;
> }
>
> let neptune = {
>   x = 1.53796971148509165e+01;
>   y = -2.59193146099879641e+01;
>   z = 1.79258772950371181e-01;
>   vx = 2.68067772490389322e-03 *. days_per_year;
>   vy = 1.62824170038242295e-03 *. days_per_year;
>   vz = -9.51592254519715870e-05 *. days_per_year;
>   mass = 5.15138902046611451e-05 *. solar_mass;
> }
>
> let sun = {
>   x = 0.;  y = 0.;  z = 0.;  vx = 0.;  vy = 0.; vz = 0.;
>   mass= solar_mass;
> }
>
> let bodies = [| sun; jupiter; saturn; uranus; neptune |]
>
> let () =
>   let n = int_of_string(Sys.argv.(1)) in
>   offset_momentum bodies;
>   Printf.printf "%.9f\n" (energy bodies);
>   for i = 1 to n do
>     advance bodies 0.01
>   done;
>   Printf.printf "%.9f\n" (energy bodies)


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:16 ` [Caml-list] " Will M. Farr
@ 2005-02-07 19:36   ` Christophe TROESTLER
  2005-02-07 19:55     ` Will M. Farr
  2005-02-07 20:16     ` Markus Mottl
  0 siblings, 2 replies; 35+ messages in thread
From: Christophe TROESTLER @ 2005-02-07 19:36 UTC (permalink / raw)
  To: farr; +Cc: caml-list

On Mon, 7 Feb 2005, "Will M. Farr" <farr@MIT.EDU> wrote:
> 
> You might try profiling (using gprof); maybe it will give you an
> idea where your time is being spent.

I did that but I could not see anything: the important spot reads:

-----------------------------------------------
                8.05    0.00 1000000/1000000     camlNbody__entry [5]
[6]     99.7    8.05    0.00 1000000         camlNbody__code_begin [6]
-----------------------------------------------

Did I miss something???

> it looks like the java code is faster, but it clearly has a large
> startup time.

I thought that.  Still, I'd like to know whether there is a way to
make Caml code that fast or if not why.

Thanks for youe reply,
ChriS


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 18:57 [Benchmark] NBody Christophe TROESTLER
  2005-02-07 19:16 ` [Caml-list] " Will M. Farr
@ 2005-02-07 19:37 ` Martin Jambon
  2005-02-07 19:46   ` Christophe TROESTLER
  2005-02-07 20:04   ` sejourne_kevin
  2005-02-08  1:29 ` skaller
  2005-02-08 10:43 ` Xavier Leroy
  3 siblings, 2 replies; 35+ messages in thread
From: Martin Jambon @ 2005-02-07 19:37 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: O'Caml Mailing List

On Mon, 7 Feb 2005, Christophe TROESTLER wrote:

> Hi,
>
> For fun I have implemented an nbody simulation following
> http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu
> (code is attached).  I've compiled it with
>
>   ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml

-inline 100 gives better results for me (around -25%)
This should be enough to beat Java :-p if you get the same improvement on
your side.


Martin

--
Martin Jambon, PhD
Researcher in Structural Bioinformatics since the 20th Century
The Burnham Institute http://www.burnham.org
San Diego, California




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:37 ` Martin Jambon
@ 2005-02-07 19:46   ` Christophe TROESTLER
  2005-02-07 20:22     ` Martin Jambon
  2005-02-07 20:04   ` sejourne_kevin
  1 sibling, 1 reply; 35+ messages in thread
From: Christophe TROESTLER @ 2005-02-07 19:46 UTC (permalink / raw)
  To: martin_jambon; +Cc: caml-list

On Mon, 7 Feb 2005, Martin Jambon <martin_jambon@emailuser.net> wrote:
> 
> On Mon, 7 Feb 2005, Christophe TROESTLER wrote:
> 
> >   ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml
> 
> -inline 100 gives better results for me (around -25%)

For me it is slower (about 13-18%).  Are you also on an intel
platform?

ChriS


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:36   ` Christophe TROESTLER
@ 2005-02-07 19:55     ` Will M. Farr
  2005-02-08 10:34       ` Olivier Andrieu
  2005-02-07 20:16     ` Markus Mottl
  1 sibling, 1 reply; 35+ messages in thread
From: Will M. Farr @ 2005-02-07 19:55 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: caml-list

When I ran it on my system using Shark (a Mac OS X profiling 
application that doesn't require hooks in the app itself to get the 
information), the breakdown of the significant values was

48.1% in advance
34% in __sqrt (system function)
+ random stuff

I'm not sure, in light of this, why the aggressive inlining makes any 
difference.

Will

On 7 Feb 2005, at 2:36 PM, Christophe TROESTLER wrote:

> On Mon, 7 Feb 2005, "Will M. Farr" <farr@MIT.EDU> wrote:
>>
>> You might try profiling (using gprof); maybe it will give you an
>> idea where your time is being spent.
>
> I did that but I could not see anything: the important spot reads:
>
> -----------------------------------------------
>                 8.05    0.00 1000000/1000000     camlNbody__entry [5]
> [6]     99.7    8.05    0.00 1000000         camlNbody__code_begin [6]
> -----------------------------------------------
>
> Did I miss something???
>
>> it looks like the java code is faster, but it clearly has a large
>> startup time.
>
> I thought that.  Still, I'd like to know whether there is a way to
> make Caml code that fast or if not why.
>
> Thanks for youe reply,
> ChriS


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:37 ` Martin Jambon
  2005-02-07 19:46   ` Christophe TROESTLER
@ 2005-02-07 20:04   ` sejourne_kevin
  2005-02-07 20:32     ` Robert Roessler
  2005-02-07 22:57     ` Oliver Bandel
  1 sibling, 2 replies; 35+ messages in thread
From: sejourne_kevin @ 2005-02-07 20:04 UTC (permalink / raw)
  To: caml-list; +Cc: Martin Jambon

Martin Jambon a écrit :
> On Mon, 7 Feb 2005, Christophe TROESTLER wrote:
> 
> 
>>Hi,
>>
>>For fun I have implemented an nbody simulation following
>>http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu
>>(code is attached).  I've compiled it with
>>
>>  ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml
> 
> 
> -inline 100 gives better results for me (around -25%)
> This should be enough to beat Java :-p if you get the same improvement on
> your side.
Same for me, but "-ccopt -O2" don't change anything.

Kévin.



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:36   ` Christophe TROESTLER
  2005-02-07 19:55     ` Will M. Farr
@ 2005-02-07 20:16     ` Markus Mottl
  1 sibling, 0 replies; 35+ messages in thread
From: Markus Mottl @ 2005-02-07 20:16 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: farr, caml-list

On Mon, 07 Feb 2005, Christophe TROESTLER wrote:
> I thought that.  Still, I'd like to know whether there is a way to
> make Caml code that fast or if not why.

I have personally found that using "let rec" loops often slightly improves
performance over "for"-loops.  Maybe this is just anecdotal evidence,
but you might want to try that instead.

Regards,
Markus

-- 
Markus Mottl    markus.mottl@gmail.com    http://www.ocaml.info


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:46   ` Christophe TROESTLER
@ 2005-02-07 20:22     ` Martin Jambon
  0 siblings, 0 replies; 35+ messages in thread
From: Martin Jambon @ 2005-02-07 20:22 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: caml-list

On Mon, 7 Feb 2005, Christophe TROESTLER wrote:

> On Mon, 7 Feb 2005, Martin Jambon <martin_jambon@emailuser.net> wrote:
> >
> > On Mon, 7 Feb 2005, Christophe TROESTLER wrote:
> >
> > >   ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml
> >
> > -inline 100 gives better results for me (around -25%)
>
> For me it is slower (about 13-18%).  Are you also on an intel
> platform?

That was on my laptop with an Intel Celeron with Linux (I don't know much
more about the hardware). The results are stable.

I tested the same code on another machine with an Intel Pentium 4, and I
get "discrete" results. I repeat "time prog arg" in the shell
successively and get this:

time ./nbody-inline100 1_000_000
-> either 1.145-1.150 or 1.070-1.090 or sometimes 1.014-1.015

time ./nbody-inline3 1_000_000
-> either 0.990-0.995 or 1.245-1.255

This is an interesting effect... Probably it is well-known by people who
write compilers, personally I don't know anything about this topic. I can
provide more quantitative data on demand.


Martin

--
Martin Jambon, PhD
Researcher in Structural Bioinformatics since the 20th Century
The Burnham Institute http://www.burnham.org
San Diego, California




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 20:04   ` sejourne_kevin
@ 2005-02-07 20:32     ` Robert Roessler
  2005-02-07 22:57     ` Oliver Bandel
  1 sibling, 0 replies; 35+ messages in thread
From: Robert Roessler @ 2005-02-07 20:32 UTC (permalink / raw)
  To: Caml-list

sejourne_kevin wrote:

> Martin Jambon a écrit :
> 
>> On Mon, 7 Feb 2005, Christophe TROESTLER wrote:
>>
>>
>>> Hi,
>>>
>>> For fun I have implemented an nbody simulation following
>>> http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu 
>>>
>>> (code is attached).  I've compiled it with
>>>
>>>  ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml
>>
>>
>>
>> -inline 100 gives better results for me (around -25%)
>> This should be enough to beat Java :-p if you get the same improvement on
>> your side.
> 
> Same for me, but "-ccopt -O2" don't change anything.

That might be because the C compiler doesn't really have anything to
do with ocamlopt... :)

The C *compiler* only comes into play if you have any .c files on the
ocamlopt command line - the C *linker* will be used for all linking.

Robert Roessler
robertr@rftp.com
http://www.rftp.com



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 20:04   ` sejourne_kevin
  2005-02-07 20:32     ` Robert Roessler
@ 2005-02-07 22:57     ` Oliver Bandel
  1 sibling, 0 replies; 35+ messages in thread
From: Oliver Bandel @ 2005-02-07 22:57 UTC (permalink / raw)
  To: caml-list

On Mon, Feb 07, 2005 at 09:04:39PM +0100, sejourne_kevin wrote:
> Martin Jambon a écrit :
> >On Mon, 7 Feb 2005, Christophe TROESTLER wrote:
> >
> >
> >>Hi,
> >>
> >>For fun I have implemented an nbody simulation following
> >>http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu
> >>(code is attached).  I've compiled it with
> >>
> >> ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml
> >
> >
> >-inline 100 gives better results for me (around -25%)
> >This should be enough to beat Java :-p if you get the same improvement on
> >your side.
> Same for me, but "-ccopt -O2" don't change anything.


If using gcc, you have to use -O3 to enable inlining.

If using only -O2, inlining is not enabled automatically by the gcc.


Ciao,
   Oliver


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 18:57 [Benchmark] NBody Christophe TROESTLER
  2005-02-07 19:16 ` [Caml-list] " Will M. Farr
  2005-02-07 19:37 ` Martin Jambon
@ 2005-02-08  1:29 ` skaller
  2005-02-08  1:48   ` Will M. Farr
  2005-02-08 10:25   ` Xavier Leroy
  2005-02-08 10:43 ` Xavier Leroy
  3 siblings, 2 replies; 35+ messages in thread
From: skaller @ 2005-02-08  1:29 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: O'Caml Mailing List

On Tue, 2005-02-08 at 05:57, Christophe TROESTLER wrote:


> type planet = {
>   mutable x : float;  mutable y : float;  mutable z : float;
>   mutable vx: float;  mutable vy: float;  mutable vz: float;
>   mass : float;
> }

Advice from old FORTRAN hacker:

This is a bad data structure for Ocaml.

Try using an unboxed float array of 7 times the length instead,
this will eliminate boxing, eliminate write barriers,
eliminate garbage collection, reduce memory requirements.


-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08  1:29 ` skaller
@ 2005-02-08  1:48   ` Will M. Farr
  2005-02-08  9:01     ` Ville-Pertti Keinonen
  2005-02-08  9:37     ` skaller
  2005-02-08 10:25   ` Xavier Leroy
  1 sibling, 2 replies; 35+ messages in thread
From: Will M. Farr @ 2005-02-08  1:48 UTC (permalink / raw)
  To: skaller; +Cc: O'Caml Mailing List, Christophe TROESTLER

Doesn't caml store records whose types are all floats as a float array 
anyway?  I thought I remembered this optimization going by in the 
manual somewhere.

Will


On 7 Feb 2005, at 8:29 PM, skaller wrote:

> On Tue, 2005-02-08 at 05:57, Christophe TROESTLER wrote:
>
>
>> type planet = {
>>   mutable x : float;  mutable y : float;  mutable z : float;
>>   mutable vx: float;  mutable vy: float;  mutable vz: float;
>>   mass : float;
>> }
>
> Advice from old FORTRAN hacker:
>
> This is a bad data structure for Ocaml.
>
> Try using an unboxed float array of 7 times the length instead,
> this will eliminate boxing, eliminate write barriers,
> eliminate garbage collection, reduce memory requirements.
>
>
> -- 
> John Skaller, mailto:skaller@users.sf.net
> voice: 061-2-9660-0850,
> snail: PO BOX 401 Glebe NSW 2037 Australia
> Checkout the Felix programming language http://felix.sf.net
>
>
>
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08  1:48   ` Will M. Farr
@ 2005-02-08  9:01     ` Ville-Pertti Keinonen
  2005-02-08  9:37     ` skaller
  1 sibling, 0 replies; 35+ messages in thread
From: Ville-Pertti Keinonen @ 2005-02-08  9:01 UTC (permalink / raw)
  To: Will M. Farr; +Cc: skaller, O'Caml Mailing List, Christophe TROESTLER

On Mon, 2005-02-07 at 20:48 -0500, Will M. Farr wrote:
> Doesn't caml store records whose types are all floats as a float array 
> anyway?  I thought I remembered this optimization going by in the 
> manual somewhere.

Yes, it does.

I wonder if there's any reason why this can't be done for even more
cases.  It should work for any record that doesn't contain pointers.



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08  1:48   ` Will M. Farr
  2005-02-08  9:01     ` Ville-Pertti Keinonen
@ 2005-02-08  9:37     ` skaller
  2005-02-08 10:10       ` Ville-Pertti Keinonen
  2005-02-08 12:04       ` Marcin 'Qrczak' Kowalczyk
  1 sibling, 2 replies; 35+ messages in thread
From: skaller @ 2005-02-08  9:37 UTC (permalink / raw)
  To: Will M. Farr; +Cc: O'Caml Mailing List, Christophe TROESTLER

On Tue, 2005-02-08 at 12:48, Will M. Farr wrote:
> Doesn't caml store records whose types are all floats as a float array 
> anyway?  I thought I remembered this optimization going by in the 
> manual somewhere.

But the types in your record are mutable, and so it can't
possibly work.

In particular, given two arrays of a record type R containing
a mutable field M, the arrays MUST uses boxes or modifications
to M in a shared record wouldn't be shared.

OTOH if the field is not mutable there is no problem,
but then you can't modify it, you can only use
functional update on the record and store the whole
thing back into the array.

Perhaps Ocaml is actually smart enough to optimise

	type r = { m: float; n: float };
	let x = Array.create 99 { m=0.0; n=0.0 } in
	x.[2] = { x.[2] with m = m + 1.0 };

so x is represnted by an array of float, and perhaps
one could even optimise

	x.[2].m <- 22.0;

even though it appears to be a type error (modifying
an immutable field), it actually isn't, since you could
always used functional update.

However it isn't clear Ocaml type system uses the most
expressive typing of 'constness', i.e. that it propages
'mutable' ness correctly. 

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08  9:37     ` skaller
@ 2005-02-08 10:10       ` Ville-Pertti Keinonen
  2005-02-08 16:36         ` skaller
  2005-02-08 12:04       ` Marcin 'Qrczak' Kowalczyk
  1 sibling, 1 reply; 35+ messages in thread
From: Ville-Pertti Keinonen @ 2005-02-08 10:10 UTC (permalink / raw)
  To: skaller; +Cc: Will M. Farr, O'Caml Mailing List, Christophe TROESTLER

On Tue, 2005-02-08 at 20:37 +1100, skaller wrote:
> But the types in your record are mutable, and so it can't
> possibly work.
> 
> In particular, given two arrays of a record type R containing
> a mutable field M, the arrays MUST uses boxes or modifications
> to M in a shared record wouldn't be shared.

You're apparently talking about an array of records (which obviously
contains pointers to the records), but the issue (I think) was the
records themselves, which store floats unboxed if they contain nothing
else.

I'm not sure that the data set in this case is large enough that giving
up abstraction and combining things into a single array would make a big
difference.  It's also not what the Java program being compared to did.



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08  1:29 ` skaller
  2005-02-08  1:48   ` Will M. Farr
@ 2005-02-08 10:25   ` Xavier Leroy
  2005-02-08 18:34     ` skaller
  1 sibling, 1 reply; 35+ messages in thread
From: Xavier Leroy @ 2005-02-08 10:25 UTC (permalink / raw)
  To: skaller; +Cc: Christophe TROESTLER, O'Caml Mailing List

> > type planet = {
> >   mutable x : float;  mutable y : float;  mutable z : float;
> >   mutable vx: float;  mutable vy: float;  mutable vz: float;
> >   mass : float;
> > }
> 
> Advice from old FORTRAN hacker:
> 
> This is a bad data structure for Ocaml.

Advice from an old Caml hacker: this is an excellent data structure
for OCaml.

Second advice from an old Caml hacker: don't trust John Skaller's
technical advice too much.  John often has preconceptions about what
Caml is, or does, or "must" do (in his mind), which bear
no connections with reality.  That would be a minor annoyance except
for the very authoritative, expert-like tone in which these
preconceptions are assessed, which can mislead non-expert readers.

- Xavier Leroy


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 19:55     ` Will M. Farr
@ 2005-02-08 10:34       ` Olivier Andrieu
  2005-02-08 10:52         ` Micha
  0 siblings, 1 reply; 35+ messages in thread
From: Olivier Andrieu @ 2005-02-08 10:34 UTC (permalink / raw)
  To: farr; +Cc: Christophe.Troestler, caml-list

 "Will M. Farr" [Mon, 7 Feb 2005]:
 > When I ran it on my system using Shark (a Mac OS X profiling 
 > application that doesn't require hooks in the app itself to get the 
 > information), the breakdown of the significant values was
 > 
 > 48.1% in advance
 > 34% in __sqrt (system function)
          ^^^^^^
in that case, the -ffast-math compiler switch will probably make a
noticeable difference on x86 (but maybe that's cheating).

-- 
   Olivier


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-07 18:57 [Benchmark] NBody Christophe TROESTLER
                   ` (2 preceding siblings ...)
  2005-02-08  1:29 ` skaller
@ 2005-02-08 10:43 ` Xavier Leroy
  2005-02-08 11:26   ` Ville-Pertti Keinonen
                     ` (4 more replies)
  3 siblings, 5 replies; 35+ messages in thread
From: Xavier Leroy @ 2005-02-08 10:43 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: O'Caml Mailing List

[-- Attachment #1: Type: text/plain, Size: 2344 bytes --]

> For fun I have implemented an nbody simulation following
> http://shootout.alioth.debian.org/benchmark.php?test=nbody&lang=all&sort=cpu
> (code is attached).  

Ah, another micro-benchmark.  Great pasttime!  

Your OCaml code is about as good as you can write.  All the unboxing
optimizations are triggered.  

>   ocamlopt -o nbody.com -inline 3 -unsafe -ccopt -O2 nbody.ml

On x86, you can get a bit more speed with -ffast-math, which turns the
call to sqrt() into inline assembly.  As others mentioned, "-ccopt -O2"
is useless.

> I've compared with the Java program they give.  I get (on a Pentium(R)
> 4 CPU 2.40GHz Debian):
> 
> n	OCaml	Java
> 1000	0.004	0.112
> 10000	0.016	0.112
> 100000	0.159	0.218
> 200000	0.284	0.370
> 500000	0.707	0.702
> 1000000	1.410	1.359
> 2000000	2.884	2.453
> 3000000	4.294	3.590
> 4000000	5.735	4.774
> 
> I am interested in explanations why OCaml seems asymptotically slower
> than Java and ways to improve that.  

You don't say which Java implementation you used (there are several).
The "0.112" overhead of Java corresponds to start-up time, which includes
JIT-compilation.  As to why Java is asymptotically faster, we'd
need to look at the generated assembly code.  Good luck doing that
with a JIT compiler.

So, to understand OCaml's performances here, one has to turn to a
different baseline.  I translated your Caml code to C and looked at
gcc output.

The best gcc output is faster than the best OCaml output by about 30%.
Looking at the asm code, the main difference is that gcc keeps some
float variables (dx, dy, dz, etc) in the floating-point stack while
OCaml stores them (unboxed) to the stack.  Maybe the Java
implementation you used manages to use the float stack.  Who knows.

The x86 floating-point stack is an awfully bad match for the
register-based OCaml code generation model, so, no, I'm not going to
the great lengths the gcc folks went to extract some performance from
that model.

(Besides, being 1.3 times slower than gcc on numerical code is within
the design envelope for OCaml.  My performance goals have always been
"never more than twice as slow as C".)

On a "normal" (register-based) float architecture like PowerPC or
x86_64, the OCaml-generated code is essentially identical to the
gcc-generated one.

The C translation is attached for your amusement.

- Xavier Leroy

[-- Attachment #2: nbody.c --]
[-- Type: text/x-csrc, Size: 3648 bytes --]

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#define pi 3.141592653589793
#define solar_mass (4 * pi * pi)
#define days_per_year 365.24

struct planet {
  double x, y, z;
  double vx, vy, vz;
  double mass;
};

void advance(int nbodies, struct planet * bodies, double dt)
{
  int i, j;

  for (i = 0; i < nbodies; i++) {
    struct planet * b = &(bodies[i]);
    for (j = i + 1; j < nbodies; j++) {
      struct planet * b2 = &(bodies[j]);
      double dx = b->x - b2->x;
      double dy = b->y - b2->y;
      double dz = b->z - b2->z;
      double distance = sqrt(dx * dx + dy * dy + dz * dz);
      double mag = dt / (distance * distance * distance);
      b->vx -= dx * b2->mass * mag;
      b->vy -= dy * b2->mass * mag;
      b->vz -= dz * b2->mass * mag;
      b2->vx += dx * b->mass * mag;
      b2->vy += dy * b->mass * mag;
      b2->vz += dz * b->mass * mag;
    }
  }
  for (i = 0; i < nbodies; i++) {
    struct planet * b = &(bodies[i]);
    b->x += dt * b->vx;
    b->y += dt * b->vy;
    b->z += dt * b->vz;
  }
}

double energy(int nbodies, struct planet * bodies)
{
  double e;
  int i, j;

  e = 0.0;
  for (i = 0; i < nbodies; i++) {
    struct planet * b = &(bodies[i]);
    e += 0.5 * b->mass * (b->vx * b->vx + b->vy * b->vy + b->vz * b->vz);
    for (j = i + 1; j < nbodies; j++) {
      struct planet * b2 = &(bodies[j]);
      double dx = b->x - b2->x;
      double dy = b->y - b2->y;
      double dz = b->z - b2->z;
      double distance = sqrt(dx * dx + dy * dy + dz * dz);
      e -= (b->mass * b2->mass) / distance;
    }
  }
  return e;
}

void offset_momentum(int nbodies, struct planet * bodies)
{
  double px = 0.0, py = 0.0, pz = 0.0;
  int i;
  for (i = 0; i < nbodies; i++) {
    px += bodies[i].vx * bodies[i].mass;
    py += bodies[i].vy * bodies[i].mass;
    pz += bodies[i].vz * bodies[i].mass;
  }
  bodies[0].vx = - px / solar_mass;
  bodies[0].vy = - py / solar_mass;
  bodies[0].vz = - pz / solar_mass;
}

#define NBODIES 5
struct planet bodies[NBODIES] = {
  {                               /* sun */
    0, 0, 0, 0, 0, 0, solar_mass
  },
  {                               /* jupiter */
    4.84143144246472090e+00,
    -1.16032004402742839e+00,
    -1.03622044471123109e-01,
    1.66007664274403694e-03 * days_per_year,
    7.69901118419740425e-03 * days_per_year,
    -6.90460016972063023e-05 * days_per_year,
    9.54791938424326609e-04 * solar_mass
  },
  {                               /* saturn */
    8.34336671824457987e+00,
    4.12479856412430479e+00,
    -4.03523417114321381e-01,
    -2.76742510726862411e-03 * days_per_year,
    4.99852801234917238e-03 * days_per_year,
    2.30417297573763929e-05 * days_per_year,
    2.85885980666130812e-04 * solar_mass
  },
  {                               /* uranus */
    1.28943695621391310e+01,
    -1.51111514016986312e+01,
    -2.23307578892655734e-01,
    2.96460137564761618e-03 * days_per_year,
    2.37847173959480950e-03 * days_per_year,
    -2.96589568540237556e-05 * days_per_year,
    4.36624404335156298e-05 * solar_mass
  },
  {                               /* neptune */
    1.53796971148509165e+01,
    -2.59193146099879641e+01,
    1.79258772950371181e-01,
    2.68067772490389322e-03 * days_per_year,
    1.62824170038242295e-03 * days_per_year,
    -9.51592254519715870e-05 * days_per_year,
    5.15138902046611451e-05 * solar_mass
  }
};

int main(int argc, char ** argv)
{
  int n = atoi(argv[1]);
  int i;

  offset_momentum(NBODIES, bodies);
  printf ("%.9f\n", energy(NBODIES, bodies));
  for (i = 1; i <= n; i++)
    advance(NBODIES, bodies, 0.01);
  printf ("%.9f\n", energy(NBODIES, bodies));
  return 0;
}


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:34       ` Olivier Andrieu
@ 2005-02-08 10:52         ` Micha
  0 siblings, 0 replies; 35+ messages in thread
From: Micha @ 2005-02-08 10:52 UTC (permalink / raw)
  To: caml-list

On Dienstag 08 Februar 2005 11:34, Olivier Andrieu wrote:
> in that case, the -ffast-math compiler switch will probably make a
> noticeable difference on x86 (but maybe that's cheating).

hmmm?  I think it's an ocaml program and not a c program? How can the 
-ffast-math compiler switch have any effect on ocaml code? 

 Michael


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:43 ` Xavier Leroy
@ 2005-02-08 11:26   ` Ville-Pertti Keinonen
  2005-02-08 15:59   ` Florian Hars
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 35+ messages in thread
From: Ville-Pertti Keinonen @ 2005-02-08 11:26 UTC (permalink / raw)
  To: Xavier Leroy; +Cc: Christophe TROESTLER, O'Caml Mailing List

On Tue, 2005-02-08 at 11:43 +0100, Xavier Leroy wrote:

> The best gcc output is faster than the best OCaml output by about 30%.
> Looking at the asm code, the main difference is that gcc keeps some
> float variables (dx, dy, dz, etc) in the floating-point stack while
> OCaml stores them (unboxed) to the stack.  Maybe the Java
> implementation you used manages to use the float stack.  Who knows.

An interesting question is whether Java aligns allocations and stack to
4-byte or 8-byte boundaries on x86.

A few years ago, when keeping the stack aligned for better floating
point performance was a new gcc feature, and only worked as long as it
was aligned initially (and consistently kept it misaligned if it
wasn't), I played around with a floating point intensive C program that
would exhibit an almost 40% difference in performance depending on what
the binary was called...



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08  9:37     ` skaller
  2005-02-08 10:10       ` Ville-Pertti Keinonen
@ 2005-02-08 12:04       ` Marcin 'Qrczak' Kowalczyk
  2005-02-08 17:06         ` skaller
  1 sibling, 1 reply; 35+ messages in thread
From: Marcin 'Qrczak' Kowalczyk @ 2005-02-08 12:04 UTC (permalink / raw)
  To: caml-list

skaller <skaller@users.sourceforge.net> writes:

> But the types in your record are mutable, and so it can't
> possibly work.

It does work: mutable floats are unboxed too (if there are no
non-float fields).

It would be different if OCaml used standalone references instead
of mutable fields. But mutable fields are not first-class entities,
so they can be unboxed. I think this is actually the reason of
their existence (instead of taking SML ref as a primitive, which
is implemented with a record with a mutable field in OCaml).

> Perhaps Ocaml is actually smart enough to optimise
>
> 	type r = { m: float; n: float };
> 	let x = Array.create 99 { m=0.0; n=0.0 } in
> 	x.[2] = { x.[2] with m = m + 1.0 };
>
> so x is represnted by an array of float,

It does not optimize it, even though it theoretically could.

It's not clear whether this would be an optimization. Having a large
field unboxed requires boxing a large object if it's taken out of the
array as a whole - this is an improvement only if memory savings (and
thus cache usage and GC time) outweigh slower element access.

And it is generally taken out as a whole, unless a particular
operation could be applied to the copy inside the array directly.
This requires analysis which I believe OCaml doesn't perform.
Floats are small enough to be kept in registers.

> and perhaps one could even optimise
>
> 	x.[2].m <- 22.0;
>
> even though it appears to be a type error (modifying
> an immutable field), it actually isn't, since you could
> always used functional update.

Since with the generic polymorphic representation of the array the
only way to implement it (in the absence of whole-program analysis)
is functional update, and it behaves exactly as functional update,
it's not surprising that OCaml doesn't allow this and requires to
use functional update explicitly.

> However it isn't clear Ocaml type system uses the most
> expressive typing of 'constness', i.e. that it propages
> 'mutable' ness correctly. 

There is nothing to propagate.

-- 
   __("<         Marcin Kowalczyk
   \__/       qrczak@knm.org.pl
    ^^     http://qrnik.knm.org.pl/~qrczak/


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:43 ` Xavier Leroy
  2005-02-08 11:26   ` Ville-Pertti Keinonen
@ 2005-02-08 15:59   ` Florian Hars
  2005-02-13 16:40   ` Christoph Bauer
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 35+ messages in thread
From: Florian Hars @ 2005-02-08 15:59 UTC (permalink / raw)
  To: Xavier Leroy; +Cc: Christophe TROESTLER, O'Caml Mailing List

Xavier Leroy wrote:
> Ah, another micro-benchmark.  Great pasttime!  

I tried it on an Athlon64, with ocaml 3.08.1 and gcc 3.3.4, and the ocaml 
version is consistently within a factor of 1.5 of the C version (which is 
faster than the C version with -ffast-math). (See blow for data.)

> The x86 floating-point stack is an awfully bad match for the
> register-based OCaml code generation model

Bu given that the ocaml code generator is register based, I am a bit surprised 
that it uses only a bit more a third of the FP registers on an Athlon64:

$ gcc  -O3 -S nbody.c
$ perl -ne 'while (/(xmm\d+)/g) {print "$1\n"}' nbody.s | sort -u | wc -l
16
$ ocamlopt -o nbody_ml40 -inline 40 -unsafe -S nbody.ml
$ perl -ne 'while (/(xmm\d+)/g) {print "$1\n"}' nbody.s | sort -u | wc -l
6

Yours, Florian.

----- Cutting here may damage your screen -----

ocamlopt -o nbody_ml10 -inline 10 -unsafe nbody.ml
ocamlopt -o nbody_ml40 -inline 40 -unsafe nbody.ml
ocamlopt -o nbody_ml40s -inline 40 nbody.ml
gcc  -O3 -o nbody_c nbody.c -lm
gcc -ffast-math  -O3 -o nbody_cf nbody.c -lm

    n       ml10    ml40    ml40s   c        cf
     1000    0.00    0.00    0.00    0.00    0.00
    10000    0.01    0.01    0.01    0.01    0.01
   100000    0.15    0.12    0.14    0.10    0.11
   200000    0.19    0.10    0.11    0.08    0.09
   500000    0.31    0.25    0.28    0.21    0.22
  1000000    0.60    0.50    0.56    0.43    0.45
  2000000    1.22    1.00    1.12    0.87    0.90
  3000000    1.83    1.50    1.68    1.30    1.35
  4000000    2.35    2.00    2.24    1.71    1.80


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:10       ` Ville-Pertti Keinonen
@ 2005-02-08 16:36         ` skaller
  0 siblings, 0 replies; 35+ messages in thread
From: skaller @ 2005-02-08 16:36 UTC (permalink / raw)
  To: Ville-Pertti Keinonen
  Cc: Will M. Farr, O'Caml Mailing List, Christophe TROESTLER

On Tue, 2005-02-08 at 21:10, Ville-Pertti Keinonen wrote:
> On Tue, 2005-02-08 at 20:37 +1100, skaller wrote:
> > But the types in your record are mutable, and so it can't
> > possibly work.
> > 
> > In particular, given two arrays of a record type R containing
> > a mutable field M, the arrays MUST uses boxes or modifications
> > to M in a shared record wouldn't be shared.
> 
> You're apparently talking about an array of records (which obviously
> contains pointers to the records), but the issue (I think) was the
> records themselves, which store floats unboxed if they contain nothing
> else.

Yes, you're right. In the problem, just one level of indirection
wouldn't hurt as much as the numbers seem to indicate.

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 12:04       ` Marcin 'Qrczak' Kowalczyk
@ 2005-02-08 17:06         ` skaller
  0 siblings, 0 replies; 35+ messages in thread
From: skaller @ 2005-02-08 17:06 UTC (permalink / raw)
  To: Marcin 'Qrczak' Kowalczyk; +Cc: caml-list

On Tue, 2005-02-08 at 23:04, Marcin 'Qrczak' Kowalczyk wrote:
> skaller <skaller@users.sourceforge.net> writes:
> 
> > But the types in your record are mutable, and so it can't
> > possibly work.
> 
> It does work: mutable floats are unboxed too (if there are no
> non-float fields).

I'm talking about fully unboxing an array of records of floats
into just an array of floats, and clearly that cannot be
done if the record contains a mutable field, because if you 
assign the record to two different array elements, then
modify the mutable field, both array elements would
have to reflect the assignment.

> > However it isn't clear Ocaml type system uses the most
> > expressive typing of 'constness', i.e. that it propages
> > 'mutable' ness correctly. 
> 
> There is nothing to propagate.

There certainly is for unboxed data structures: if you store
a record with an immutable field into a mutable field,
the immutable field becomes mutable, because you can
always use functional update. The mutability is thus
mutable if the parent is mutable, whether or not 
the field is mutable. In other words mutability is inherited.

In C++ this applies to lvalueness, but constness actually
works in reverse: a non-const field is const if its parent is.

For boxed fields, you may be right there is no propagation
in Ocaml, but perhaps that indicates a lack of expressiveness,
for example record subtyping is missing. In particular
one can believe that a function accepting a record
of immutable fields would not harm a record with the 
same structure but some mutable fields -- roughly
the equivalent of C++ 'const' conversions. However
unless I'm mistaken there is no way to do this,
even though it appears sound and desirable.
So saying 'there is nothing to propagate' isn't
really meaningful, it seems circular: Ocaml doesn't
propagate it therefore there isn't anything to
propagate.

Note I didn't claim there was a bug, I said merely that

> it isn't clear Ocaml type system uses the most
> > expressive typing of 'constness',

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:25   ` Xavier Leroy
@ 2005-02-08 18:34     ` skaller
  0 siblings, 0 replies; 35+ messages in thread
From: skaller @ 2005-02-08 18:34 UTC (permalink / raw)
  To: Xavier Leroy; +Cc: Christophe TROESTLER, O'Caml Mailing List

On Tue, 2005-02-08 at 21:25, Xavier Leroy wrote:
> > > type planet = {
> > >   mutable x : float;  mutable y : float;  mutable z : float;
> > >   mutable vx: float;  mutable vy: float;  mutable vz: float;
> > >   mass : float;
> > > }
> > 
> > Advice from old FORTRAN hacker:
> > 
> > This is a bad data structure for Ocaml.
> 
> Advice from an old Caml hacker: this is an excellent data structure
> for OCaml.
> 
> Second advice from an old Caml hacker: don't trust John Skaller's
> technical advice too much.  John often has preconceptions about what
> Caml is, or does, or "must" do (in his mind), which bear
> no connections with reality.  That would be a minor annoyance except
> for the very authoritative, expert-like tone in which these
> preconceptions are assessed, which can mislead non-expert readers.

Indeed you are right, I did not notice that the record type
was all floats and might be unboxed.

I apologise for speaking with an authority I don't have.

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:43 ` Xavier Leroy
  2005-02-08 11:26   ` Ville-Pertti Keinonen
  2005-02-08 15:59   ` Florian Hars
@ 2005-02-13 16:40   ` Christoph Bauer
  2005-02-13 18:13   ` Christophe TROESTLER
  2005-02-24 22:18   ` NBody (one more question) Christophe TROESTLER
  4 siblings, 0 replies; 35+ messages in thread
From: Christoph Bauer @ 2005-02-13 16:40 UTC (permalink / raw)
  To: OCaml List; +Cc: shootout-list

[-- Attachment #1: Type: text/plain, Size: 788 bytes --]

Hi,

>
> The C translation is attached for your amusement.

and here is a mlton version. The test runs on a Pentium-M.

compile flags:

gcc -march=pentium-m -O2 -o nbody-gcc nbody.c
mlton -cc-opt -ffast-math -output nbody-mlton nbody.sml
ocamlopt -ccopt -ffast-math -inline 3 -unsafe    -o nbody-ocaml nbody.ml 

nbody         gcc         ocaml         mlton
1000	      0.000       0.000         0.000
10000         0.014       0.019         0.024
100000        0.070       0.301         0.239
200000        0.231       0.621         0.478
500000        0.552       1.556         1.189
1000000       1.123       3.112         2.377
2000000       2.246       6.229         4.704
3000000       3.367       9.286         8.865
4000000       4.457       12.501        9.687

Christoph Bauer


[-- Attachment #2: nbody.sml --]
[-- Type: application/octet-stream, Size: 4628 bytes --]

val pi = 3.141592653589793
val solar_mass = 4.0 * pi * pi
val days_per_year = 365.24

type planet = {
  x : real ref,
  y : real ref,
  z : real ref,
  vx: real ref,  
  vy: real ref,  
  vz: real ref,
  mass : real
}


fun for (start, stop, f) =
   let
      fun loop i =
         if i > stop
            then ()
         else (f i; loop (i + 1))
   in
      loop start
   end

fun advance (bodies : planet array) dt =
  let 
     val n = Array.length bodies - 1
  in 
   for (0, n, fn i =>
   let 
      val b = Array.sub (bodies, i) 
   in
     for (i+1, n, fn j =>
         let 
           val b' = Array.sub (bodies, j)
           val dx = !(#x b) - !(#x b')  
           val dy = !(#y b) - !(#y b')
           val dz = !(#z b) - !(#z b')
           val distance = Math.sqrt(dx * dx + dy * dy + dz * dz) 
           val mag = dt / (distance * distance * distance) 
         in
           #vx b := !(#vx b) - dx * (#mass b') * mag;
           #vy b := !(#vy b) - dy * (#mass b') * mag;
           #vz b := !(#vz b) - dz * (#mass b') * mag;
	   
           #vx b' := !(#vx b') + dx * (#mass b) * mag;
           #vy b' := !(#vy b') + dy * (#mass b) * mag;
           #vz b' := !(#vz b') + dz * (#mass b) * mag
        end)
  end);

  for( 0, n, fn i =>
    let 
       val b = Array.sub (bodies, i) 
    in
	#x b := !(#x b) + dt * !(#vx b);
	#y b := !(#y b) + dt * !(#vy b);
	#z b := !(#z b) + dt * !(#vz b)
    end)
end

fun energy (bodies : planet array) =
  let 
     val e = ref 0.0 
  in
    for(0, Array.length bodies - 1, fn i =>
    let 
        val b = Array.sub (bodies, i) 
    in
       e := !e + 0.5 * (#mass b) * 
		 (!(#vx b) * !(#vx b) + !(#vy b) 
		  * !(#vy b) + !(#vz b) * !(#vz b));
       for (i+1, Array.length bodies - 1, fn j => 
         let 
            val b' = Array.sub (bodies, j)
            val dx = !(#x b) - !(#x b') 
            val dy = !(#y b) - !(#y b')  
            val dz = !(#z b) - !(#z b')
            val distance = Math.sqrt(dx * dx + dy * dy + dz * dz) 
         in
             e := !e - ((#mass b) * (#mass b')) / distance
         end)
   end);
   !e
  end



fun offset_momentum (bodies : planet array) =
  let 
      val px = ref 0.0
      val py = ref 0.0 
      val pz = ref 0.0
   in
     for (0, Array.length bodies - 1, 
	  fn i =>
	     (px := !px + !(#vx (Array.sub (bodies, i))) * (#mass (Array.sub (bodies, i)));
	      py := !py + !(#vy (Array.sub (bodies, i))) * (#mass (Array.sub (bodies, i)));
	      pz := !pz + !(#vz (Array.sub (bodies, i))) * (#mass (Array.sub (bodies, i)))));
     #vx (Array.sub (bodies, 0)) := ~ (!px / solar_mass);
     #vy (Array.sub (bodies, 0)) := ~ (!py / solar_mass);
     #vz (Array.sub (bodies, 0)) := ~ (!pz / solar_mass)
  end

val jupiter = {
  x = ref 4.84143144246472090,
  y = ref ~1.16032004402742839,
  z = ref ~1.03622044471123109e~1,
  vx = ref (1.66007664274403694e~3 * days_per_year),
  vy = ref (7.69901118419740425e~3 * days_per_year),
  vz = ref (~6.90460016972063023e~5 * days_per_year),
  mass = 9.54791938424326609e~4 * solar_mass
}

val saturn = {
  x = ref 8.34336671824457987,
  y = ref 4.12479856412430479e00,
  z = ref ~4.03523417114321381e~01,
  vx = ref (~2.76742510726862411e~03 * days_per_year),
  vy = ref (4.99852801234917238e~03 * days_per_year),
  vz = ref (2.30417297573763929e~05 * days_per_year),
  mass = 2.85885980666130812e~04 * solar_mass
}

val uranus = {
  x = ref 1.28943695621391310e01,
  y = ref ~1.51111514016986312e01,
  z = ref ~2.23307578892655734e~01,
  vx = ref (2.96460137564761618e~03 * days_per_year),
  vy = ref (2.37847173959480950e~03 * days_per_year),
  vz = ref (~2.96589568540237556e~05 * days_per_year),
  mass = 4.36624404335156298e~05 * solar_mass
}

val neptune = {
  x = ref 1.53796971148509165e01,
  y = ref ~2.59193146099879641e01,
  z = ref 1.79258772950371181e~01,
  vx = ref (2.68067772490389322e~03 * days_per_year),
  vy = ref (1.62824170038242295e~03 * days_per_year),
  vz = ref (~9.51592254519715870e~05 * days_per_year),
  mass = 5.15138902046611451e~05 * solar_mass
}

val sun = {
  x = ref 0.0,  y = ref 0.0,  z = ref 0.0,  vx = ref 0.0,  vy = ref 0.0, vz = ref 0.0,
  mass= solar_mass
}



fun printr r = 
  let 
    val s = Real.fmt (StringCvt.FIX (SOME 9)) r
  in (print s; print "\n") end

val bodies = Array.fromList [ sun, jupiter, saturn, uranus, neptune ]


fun main args =
    let 
	val n = 
	    case Int.fromString (List.hd args) of
		SOME i => i
	      | NONE => 0
			
    in
	offset_momentum bodies;
	printr (energy bodies);
	for (1, n, fn _ => advance bodies 0.01);
	printr (energy bodies)
    end

val _ = main (CommandLine.arguments ())

[-- Attachment #3: Type: text/plain, Size: 172 bytes --]




-- 
let () = let rec f a w i j = Printf.printf "%.20f\r" a; let a1 = a *. i /. j in
if w then f a1 false (i +. 2.0) j else f a1 true i (j +. 2.0) in f 2.0 false 2.0 1.0

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] [Benchmark] NBody
  2005-02-08 10:43 ` Xavier Leroy
                     ` (2 preceding siblings ...)
  2005-02-13 16:40   ` Christoph Bauer
@ 2005-02-13 18:13   ` Christophe TROESTLER
  2005-02-24 22:18   ` NBody (one more question) Christophe TROESTLER
  4 siblings, 0 replies; 35+ messages in thread
From: Christophe TROESTLER @ 2005-02-13 18:13 UTC (permalink / raw)
  To: O'Caml Mailing List

Hi,

Sorry for the late reply, I am catching up with email.

On Tue, 8 Feb 2005, Xavier Leroy <Xavier.Leroy@inria.fr> wrote:
> 
> Your OCaml code is about as good as you can write.  All the unboxing
> optimizations are triggered.

Ah, thanks for telling, I was wondering about that.

> You don't say which Java implementation you used (there are several).

Sorry; Sun JDK 1.5.0.

> (Besides, being 1.3 times slower than gcc on numerical code is
> within the design envelope for OCaml.  My performance goals have
> always been "never more than twice as slow as C".)

Yes.  I am not complaining, just trying to understand (and, to say the
whole truth, I was also a bit "sad" that Java was beating my favorite
language :).

> On a "normal" (register-based) float architecture like PowerPC or
> x86_64, the OCaml-generated code is essentially identical to the
> gcc-generated one.

Oh, good, so going to 64 bits will bring us that too!

> The C translation is attached for your amusement.

Thanks for your very detailed and informative answer!

Best regards,
ChriS


^ permalink raw reply	[flat|nested] 35+ messages in thread

* NBody (one more question)
  2005-02-08 10:43 ` Xavier Leroy
                     ` (3 preceding siblings ...)
  2005-02-13 18:13   ` Christophe TROESTLER
@ 2005-02-24 22:18   ` Christophe TROESTLER
  2005-02-25 17:06     ` [Caml-list] " John Carr
                       ` (2 more replies)
  4 siblings, 3 replies; 35+ messages in thread
From: Christophe TROESTLER @ 2005-02-24 22:18 UTC (permalink / raw)
  To: O'Caml Mailing List

Hi,

I'd like to come back to the nbody mini-benchmark with one more
question (I apologize if I bother some people but I think there are
competent people here who can clarify the situation for me).

When I compile the C code with -O0 (with gcc -o nbody.gcc -Wall
--fast-math nbody.c -lm), I get a time of 1.513s which is comparable
to OCaml (1.607s).  But as soon as I turn on -O options (as with gcc
-o nbody.gcc -Wall -O1 --fast-math nbody.c -lm), the running time
drops down to 0.871s (0.58%).  Can somebody tell me what is the
optimization that has such an effect and whether it could be applied
to OCaml ?  (I am not saying or asking it to be done -- it is not up
to me to decide -- I just want to understand -- and I am not versed
enough in assembly to do it myself unfortunately :( ).

Regards,
ChriS


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] NBody (one more question)
  2005-02-24 22:18   ` NBody (one more question) Christophe TROESTLER
@ 2005-02-25 17:06     ` John Carr
  2005-02-25 17:17       ` Christophe TROESTLER
  2005-02-25 17:24     ` Ken Rose
  2005-02-25 17:57     ` Xavier Leroy
  2 siblings, 1 reply; 35+ messages in thread
From: John Carr @ 2005-02-25 17:06 UTC (permalink / raw)
  To: caml-list


> When I compile the C code with -O0 (with gcc -o nbody.gcc -Wall
> --fast-math nbody.c -lm), I get a time of 1.513s which is comparable
> to OCaml (1.607s).  But as soon as I turn on -O options (as with gcc
> -o nbody.gcc -Wall -O1 --fast-math nbody.c -lm), the running time
> drops down to 0.871s (0.58%).  Can somebody tell me what is the
> optimization that has such an effect and whether it could be applied
> to OCaml ?

gcc -O0 sets out to generate the worst possible code, and mostly
succeeds.  Optimizations in gcc -O1 compared to gcc -O0 include
register allocation, dead code elimination, branch straightening,
common subexpression elimination, instruction combining, and
instruction scheduling.

ocamlopt-generated code is between -O0 and -O1 in quality, usually
much closer to -O1.  The biggest missing optimization is common
subexpression elimination.  ocamlopt puts less effort into instruction
combining than gcc.

gcc -O2 adds loop optimizations which ocamlopt never does.

A functional programming style puts different demands on the
optimizer.  ocamlopt has some optimizations that don't make
sense for C, e.g. replacing (unbox (box (value))) with value.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] NBody (one more question)
  2005-02-25 17:06     ` [Caml-list] " John Carr
@ 2005-02-25 17:17       ` Christophe TROESTLER
  2005-02-26 16:08         ` John Carr
  0 siblings, 1 reply; 35+ messages in thread
From: Christophe TROESTLER @ 2005-02-25 17:17 UTC (permalink / raw)
  To: jfc; +Cc: caml-list

On Fri, 25 Feb 2005, John Carr <jfc@MIT.EDU> wrote:
> 
> Optimizations in gcc -O1 compared to gcc -O0 include register
> allocation, dead code elimination, branch straightening, common
> subexpression elimination, instruction combining, and instruction
> scheduling.

Ok, but do you know which one(s) account for the speedup observed in
this particular case?

Regards,
ChriS


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] NBody (one more question)
  2005-02-24 22:18   ` NBody (one more question) Christophe TROESTLER
  2005-02-25 17:06     ` [Caml-list] " John Carr
@ 2005-02-25 17:24     ` Ken Rose
  2005-02-25 17:42       ` Oliver Bandel
  2005-02-25 17:57     ` Xavier Leroy
  2 siblings, 1 reply; 35+ messages in thread
From: Ken Rose @ 2005-02-25 17:24 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: O'Caml Mailing List

Christophe TROESTLER wrote:
> When I compile the C code with -O0 (with gcc -o nbody.gcc -Wall
> --fast-math nbody.c -lm), I get a time of 1.513s which is comparable
> to OCaml (1.607s).  But as soon as I turn on -O options (as with gcc
> -o nbody.gcc -Wall -O1 --fast-math nbody.c -lm), the running time
> drops down to 0.871s (0.58%).  Can somebody tell me what is the
> optimization that has such an effect and whether it could be applied
> to OCaml ?  (I am not saying or asking it to be done -- it is not up
> to me to decide -- I just want to understand -- and I am not versed
> enough in assembly to do it myself unfortunately :( ).

I'm not familiar with the OCaml code generator, but gcc without 
optimization produces very naive code.  Each source statement is 
translated separately, and all variable values are read from & written 
back to memory.  (Only changed values are written, obviously)  It 
doesn't do any instruction scheduling beyond what the processor may 
require for correctness.  It really doesn't do anything more 
sophisticated than suppressing moves that have the same register as 
source & destination.

So it might be just about anything, from using registers to some 
common-subexpression elimination to a number of others.

  - ken


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] NBody (one more question)
  2005-02-25 17:24     ` Ken Rose
@ 2005-02-25 17:42       ` Oliver Bandel
  0 siblings, 0 replies; 35+ messages in thread
From: Oliver Bandel @ 2005-02-25 17:42 UTC (permalink / raw)
  To: caml-list

On Fri, Feb 25, 2005 at 09:24:19AM -0800, Ken Rose wrote:
[...]
> I'm not familiar with the OCaml code generator, but gcc without 
> optimization produces very naive code.  Each source statement is 
> translated separately, and all variable values are read from & written 
> back to memory.  (Only changed values are written, obviously)  It 
> doesn't do any instruction scheduling beyond what the processor may 
> require for correctness.  It really doesn't do anything more 
> sophisticated than suppressing moves that have the same register as 
> source & destination.
[...]

There is a good reason that without optimisations
there will be done none.

1.: It's what it seems to be. And switching optimisation on then
    is also what it seems to be: optimized code...
    So, a chair is a chair and a table is a table and that's good. :)

2.: Optimisation must be switched off to be able to debug...
    ...so it's good that gcc has the capability to produce non-
    optimized code.... it's not stupid to produce naive code.
    It sometimes is very useful.

Ciao,
   Oliver


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] NBody (one more question)
  2005-02-24 22:18   ` NBody (one more question) Christophe TROESTLER
  2005-02-25 17:06     ` [Caml-list] " John Carr
  2005-02-25 17:24     ` Ken Rose
@ 2005-02-25 17:57     ` Xavier Leroy
  2 siblings, 0 replies; 35+ messages in thread
From: Xavier Leroy @ 2005-02-25 17:57 UTC (permalink / raw)
  To: Christophe TROESTLER; +Cc: O'Caml Mailing List

> When I compile the C code with -O0 (with gcc -o nbody.gcc -Wall
> --fast-math nbody.c -lm), I get a time of 1.513s which is comparable
> to OCaml (1.607s).  But as soon as I turn on -O options (as with gcc
> -o nbody.gcc -Wall -O1 --fast-math nbody.c -lm), the running time
> drops down to 0.871s (0.58%).  Can somebody tell me what is the
> optimization that has such an effect and whether it could be applied
> to OCaml ?

First, make sure the Caml code is compiled with bounds checking off
(ocamlopt -unsafe), otherwise the comparison isn't quite fair.  But
even with -unsafe, you are correct that the Caml code is significantly
slower than the gcc -O1 code.  This is especially surprising because
the assembly code generated by ocamlopt and gcc look very similar.
So, I don't think you can pinpoint the speed difference on a
particular optimization.

My current guess would be alignment issues:
- data alignment: float arrays are 4-aligned in OCaml, 8-aligned in C,
  so if you're unlucky you can end up with slower unaligned accesses
  on every Caml float.
- code alignment: it could be that OCaml doesn't perform sufficient
  alignment on function entry points and loop points.  The proper
  alignments for various implementations of the x86 architecture are
  a mystery to me.

Again, these are just wild guesses.  To understand what is going on
inside the chip, one would need to use performance monitoring
counters.  Unfortunately, I never felt motivated enough to shell out
the $$$ for Intel's VTune analyzer...

- Xavier Leroy


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Caml-list] NBody (one more question)
  2005-02-25 17:17       ` Christophe TROESTLER
@ 2005-02-26 16:08         ` John Carr
  0 siblings, 0 replies; 35+ messages in thread
From: John Carr @ 2005-02-26 16:08 UTC (permalink / raw)
  To: caml-list


> > Optimizations in gcc -O1 compared to gcc -O0 include register
> > allocation, dead code elimination, branch straightening, common
> > subexpression elimination, instruction combining, and instruction
> > scheduling.
> 
> Ok, but do you know which one(s) account for the speedup observed in
> this particular case?

There is no way to be sure without careful analysis of generated
code and possibly the state of gcc's internal representation at
intermediate steps.  Most of the optimizations at -O1 can not be
turned on or off independently of the general optimization level.


^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2005-02-26 16:09 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-02-07 18:57 [Benchmark] NBody Christophe TROESTLER
2005-02-07 19:16 ` [Caml-list] " Will M. Farr
2005-02-07 19:36   ` Christophe TROESTLER
2005-02-07 19:55     ` Will M. Farr
2005-02-08 10:34       ` Olivier Andrieu
2005-02-08 10:52         ` Micha
2005-02-07 20:16     ` Markus Mottl
2005-02-07 19:37 ` Martin Jambon
2005-02-07 19:46   ` Christophe TROESTLER
2005-02-07 20:22     ` Martin Jambon
2005-02-07 20:04   ` sejourne_kevin
2005-02-07 20:32     ` Robert Roessler
2005-02-07 22:57     ` Oliver Bandel
2005-02-08  1:29 ` skaller
2005-02-08  1:48   ` Will M. Farr
2005-02-08  9:01     ` Ville-Pertti Keinonen
2005-02-08  9:37     ` skaller
2005-02-08 10:10       ` Ville-Pertti Keinonen
2005-02-08 16:36         ` skaller
2005-02-08 12:04       ` Marcin 'Qrczak' Kowalczyk
2005-02-08 17:06         ` skaller
2005-02-08 10:25   ` Xavier Leroy
2005-02-08 18:34     ` skaller
2005-02-08 10:43 ` Xavier Leroy
2005-02-08 11:26   ` Ville-Pertti Keinonen
2005-02-08 15:59   ` Florian Hars
2005-02-13 16:40   ` Christoph Bauer
2005-02-13 18:13   ` Christophe TROESTLER
2005-02-24 22:18   ` NBody (one more question) Christophe TROESTLER
2005-02-25 17:06     ` [Caml-list] " John Carr
2005-02-25 17:17       ` Christophe TROESTLER
2005-02-26 16:08         ` John Carr
2005-02-25 17:24     ` Ken Rose
2005-02-25 17:42       ` Oliver Bandel
2005-02-25 17:57     ` Xavier Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).