caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* We should all be forking
@ 2007-06-05 18:13 Jon Harrop
  2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Jon Harrop @ 2007-06-05 18:13 UTC (permalink / raw)
  To: Caml List


Following on from our "why not fork" discussion, here is my most elegant 
concurrent ray tracer written in vanilla OCaml.

This program simply runs four processes (forking off three) in parallel to 
improve performance when 2-4 CPUs are available (increase "d" if you have >4 
CPUs).

This isn't as sophisticated as the JoCaml version but it does double 
performance (taking the time down from 4s to 2s) on my dual-core machine, 
making it faster than any language that uses a concurrent GC. I don't know 
about you guys, but this is a complete revelation for me!

I believe the performance relies upon the Linux kernel lazily copying the 
process. Does OSX also do that?

Anyway, here is the code:

let delta = sqrt epsilon_float

type vec = {x:float; y:float; z:float}
let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}
let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z
let length r = sqrt(dot r r)
let unitise r = 1. /. length r *| r

type scene =
    Sphere of vec * float
  | Group of vec * float * scene * scene * scene * scene * scene

let ray_sphere {x=dx; y=dy; z=dz} {x=vx; y=vy; z=vz} r =
  let disc = vx *. vx +. vy *. vy +. vz *. vz -. r *. r in
  if disc < 0. then infinity else
    let b = vx *. dx +. vy *. dy +. vz *. dz in
    let b2 = b *. b in
    if b2 < disc then infinity else
      let disc = sqrt(b2 -. disc) in
      let t1 = b -. disc in
      if t1 > 0. then t1 else b +. disc

let ray_sphere' {x=ox; y=oy; z=oz} {x=dx; y=dy; z=dz} {x=cx; y=cy; z=cz} r =
  let vx = cx -. ox and vy = cy -. oy and vz = cz -. oz in
  let vv = vx *. vx +. vy *. vy +. vz *. vz in
  let b = vx *. dx +. vy *. dy +. vz *. dz in
  let disc = b *. b -. vv +. r *. r in
  disc >= 0. && b +. sqrt disc >= 0.

type hit = {l: float; nx: float; ny: float; nz: float}

let rec intersect ({x=dx; y=dy; z=dz} as dir) hit = function
    Sphere ({x=cx; y=cy; z=cz} as center, radius) ->
      let l' = ray_sphere dir center radius in
      if l' >= hit.l then hit else
	let x = l' *. dx -. cx in
	let y = l' *. dy -. cy in
	let z = l' *. dz -. cz in
	let il = 1. /. sqrt(x *. x +. y *. y +. z *. z) in
	{l = l'; nx = il *. x; ny = il *. y; nz = il *. z}
  | Group (center, radius, a, b, c, d, e) ->
      let l' = ray_sphere dir center radius in
      if l' >= hit.l then hit else
	let f h s = intersect dir h s in
	f (f (f (f (f hit a) b) c) d) e

let rec intersect' orig dir = function
    Sphere (center, radius) -> ray_sphere' orig dir center radius
  | Group (center, radius, a, b, c, d, e) ->
      let f s = intersect' orig dir s in
      ray_sphere' orig dir center radius && (f a || f b || f c || f d || f e)

let neg_light = unitise { x = 1.; y = 3.; z = -2. }

let rec ray_trace dir scene =
  let hit = intersect dir {l=infinity; nx=0.; ny=0.; nz=0.} scene in
  if hit.l = infinity then 0. else
    let n = {x = hit.nx; y = hit.ny; z = hit.nz} in
    let g = dot n neg_light in
    if g < 0. then 0. else
      if intersect' (hit.l *| dir +| delta *| n) neg_light scene then 0. else 
g

let fold5 f x a b c d e = f (f (f (f (f x a) b) c) d) e

let rec create level c r =
  let obj = Sphere (c, r) in
  if level = 1 then obj else
    let a = 3. *. r /. sqrt 12. in
    let rec bound (c, r) = function
	Sphere (c', r') -> c, max r (length (c -| c') +. r')
      | Group (_, _, v, w, x, y, z) -> fold5 bound (c, r) v w x y z in
    let aux x' z' = create (level - 1) (c +| {x=x'; y=a; z=z'}) (0.5 *. r) in
    let w = aux (-.a) (-.a) and x = aux a (-.a) in
    let y = aux (-.a) a and z = aux a a in
    let c, r = fold5 bound (c +| {x=0.; y=r; z=0.}, 0.) obj w x y z in
    Group (c, r, obj, w, x, y, z)

let level, n =
  try int_of_string Sys.argv.(1), int_of_string Sys.argv.(2) with _ -> 9, 512

let scene = create level { x = 0.; y = -1.; z = 4. } 1. and ss = 4

module String = struct
  include String

  let init n f =
    if n=0 then "" else
      let s = String.make n (f 0) in
      for i=1 to n-1 do
	s.[i] <- f i
      done;
      s
end

let raster y =
  y, String.init n
    (fun x ->
       let g = ref 0. in
       for dx = 0 to ss - 1 do
	 for dy = 0 to ss - 1 do
	   let aux x d = float x -. float n /. 2. +. float d /. float ss in
	   let dir = unitise {x = aux x dx; y = aux y dy; z = float n } in
	   g := !g +. ray_trace dir scene
	 done
       done;
       let g = 0.5 +. 255. *. !g /. float (ss*ss) in
       char_of_int (int_of_float g))

let rasters d i =
  Array.init ((n+d-i)/d) (fun j -> raster(d*j+i))

let invoke (f : 'a -> 'b) x : unit -> 'b =
  let input, output = Unix.pipe() in
  match Unix.fork() with
  | 0 ->
      Unix.close input;
      let output = Unix.out_channel_of_descr output in
      Marshal.to_channel output (try `Res(f x) with e -> `Exn e) [];
      exit 0
  | _ ->
      Unix.close output;
      let input = Unix.in_channel_of_descr input in
      fun () ->
	match Marshal.from_channel input with
	| `Res x -> x
	| `Exn e -> raise e

let ( |> ) x f = f x

let map (f : 'a -> 'b) a : 'b array =
  let n = Array.length a in
  let aux i x =
    if i<n-1 then invoke f x else
      let f_x = f x in
      fun () -> f_x in
  Array.mapi aux a |>
      Array.map (fun f -> f())

let () =
  let d = 4 in
  let sort a = Array.sort compare a; a in
  let image =
    Array.init d (fun i -> i) |>
	map (rasters d) |>
	    Array.to_list |>
		Array.concat |>
		    sort |>
			Array.map snd in
  Printf.printf "P5\n%d %d\n255\n" n n;
  for y = n - 1 downto 0 do
    print_string image.(y)
  done

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
OCaml for Scientists
http://www.ffconsultancy.com/products/ocaml_for_scientists/?e


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-05 18:13 We should all be forking Jon Harrop
@ 2007-06-05 18:25 ` Jonathan Bryant
  2007-06-05 18:32 ` Joel Reymont
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Jonathan Bryant @ 2007-06-05 18:25 UTC (permalink / raw)
  To: Jon Harrop; +Cc: Caml List

>
> I believe the performance relies upon the Linux kernel lazily  
> copying the
> process. Does OSX also do that?

The man page under OSX doesn't mention anything about copy-on-write.   
That doesn't necessarily mean anything, though...

--Jonathan


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-05 18:13 We should all be forking Jon Harrop
  2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
@ 2007-06-05 18:32 ` Joel Reymont
  2007-06-05 18:46   ` Jon Harrop
  2007-06-05 19:13 ` Chris King
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Joel Reymont @ 2007-06-05 18:32 UTC (permalink / raw)
  To: Jon Harrop; +Cc: Caml List


On Jun 5, 2007, at 7:13 PM, Jon Harrop wrote:

> This isn't as sophisticated as the JoCaml version but it does double
> performance (taking the time down from 4s to 2s) on my dual-core  
> machine,

I must have missed the JoCaml version. Where is it?

	Thanks, Joel

--
http://topdog.cc      - EasyLanguage to C# translator
http://wagerlabs.com  - Blog






^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-05 18:32 ` Joel Reymont
@ 2007-06-05 18:46   ` Jon Harrop
  0 siblings, 0 replies; 11+ messages in thread
From: Jon Harrop @ 2007-06-05 18:46 UTC (permalink / raw)
  To: Joel Reymont; +Cc: Caml List

On Tuesday 05 June 2007 19:32:25 Joel Reymont wrote:
> On Jun 5, 2007, at 7:13 PM, Jon Harrop wrote:
> > This isn't as sophisticated as the JoCaml version but it does double
> > performance (taking the time down from 4s to 2s) on my dual-core
> > machine,
>
> I must have missed the JoCaml version. Where is it?
>
> 	Thanks, Joel

  http://jocaml.inria.fr/#examples

There is also a JoCaml mailing list for JoCaml-related ramblings, and it could 
do with more rambling. ;-)

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
OCaml for Scientists
http://www.ffconsultancy.com/products/ocaml_for_scientists/?e


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-05 18:13 We should all be forking Jon Harrop
  2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
  2007-06-05 18:32 ` Joel Reymont
@ 2007-06-05 19:13 ` Chris King
  2007-06-05 19:20   ` jocaml vs camlp3l Joel Reymont
  2007-06-05 23:58 ` [Caml-list] We should all be forking Brian Hurt
  2007-06-07 12:04 ` Richard Jones
  4 siblings, 1 reply; 11+ messages in thread
From: Chris King @ 2007-06-05 19:13 UTC (permalink / raw)
  To: Jon Harrop; +Cc: Caml List

On 6/5/07, Jon Harrop <jon@ffconsultancy.com> wrote:
> Following on from our "why not fork" discussion, here is my most elegant
> concurrent ray tracer written in vanilla OCaml.
>
> This program simply runs four processes (forking off three) in parallel to
> improve performance when 2-4 CPUs are available (increase "d" if you have >4
> CPUs).

I'm curious, have you considered trying camlp3l [1] to parallelize
your raytracer?  It uses high-level constructs ("skeletons") to
describe parallelizable functions, e.g. for a raytracer you can
essentially just say "farm out each row to a different computing
node".  It uses marshalling over TCP/IP sockets, so can be distributed
if you like.  The forking you just do beforehand in a shell script
(though it gives an example where you do it in O'Caml too).

[1] http://camlp3l.inria.fr/


^ permalink raw reply	[flat|nested] 11+ messages in thread

* jocaml vs camlp3l
  2007-06-05 19:13 ` Chris King
@ 2007-06-05 19:20   ` Joel Reymont
  2007-06-05 21:02     ` [Caml-list] " Erik de Castro Lopo
  0 siblings, 1 reply; 11+ messages in thread
From: Joel Reymont @ 2007-06-05 19:20 UTC (permalink / raw)
  To: Chris King; +Cc: Jon Harrop, Caml List

I'm gonna go bonkers soon. Suppose I know and quite like Erlang but  
dislike its syntax and performance on non-IO things. I'm also quite  
fond of OCaml.

Should I go with JoCaml? Camlp3l? Why?

	Thanks, Joel

On Jun 5, 2007, at 8:13 PM, Chris King wrote:

> I'm curious, have you considered trying camlp3l [1] to parallelize
> your raytracer?  It uses high-level constructs ("skeletons") to
> describe parallelizable functions, e.g. for a raytracer you can
> essentially just say "farm out each row to a different computing
> node".

--
http://topdog.cc      - EasyLanguage to C# translator
http://wagerlabs.com  - Blog






^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] jocaml vs camlp3l
  2007-06-05 19:20   ` jocaml vs camlp3l Joel Reymont
@ 2007-06-05 21:02     ` Erik de Castro Lopo
  0 siblings, 0 replies; 11+ messages in thread
From: Erik de Castro Lopo @ 2007-06-05 21:02 UTC (permalink / raw)
  To: caml-list

Joel Reymont wrote:

> I'm gonna go bonkers soon. Suppose I know and quite like Erlang but  
> dislike its syntax and performance on non-IO things. I'm also quite  
> fond of OCaml.

I'm very keen on Ocaml and started learning Erlang for the concurrency.
However, Erlang's syntax, dynamic typing and "everything runs in the
erl shell" thing started to annoy me.

So JoCaml is currently looking *really* good.

> Should I go with JoCaml? Camlp3l? Why?

I think we let Jon Harrop and others (maybe even me) thrash this out
on a number of examples and then enjoy the benefits:-).

Erik
-- 
-----------------------------------------------------------------
Erik de Castro Lopo
-----------------------------------------------------------------
"I do have a policy of mandatory evisceration where the crime of
whitespace in filenames is perpetrated on my systems. This has
inspired a new range of designer colostomy bag covers in corduroy,
velvet, and the ever popular denim."
-- jdub on the SLUG mailing list


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-05 18:13 We should all be forking Jon Harrop
                   ` (2 preceding siblings ...)
  2007-06-05 19:13 ` Chris King
@ 2007-06-05 23:58 ` Brian Hurt
  2007-06-07 12:04 ` Richard Jones
  4 siblings, 0 replies; 11+ messages in thread
From: Brian Hurt @ 2007-06-05 23:58 UTC (permalink / raw)
  To: Jon Harrop; +Cc: Caml List



On Tue, 5 Jun 2007, Jon Harrop wrote:

>
> Following on from our "why not fork" discussion, here is my most elegant
> concurrent ray tracer written in vanilla OCaml.
>
> This program simply runs four processes (forking off three) in parallel to
> improve performance when 2-4 CPUs are available (increase "d" if you have >4
> CPUs).
>
> This isn't as sophisticated as the JoCaml version but it does double
> performance (taking the time down from 4s to 2s) on my dual-core machine,
> making it faster than any language that uses a concurrent GC. I don't know
> about you guys, but this is a complete revelation for me!

This actually doesn't surprise me.  Big linear algebra problems- like ray 
tracing- are almost embarassingly parallel.  This is why the supercomputer 
market died a decade and a half ago- it was just too easy to take the 
problems supercomputers were designed to handle and spread them out over 
an cluster of workstations or pcs.  What needs to be shared in the ray 
tracing example?  The model, obviously.  And the results have to be 
collected.  And what subset of the problem (which collection of rays) need 
to be worked on.  I note that the big-iron database machine (aka 
mainframe) market is alive and well.  Some problems don't parallelize 
quite so easily.

My point here is to caution against making too much soup from this lone 
oyster.  Simply because this program, and a large class of other programs 
like it, parallelize easily, doesn't mean all will.

On the other hand, for programs that do parallelize this way, JoCaml looks 
like a wonderful gift.

Brian


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-05 18:13 We should all be forking Jon Harrop
                   ` (3 preceding siblings ...)
  2007-06-05 23:58 ` [Caml-list] We should all be forking Brian Hurt
@ 2007-06-07 12:04 ` Richard Jones
  2007-06-14 10:09   ` Luc Maranget
  4 siblings, 1 reply; 11+ messages in thread
From: Richard Jones @ 2007-06-07 12:04 UTC (permalink / raw)
  To: Jon Harrop; +Cc: Caml List


How close are we to real-time raytracing then?  Given that 4 & even 8
core machines may become commonplace soon.

Rich.

-- 
Richard Jones
Red Hat


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Caml-list] We should all be forking
  2007-06-07 12:04 ` Richard Jones
@ 2007-06-14 10:09   ` Luc Maranget
  0 siblings, 0 replies; 11+ messages in thread
From: Luc Maranget @ 2007-06-14 10:09 UTC (permalink / raw)
  To: Richard Jones; +Cc: Caml List

> 
> How close are we to real-time raytracing then?  Given that 4 & even 8
> core machines may become commonplace soon.
> 
> Rich.
> 
> -- 
> Richard Jones
> Red Hat


For your information there is now a more realistic ray tracer in JoCaml.
<http://jocaml.inria.fr/#examples>
Speedups can be impressive, but no real-time yet, even on
a 2x11x2Ghz grid.

The JoCaml ray tracer is a distributed/concurrent version of
the Camls'R Us entry at ICFP 2000 programming contest.


-- 
Luc Maranget


^ permalink raw reply	[flat|nested] 11+ messages in thread

* re: We should all be forking
@ 2007-06-05 22:30 Christopher Cramer
  0 siblings, 0 replies; 11+ messages in thread
From: Christopher Cramer @ 2007-06-05 22:30 UTC (permalink / raw)
  To: caml-list

Jon Harrop:
> I believe the performance relies upon the Linux kernel lazily copying
> the process. Does OSX also do that? 

It's called copy-on-write and I would be surprised if OSX didn't also
do it.

The only way to start a new process is to fork, so even if you're just
running another program you fork first, and then replace the process
image with the new program with exec. If the fork had to copy the entire
process image before just throwing it away upon exec, I think Unix,
which is based around a philosophy of piping between multiple processes,
would have abandoned fork a long time ago. Then again, there is vfork,
so I guess they almost did abandon it at one point.

This method doesn't work well at all on Windows, as I understand
it.

BTW your code looks a lot nicer than mine. The |> is brilliant, I think.
I'm a little surprised you cut the speed in half without using select.
And looking at your code I just realized I have a bug in mine...


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2007-06-14 10:09 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-05 18:13 We should all be forking Jon Harrop
2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
2007-06-05 18:32 ` Joel Reymont
2007-06-05 18:46   ` Jon Harrop
2007-06-05 19:13 ` Chris King
2007-06-05 19:20   ` jocaml vs camlp3l Joel Reymont
2007-06-05 21:02     ` [Caml-list] " Erik de Castro Lopo
2007-06-05 23:58 ` [Caml-list] We should all be forking Brian Hurt
2007-06-07 12:04 ` Richard Jones
2007-06-14 10:09   ` Luc Maranget
2007-06-05 22:30 Christopher Cramer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).