caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* LLVM: A native-code compiler for MiniML in ~100LOC
@ 2007-11-26 20:52 Jon Harrop
  2007-11-27 16:57 ` [Caml-list] " Tom Primožič
  2007-12-02 10:00 ` Xavier Leroy
  0 siblings, 2 replies; 4+ messages in thread
From: Jon Harrop @ 2007-11-26 20:52 UTC (permalink / raw)
  To: caml-list


I recently rediscovered the Low-Level Virtual Machine (LLVM) project that has 
since been adopted by Apple:

  http://llvm.org

This is a library (with OCaml bindings!) that allows you to write a compiler 
that generates their RISC-like intermediate language (IL) that can then be 
compiled to native code. LLVM even supports JIT compilation.

I went through the usual steps in trying this and was extremely impressed with 
the results. After only two days I was able to create an optimizing 
native-code compiler for a subset of CAML large enough to represent the 
following Fibonacci program:

  let rec fib n =
    if n <= 2 then 1 else
      fib(n-1) + fib(n-2)

  do fib 40

The compiler is written entirely in OCaml, using camlp4 for lexing and 
parsing, and the whole thing is only ~100 lines of code!

I'll detail exactly how you can use LLVM from OCaml in a future OCaml Journal 
article:

  http://www.ffconsultancy.com/products/ocaml_journal/?ol

Meanwhile here's my latest source:

type expr =
  | Int of int
  | Var of string
  | BinOp of [ `Add | `Sub | `Leq ] * expr * expr
  | If of expr * expr * expr
  | Apply of expr * expr

type defn =
  | LetRec of string * string * expr

open Camlp4.PreCast

let expr = Gram.Entry.mk "expr"
let defn = Gram.Entry.mk "defn"
let prog = Gram.Entry.mk "prog"

EXTEND Gram
  expr:
  [ [ "if"; p = expr; "then"; t = expr; "else"; f = expr ->
	If(p, t, f) ]
  | [ e1 = expr; "<="; e2 = expr -> BinOp(`Leq, e1, e2) ]
  | [ e1 = expr; "+"; e2 = expr -> BinOp(`Add, e1, e2)
    | e1 = expr; "-"; e2 = expr -> BinOp(`Sub, e1, e2) ]
  | [ f = expr; x = expr -> Apply(f, x) ]
  | [ v = LIDENT -> Var v
    | n = INT -> Int(int_of_string n)
    | "("; e = expr; ")" -> e ] ];
  defn:
  [ [ "let"; "rec"; f = LIDENT; x = LIDENT; "="; body = expr ->
	LetRec(f, x, body) ] ];
  prog:
  [ [ defns = LIST0 defn; "do"; run = expr -> defns, run ] ];
END

open Printf

let program, run =
  try Gram.parse prog Loc.ghost (Stream.of_channel stdin) with
  | Loc.Exc_located(loc, e) ->
      printf "%s at line %d\n" (Printexc.to_string e) (Loc.start_line loc);
      exit 1

open Llvm

let ty = i64_type

let ( |> ) x f = f x

type state =
    { fn: llvalue;
      blk: llbasicblock;
      vars: (string * llvalue) list }

let bb state = builder_at_end state.blk
let new_block state name = append_block name state.fn
let find state v =
  try List.assoc v state.vars with Not_found ->
    eprintf "Unknown variable %s\n" v;
    raise Not_found
let cont (v, state) dest_blk =
  build_br dest_blk (bb state) |> ignore;
  v, state

let rec expr state = function
  | Int n -> const_int ty n, state
  | Var x -> find state x, state
  | BinOp(op, f, g) ->
      let f, state = expr state f in
      let g, state = expr state g in
      let build, name = match op with
	| `Add -> build_add, "add"
	| `Sub -> build_sub, "sub"
	| `Leq -> build_icmp Icmp_sle, "leq" in
      build f g name (bb state), state
  | If(p, t, f) ->
      let t_blk = new_block state "pass" in
      let f_blk = new_block state "fail" in
      let k_blk = new_block state "cont" in
      let cond, state = expr state p in
      build_cond_br cond t_blk f_blk (bb state) |> ignore;
      let t, state = cont (expr { state with blk = t_blk } t) k_blk in
      let f, state = cont (expr { state with blk = f_blk } f) k_blk in
      build_phi [t, t_blk; f, f_blk] "join" (bb state), state
  | Apply(f, arg) ->
      let f, state = expr state f in
      let arg, state = expr state arg in
      build_call f [|arg|] "apply" (bb state), state

let defn m vars = function
  | LetRec(f, arg, body) ->
      let ty = function_type ty [| ty |] in
      let fn = define_function f ty m in
      let vars' = (arg, param fn 0) :: (f, fn) :: vars in
      let body, state =
	expr { fn = fn; blk = entry_block fn; vars = vars' } body in
      build_ret body (bb state) |> ignore;
      (f, fn) :: vars

let int n = const_int ty n

let main filename =
  let m = create_module filename in

  let string = pointer_type i8_type in

  let print =
    declare_function "printf" (var_arg_function_type ty [|string|]) m in

  let main = define_function "main" (function_type ty [| |]) m in
  let blk = entry_block main in
  let bb = builder_at_end blk in

  let str s = define_global "buf" (const_stringz s) m in
  let int_spec = build_gep (str "%d\n") [| int 0; int 0 |] "int_spec" bb in

  let vars = List.fold_left (defn m) [] program in
  let n, _ = expr { fn = main; blk = blk; vars = vars } run in

  build_call print [| int_spec; n |] "" bb |> ignore;

  build_ret (int 0) bb |> ignore;

  if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
  dispose_module m

let () = match Sys.argv with
  | [|_; filename|] -> main filename
  | _ as a -> Printf.eprintf "Usage: %s <file>\n" a.(0)

To use it, simply download and install the latest SVN version of LLVM (which 
even builds and installs the OCaml bindings for you!) and then do:

$ ocamlc -g -dtypes -pp camlp4oof -I +camlp4 dynlink.cma camlp4lib.cma -cc g++ 
llvm.cma llvm_bitwriter.cma minml.ml -o minml
$ ./minml run.bc <fib.ml
$ llc -f run.bc -o run.s
$ gcc run.s -o run
$ ./run
102334155
$

You can look at the generated intermediate representation with:

$ llvm-dis -f run.bc
$ cat run.ll

If anyone improves upon this I'd love to hear about it! :-)

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?e


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Caml-list] LLVM: A native-code compiler for MiniML in ~100LOC
  2007-11-26 20:52 LLVM: A native-code compiler for MiniML in ~100LOC Jon Harrop
@ 2007-11-27 16:57 ` Tom Primožič
  2007-12-02 10:00 ` Xavier Leroy
  1 sibling, 0 replies; 4+ messages in thread
From: Tom Primožič @ 2007-11-27 16:57 UTC (permalink / raw)
  To: Jon Harrop; +Cc: caml-list

[-- Attachment #1: Type: text/plain, Size: 6608 bytes --]

Amazing!

I once wanted to write the compiler et. al myself, however, since
discovering LLVM, I have been determined to use it as my backend! It
supports many platforms, has a lot of (configurable) optimization passes,
and simple IL. Furthermore, it has GC bindings and JIT on some platforms.

So, this is a perfect example for me since it has only made me more certain
of my decision. I will write an excelent language (ML-like) and feed it to
LLVM to compile it!

 - Tom

On 26/11/2007, Jon Harrop <jon@ffconsultancy.com> wrote:
>
>
> I recently rediscovered the Low-Level Virtual Machine (LLVM) project that
> has
> since been adopted by Apple:
>
>   http://llvm.org
>
> This is a library (with OCaml bindings!) that allows you to write a
> compiler
> that generates their RISC-like intermediate language (IL) that can then be
> compiled to native code. LLVM even supports JIT compilation.
>
> I went through the usual steps in trying this and was extremely impressed
> with
> the results. After only two days I was able to create an optimizing
> native-code compiler for a subset of CAML large enough to represent the
> following Fibonacci program:
>
>   let rec fib n =
>     if n <= 2 then 1 else
>       fib(n-1) + fib(n-2)
>
>   do fib 40
>
> The compiler is written entirely in OCaml, using camlp4 for lexing and
> parsing, and the whole thing is only ~100 lines of code!
>
> I'll detail exactly how you can use LLVM from OCaml in a future OCaml
> Journal
> article:
>
>   http://www.ffconsultancy.com/products/ocaml_journal/?ol
>
> Meanwhile here's my latest source:
>
> type expr =
>   | Int of int
>   | Var of string
>   | BinOp of [ `Add | `Sub | `Leq ] * expr * expr
>   | If of expr * expr * expr
>   | Apply of expr * expr
>
> type defn =
>   | LetRec of string * string * expr
>
> open Camlp4.PreCast
>
> let expr = Gram.Entry.mk "expr"
> let defn = Gram.Entry.mk "defn"
> let prog = Gram.Entry.mk "prog"
>
> EXTEND Gram
>   expr:
>   [ [ "if"; p = expr; "then"; t = expr; "else"; f = expr ->
>         If(p, t, f) ]
>   | [ e1 = expr; "<="; e2 = expr -> BinOp(`Leq, e1, e2) ]
>   | [ e1 = expr; "+"; e2 = expr -> BinOp(`Add, e1, e2)
>     | e1 = expr; "-"; e2 = expr -> BinOp(`Sub, e1, e2) ]
>   | [ f = expr; x = expr -> Apply(f, x) ]
>   | [ v = LIDENT -> Var v
>     | n = INT -> Int(int_of_string n)
>     | "("; e = expr; ")" -> e ] ];
>   defn:
>   [ [ "let"; "rec"; f = LIDENT; x = LIDENT; "="; body = expr ->
>         LetRec(f, x, body) ] ];
>   prog:
>   [ [ defns = LIST0 defn; "do"; run = expr -> defns, run ] ];
> END
>
> open Printf
>
> let program, run =
>   try Gram.parse prog Loc.ghost (Stream.of_channel stdin) with
>   | Loc.Exc_located(loc, e) ->
>       printf "%s at line %d\n" (Printexc.to_string e) (Loc.start_lineloc);
>       exit 1
>
> open Llvm
>
> let ty = i64_type
>
> let ( |> ) x f = f x
>
> type state =
>     { fn: llvalue;
>       blk: llbasicblock;
>       vars: (string * llvalue) list }
>
> let bb state = builder_at_end state.blk
> let new_block state name = append_block name state.fn
> let find state v =
>   try List.assoc v state.vars with Not_found ->
>     eprintf "Unknown variable %s\n" v;
>     raise Not_found
> let cont (v, state) dest_blk =
>   build_br dest_blk (bb state) |> ignore;
>   v, state
>
> let rec expr state = function
>   | Int n -> const_int ty n, state
>   | Var x -> find state x, state
>   | BinOp(op, f, g) ->
>       let f, state = expr state f in
>       let g, state = expr state g in
>       let build, name = match op with
>         | `Add -> build_add, "add"
>         | `Sub -> build_sub, "sub"
>         | `Leq -> build_icmp Icmp_sle, "leq" in
>       build f g name (bb state), state
>   | If(p, t, f) ->
>       let t_blk = new_block state "pass" in
>       let f_blk = new_block state "fail" in
>       let k_blk = new_block state "cont" in
>       let cond, state = expr state p in
>       build_cond_br cond t_blk f_blk (bb state) |> ignore;
>       let t, state = cont (expr { state with blk = t_blk } t) k_blk in
>       let f, state = cont (expr { state with blk = f_blk } f) k_blk in
>       build_phi [t, t_blk; f, f_blk] "join" (bb state), state
>   | Apply(f, arg) ->
>       let f, state = expr state f in
>       let arg, state = expr state arg in
>       build_call f [|arg|] "apply" (bb state), state
>
> let defn m vars = function
>   | LetRec(f, arg, body) ->
>       let ty = function_type ty [| ty |] in
>       let fn = define_function f ty m in
>       let vars' = (arg, param fn 0) :: (f, fn) :: vars in
>       let body, state =
>         expr { fn = fn; blk = entry_block fn; vars = vars' } body in
>       build_ret body (bb state) |> ignore;
>       (f, fn) :: vars
>
> let int n = const_int ty n
>
> let main filename =
>   let m = create_module filename in
>
>   let string = pointer_type i8_type in
>
>   let print =
>     declare_function "printf" (var_arg_function_type ty [|string|]) m in
>
>   let main = define_function "main" (function_type ty [| |]) m in
>   let blk = entry_block main in
>   let bb = builder_at_end blk in
>
>   let str s = define_global "buf" (const_stringz s) m in
>   let int_spec = build_gep (str "%d\n") [| int 0; int 0 |] "int_spec" bb
> in
>
>   let vars = List.fold_left (defn m) [] program in
>   let n, _ = expr { fn = main; blk = blk; vars = vars } run in
>
>   build_call print [| int_spec; n |] "" bb |> ignore;
>
>   build_ret (int 0) bb |> ignore;
>
>   if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
>   dispose_module m
>
> let () = match Sys.argv with
>   | [|_; filename|] -> main filename
>   | _ as a -> Printf.eprintf "Usage: %s <file>\n" a.(0)
>
> To use it, simply download and install the latest SVN version of LLVM
> (which
> even builds and installs the OCaml bindings for you!) and then do:
>
> $ ocamlc -g -dtypes -pp camlp4oof -I +camlp4 dynlink.cma camlp4lib.cma -cc
> g++
> llvm.cma llvm_bitwriter.cma minml.ml -o minml
> $ ./minml run.bc <fib.ml
> $ llc -f run.bc -o run.s
> $ gcc run.s -o run
> $ ./run
> 102334155
> $
>
> You can look at the generated intermediate representation with:
>
> $ llvm-dis -f run.bc
> $ cat run.ll
>
> If anyone improves upon this I'd love to hear about it! :-)
>
> --
> Dr Jon D Harrop, Flying Frog Consultancy Ltd.
> http://www.ffconsultancy.com/products/?e
>
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs
>

[-- Attachment #2: Type: text/html, Size: 9785 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Caml-list] LLVM: A native-code compiler for MiniML in ~100LOC
  2007-11-26 20:52 LLVM: A native-code compiler for MiniML in ~100LOC Jon Harrop
  2007-11-27 16:57 ` [Caml-list] " Tom Primožič
@ 2007-12-02 10:00 ` Xavier Leroy
  2007-12-02 16:21   ` Jon Harrop
  1 sibling, 1 reply; 4+ messages in thread
From: Xavier Leroy @ 2007-12-02 10:00 UTC (permalink / raw)
  To: Jon Harrop; +Cc: caml-list

> I went through the usual steps in trying this and was extremely
> impressed with the results. After only two days I was able to create
> an optimizing native-code compiler for a subset of CAML large enough
> to represent the following Fibonacci program:

Yes, LLVM is an impressive project and your example demonstrates that
LLVM is very well suited to run-time code generation.

However, I have a point of terminology to make: the language you
implemented is not what is normally called Mini-ML in the literature.

Your language has first-order, second-class functions, while Mini-ML
has higher-order, first-class functions.  A runtime code generator for
Mini-ML would be significantly more complex, since it has to deal with
first-class functions either through closure conversion or
defunctionalization, meaning in both cases dynamic allocation and
garbage collection.

Which brings us back to Basile's original question about LLVM and the
Caml garbage collector.  The following page suggests that they can
already work together:

        http://llvm.org/docs/GarbageCollection.html

- Xavier Leroy


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Caml-list] LLVM: A native-code compiler for MiniML in ~100LOC
  2007-12-02 10:00 ` Xavier Leroy
@ 2007-12-02 16:21   ` Jon Harrop
  0 siblings, 0 replies; 4+ messages in thread
From: Jon Harrop @ 2007-12-02 16:21 UTC (permalink / raw)
  To: caml-list

On Sunday 02 December 2007 10:00, Xavier Leroy wrote:
> Your language has first-order, second-class functions, while Mini-ML
> has higher-order, first-class functions.  A runtime code generator for
> Mini-ML would be significantly more complex, since it has to deal with
> first-class functions either through closure conversion or
> defunctionalization, meaning in both cases dynamic allocation and
> garbage collection.

Yes, it was a little cheeky of me to call it MiniML. Adding first-class 
functions turned out to be trivial but making them higher-order will take 
more work.

I would like to build a new language implementation that draws mostly upon 
OCaml and F#. In particular, I'd like to augment OCaml with run-time types, 
something akin to type classes, operator overloading, high-performance 
numerics (built-in types for complex numbers and low-dimensional vectors and 
matrices, and 32-bit floats as a storage format) and a commerce-friendly 
intermediate representation.

Although I would like to build a platform suitable for commercial 
applications, I would like the whole thing to be open source and other 
contributions and ideas are most welcome!

There are some important design decisions to make first so I've got a lot of 
benchmarking to do before I'll start implementing any production-quality 
code.

> Which brings us back to Basile's original question about LLVM and the
> Caml garbage collector.  The following page suggests that they can
> already work together:
>
>         http://llvm.org/docs/GarbageCollection.html

To the best of my knowledge there are no working demos of any GCs using LLVM's 
API for this. Chris Lattner (the lead developer of LLVM) did implement a CLR 
on top of LLVM but the results are owned by Microsoft. He assures me that it 
is perfectly feasible and, I believe, Gordon Henriksen already has something 
working and it is in the pipeline but not yet in SVN LLVM.

They were only recently discussing whether or not an external front-end is 
even theoretically capable of generating OCaml-friendly native-code in order 
to reuse OCaml's GC.

Still, LLVM is an awesome project and, once someone makes the breakthough of a 
first working tutorial GC on it, I think it will snowball into a great 
alternative to Mono for creating new languages.

-- 
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?e


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-12-02 16:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-11-26 20:52 LLVM: A native-code compiler for MiniML in ~100LOC Jon Harrop
2007-11-27 16:57 ` [Caml-list] " Tom Primožič
2007-12-02 10:00 ` Xavier Leroy
2007-12-02 16:21   ` Jon Harrop

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).