Yaron,
I use a slightly modified version of the CSV library's load_rows . Here is the main code which is highly imperative style. I might transform it in purely functional style ?
The main program is :
open Printf;;
open Sys;;
let timed_exec start_message f =
print_string start_message;
let st1 = time () in
let r = f () in
print_endline ("done in " ^ (string_of_float ((time ()) -. st1)) );
r;;
(* This line enabled makes the program really slow ! *)
let run_threaded f = Thread.create (fun () -> f (); Thread.exit ()) ()
let () = timed_exec "Reading data " (fun () ->
load_rows (fun _ -> ()) (open_in "file1.csv");
load_rows (fun _ -> ()) (open_in "file2.csv");
()
)
The load_rows :
let load_rows ?(separator = ',') ?(nread = -1) f chan =
let nr = ref 0 in
let row = ref [] in (* Current row. *)
let field = ref [] in (* Current field. *)
let state = ref StartField in (* Current state. *)
let end_of_field () =
let field_list = List.rev !field in
let field_len = List.length field_list in
let field_str = String.create field_len in
let rec loop i = function
[] -> ()
| x :: xs ->
field_str.[i] <- x;
loop (i+1) xs
in
loop 0 field_list;
row := (Some field_str) :: !row;
field := [];
state := StartField
in
let empty_field () =
row := None :: !row;
field := [];
state := StartField
in
let end_of_row () =
let row_list = List.rev !row in
f row_list;
row := [];
state := StartField;
nr := !nr + 1;
in
let rec loop () =
let c = input_char chan in
if c != '\r' then ( (* Always ignore \r characters. *)
match !state with
StartField -> (* Expecting quote or other char. *)
if c = '"' then (
state := InQuotedField;
field := []
) else if c = separator then (* Empty field. *)
empty_field ()
else if c = '\n' then ( (* Empty field, end of row. *)
empty_field ();
end_of_row ()
) else (
state := InUnquotedField;
field := [c]
)
| InUnquotedField -> (* Reading chars to end of field. *)
if c = separator then (* End of field. *)
end_of_field ()
else if c = '\n' then ( (* End of field and end of row. *)
end_of_field ();
end_of_row ()
) else
field := c :: !field
| InQuotedField -> (* Reading chars to end of field. *)
if c = '"' then
state := InQuotedFieldAfterQuote
else
field := c :: !field
| InQuotedFieldAfterQuote ->
if c = '"' then ( (* Doubled quote. *)
field := c :: !field;
state := InQuotedField
) else if c = '0' then ( (* Quote-0 is ASCII NUL. *)
field := '\000' :: !field;
state := InQuotedField
) else if c = separator then (* End of field. *)
end_of_field ()
else if c = '\n' then ( (* End of field and end of row. *)
end_of_field ();
end_of_row ()
) else ( (* Bad single quote in field. *)
field := c :: '"' :: !field;
state := InQuotedField
)
); (* end of match *)
if( nread < 0 or !nr < nread) then loop () else ()
in
try
loop ()
with
End_of_file ->
(* Any part left to write out? *)
(match !state with
StartField ->
if !row <> [] then
( empty_field (); end_of_row () )
| InUnquotedField | InQuotedFieldAfterQuote ->
end_of_field (); end_of_row ()
| InQuotedField ->
raise (Bad_CSV_file "Missing end quote after quoted field.")
)
Thanks,
Rémi
2009/2/16 Rémi Dewitte <remi@gide.net>Hello,
I would like to read two files in two different threads.
I have made a first version reading the first then the second and it takes 2.8s (native).
I decided to make a threaded version and before any use of thread I realized that just linking no even using it to the threads library makes my first version of the program to run in 12s !
Do you have a short benchmark you can post? The idea that the thread-overhead would make a difference like that, particularly for IO-bound code (which I'm guessing this is) is pretty surprising.
y
I use pcre, extlib, csv libraries as well.
I guess it might come from GC slowing down thinks here, doesn't it ? Where can it come from otherwise ? Is there a workaround or something I should know ?
Can ocaml use multiple cores ?
Do you have few pointers on libraries to make parallel I/Os ?
Thanks,
Rémi
_______________________________________________
Caml-list mailing list. Subscription management:
http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
Archives: http://caml.inria.fr
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
Bug reports: http://caml.inria.fr/bin/caml-bugs