(* open ExtLib *) open Bigarray (** Slithly modified copy from module CSV *) exception Bad_CSV_file of string type state_t = StartField | InUnquotedField | InQuotedField | InQuotedFieldAfterQuote let load_rows ?(separator = ',') ?(nread = -1) f file = let nr = ref 0 in let row = ref [] in (* Current row. *) let field = ref [] in (* Current field. *) let state = ref StartField in (* Current state. *) let end_of_field () = let field_list = List.rev !field in let field_len = List.length field_list in let field_str = String.create field_len in let rec loop i = function [] -> () | x :: xs -> field_str.[i] <- x; loop (i+1) xs in loop 0 field_list; row := (Some field_str) :: !row; field := []; state := StartField in let empty_field () = row := None :: !row; field := []; state := StartField in let end_of_row () = let row_list = List.rev !row in f row_list; row := []; state := StartField; nr := !nr + 1; in let process c = if c != '\r' then ( (* Always ignore \r characters. *) match !state with StartField -> (* Expecting quote or other char. *) if c = '"' then ( state := InQuotedField; field := [] ) else if c = separator then (* Empty field. *) empty_field () else if c = '\n' then ( (* Empty field, end of row. *) empty_field (); end_of_row () ) else ( state := InUnquotedField; field := [c] ) | InUnquotedField -> (* Reading chars to end of field. *) if c = separator then (* End of field. *) end_of_field () else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () ) else field := c :: !field | InQuotedField -> (* Reading chars to end of field. *) if c = '"' then state := InQuotedFieldAfterQuote else field := c :: !field | InQuotedFieldAfterQuote -> if c = '"' then ( (* Doubled quote. *) field := c :: !field; state := InQuotedField ) else if c = '0' then ( (* Quote-0 is ASCII NUL. *) field := '\000' :: !field; state := InQuotedField ) else if c = separator then (* End of field. *) end_of_field () else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () ) else ( (* Bad single quote in field. *) field := c :: '"' :: !field; state := InQuotedField ) ) (* end of match *) in let file_in = Unix.openfile file [Unix.O_RDONLY] 0o640 in let end_processing () = try Unix.close file_in with _ -> (); (match !state with | StartField -> if !row <> [] then ( empty_field (); end_of_row () ) | InUnquotedField | InQuotedFieldAfterQuote -> end_of_field (); end_of_row () | InQuotedField -> raise (Bad_CSV_file "Missing end quote after quoted field.") ) in let mmap = Bigarray.Array1.map_file file_in Bigarray.char Bigarray.c_layout false (-1) in let l = (Bigarray.Array1.dim mmap) in let continue = ref true in let i = ref 0 in while !continue do process (Array1.(*unsafe_*)get mmap !i); i := !i + 1; continue := (nread < 0 || !nr < nread ) && !i < l done; end_processing () ;; let run_threaded f = Thread.create (fun () -> f (); Thread.exit ());; load_rows (fun _ -> ()) "test.csv" ;; load_rows (fun _ -> ()) "test2.csv";; load_rows (fun _ -> ()) "test3.csv";;