(* open ExtLib *) (** Slithly modified copy from module CSV *) exception Bad_CSV_file of string type state_t = StartField | InUnquotedField | InQuotedField | InQuotedFieldAfterQuote let load_rows ?(separator = ',') ?(nread = -1) f file = let nr = ref 0 in let row = ref [] in (* Current row. *) let field = ref [] in (* Current field. *) let state = ref StartField in (* Current state. *) let end_of_field () = let field_list = List.rev !field in let field_len = List.length field_list in let field_str = String.create field_len in let rec loop i = function [] -> () | x :: xs -> field_str.[i] <- x; loop (i+1) xs in loop 0 field_list; row := (Some field_str) :: !row; field := []; state := StartField in let empty_field () = row := None :: !row; field := []; state := StartField in let end_of_row () = let row_list = List.rev !row in f row_list; row := []; state := StartField; nr := !nr + 1; in let process c = if c != '\r' then ( (* Always ignore \r characters. *) match !state with StartField -> (* Expecting quote or other char. *) if c = '"' then ( state := InQuotedField; field := [] ) else if c = separator then (* Empty field. *) empty_field () else if c = '\n' then ( (* Empty field, end of row. *) empty_field (); end_of_row () ) else ( state := InUnquotedField; field := [c] ) | InUnquotedField -> (* Reading chars to end of field. *) if c = separator then (* End of field. *) end_of_field () else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () ) else field := c :: !field | InQuotedField -> (* Reading chars to end of field. *) if c = '"' then state := InQuotedFieldAfterQuote else field := c :: !field | InQuotedFieldAfterQuote -> if c = '"' then ( (* Doubled quote. *) field := c :: !field; state := InQuotedField ) else if c = '0' then ( (* Quote-0 is ASCII NUL. *) field := '\000' :: !field; state := InQuotedField ) else if c = separator then (* End of field. *) end_of_field () else if c = '\n' then ( (* End of field and end of row. *) end_of_field (); end_of_row () ) else ( (* Bad single quote in field. *) field := c :: '"' :: !field; state := InQuotedField ) ) (* end of match *) in let continue = ref true in let file_in = Unix.openfile file [Unix.O_RDONLY] 0o640 in let end_processing () = continue := false; try Unix.close file_in with _ -> (); (match !state with | StartField -> if !row <> [] then ( empty_field (); end_of_row () ) | InUnquotedField | InQuotedFieldAfterQuote -> end_of_field (); end_of_row () | InQuotedField -> raise (Bad_CSV_file "Missing end quote after quoted field.") ) in let buffer_length = 2 * 1024 * 1024 in let buffer = String.make buffer_length '\000' in let process_buffer l = (* for i = 0 to l do *) let ii = ref 0 in while (!continue) && (!ii) <= l do let i = !ii in process buffer.[i]; ii := i + 1; if( nread > 0 && !nr = nread ) then end_processing () else () done in while !(continue) do let n = Unix.read file_in buffer 0 buffer_length in if (n > 0 ) then process_buffer n else end_processing () done let run_threaded f = Thread.create (fun () -> f (); Thread.exit ()) () let t1 = load_rows (fun _ -> ()) "test.csv" let t2 = load_rows (fun _ -> ()) "test2.csv" let t3 = load_rows (fun _ -> ()) "test3.csv"