caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* [Caml-list] Re: Common IO structure
@ 2004-05-03  6:12 Vladimir N. Silyaev
  2004-05-04 21:31 ` Benjamin Geer
  0 siblings, 1 reply; 67+ messages in thread
From: Vladimir N. Silyaev @ 2004-05-03  6:12 UTC (permalink / raw)
  To: caml-list

[-- Attachment #1: Type: text/plain, Size: 1402 bytes --]


I'm relatively new to ocaml and very new to this list. Browsing thru recent
discussion about standard IO structure, I felt that discussion was cycled 
over what signature of the "universal" class should look like, so it would 
satisfy all possible needs.

However looks like prospective users, of this new IO, have rather 
contradictive requirements to the IO in general. And I'm thinking could 
standard IO only to provide basic signature of the IO modules and framework 
for layered IO.  Doing so additional functionality could be added 
incrementally, without affection core IO and achieving interoperability 
between different libraries which are using that IO.

Last day or two I was playing with ocaml and ocaml module system, and 
sketched some variant of basic IO. This sketch based on imperative 
streams, where stream is module parameterized by the symbol type. However 
blocked based IO is also supported, test code includes naive functors to 
translate from a block based IO to a stream IO.

If you felt interested, please look into the attached file io.ml. 

In the file you would found wrappers for Pervasive file IO, rudimentary
socket I/O, naive UTF8 filter and generic I/O algorithms. At the end
of file there are several test cases to exercise I/O extensibility.

File is self sufficient example and one should be able to compile it or run
in the toplevel. 
	

Regards,
Vladimir





[-- Attachment #2: io.ml --]
[-- Type: text/plain, Size: 10705 bytes --]


module Stream = struct
  module type Read =  sig
    type t 
    type symbol
    val get: t -> symbol
  end
  module type Write =  sig
    type t 
    type symbol
    val put: t -> symbol -> unit
  end
end

module Block =
struct
  module type Read = 
  sig
    type t 
    val read:  t -> string -> int -> int -> int
  end
  module type Write = 
  sig
    type t 
    val write: t -> string -> int -> int -> unit
  end
end

module Filter = struct
  module type Read =  sig    
    include Stream.Read
    type source 
    val flush: t -> unit
    val attach: source -> t
  end
  module type Write =  sig
    include Stream.Write
    type dest
    val flush: t -> unit
    val attach: dest -> t
  end
end



module File = struct
  module Read = struct
    type file = Pervasives.in_channel
    type t = file
    let _open name :t = Pervasives.open_in_bin name
    let close (t:t) = Pervasives.close_in t
    let seek = seek_in 
    let pos = pos_in
    type symbol = char
    let get (t:t) = Pervasives.input_char t
    let read (t:t) ic buf pos = Pervasives.input t ic buf pos
  end
  module Write = struct
    type file = Pervasives.out_channel
    type t = file
    let _open name :t = Pervasives.open_out_bin name
    let close (t:t) = Pervasives.close_out t
    let seek = seek_out 
    let pos = pos_out
    type symbol = char
    let put (t:t) ch = Pervasives.output_char t ch
    let write = Pervasives.output
  end
end


module Buffer = struct
  module Read(B:Block.Read) : (Filter.Read with type symbol=char and type source=B.t)  = struct
    type t = {
      buf : String.t;
      mutable pos : int;
      mutable level: int;
      source: B.t
    }
    type source = B.t
    type symbol = char
    let attach b =
      let blen = 1024 in
	{
	  buf = String.create blen;
	  pos = 0;	  
	  level = 0;
	  source = b	
	}
    let get t = 
      if t.pos = t.level then begin
	match B.read t.source t.buf 0 (String.length t.buf) with
	    0 -> raise End_of_file
	  | n -> 
	      t.pos <- 0;
	      t.level <- n
      end;	
      let ch = t.buf.[t.pos] in
	t.pos <- t.pos + 1;
	ch	  
    let flush t = 
      t.pos <- 0;
      t.level <- 0
	
  end

  module Write(B:Block.Write) : (Filter.Write with type symbol=char and type dest=B.t)  = struct
    type dest = B.t
    type symbol = char
    type t = {
      buf : String.t;
      mutable pos : int;
      dest: B.t
    }
    let attach t = {
      buf = String.create 256;
      pos = 0;
      dest = t;		     
    }

    let flush t = 
      B.write t.dest t.buf 0 t.pos;
      t.pos <- 0

    let put t ch = 
       t.buf.[t.pos] <- ch;
       t.pos <- t.pos + 1;
       if t.pos >= String.length t.buf then flush t
  end

end

module Socket = struct
  type sock = Unix.file_descr
  type t = sock
  let create ?(domain=Unix.PF_INET) ?(protocol=0) _type   :t =  Unix.socket domain _type protocol 
  let close  = Unix.close
  module Read = struct 
    type t = sock	  
    let read  = Unix.read 
    let shutdown s = Unix.shutdown s Unix.SHUTDOWN_RECEIVE
  end
  module Write = struct
    type t = sock
    let rec write t buf off len = 
      match Unix.write t buf off len with
	  n when n=len -> ()
	| n -> write t buf (off+n) (len-n)
    let shutdown s = Unix.shutdown s Unix.SHUTDOWN_SEND
  end
  let connect ?sock addr :(Read.t*Write.t) = 
    let s = match sock with 
	Some sock -> sock
      | None ->  create Unix.SOCK_STREAM in
      Unix.connect s addr;
      (s,s)
end

module UTF8  = struct
  exception InvalidSymbol  
  type utf8 = int
  type t = utf8
  module Read(Src:Stream.Read with type symbol=char) : (Filter.Read with type symbol=utf8 and type source=Src.t) = struct
    type symbol = utf8
    type t = Src.t
    type source = Src.t
    let attach (t:Src.t) : t = t
    let flush t = ()
    let get t = 
      let next t = 
	let ch = int_of_char (Src.get t) in
	  if (ch land 0xC0) = 0x80 then ch land 0x3F else raise InvalidSymbol in
      let ch0 = int_of_char (Src.get t) in
	if ch0 < 0x80 then ch0
	else if ch0 < 0xE0 then
	  let ch1 = next  t in
	  let ch = ((ch0 land 0x1F) lsl 6) lor ch1 in
	  if ch < 0x80 then raise InvalidSymbol
	  else ch
	else if ch0 < 0xF0 then
	  let ch1 = next t in
	  let ch2 = next t in
	  let ch = ((ch0 land 0x0F) lsl 12) lor (ch1 lsl 6) lor ch2 in
	    if ch < 0x800 then raise InvalidSymbol
	    else ch
	else if ch0 < 0xF8 then
	  let ch1 = next t in
	  let ch2 = next t in
	  let ch3 = next t in
	  let ch = ((ch0 land 0x03) lsl 18) lor (ch1 lsl 12) lor (ch2 lsl 6) lor ch3 in
	    if ch < 0x10000 then raise InvalidSymbol
	    else ch
	else raise InvalidSymbol
  end     
  module Write(Dst:Stream.Write with type symbol=char): (Filter.Write with type symbol=utf8 and type dest=Dst.t)  = struct
    type symbol = utf8
    type dest = Dst.t
    type t = Dst.t
    let attach t = t
    let flush t = ()
    let put t ch = 
      if ch < 0x80 then Dst.put t (char_of_int ch)
      else if ch < 0x800 then begin
	Dst.put t (char_of_int (0xC0 lor (ch lsr 6)));
	Dst.put t (char_of_int (0x80 lor (ch land 0x3F)))
      end else if ch < 0x10000 then begin
	Dst.put t (char_of_int (0xE0 lor (ch lsr 12)));
	Dst.put t (char_of_int (0x80 lor ((ch lsr 6) land 0x3F)));
	Dst.put t (char_of_int (0x80 lor (ch land 0x3F)))
      end else if ch < 0x110000 then begin
	Dst.put t (char_of_int (0xF0 lor (ch lsr 18)));
	Dst.put t (char_of_int (0x80 lor ((ch lsr 12) land 0x3F)));
	Dst.put t (char_of_int (0x80 lor ((ch lsr 6) land 0x3F)));
	Dst.put t (char_of_int (0x80 lor (ch land 0x3F)))
      end else raise InvalidSymbol	
  end    
end

module type Type =
  sig
    type t
  end

module Char = struct
  type t = char
end

module Copy (T:Type) (Src:Stream.Read with type symbol=T.t) (Dst:Stream.Write with type symbol=T.t) = struct
  let run s d =
    while true do
      Dst.put d (Src.get s)
    done
end

  
let copy_byte src dst = 
  let module CopyFile = Copy(Char) (File.Read) (File.Write) in
  let src = File.Read._open src
  and dst = File.Write._open dst in
    try CopyFile.run src dst
    with End_of_file ->
      File.Read.close src;
      File.Write.close dst
      

module FileWriteUtf8 = UTF8.Write (File.Write)

let copy_utf src dst = 
  let module FileReadUtf8 = UTF8.Read (File.Read) in
  let module CopyFile = Copy(UTF8) (FileReadUtf8) (FileWriteUtf8) in
  let src = File.Read._open src
  and dst = File.Write._open dst in
  let src8 = FileReadUtf8.attach src
  and dst8 = FileWriteUtf8.attach dst
  in
    try CopyFile.run src8 dst8
    with End_of_file ->
      File.Read.close src;
      File.Write.close dst

module BufferedFileRead = Buffer.Read(File.Read)
module BufferedFileReadUtf8 = UTF8.Read(BufferedFileRead)
module FileReadUtf8 = UTF8.Read(File.Read)


let copy_utf2 src dst = 
  let module CopyFile = Copy (UTF8) (BufferedFileReadUtf8) (FileWriteUtf8) in 
  let src = File.Read._open src
  and dst = File.Write._open dst in
  let srcb = BufferedFileRead.attach src in
  let src8 = BufferedFileReadUtf8.attach srcb
  and dst8 = FileWriteUtf8.attach dst
  in
    try CopyFile.run src8 dst8
    with End_of_file ->
      File.Read.close src;
      File.Write.close dst

module Utf2Ascii (Dst:Stream.Write with type symbol=char )  = struct
  type symbol = UTF8.t
  type t = Dst.t
  type dest = Dst.t
  let attach (t:Dst.t) :t = t
  let put t ch = 
    match ch with
	ch when ch < 0x80 -> Dst.put t (char_of_int ch)
      | _ -> Dst.put t ' '
end



let copy_utf2ascii src dst = 
  let module FileWriteAscii = Utf2Ascii(File.Write) in
  let module CopyFile = Copy (UTF8) (BufferedFileReadUtf8) (FileWriteAscii) in
  let src = File.Read._open src
  and dst = File.Write._open dst in
  let srcb = BufferedFileRead.attach src in
  let src8 = BufferedFileReadUtf8.attach srcb in
  let dsta = FileWriteAscii.attach dst in
    try CopyFile.run src8 dsta
    with End_of_file ->
      File.Read.close src;
      File.Write.close dst


module Static = struct
  type symbol = char
  type t = {
    mutable pos: int;
    mutable buf: String.t list;
    mutable cur: String.t
  }
  let attach l = {
    pos = 0;
    buf = l;
    cur = "";
  }
  let rec get t = 
    if t.pos < String.length t.cur then begin
      let ch = t.cur.[t.pos] in
	t.pos <- t.pos + 1;
	ch
    end else match t.buf with 
	hd::tl -> 
	  t.buf <- tl;
	  t.pos <- 0;
	  t.cur <- hd;
	  get t
      | [] -> raise End_of_file
    
end

  

let copy_static dst = 
  let module CopyFile = Copy (Char) (Static) (File.Write) in
  let dst = File.Write._open dst
  and src = Static.attach ["Hello";"\n";"World";"\n"] in
    try CopyFile.run src dst
    with End_of_file ->
      File.Write.close dst

module StreamSockRead = Buffer.Read(Socket.Read)
module StreamSockWrite = Buffer.Write(Socket.Write)

let get_utf8 host url dst = 
  let addr = Unix.ADDR_INET((Unix.gethostbyname host).Unix.h_addr_list.(0), 80) in
  let (sr,sw) = Socket.connect addr in
  let req = Static.attach  ["GET"; " ";url; " "; "HTTP/0.9";"\n";"\n"] in
  let sock_wr = StreamSockWrite.attach sw 
  and sock_rd = StreamSockRead.attach sr in
  let module SendReq = Copy (Char) (Static) (StreamSockWrite) in
    try SendReq.run req sock_wr with
	End_of_file ->
	  StreamSockWrite.flush sock_wr;
 	  let dst = File.Write._open dst in
	  let module GetFile = Copy (Char) (StreamSockRead) (File.Write) in
	    try GetFile.run sock_rd dst with
		End_of_file ->
		  Socket.close sr;
		  File.Write.close dst		    

module Length (T:Type) (Src:Stream.Read with type symbol=T.t) = struct
  let get s = 
    let len = ref 0 in
      try while true do
	ignore (Src.get s);
	incr len
      done; -1
      with End_of_file -> !len
end


		    
let length file = 
  let src = File.Read._open file in
  let module FileCharLen = Length (Char) (File.Read)  in
  let len = FileCharLen.get src in
    File.Read.seek src 0;
    let module FileUtf8Len = Length (UTF8) (FileReadUtf8) in
    let usrc = FileReadUtf8.attach src in
    let len8 = FileUtf8Len.get usrc in
      File.Read.close src;
      len,len8
    
  
let run () = 
  let html = "utf8.html" in
  List.iter (
    fun (desc,op,file) ->
      print_endline desc;
      op file;
      let (bytes,symbols) = length file in
	Printf.printf "Done. File '%s'; bytes %d, symbols %d\n" file bytes symbols
  ) [ 
    "Hello World", copy_static, "static.dat";
    "Getting corpse from a web ..", (get_utf8 "www.columbia.edu"  "/kermit/utf8.html" ), html;
    "Copying corpse using bytecopy", (copy_byte html), "copy.dat";
    "Copying corpse using utf8 symbols", (copy_utf html), "copy_utf8.dat";
    "Copying corpse using utf8 symbols and read buffer", (copy_utf2 html), "copy_utf8_2.dat";
    "Converting corpse to ASCII", (copy_utf2ascii html), "ascii.dat";
  ]
    

	      
	       
	       
let _ = run ()

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-03  6:12 [Caml-list] Re: Common IO structure Vladimir N. Silyaev
@ 2004-05-04 21:31 ` Benjamin Geer
  2004-05-04 22:59   ` Yamagata Yoriyuki
  2004-05-05 17:31   ` Vladimir N. Silyaev
  0 siblings, 2 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-05-04 21:31 UTC (permalink / raw)
  To: Vladimir N. Silyaev; +Cc: caml-list

[-- Attachment #1: Type: text/plain, Size: 957 bytes --]

Vladimir N. Silyaev wrote:
> If you felt interested, please look into the attached file io.ml.

This looks like a good start to me; what do others think?

I have two questions about it:

First, I think that rather than having to hard-code the use of the UTF8 
module in an application in order to handle UTF-8 characters, it would 
be more useful to be able to specify encodings using string arguments; 
the choice of encoding is often determined by reading a configuration 
file.  Would there be a way to support this?

Second, I'm wondering if this design can be adapted to accommodate 
non-blocking I/O.  The 'get' function has to return a character, but on 
a non-blocking socket, there might not be any character to return.  I've 
attached a sketch of an approach that might be more suitable for 
non-blocking I/O; I'd like to add it to your design, but I'm having 
trouble figuring out how.  I would be very interested in your thoughts 
on this.

Ben

[-- Attachment #2: buf.ml --]
[-- Type: text/plain, Size: 3216 bytes --]

exception Buffer_underflow
exception Buffer_overflow

module type Async =
sig
  type t
  val create : unit -> t
  val get : t -> char
  val put : t -> char -> unit
  val read : t -> string -> int -> int -> int
  val write : t -> string -> int -> int -> unit
  val from_fd : t -> Unix.file_descr -> unit
  val to_fd : t -> Unix.file_descr -> unit
  val clear : t -> unit
  val flip : t -> unit
  val compact : t -> unit
  val rewind : t -> unit
  val limit : t -> int
  val position : t -> int
  val remaining : t -> int
  val contents : t -> string
end

module AsyncBuffer : Async =
struct
  type t = {
    buf : string;
    mutable pos : int;
    mutable limit: int;
  }
  
  let create () =
    let capacity = 1024 in
      {
        buf = String.create capacity;
        pos = 0;   
        limit = capacity;
      }
  
  let limit b =
    b.limit

  let position b =
    b.pos

  let remaining b =
    b.limit - b.pos

  let contents b =
    String.sub b.buf b.pos (remaining b)

  let get b =
    let c = b.buf.[b.pos] in
      if b.pos < b.limit then
        (b.pos <- b.pos + 1; c)
      else
        raise Buffer_underflow

  let put b c =
    if b.pos < b.limit then
      (b.buf.[b.pos] <- c;
       b.pos <- b.pos + 1)
    else
      raise Buffer_overflow

  let read b dst offset len =
    let real_len =
      if len > remaining b then
        remaining b
      else
        len in
      String.blit b.buf b.pos dst offset real_len;
      b.pos <- b.pos + real_len;
      real_len

  let write b src offset len =
    if len <= remaining b then
      let result =
        String.blit src offset b.buf b.pos len in
        b.pos <- b.pos + len;
        result
    else
      raise Buffer_overflow

  let from_fd b fd =
    try
      let len = Unix.read fd b.buf b.pos (remaining b) in
        b.pos <- b.pos + len;
        if len = 0 then
          raise End_of_file
    with Unix.Unix_error (err, _, _) as e ->
      match err with
          Unix.EAGAIN | Unix.EWOULDBLOCK -> ()
        | _ -> raise e
          
  let to_fd b fd =
    try
      let len = Unix.write fd b.buf b.pos (remaining b) in
        b.pos <- b.pos + len
    with Unix.Unix_error (err, _, _) as e ->
      match err with
          Unix.EAGAIN | Unix.EWOULDBLOCK -> ()
        | _ -> raise e
      
  let clear b =
    b.pos <- 0;
    b.limit <- String.length b.buf
  
  let flip b =
    b.limit <- b.pos;
    b.pos <- 0

  let compact b =
    String.blit b.buf b.pos b.buf 0 (remaining b)
  
  let rewind b =
    b.pos <- 0
end

let copy_fd in_fd out_fd =
  let b = AsyncBuffer.create () in
    try
      while (true) do
        AsyncBuffer.from_fd b in_fd;
        AsyncBuffer.flip b;
        AsyncBuffer.to_fd b out_fd;
        AsyncBuffer.compact b
      done
    with End_of_file -> ()

let copy_file input_filename output_filename =
  let in_fd = Unix.openfile input_filename
                [ Unix.O_RDONLY; Unix.O_NONBLOCK ] 0o644 in
  let out_fd = Unix.openfile output_filename
                 [ Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC; Unix.O_NONBLOCK ] 0o644 in
  let output_data = String.create 10 in
    copy_fd in_fd out_fd;
    Unix.close in_fd;
    Unix.close out_fd

let _ =
  copy_file "input.txt" "output.txt"

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-04 21:31 ` Benjamin Geer
@ 2004-05-04 22:59   ` Yamagata Yoriyuki
  2004-05-05  8:11     ` skaller
  2004-05-05 17:33     ` Vladimir N. Silyaev
  2004-05-05 17:31   ` Vladimir N. Silyaev
  1 sibling, 2 replies; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-05-04 22:59 UTC (permalink / raw)
  To: ben; +Cc: vsilyaev, caml-list

From: Benjamin Geer <ben@socialtools.net>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Tue, 04 May 2004 22:31:14 +0100

> Vladimir N. Silyaev wrote:
> > If you felt interested, please look into the attached file io.ml.
> 
> This looks like a good start to me; what do others think?

Our problem is having too many incompatible IO system, not the lack of
IO.  So, what we need is the standard, not the code.

Of course, if some IO library is widespread, then it will become de
facto standard.  I advise you to look existing IO systems for smooth
integration/migration.  In particular, the IO system of ocamlnet
deserves a close look, because it is well thought and widly used.

BTW, I think this is one of the problems of OCaml community.  We do
not know each other's work well, while too much being bombarded by
hypes from Java/Perl/Python/C#...

--
Yamagata Yoriyuki


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-04 22:59   ` Yamagata Yoriyuki
@ 2004-05-05  8:11     ` skaller
  2004-05-05 15:48       ` Marcin 'Qrczak' Kowalczyk
  2004-05-05 17:33     ` Vladimir N. Silyaev
  1 sibling, 1 reply; 67+ messages in thread
From: skaller @ 2004-05-05  8:11 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: ben, vsilyaev, caml-list

On Wed, 2004-05-05 at 08:59, Yamagata Yoriyuki wrote:

> BTW, I think this is one of the problems of OCaml community.  We do
> not know each other's work well, while too much being bombarded by
> hypes from Java/Perl/Python/C#...

Perhaps .. but .. recent post to Felix language list:

"The excellent Cil frontc C parser, described here:

http://manju.cs.berkeley.edu/cil/

has been built into Felix as an initial step
in Felix being able to process C library header
files *directly* without binding specifications
or use of an external tool like SWIG."

OK, so Ocaml community doesn't know other's work
so well but here is some more information:

It took 4 hours ONLY to complete the repackaging
and Cil's frontc parsed the (huge and arcane)
gtk+2.0 header without an error, pretty printing
the parse tree in a neat format.

Now try doing that with "Java/Perl/Python/C#" not
to mention C++ or ... oh no .. C .. (OUCH!)

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net



-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-05  8:11     ` skaller
@ 2004-05-05 15:48       ` Marcin 'Qrczak' Kowalczyk
  2004-05-05 19:28         ` skaller
  0 siblings, 1 reply; 67+ messages in thread
From: Marcin 'Qrczak' Kowalczyk @ 2004-05-05 15:48 UTC (permalink / raw)
  To: caml-list

W liście z śro, 05-05-2004, godz. 18:11 +1000, skaller napisał:

> has been built into Felix as an initial step
> in Felix being able to process C library header
> files *directly* without binding specifications
> or use of an external tool like SWIG."

I don't believe it's possible to automatically generate a reasonable
binding to a C library, given only its headers. Except very simple
libraries.

I've recently make a binding between my language and Python. I'm very
happy with the result, objects are automatically wrapped or unwrapped or
converted, but it would not be impossible to generate this automatically,
treating Python as a C library (which it is).

One thing which is enough to prevent this is Python objects' reference
counts. Of course a good binding must integrate this with garbage
collection. Each Python API function is documented whether it steals
a reference to arguments (rare) or not, and whether it returns a new
reference or not (it often doesn't return a new reference when it always
returns a subobject of another object). This is only explained in the
documentation, so no automatic tool will handle that.

Other than that, it's a non-trivial work to match the conventions, e.g.
let Python sequences fulfil sequence protocols in my language and vice
versa.

Even the simpler thing, converting types, is not easy. A tool will not
guess that I want to automatically convert Python ints and longs into
INTs in my language, and vice versa; they use entirely different
representation of bignums, so a meaningful conversion must be
implemented manually. And it will not guess how to match mixed
ISO-8859-1 / UTF-32 strings in my language with mixed local byte
encoding / UTF-16-or-32 strings in Python. It will not create a Python
type which wraps arbitrary objects of my language, nor vice versa.
Especially when the objects should translate arguments when they are
applied, and translate attribute access. And a tool will not handle
translation of exceptions, such thatthey are automatically propagated
with appropriate wrapping and unwrapping.

But when I finally manually did all this (it took a couple of days;
well, it's not completely finished yet), I got a Gtk+ interface for my
language completely for free, because someone once wrapped Gtk+ for
Python. And similarly many other Python libraries are now directly
usable.

An API expressed in Python, as opposed to an API in C, is possible to
be translated into another language quite well even with no hand-written
description. Perhaps many libraries would be somewhat simpler to bind to
than the Python API, but I don't believe in an automatic tool for C
libraries. Too many issues are visible only in the documentation; it's
not possible to know how to map a void* argument.

> It took 4 hours ONLY to complete the repackaging
> and Cil's frontc parsed the (huge and arcane)
> gtk+2.0 header without an error, pretty printing
> the parse tree in a neat format.

Ok, parsed. Then what?

-- 
   __("<         Marcin Kowalczyk
   \__/       qrczak@knm.org.pl
    ^^     http://qrnik.knm.org.pl/~qrczak/


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-04 21:31 ` Benjamin Geer
  2004-05-04 22:59   ` Yamagata Yoriyuki
@ 2004-05-05 17:31   ` Vladimir N. Silyaev
  2004-05-07 22:11     ` Benjamin Geer
  1 sibling, 1 reply; 67+ messages in thread
From: Vladimir N. Silyaev @ 2004-05-05 17:31 UTC (permalink / raw)
  To: caml-list

On Tue, May 04, 2004 at 10:31:14PM +0100, Benjamin Geer wrote:
> >If you felt interested, please look into the attached file io.ml.
> 
> This looks like a good start to me; what do others think?
> 
> I have two questions about it:
> 
> First, I think that rather than having to hard-code the use of the UTF8 
> module in an application in order to handle UTF-8 characters, it would 
> be more useful to be able to specify encodings using string arguments; 
> the choice of encoding is often determined by reading a configuration 
> file.  Would there be a way to support this?
Sure. I probably should specify more clearly, it was envisioned
that IO module would only specify signatures for IO.Stream.[Read/Write],
IO.Block.[Read/Write] and IO.Filter.[Read/Write]. Rest of the file was
just an quick and dirty implementation of mapping IO to Pervasives modules,
Unix socket and sample filter for UTF8 bytestream<->unicode converter.

And assume that we have in addition to UTF8 filter, filter
UTF16 bytestream <->unicode, Latin1 bytestream<->unicode, there is could
be a helper function, which takes filename, encoding name and returns
IO.Stream.Read specialized with Unicode type. This could be done
relatively easy, by providing functoral interface to the camomile, and
reusing existing code.

> 
> Second, I'm wondering if this design can be adapted to accommodate 
> non-blocking I/O.  The 'get' function has to return a character, but on 
> a non-blocking socket, there might not be any character to return.  I've 
> attached a sketch of an approach that might be more suitable for 
> non-blocking I/O; I'd like to add it to your design, but I'm having 
> trouble figuring out how.  I would be very interested in your thoughts 
> on this.
Sure non blocking I/O could be supported, but one should be aware,
that get and/or put could throw exception.

> exception Buffer_underflow
> exception Buffer_overflow
> 
> module type Async =
> sig
>   type t
>   val create : unit -> t
>   val get : t -> char
>   val put : t -> char -> unit
>   val read : t -> string -> int -> int -> int
>   val write : t -> string -> int -> int -> unit
>   val from_fd : t -> Unix.file_descr -> unit
>   val to_fd : t -> Unix.file_descr -> unit
>   val clear : t -> unit
>   val flip : t -> unit
>   val compact : t -> unit
>   val rewind : t -> unit
>   val limit : t -> int
>   val position : t -> int
>   val remaining : t -> int
>   val contents : t -> string
> end

This signature looks like a good starting point. However I would rather
separate this code to three different pieces:
 - input, asynchronous source of bytes
 - output, synchronous source of symbols (char)
 - algorithm(filter), converts input to output,
   and throws exception where apropriate

Input is a signature, for example like this:
module Block = struct
  module type Read =
  sig
    type t
    val nb_read:  t -> string -> int -> int -> int
  end

Output is IO.Stream.Read + IO.Block.Read

And algorithm is a functor, which actually manages all buffering
and offset arithmetic.

Such code structuring would allow algorithm reuse with different
type of input.

Please note,  that write and read inherently separated in signatures,
it allows simpler interface, supports read/write only streams
and better feet common model, where read and writes are separated.
However, module couldn't implement both read and write signatures, if
it's required.

Regards,
Vladimir


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-04 22:59   ` Yamagata Yoriyuki
  2004-05-05  8:11     ` skaller
@ 2004-05-05 17:33     ` Vladimir N. Silyaev
  1 sibling, 0 replies; 67+ messages in thread
From: Vladimir N. Silyaev @ 2004-05-05 17:33 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: ben, caml-list

On Wed, May 05, 2004 at 07:59:19AM +0900, Yamagata Yoriyuki wrote:
> > Vladimir N. Silyaev wrote:
> > > If you felt interested, please look into the attached file io.ml.
> > 
> > This looks like a good start to me; what do others think?
> 
> Our problem is having too many incompatible IO system, not the lack of
> IO.  So, what we need is the standard, not the code.
> 
> Of course, if some IO library is widespread, then it will become de
> facto standard.  I advise you to look existing IO systems for smooth
> integration/migration.  In particular, the IO system of ocamlnet
> deserves a close look, because it is well thought and widly used.
That's essentially correct. Ocaml community is not that large and number
of people are willing to contribute time into the project is
rather limited.
So in order for whatever new way of doing IO be successive it
shall be:
 a -  simple
 b -  compatible with existing codebase

Simplicity it's exactly an issue with I/O in ocamlnet and extlib -
interfaces are too rich, there is many different methods and classes.
Surely that model works quite well for the given project and probably
there is not much sense to rewrite implementation.  But that's not
required nor that important, important part is interoperability
between projects, so output of ocamlnet.Cgi could be feed to camomile.Utf8.

And it might be possible to agree on very limited interface, what
doesn't exposes all richness and features of many different modules,
but allows connect different modules together and supports future
extensions.


--
Vladimir

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-05 15:48       ` Marcin 'Qrczak' Kowalczyk
@ 2004-05-05 19:28         ` skaller
  0 siblings, 0 replies; 67+ messages in thread
From: skaller @ 2004-05-05 19:28 UTC (permalink / raw)
  To: Marcin 'Qrczak' Kowalczyk; +Cc: caml-list

On Thu, 2004-05-06 at 01:48, Marcin 'Qrczak' Kowalczyk wrote:
> W liście z śro, 05-05-2004, godz. 18:11 +1000, skaller napisał:
> 
> > has been built into Felix as an initial step
> > in Felix being able to process C library header
> > files *directly* without binding specifications
> > or use of an external tool like SWIG."
> 
> I don't believe it's possible to automatically generate a reasonable
> binding to a C library, given only its headers. Except very simple
> libraries.

extern "C" {
#include <some_library.h>
}

Pretty neat wrapper generator, eh?

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net



-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-05 17:31   ` Vladimir N. Silyaev
@ 2004-05-07 22:11     ` Benjamin Geer
  2004-05-08  7:29       ` Vladimir N. Silyaev
  0 siblings, 1 reply; 67+ messages in thread
From: Benjamin Geer @ 2004-05-07 22:11 UTC (permalink / raw)
  To: Vladimir N. Silyaev; +Cc: caml-list

Vladimir N. Silyaev wrote:
> This signature looks like a good starting point. However I would rather
> separate this code to three different pieces

That seems fine to me.  I just wanted to give you a very rough idea of 
what I had in mind; I was pretty sure you'd see a better way to design 
it. :)

> Please note,  that write and read inherently separated in signatures,
> it allows simpler interface, supports read/write only streams
> and better feet common model, where read and writes are separated.
> However, module couldn't implement both read and write signatures, if
> it's required.

The main thing I wanted to point out was that there needs to be a way to 
read data into a buffer from a non-blocking socket into a buffer, and 
then write the data from the *same buffer* into another non-blocking 
socket.  Then compact the buffer (move any unwritten data to the 
beginning of the buffer) and start again, like in this loop:

let copy_fd in_fd out_fd =
   let b = AsyncBuffer.create () in
     try
       while (true) do
         AsyncBuffer.from_fd b in_fd;
         AsyncBuffer.flip b;
         AsyncBuffer.to_fd b out_fd;
         AsyncBuffer.compact b
       done
     with End_of_file -> ()

Can that still be done if the read and write signatures are separated?

The other thing that's important is that character encoder/decoders 
would need to be able to read characters from one buffer and write them 
to another buffer in a different encoding.  An encoder/decoder would 
need to gracefully handle the case where it reads from a buffer 
containing incomplete characters.  That's another reason for the 
'compact' function: you could read 10 bytes from a socket into a buffer, 
and those 10 bytes could contain 9 bytes worth of complete UTF-8 
characters; the 10th byte would be the first byte of a multi-byte 
character.  You'd pass the buffer to an encoder/decoder, which would 
read 9 bytes and write them into another buffer in a different encoding 
(say UTF-16), leaving the last byte.  You would then call 'compact' to 
move that byte to the beginning of the buffer, and repeat.

Is there a way to fit this approach into what you've proposed for 
encoder/decoders?

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-07 22:11     ` Benjamin Geer
@ 2004-05-08  7:29       ` Vladimir N. Silyaev
  2004-05-09 17:35         ` Benjamin Geer
  0 siblings, 1 reply; 67+ messages in thread
From: Vladimir N. Silyaev @ 2004-05-08  7:29 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: caml-list

On Fri, May 07, 2004 at 11:11:17PM +0100, Benjamin Geer wrote:

> >This signature looks like a good starting point. However I would rather
> >separate this code to three different pieces
> 
> That seems fine to me.  I just wanted to give you a very rough idea of 
> what I had in mind; I was pretty sure you'd see a better way to design 
> it. :)
> 
> >Please note,  that write and read inherently separated in signatures,
> >it allows simpler interface, supports read/write only streams
> >and better feet common model, where read and writes are separated.
> >However, module couldn't implement both read and write signatures, if
> >it's required.
> 
> The main thing I wanted to point out was that there needs to be a way to 
> read data into a buffer from a non-blocking socket into a buffer, and 
> then write the data from the *same buffer* into another non-blocking 
> socket.  Then compact the buffer (move any unwritten data to the 
> beginning of the buffer) and start again, like in this loop:
> 
> let copy_fd in_fd out_fd =
>   let b = AsyncBuffer.create () in
>     try
>       while (true) do
>         AsyncBuffer.from_fd b in_fd;
>         AsyncBuffer.flip b;
>         AsyncBuffer.to_fd b out_fd;
>         AsyncBuffer.compact b
>       done
>     with End_of_file -> ()
> 
> Can that still be done if the read and write signatures are separated?
Surely, just a little twist:
module Block = struct
  module type NbRead = sig
    type t
    val nb_read:  t -> string -> int -> int -> int
  end
  module type NbWrite = sig
    type t
    val nb_write:  t -> string -> int -> int -> int
  end
end

Note, that both read or write are using supplied buffers, so zero
copy is possible.
module NbCopy (Src:Block.NbRead) (Dst:Block.NbWrite) = struct
  let run s d = 
   let blen = 4096 in
   let buf = String.create blen in
   let rec copy off = 
     let  off  = if off = blen then 0 else off in
     let  rlen = blen - off in
     let  n = Src.nb_read s buf off rlen in
     let  n = Dst.nb_write d buf off n in
     copy (off+n)
   in
   try copy 0 with
   End_of_file -> ()
end

> 
> The other thing that's important is that character encoder/decoders 
> would need to be able to read characters from one buffer and write them 
> to another buffer in a different encoding.  An encoder/decoder would 
> need to gracefully handle the case where it reads from a buffer 
> containing incomplete characters.  That's another reason for the 
> 'compact' function: you could read 10 bytes from a socket into a buffer, 
> and those 10 bytes could contain 9 bytes worth of complete UTF-8 
> characters; the 10th byte would be the first byte of a multi-byte 
> character.  You'd pass the buffer to an encoder/decoder, which would 
> read 9 bytes and write them into another buffer in a different encoding 
> (say UTF-16), leaving the last byte.  You would then call 'compact' to 
> move that byte to the beginning of the buffer, and repeat.
> 
> Is there a way to fit this approach into what you've proposed for 
> encoder/decoders?
That's more complicated. Basically what was initially proposed is for
blocking IO, where read (get) guaranteed to return result or fail,
and fail is terminal, filter is not required to support restart procedure.
This works for any type of blocking IO, however it fails for non blocking
IO where restart is common technique. Problem with restart, that it places
unbounded restrictions of filter, it should be able to handle incomplete
inputs and support backtracking.

However non blocking IO usually used as an alternative for threads, in which
case it might be beneficial just change control type and add support 
for non blocking IO into the signature by using explicit continuation 
passing. There is an example for get and put signature with CPS:
 val get: t -> (symbol -> unit) -> unit
 val put: t -> (unit -> unit) -> symbol -> unit

Advantage of using CPS style, is that state of the "filter" captured
by the compiler in a closure at the time of function application. Disadvantage
of CPS style is rather unusual look of code and cost of closure 
construction. Later could be significantly reduced by short circuiting
filters (encoder/decoder) by providing filter with has block IO as input
and stream of symbols as output. 

---
Vladimir

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-05-08  7:29       ` Vladimir N. Silyaev
@ 2004-05-09 17:35         ` Benjamin Geer
  0 siblings, 0 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-05-09 17:35 UTC (permalink / raw)
  To: Vladimir N. Silyaev; +Cc: caml-list

This all looks OK to me, but maybe it would help if you made some 
wrappers for netstring and camomile; this might convince others that 
there's an advantage in having a new API.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 12:23                             ` Richard Jones
  2004-04-29 15:10                               ` Benjamin Geer
@ 2004-05-01 14:37                               ` Brian Hurt
  1 sibling, 0 replies; 67+ messages in thread
From: Brian Hurt @ 2004-05-01 14:37 UTC (permalink / raw)
  To: Richard Jones; +Cc: Ocaml Mailing List


Again, sorry for the delay in responding...

On Thu, 29 Apr 2004, Richard Jones wrote:

> Think of an API as like a user interface.  It's a UI for programmers
> to use.

This is a dangerous analogy, I think.  There are several differences.  
First, I damned well *expect* programmers to be willing to RTFM.  

Second, and more importantly, take care that in making the API easy to use
for the common case you don't make it impossible to use in the odd case.  
Being able to extend and reuse the library in unexpected ways and in new
situations is more important to me as making it easy to use in the common
case.  Because I inevitably end up in the odd case trying to make the 
library do something it wasn't designed to do, and discovering you can't 
do that.  This is annoying enough in programs- it's enough to make me dump 
languages in APIs.

And this is one case where we can have our cake and eat it too.  My 
opinion is that the base library should be as flexible as possible- and 
then we can provide wrapper classes/functions for the common cases.  This 
increases the number of classes and functions, however...

The Java.io library has it's drawbacks, I'll freely admit.  But the core 
idea- which is the same core idea as the Unix command line, I comment- is 
the best I've ever seen for doing I/O.

-- 
"Usenet is like a herd of performing elephants with diarrhea -- massive,
difficult to redirect, awe-inspiring, entertaining, and a source of
mind-boggling amounts of excrement when you least expect it."
                                - Gene Spafford 
Brian

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 23:53                                       ` Benjamin Geer
@ 2004-04-30  4:10                                         ` james woodyatt
  0 siblings, 0 replies; 67+ messages in thread
From: james woodyatt @ 2004-04-30  4:10 UTC (permalink / raw)
  To: The Caml Trade

On 29 Apr 2004, at 16:53, Benjamin Geer wrote:
> james woodyatt wrote:
>> I need non-blocking I/O.
>
> Have you had a look at Gerd Stolpmann's Equeue module?

Oh yes.  I did.  I wrote something different.  I haven't released it.


-- 
j h woodyatt <jhw@wetware.com>
that's my village calling... no doubt, they want their idiot back.

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 17:31                                     ` james woodyatt
@ 2004-04-29 23:53                                       ` Benjamin Geer
  2004-04-30  4:10                                         ` james woodyatt
  0 siblings, 1 reply; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 23:53 UTC (permalink / raw)
  To: james woodyatt; +Cc: caml-list Trade

james woodyatt wrote:
> I need non-blocking I/O.

Have you had a look at Gerd Stolpmann's Equeue module?

http://www.ocaml-programming.de/packages/documentation/equeue/

Ben


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 20:41                                     ` John Goerzen
@ 2004-04-29 22:35                                       ` Benjamin Geer
  0 siblings, 0 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 22:35 UTC (permalink / raw)
  To: John Goerzen; +Cc: Richard Jones, caml-list

John Goerzen wrote:
> What you have proposed here is exactly what I am proposing and what
> Python does.  It appears we are somehow in complete agreement about what
> should happen.  I guess the disagreement is whether it is like Java.  I
> maintain it is not, since there is a single File object that is used for
> everything -- both files themselves and various converters.

A basic difference between Python and Java, perhaps, is that Java makes 
a strict distinction between bytes and characters.  In Java, characters 
are always Unicode characters.  Since the most basic kind of stream 
(what you're calling a 'file object') can't possibly be a converter, its 
interface can't return characters.  Therefore, in Java, at some point 
you need a different interface, what you could call a 'file object that 
knows about characters', which Java calls Reader.

However, in Caml we can surely do better, as the 'Common IO Structure' 
thread shows; the basic file/channel/stream type can be polymorphic. 
Now if only Nicholas Cannasse, Gerd Stolpmann and Yamagata Yoriyuki can 
only agree on the type parameters... :)

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 15:46                                   ` Benjamin Geer
  2004-04-29 15:58                                     ` Richard Jones
@ 2004-04-29 20:41                                     ` John Goerzen
  2004-04-29 22:35                                       ` Benjamin Geer
  1 sibling, 1 reply; 67+ messages in thread
From: John Goerzen @ 2004-04-29 20:41 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: Richard Jones, caml-list

On Thu, Apr 29, 2004 at 04:46:49PM +0100, Benjamin Geer wrote:
> John Goerzen wrote:
> >On Thu, Apr 29, 2004 at 04:10:46PM +0100, Benjamin Geer wrote:
> >But you do not need a Java-esque API to do that.  All you need is a
> >standardized File object.  You could instantiate one of these by opening
> >a file.  Or perhaps by passing an existing object to the initializer for
> >a gzip decompressor or a Unicode processor.
> 
> The key for me is that I need to be able to chain processing steps 
> together, so that I can, for example, decompress gzip format and convert 
> the result to Unicode, a few bytes at a time.  This suggests to me that 
> the gzip compressor and the Unicode processor should themselves be 
> implementations of the standard File object, so I can wrap a gzip 
> decompressor around an underlying data source, then wrap the Unicode 
> decoder around the gzip decompressor.  The advantage of this approach is 
> that the Unicode decoder doesn't know it's dealing with a gzip 
> decompressor; it only knows it's dealing with something it can read 
> bytes from.  I can then easily remove the decompression step if needed. 
>  And that brings us back to a Java-like approach.  If you can think of 
> a better way of accomplishing this, I'd love to see it.

What you have proposed here is exactly what I am proposing and what
Python does.  It appears we are somehow in complete agreement about what
should happen.  I guess the disagreement is whether it is like Java.  I
maintain it is not, since there is a single File object that is used for
everything -- both files themselves and various converters.  But hey, if
you write this and say it's like Java, I'll be happy anyway.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 15:31                                   ` Yamagata Yoriyuki
@ 2004-04-29 17:31                                     ` james woodyatt
  2004-04-29 23:53                                       ` Benjamin Geer
  0 siblings, 1 reply; 67+ messages in thread
From: james woodyatt @ 2004-04-29 17:31 UTC (permalink / raw)
  To: caml-list Trade

On 29 Apr 2004, at 08:31, Yamagata Yoriyuki wrote:
> Encoding could be stateful, so there would be no single representation
> of EOL. (*)  Ok, this is very unlikely case currently, but I think 
> there
> is an interesting encoding for Unicode which is fully stateful.  So,
> readlines() needs to fully aware of the encoding.

This transcoding I/O channel under discussion is required to contain 
internal state for other reasons.  With non-blocking I/O, an underlying 
transport may present only those octets that are ready for reading, 
which may leave a codepoint incomplete at the end of the currently 
received octets.  Even without non-blocking I/O, a read can be 
interrupted by a system signal event and still return less than the 
number of octets requested.  It is not sufficient to defer signal 
processing until after the read completes— sometimes (but not always), 
a signal explicitly means to abort reading immediately.

> My proposal is mainly for sharing common channel types among
> libraries, so that a user can pass a channel from a libraries to
> anonther withoug writing a glue code.  Since parsing endline, or
> loading the whole file into the string mainly occurs in the endpoint
> of IO, I do not think standardizing them are necessary for this
> purpose.
>
> I do not think standardizing the endpoint API is important, because I
> think that in the end, we will use only one library as the endpoint of
> IO.

Most of us.  Some of us have other concerns that I don't see anyone 
else trying to address.  At some point, probably soon, I will be 
writing a wrapper around OpenSSL.  I need non-blocking I/O.  I need to 
parse XML documents of unbounded length, which means using a SAX-like 
parser (I have that now).  I need to be able to parse an arbitrary 
number of XML documents simultaneously.  In potentially any of the 
legal Unicode transfer encodings.  And I need to be responsive to 
events in near real-time.

I have the "control inversion" nightmare from hell.  That's why I have 
forced myself to learn functional programming techniques.

An I/O library that I can use is simply not going to be something that 
can satisfy Richard's requirement that he be able to slurp a whole file 
into an application data structure with a single line of code.  So I'm 
writing my own.  Richard will be appalled by how it works.

So I'm watching this discussion with a certain bemused detachment: I 
wonder what new and improved API will be coming from this that I will 
still find inadequate for my tasks.

> (*) IIRC, RFC defines the endianness of UTF-16 is swapped in the
> middle of the stream, when "BOM" 0xfffe appears.

This is quite true.  Happens all the time, too.


-- 
j h woodyatt <jhw@wetware.com>
markets are only free to the people who own them.
-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 15:46                                   ` Benjamin Geer
@ 2004-04-29 15:58                                     ` Richard Jones
  2004-04-29 20:41                                     ` John Goerzen
  1 sibling, 0 replies; 67+ messages in thread
From: Richard Jones @ 2004-04-29 15:58 UTC (permalink / raw)
  Cc: caml-list

On Thu, Apr 29, 2004 at 04:46:49PM +0100, Benjamin Geer wrote:
> The key for me is that I need to be able to chain processing steps 
> together, so that I can, for example, decompress gzip format and convert 
> the result to Unicode, a few bytes at a time.  This suggests to me that 
> the gzip compressor and the Unicode processor should themselves be 
> implementations of the standard File object, so I can wrap a gzip 
> decompressor around an underlying data source, then wrap the Unicode 
> decoder around the gzip decompressor.  The advantage of this approach is 
> that the Unicode decoder doesn't know it's dealing with a gzip 
> decompressor; it only knows it's dealing with something it can read 
> bytes from.  I can then easily remove the decompression step if needed. 

I entirely agree that this is needed.  My focus however was on making
it simple to do the common things, and possible to do the rare, hard
stuff.  So the API designer should start by writing example programs
in the non-existant API.  Here are some rather fuzzy ideas of mine:

open IO

(* Read a whole file. *)
let content = slurp filename;;

(* Get a list of lines from a compressed file. *)
let lines = open filename >> unzip >> slurp_lines;;

(* Call function f line-by-line on a UTF-16 encoded file. *)
open filename >> utf16_decode >> slurp_lines >> (List.iter f);;

There are some obvious problems (eg. how are files closed? is '>>' a
reserved operator already?) but it's nice to think about what an easy
to use API might look like first *before* thinking about the
implementation.

Rich.

-- 
Richard Jones. http://www.annexia.org/ http://www.j-london.com/
Merjis Ltd. http://www.merjis.com/ - improving website return on investment
Learning Objective CAML for C, C++, Perl and Java programmers:
http://www.merjis.com/richj/computers/ocaml/tutorial/

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 15:35                                 ` John Goerzen
@ 2004-04-29 15:46                                   ` Benjamin Geer
  2004-04-29 15:58                                     ` Richard Jones
  2004-04-29 20:41                                     ` John Goerzen
  0 siblings, 2 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 15:46 UTC (permalink / raw)
  To: John Goerzen; +Cc: Richard Jones, caml-list

John Goerzen wrote:
> On Thu, Apr 29, 2004 at 04:10:46PM +0100, Benjamin Geer wrote:
> 
>>This is why I'm pleading for a layered API, so that character encoding, 
>>buffering, compression, encryption, and any other optional processing 
> 
> But you do not need a Java-esque API to do that.  All you need is a
> standardized File object.  You could instantiate one of these by opening
> a file.  Or perhaps by passing an existing object to the initializer for
> a gzip decompressor or a Unicode processor.

The key for me is that I need to be able to chain processing steps 
together, so that I can, for example, decompress gzip format and convert 
the result to Unicode, a few bytes at a time.  This suggests to me that 
the gzip compressor and the Unicode processor should themselves be 
implementations of the standard File object, so I can wrap a gzip 
decompressor around an underlying data source, then wrap the Unicode 
decoder around the gzip decompressor.  The advantage of this approach is 
that the Unicode decoder doesn't know it's dealing with a gzip 
decompressor; it only knows it's dealing with something it can read 
bytes from.  I can then easily remove the decompression step if needed. 
  And that brings us back to a Java-like approach.  If you can think of 
a better way of accomplishing this, I'd love to see it.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 13:23                             ` John Goerzen
  2004-04-29 14:12                               ` John Goerzen
@ 2004-04-29 15:37                               ` Benjamin Geer
  1 sibling, 0 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 15:37 UTC (permalink / raw)
  To: John Goerzen; +Cc: caml-list

John Goerzen wrote:
> We were talking about being intuitive here.  I'd have to read maybe a
> dozen different class descriptions, have to understand the differences
> between them, and cross-reference back and forth between them to figure
> out how to get the object I want.  Or pay for a Java book that describes
> these relationships itself.

No, you just need a little tutorial; Sun has a good free one online:

http://java.sun.com/docs/books/tutorial/essential/io/index.html

> Everybody I worked with had the Javadoc for the API bookmarked and
> referred to it constantly.

I've always done that in all programming languages.  I have more 
important things to remember than all the little details of all the APIs 
I use.

> Here's one of the problems.  Java's API makes it complex to do simple
> things without simplifying complex things.  Recall my example -- being
> able to open a file read/write and seeking around in it?  In C, I'd do:
> 
>    int file = open(filename, O_RDWR) or FILE * file = fopen(filename, "r+")
> 
> Perl, it is:
>  
>    open(FH, "+<", $filename)
> 
> Or, in Python:
> 
>    file = open(filename, "r+")

How are any of those simpler than the Java example I gave you:

RandomAccessFile file = new RandomAccessFile(filename, "rw");

> Java requires me to wade through and think about all of these things
> plus the way the file will eventually be used (do I want an array of
> bytes, an array of chars, strings, etc?) right when I open it.

If you want to open a file for reading, you have this in Java:

InputStream in = new FileInputStream(filename);

That's all; you're ready to read bytes.  If you want to convert bytes 
into characters, you can make that decision later:

Reader inReader = new InputStreamReader(in);

That will use your system's default character encoding.  Since this is a 
common case, Java provides a convenience class as a shortcut (no 
InputStream needed):

Reader inReader = new FileReader(filename);

On the other hand, suppose you're reading data that's compressed in Zip 
format?  Given any InputStream (from a socket, a file, or whatever), you 
can wrap it in a ZipInputStream, which knows how to decompress a Zip file:

ZipInputStream zIn = new ZipInputStream(in);

Wasn't that easy?

> for line in open(filename, "r").xreadlines():
>     print line
> 
> See what I mean about intuitive?

I don't find it intuitive for every 'file-like object' (i.e. stream or 
channel) to know about character encodings and assume that it can 
meaningfully chop its data up into lines.  In many cases (e.g. when 
dealing with image or audio files) that assumption will be false.  It 
seems more intuitive to me to have character encodings (and compression, 
encryption, and all other transformations of bytes) in separate, 
optional layers.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 15:10                               ` Benjamin Geer
@ 2004-04-29 15:35                                 ` John Goerzen
  2004-04-29 15:46                                   ` Benjamin Geer
  0 siblings, 1 reply; 67+ messages in thread
From: John Goerzen @ 2004-04-29 15:35 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: Richard Jones, caml-list

On Thu, Apr 29, 2004 at 04:10:46PM +0100, Benjamin Geer wrote:
> This is why I'm pleading for a layered API, so that character encoding, 
> buffering, compression, encryption, and any other optional processing 

But you do not need a Java-esque API to do that.  All you need is a
standardized File object.  You could instantiate one of these by opening
a file.  Or perhaps by passing an existing object to the initializer for
a gzip decompressor or a Unicode processor.  ExtLib in CVS has one
approach to this.  I'd prefer to use OCaml's object system myself.

Perhaps I'll write up a proof-of-concept for missinglib...

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 14:02                                 ` John Goerzen
@ 2004-04-29 15:31                                   ` Yamagata Yoriyuki
  2004-04-29 17:31                                     ` james woodyatt
  0 siblings, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-29 15:31 UTC (permalink / raw)
  To: jgoerzen; +Cc: ben, caml-list

From: John Goerzen <jgoerzen@complete.org>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Thu, 29 Apr 2004 09:02:40 -0500

> On Thu, Apr 29, 2004 at 10:40:36PM +0900, Yamagata Yoriyuki wrote:
> > > > > > OK, but then you can leave out readline(), readlines() and xreadlines(), 
> > > > > > because they don't make any sense unless you've already dealt with 
> > > > > > character encodings.
> > > > > 
> > > > > No, they can simply be implemented in terms of read().
> > > > 
> > > > It will break when UTF-16/UTF-32 are used.  The line separator should
> > > > be handled after code conversion.  At least that is the idea of
> > > > Unicode standard.  (But Since Unicode standard is challenged by
> > > > reality in every aspect, maybe nobody cares.)
> > > 
> > > You are missing the point.  read() could handle the code conversion.
> > 
> > No, what I wanted to say is that the line separator should be handled
> > in the Unicode level, not the byte-character level.  Your design
> > assumes read() always returns new line characters as in ASCII.  This
> > would not hold when read() returns UTF-16/UTF-32.
> 
> I don't see why that is the case.  If read() returns UTF-16 data,
> readlines() works with it, and would of course be scanning it for a
> UTF-16 EOL character or string.  I don't see where that's the problem.

Encoding could be stateful, so there would be no single representation
of EOL. (*)  Ok, this is very unlikely case currently, but I think there
is an interesting encoding for Unicode which is fully stateful.  So,
readlines() needs to fully aware of the encoding.

My proposal is mainly for sharing common channel types among
libraries, so that a user can pass a channel from a libraries to
anonther withoug writing a glue code.  Since parsing endline, or
loading the whole file into the string mainly occurs in the endpoint
of IO, I do not think standardizing them are necessary for this
purpose.

I do not think standardizing the endpoint API is important, because I
think that in the end, we will use only one library as the endpoint of
IO.

(*) IIRC, RFC defines the endianness of UTF-16 is swapped in the
middle of the stream, when "BOM" 0xfffe appears.

--
Yamagata Yoriyuki

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 12:03                               ` Richard Jones
@ 2004-04-29 15:16                                 ` Benjamin Geer
  0 siblings, 0 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 15:16 UTC (permalink / raw)
  To: Richard Jones; +Cc: caml-list

Richard Jones wrote:
> Actually me too.  90% of my file IO requirement is to slurp a whole
> file into a string

Sometimes I also do get very large messages, so it's nice if, when 
converting from, say, Arabic EBCDIC encoding to Unicode, I don't have to 
read the entire message as EBCDIC bytes, then convert the whole thing to 
Unicode at once.  It uses much less memory if I can read, say, 1K of 
EBCDIC bytes, convert them to Unicode characters, append them to a 
string buffer, and repeat until done.

Another thing that I get fairly often (though less often) is a file 
containing many messages separated by some delimiter.  In this case I 
want to read through the file, converting bytes to Unicode characters as 
I go along, and looking for the delimiter until I have a whole message 
in a string.  I don't want to read the whole file at once, because it 
might be far too big.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 12:23                             ` Richard Jones
@ 2004-04-29 15:10                               ` Benjamin Geer
  2004-04-29 15:35                                 ` John Goerzen
  2004-05-01 14:37                               ` Brian Hurt
  1 sibling, 1 reply; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 15:10 UTC (permalink / raw)
  To: Richard Jones; +Cc: caml-list

Richard Jones wrote:
> Good user interface design does *not* require you to read manuals to
> find out how to use it

I don't expect programming APIs to be as obvious as user interfaces.  If 
I can work out how to use an API without reading a tutorial or HOWTO, 
that's great, but if not, I don't see it as a flaw in the API.

There is a tradeoff between power and flexibility on the one hand, and 
obviousness and simplicity on the other.

> I deliberately chose to make
> the common case this simple because it's the common case and people
> shouldn't have to remember much to use it.

Java does that, too.  If your default encoding is UTF-8, and you want to 
read a UTF-8 file, you can write this instead:

BufferedReader in = new BufferedReader(new FileReader(filename));

If you don't need buffering, you can write:

FileReader in = new FileReader(filename);

and read characters one at a time or in blocks, instead of line-by-line.

I like the fact that the API makes a distinction between bytes and 
characters; I can deal with I/O as raw bytes, or I can use an optional 
layer that translates bytes into characters.  If I don't need that 
layer, I don't have to pay for the overhead.  Likewise, I can add 
buffering only if I need it.  Best of all, if

 > Here's how you read in and parse a CSV file using my OCaml
 > CSV library:
 >
 >   let csv = Csv.load csvfile in

This seems to take too much for granted.  How do I specify the encoding 
and the line endings?  Is all that part of the Csv library, meaning that 
the Csv library has yet another mechanism for handling encodings?  What 
if my file is encrypted or compressed?  How do I make Csv.load use my 
decryption or decompression function?  Do you see what I'm getting at?

This is why I'm pleading for a layered API, so that character encoding, 
buffering, compression, encryption, and any other optional processing 
can be handled by optional layers that everyone can use, whether they're 
dealing with network protocols or just reading files from a disk. 
Moreover, I'm arguing that there should be some stream abstraction that 
can be passed around to things like Csv.load, and that different 
implementations of this abstraction should be free to implement 
decoding, decompression, and so on.  Does that make sense to you?

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 13:23                             ` John Goerzen
@ 2004-04-29 14:12                               ` John Goerzen
  2004-04-29 15:37                               ` Benjamin Geer
  1 sibling, 0 replies; 67+ messages in thread
From: John Goerzen @ 2004-04-29 14:12 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: caml-list

On Thu, Apr 29, 2004 at 08:23:16AM -0500, John Goerzen wrote:
> And before you say, "See, Java has a StreamReader too!", note that
> codecs defines *4* classes: StreamWriter, StreamReader,
> StreamReaderWriter, and StreamRecoder.  I can handle that.

And I should add that the only time I even care about these 4 classes is
when I need to do some sort of charset handling or conversion.

If I'm just reading binary or ASCII text, the entire issue is completely
irrelevant to me.  As it should be.

And yet if I want to do charset conversions (the example I posted could
be extended to, for instance, convert Latin-2 to UTF-8 on the fly with
only a few more characters), it's still easy.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 13:40                               ` Yamagata Yoriyuki
@ 2004-04-29 14:02                                 ` John Goerzen
  2004-04-29 15:31                                   ` Yamagata Yoriyuki
  0 siblings, 1 reply; 67+ messages in thread
From: John Goerzen @ 2004-04-29 14:02 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: ben, caml-list

On Thu, Apr 29, 2004 at 10:40:36PM +0900, Yamagata Yoriyuki wrote:
> > > > > OK, but then you can leave out readline(), readlines() and xreadlines(), 
> > > > > because they don't make any sense unless you've already dealt with 
> > > > > character encodings.
> > > > 
> > > > No, they can simply be implemented in terms of read().
> > > 
> > > It will break when UTF-16/UTF-32 are used.  The line separator should
> > > be handled after code conversion.  At least that is the idea of
> > > Unicode standard.  (But Since Unicode standard is challenged by
> > > reality in every aspect, maybe nobody cares.)
> > 
> > You are missing the point.  read() could handle the code conversion.
> 
> No, what I wanted to say is that the line separator should be handled
> in the Unicode level, not the byte-character level.  Your design
> assumes read() always returns new line characters as in ASCII.  This
> would not hold when read() returns UTF-16/UTF-32.

I don't see why that is the case.  If read() returns UTF-16 data,
readlines() works with it, and would of course be scanning it for a
UTF-16 EOL character or string.  I don't see where that's the problem.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 13:03                             ` John Goerzen
@ 2004-04-29 13:40                               ` Yamagata Yoriyuki
  2004-04-29 14:02                                 ` John Goerzen
  0 siblings, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-29 13:40 UTC (permalink / raw)
  To: jgoerzen; +Cc: ben, caml-list

From: John Goerzen <jgoerzen@complete.org>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Thu, 29 Apr 2004 08:03:35 -0500

> 
> On Thu, Apr 29, 2004 at 07:27:46PM +0900, Yamagata Yoriyuki wrote:
> > > > >Python is simple.  One standard for everything.  You get read(),
> > > > >write(), readline(), readlines(), xreadlines() (hello Extlib, this one's
> > > > >for you), seek(), etc.  This can apply to files, strings, sockets,
> > > > >pipes, whatever.  Before we can start fussing about unicode
> > > > >abstractions, I think we need to have a uniform I/O layer.
> > > > 
> > > > OK, but then you can leave out readline(), readlines() and xreadlines(), 
> > > > because they don't make any sense unless you've already dealt with 
> > > > character encodings.
> > > 
> > > No, they can simply be implemented in terms of read().
> > 
> > It will break when UTF-16/UTF-32 are used.  The line separator should
> > be handled after code conversion.  At least that is the idea of
> > Unicode standard.  (But Since Unicode standard is challenged by
> > reality in every aspect, maybe nobody cares.)
> 
> You are missing the point.  read() could handle the code conversion.

No, what I wanted to say is that the line separator should be handled
in the Unicode level, not the byte-character level.  Your design
assumes read() always returns new line characters as in ASCII.  This
would not hold when read() returns UTF-16/UTF-32.

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 11:23                           ` Benjamin Geer
  2004-04-29 12:23                             ` Richard Jones
@ 2004-04-29 13:23                             ` John Goerzen
  2004-04-29 14:12                               ` John Goerzen
  2004-04-29 15:37                               ` Benjamin Geer
  1 sibling, 2 replies; 67+ messages in thread
From: John Goerzen @ 2004-04-29 13:23 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: caml-list

On Thu, Apr 29, 2004 at 12:23:03PM +0100, Benjamin Geer wrote:
> >InputStreamReader, OutputStream, OutputStreamWriter, RandomAccessFile,
> >Reader, or Writer.  Really, I literally *do not know how to open a
> >simple file*.  I would not call that intuitive.
> 
> You actually have to *read* the documentation, not just glance at the 
> class names. :)  That's to be expected with a powerful API.  Once you 

We were talking about being intuitive here.  I'd have to read maybe a
dozen different class descriptions, have to understand the differences
between them, and cross-reference back and forth between them to figure
out how to get the object I want.  Or pay for a Java book that describes
these relationships itself.

> understand the key concepts governing the design of the API, it makes 
> sense, it and becomes intuitive to select the classes you need.  I tried 
> to point out these concepts in the message you replied to.

Even when I lived and breathed Java every day for a year, its I/O API
was not intuitive.  Actually, most of its APIs were not intuitive.
Everybody I worked with had the Javadoc for the API bookmarked and
referred to it constantly.

In contrast, the API in Python, Perl, or even C is very easy to use.

Here's one of the problems.  Java's API makes it complex to do simple
things without simplifying complex things.  Recall my example -- being
able to open a file read/write and seeking around in it?  In C, I'd do:

   int file = open(filename, O_RDWR) or FILE * file = fopen(filename, "r+")

Perl, it is:
 
   open(FH, "+<", $filename)

Or, in Python:

   file = open(filename, "r+")

If I don't know my language's code for file modes, I have one simple
place to look.

Now, none of these examples do UTF-8 or other conversions.  That's fine.
I usually don't need that.  In fact, I dare say that any kind of
conversion like that is by far the minority case.

Java requires me to wade through and think about all of these things
plus the way the file will eventually be used (do I want an array of
bytes, an array of chars, strings, etc?) right when I open it.  That's
bad form.  Make the open a generic call, and let people build upon the
file object from there.  This is how C and Python work.  (Perl is a
little wacko with its open call, but it works that way too, mostly.)

> To read a file containing UTF-8 text, one line at a time:
> 
> BufferedReader in =
>     new BufferedReader
>     (new InputStreamReader
>      (new FileInputStream(filename), "UTF8"));
> 
> while (true)
> {
>     String line = in.readLine();
> 
>     if (line == null)
>     {
>         break;
>     }
> 
>     System.out.println(line);
> }

But the scary part is that this is about how hard it is to read a file
of ASCII text, one line at a time.  Whereas, with Python, I'd do:

for line in open(filename, "r").xreadlines():
    print line

See what I mean about intuitive?

But what about UTF-8 in Python?

import codecs
file = codecs.open(filename, "r", "UTF-8")
for line in file.xreadlines():
    print line

By all means, if we are going to emulate a design from another language,
let us emulate this one.  It is far cleaner and sensible.  For more
info, see file:/usr/share/doc/python2.3/html/lib/module-codecs.html.

Essentially, the codecs.open call opens a file handle and returns a
StreamReader object that has the file handle passed in to it.  But
here's the key: this StreamReader object is itself a "file-like object"
in Python parlance.  That means you can use it everywhere you could have
used a standard file object (assuming the code is capable of handling
Unicode strings, which it usually is.)  So you still have the helpful
abstraction of Java without all the mess.

And before you say, "See, Java has a StreamReader too!", note that
codecs defines *4* classes: StreamWriter, StreamReader,
StreamReaderWriter, and StreamRecoder.  I can handle that.

> functionality (like buffering).  All the classes whose names end in 
> 'Stream' deal with bytes only; the ones whose names end in 'Reader' or 
> 'Writer' deal with characters.  See?  It's easy once you know the pattern.

But the point is, this distinction is at the wrong place.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 10:27                           ` Yamagata Yoriyuki
@ 2004-04-29 13:03                             ` John Goerzen
  2004-04-29 13:40                               ` Yamagata Yoriyuki
  0 siblings, 1 reply; 67+ messages in thread
From: John Goerzen @ 2004-04-29 13:03 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: ben, caml-list

On Thu, Apr 29, 2004 at 07:27:46PM +0900, Yamagata Yoriyuki wrote:
> > > >Python is simple.  One standard for everything.  You get read(),
> > > >write(), readline(), readlines(), xreadlines() (hello Extlib, this one's
> > > >for you), seek(), etc.  This can apply to files, strings, sockets,
> > > >pipes, whatever.  Before we can start fussing about unicode
> > > >abstractions, I think we need to have a uniform I/O layer.
> > > 
> > > OK, but then you can leave out readline(), readlines() and xreadlines(), 
> > > because they don't make any sense unless you've already dealt with 
> > > character encodings.
> > 
> > No, they can simply be implemented in terms of read().
> 
> It will break when UTF-16/UTF-32 are used.  The line separator should
> be handled after code conversion.  At least that is the idea of
> Unicode standard.  (But Since Unicode standard is challenged by
> reality in every aspect, maybe nobody cares.)

You are missing the point.  read() could handle the code conversion.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 11:23                           ` Benjamin Geer
@ 2004-04-29 12:23                             ` Richard Jones
  2004-04-29 15:10                               ` Benjamin Geer
  2004-05-01 14:37                               ` Brian Hurt
  2004-04-29 13:23                             ` John Goerzen
  1 sibling, 2 replies; 67+ messages in thread
From: Richard Jones @ 2004-04-29 12:23 UTC (permalink / raw)
  Cc: caml-list

On Thu, Apr 29, 2004 at 12:23:03PM +0100, Benjamin Geer wrote:
> John Goerzen wrote:
> >I'm looking at java.io right now.  I count no less than 10 interfaces
> >and 50 classes.  Let's say that I want to open up a file for read/write
> >access and be able to seek around in it.  Looking at the class list, I
> >don't know if I want BufferedInputStream, BufferedOutputStream,
> >BufferedReader, BufferedWriter, CharArrayReader, CharArrayWriter,
> >DataInputStream, DataOutputStream, File, FileDescriptor,
> >FileInputStream, FileOutputStream, FileReader, FileWriter, InputStream,
> >InputStreamReader, OutputStream, OutputStreamWriter, RandomAccessFile,
> >Reader, or Writer.  Really, I literally *do not know how to open a
> >simple file*.  I would not call that intuitive.
> 
> You actually have to *read* the documentation, not just glance at the 
> class names. :)  That's to be expected with a powerful API.  Once you 
> understand the key concepts governing the design of the API, it makes 
> sense, it and becomes intuitive to select the classes you need.  I tried 
> to point out these concepts in the message you replied to.

You shouldn't need to read a boatload of documentation just to read a
file, even one encoded in UTF-8.  Especially one encoded in UTF-8,
since in the future most files on Unix will be encoded that way.

Think of an API as like a user interface.  It's a UI for programmers
to use.

Good user interface design does *not* require you to read manuals to
find out how to use it (excepting very special cases like airplanes,
surgical equipment, etc.).  If you designed a website or a computer
program that required you to read a manual before you could use it, no
one would ever use it.  Simple fact.  Don't make an API which needs
you to read manuals to do a trivial operation like slurping in a UTF-8
file.

Here's how you read in and parse a CSV file using my OCaml CSV library:

  let csv = Csv.load csvfile in

('csv' is a list of list of strings).  I deliberately chose to make
the common case this simple because it's the common case and people
shouldn't have to remember much to use it.

> To read a file containing UTF-8 text, one line at a time:
> 
> BufferedReader in =
>     new BufferedReader
>     (new InputStreamReader
>      (new FileInputStream(filename), "UTF8"));

This example really reflects all that's wrong in Java.  Thankfully I
don't have to do Java programming any more - I got out of that job as
soon as I could.

Rich.

-- 
Richard Jones. http://www.annexia.org/ http://www.j-london.com/
Merjis Ltd. http://www.merjis.com/ - improving website return on investment
http://www.YouUnlimited.co.uk/ - management courses

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-29 11:51                             ` Benjamin Geer
@ 2004-04-29 12:03                               ` Richard Jones
  2004-04-29 15:16                                 ` Benjamin Geer
  0 siblings, 1 reply; 67+ messages in thread
From: Richard Jones @ 2004-04-29 12:03 UTC (permalink / raw)
  Cc: caml-list

On Thu, Apr 29, 2004 at 12:51:19PM +0100, Benjamin Geer wrote:
> Richard Jones wrote:
> >* What programs do people need to write?
> 
> Most of what I do involves financial messages, which tend to be small, 
> and come in many different encodings and formats.  So what I usually 
> need to do is read until EOF (from a file, a socket or whatever), 
> convert the data to a Unicode string, and run it through some kind of 
> parser (e.g. an XML parser).  Going the other way, I have a Unicode 
> string, and I just want to convert it to bytes and write it to a file or 
> socket.  That covers 90% of what I do with I/O.

Actually me too.  90% of my file IO requirement is to slurp a whole
file into a string, which suggests that any proposed IO interface
which doesn't allow you to do that in a single line is going to be a
non-starter:

let content = slurp_filename filename in ...

Rich.

-- 
Richard Jones. http://www.annexia.org/ http://www.j-london.com/
Merjis Ltd. http://www.merjis.com/ - improving website return on investment
Perl4Caml lets you use any Perl library in your type-safe Objective
CAML programs. http://www.merjis.com/developers/perl4caml/

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28 22:41                           ` Richard Jones
@ 2004-04-29 11:51                             ` Benjamin Geer
  2004-04-29 12:03                               ` Richard Jones
  0 siblings, 1 reply; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 11:51 UTC (permalink / raw)
  To: Richard Jones; +Cc: caml-list

Richard Jones wrote:
> * What programs do people need to write?

Most of what I do involves financial messages, which tend to be small, 
and come in many different encodings and formats.  So what I usually 
need to do is read until EOF (from a file, a socket or whatever), 
convert the data to a Unicode string, and run it through some kind of 
parser (e.g. an XML parser).  Going the other way, I have a Unicode 
string, and I just want to convert it to bytes and write it to a file or 
socket.  That covers 90% of what I do with I/O.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28 21:44                         ` John Goerzen
  2004-04-28 22:41                           ` Richard Jones
  2004-04-29 10:27                           ` Yamagata Yoriyuki
@ 2004-04-29 11:23                           ` Benjamin Geer
  2004-04-29 12:23                             ` Richard Jones
  2004-04-29 13:23                             ` John Goerzen
  2 siblings, 2 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-29 11:23 UTC (permalink / raw)
  To: John Goerzen; +Cc: caml-list

John Goerzen wrote:
> I'm looking at java.io right now.  I count no less than 10 interfaces
> and 50 classes.  Let's say that I want to open up a file for read/write
> access and be able to seek around in it.  Looking at the class list, I
> don't know if I want BufferedInputStream, BufferedOutputStream,
> BufferedReader, BufferedWriter, CharArrayReader, CharArrayWriter,
> DataInputStream, DataOutputStream, File, FileDescriptor,
> FileInputStream, FileOutputStream, FileReader, FileWriter, InputStream,
> InputStreamReader, OutputStream, OutputStreamWriter, RandomAccessFile,
> Reader, or Writer.  Really, I literally *do not know how to open a
> simple file*.  I would not call that intuitive.

You actually have to *read* the documentation, not just glance at the 
class names. :)  That's to be expected with a powerful API.  Once you 
understand the key concepts governing the design of the API, it makes 
sense, it and becomes intuitive to select the classes you need.  I tried 
to point out these concepts in the message you replied to.

To read a file containing UTF-8 text, one line at a time:

BufferedReader in =
     new BufferedReader
     (new InputStreamReader
      (new FileInputStream(filename), "UTF8"));

while (true)
{
     String line = in.readLine();

     if (line == null)
     {
         break;
     }

     System.out.println(line);
}

This illustrates the main design concept I was talking about. 
InputStream is an abstract class; different implementations either know 
how to get input from a particular source (like FileInputStream), or are 
meant to be used as wrappers around another InputStream to add 
functionality (like buffering).  All the classes whose names end in 
'Stream' deal with bytes only; the ones whose names end in 'Reader' or 
'Writer' deal with characters.  See?  It's easy once you know the pattern.

To open a file for read/write access and be able to seek around in it:

RandomAccessFile file = new RandomAccessFile(filename, "rw");

The methods in RandomAccessFile are pretty self-explanatory.

Ben

>>OK, but then you can leave out readline(), readlines() and xreadlines(), 
>>because they don't make any sense unless you've already dealt with 
>>character encodings.
> 
> No, they can simply be implemented in terms of read().

A line is a chunk of text, not of bytes.  I don't think it makes sense 
to deal with text unless you know what encoding it's in.

>>Then, before you can divide text into lines, you also need to know which 
>>newline character(s) to use.  This needs to be configurable 
>>programmatically
> 
> That's pretty easy as a class variable

I was only pointing out that neither Python (as far as I can tell) nor 
Java do this.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28 21:44                         ` John Goerzen
  2004-04-28 22:41                           ` Richard Jones
@ 2004-04-29 10:27                           ` Yamagata Yoriyuki
  2004-04-29 13:03                             ` John Goerzen
  2004-04-29 11:23                           ` Benjamin Geer
  2 siblings, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-29 10:27 UTC (permalink / raw)
  To: jgoerzen; +Cc: ben, caml-list

From: John Goerzen <jgoerzen@complete.org>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Wed, 28 Apr 2004 16:44:42 -0500

> > >Python is simple.  One standard for everything.  You get read(),
> > >write(), readline(), readlines(), xreadlines() (hello Extlib, this one's
> > >for you), seek(), etc.  This can apply to files, strings, sockets,
> > >pipes, whatever.  Before we can start fussing about unicode
> > >abstractions, I think we need to have a uniform I/O layer.
> > 
> > OK, but then you can leave out readline(), readlines() and xreadlines(), 
> > because they don't make any sense unless you've already dealt with 
> > character encodings.
> 
> No, they can simply be implemented in terms of read().

It will break when UTF-16/UTF-32 are used.  The line separator should
be handled after code conversion.  At least that is the idea of
Unicode standard.  (But Since Unicode standard is challenged by
reality in every aspect, maybe nobody cares.)

--
Yamagata Yoriyuki

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 19:08               ` Nicolas Cannasse
  2004-04-27 22:22                 ` Gerd Stolpmann
@ 2004-04-29 10:13                 ` Yamagata Yoriyuki
  1 sibling, 0 replies; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-29 10:13 UTC (permalink / raw)
  To: warplayer; +Cc: info, caml-list

From: "Nicolas Cannasse" <warplayer@free.fr>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Tue, 27 Apr 2004 21:08:18 +0200

> - Yamagata Yoriyuki want IO to be on a char basis (and that makes sense for
> Unicode)
> - you would prefer having buffered channels (and that make sense for network
> protocols, parsing, ...)
> - I propose that we have two way of accessing the channel, that can be
> buffered or unbuffered, or others. I think this is enough general to cover a
> lot of different usage, and introduce some interesting polymorphism.
> I would like to get your opinion on that.

I agree buffered I/O for byte-char I/O.  I prefer

object  ... input : string -> int -> int -> int ... end
object  ... output : string -> int -> int -> unit ... end

than your nread/nwrite though. 

I am against buffered I/O for polymorphic channels, because it would
not be easy to come up with a standard for buffer types.  All
arguments for buffered I/O raised in the list are so far about
byte-character I/O (including UTF-8 channels.)

Di-polymorphic channels are interesting, but unless we have a
standard for buffer types, it would not be useful for the standard.
It will be easy to write a mapping from uni-polymorphic channels to
Di-polymorphic channels and vice verse, so IO system of Extlib does
not need to change.  In the future, when Extlib IO is widely used, we
could regard Extlib IO as the standard.

Since we do not have even common Unicode character type, we can not
discuss standardization of Unicode channels.  (one thing at a time!)
Please see my all arguments about Unicode channels as an example of
polymorphic channels.

I still believe my proposal in the previous mail 
  http://caml.inria.fr/archives/200404/msg00716.html 
is reasonable, except for the method names.

--
Yamagata Yoriyuki

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28 21:44                         ` John Goerzen
@ 2004-04-28 22:41                           ` Richard Jones
  2004-04-29 11:51                             ` Benjamin Geer
  2004-04-29 10:27                           ` Yamagata Yoriyuki
  2004-04-29 11:23                           ` Benjamin Geer
  2 siblings, 1 reply; 67+ messages in thread
From: Richard Jones @ 2004-04-28 22:41 UTC (permalink / raw)
  To: caml-list

Indeed.

People are really missing the point that an API is a user interface
for _programmers_.  The most important thing is that it be easy, short
and simple for programmers to do their job - ie. write _programs_.
'Java.io' and all its miriad of ill-conceived classes which require
you to remember, recall and rewite tons of crap is a great example of
a terrible _programming_ UI.

So a good way to start, and indeed the way I started when thinking
about mod_caml/ocamldbi, is:

* What programs do people need to write?

* What are they going to have to _type_ to write those programs?

Let's get what you need to _type_ as short as possible, and then you
probably have a good UI that _programmers_ can use.

</annoyed mode="off">

Rich.

-- 
Richard Jones. http://www.annexia.org/ http://www.j-london.com/
Merjis Ltd. http://www.merjis.com/ - improving website return on investment
'There is a joke about American engineers and French engineers. The
American team brings a prototype to the French team. The French team's
response is: "Well, it works fine in practice; but how will it hold up
in theory?"'

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28 21:30                       ` Benjamin Geer
@ 2004-04-28 21:44                         ` John Goerzen
  2004-04-28 22:41                           ` Richard Jones
                                             ` (2 more replies)
  0 siblings, 3 replies; 67+ messages in thread
From: John Goerzen @ 2004-04-28 21:44 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: caml-list

On Wed, Apr 28, 2004 at 10:30:13PM +0100, Benjamin Geer wrote:
> >>the new one (java.nio)[2].  The old one has the virtue of being easy to 
> >>understand and use, and flexible enough for many situations.  The basic 
> >
> >Uh, no.  I don't have the API reference in front of me,
> 
> I provided links to it in the very message you're replying to.
> 
> >but I seem to
> >recall somewhere around a dozen or so predefined classes for doing
> >I/O...
> 
> I've been using java.io every day for several years, and I find those 
> classes simple and intuitive, particularly the layered approach of using 
> wrappers to add functionality to stream objects, as Nicholas Cannasse 
> points out in another message.

I'm looking at java.io right now.  I count no less than 10 interfaces
and 50 classes.  Let's say that I want to open up a file for read/write
access and be able to seek around in it.  Looking at the class list, I
don't know if I want BufferedInputStream, BufferedOutputStream,
BufferedReader, BufferedWriter, CharArrayReader, CharArrayWriter,
DataInputStream, DataOutputStream, File, FileDescriptor,
FileInputStream, FileOutputStream, FileReader, FileWriter, InputStream,
InputStreamReader, OutputStream, OutputStreamWriter, RandomAccessFile,
Reader, or Writer.  Really, I literally *do not know how to open a
simple file*.  I would not call that intuitive.  Even OCaml, for all its
faults, makes that easier (and that's after I've already found the
function I need in Unix!)

> >Python is simple.  One standard for everything.  You get read(),
> >write(), readline(), readlines(), xreadlines() (hello Extlib, this one's
> >for you), seek(), etc.  This can apply to files, strings, sockets,
> >pipes, whatever.  Before we can start fussing about unicode
> >abstractions, I think we need to have a uniform I/O layer.
> 
> OK, but then you can leave out readline(), readlines() and xreadlines(), 
> because they don't make any sense unless you've already dealt with 
> character encodings.

No, they can simply be implemented in terms of read().

> Then, before you can divide text into lines, you also need to know which 
> newline character(s) to use.  This needs to be configurable 
> programmatically rather than guessed based on the platform the program 
> is running on; some protocols require you to use \r\n regardless of the 
> platform.

That's pretty easy as a class variable, not to mention that \r\n or \n
line-endings can be automatically handled in a reliable way if used
uniformly throughout a file.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28  3:44                     ` John Goerzen
  2004-04-28 13:01                       ` Richard Jones
@ 2004-04-28 21:30                       ` Benjamin Geer
  2004-04-28 21:44                         ` John Goerzen
  1 sibling, 1 reply; 67+ messages in thread
From: Benjamin Geer @ 2004-04-28 21:30 UTC (permalink / raw)
  To: John Goerzen; +Cc: caml-list

John Goerzen wrote:
> On Wed, Apr 28, 2004 at 12:35:26AM +0100, Benjamin Geer wrote:
> 
>>In Java there are two I/O libraries, the original one (java.io)[1] and 
>>the new one (java.nio)[2].  The old one has the virtue of being easy to 
>>understand and use, and flexible enough for many situations.  The basic 
> 
> Uh, no.  I don't have the API reference in front of me,

I provided links to it in the very message you're replying to.

> but I seem to
> recall somewhere around a dozen or so predefined classes for doing
> I/O...

I've been using java.io every day for several years, and I find those 
classes simple and intuitive, particularly the layered approach of using 
wrappers to add functionality to stream objects, as Nicholas Cannasse 
points out in another message.

However, I agree that there are too many classes in java.nio; I'm pretty 
sure something simpler can be done in Caml using its more powerful 
polymorphism.

> Python is simple.  One standard for everything.  You get read(),
> write(), readline(), readlines(), xreadlines() (hello Extlib, this one's
> for you), seek(), etc.  This can apply to files, strings, sockets,
> pipes, whatever.  Before we can start fussing about unicode
> abstractions, I think we need to have a uniform I/O layer.

OK, but then you can leave out readline(), readlines() and xreadlines(), 
because they don't make any sense unless you've already dealt with 
character encodings.

Then, before you can divide text into lines, you also need to know which 
newline character(s) to use.  This needs to be configurable 
programmatically rather than guessed based on the platform the program 
is running on; some protocols require you to use \r\n regardless of the 
platform.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 16:58                 ` Yamagata Yoriyuki
                                     ` (2 preceding siblings ...)
  2004-04-28  3:39                   ` John Goerzen
@ 2004-04-28 13:04                   ` Richard Jones
  3 siblings, 0 replies; 67+ messages in thread
From: Richard Jones @ 2004-04-28 13:04 UTC (permalink / raw)
  Cc: caml-list

On Wed, Apr 28, 2004 at 01:58:00AM +0900, Yamagata Yoriyuki wrote:
> I'm interested in an emprical evidence, though.

I can't claim empirical evidence, but I do know that UTF-8
input/output is increasingly important to me.  We've moved to a model
where all the websites we're doing (even for UK customers) are UTF-8
throughout.  PostgreSQL is UNICODE, webserver serves UTF-8 pages, all
strings manipulated inside the Caml code are really UTF-8 strings
(although we rarely deal with them as anything other than pure bytes).
The only time we convert to other encodings is for handling stupid
mail systems like Hotmail which cannot deal with UTF-8.

Rich.

-- 
Richard Jones. http://www.annexia.org/ http://www.j-london.com/
Merjis Ltd. http://www.merjis.com/ - improving website return on investment
"One serious obstacle to the adoption of good programming languages is
the notion that everything has to be sacrificed for speed. In computer
languages as in life, speed kills." -- Mike Vanier

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-28  3:44                     ` John Goerzen
@ 2004-04-28 13:01                       ` Richard Jones
  2004-04-28 21:30                       ` Benjamin Geer
  1 sibling, 0 replies; 67+ messages in thread
From: Richard Jones @ 2004-04-28 13:01 UTC (permalink / raw)
  Cc: caml-list

On Tue, Apr 27, 2004 at 10:44:15PM -0500, John Goerzen wrote:
> On Wed, Apr 28, 2004 at 12:35:26AM +0100, Benjamin Geer wrote:
> > In Java there are two I/O libraries, the original one (java.io)[1] and 
> > the new one (java.nio)[2].  The old one has the virtue of being easy to 
> > understand and use, and flexible enough for many situations.  The basic 
> 
> Uh, no.  I don't have the API reference in front of me, but I seem to
> recall somewhere around a dozen or so predefined classes for doing
> I/O...  all sorts of StreamReaders, etc, etc.  Please do not model
> anything after this horribly bloated API.

Agreed!  The java.io.* stuff is a great example of what NOT to do.

Rich.

-- 
Richard Jones. http://www.annexia.org/ http://www.j-london.com/
Merjis Ltd. http://www.merjis.com/ - improving website return on investment
Perl4Caml lets you use any Perl library in your type-safe Objective
CAML programs. http://www.merjis.com/developers/perl4caml/

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 22:22                 ` Gerd Stolpmann
@ 2004-04-28  7:42                   ` Nicolas Cannasse
  0 siblings, 0 replies; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-28  7:42 UTC (permalink / raw)
  To: Gerd Stolpmann; +Cc: Yamagata Yoriyuki, caml-list

[...]
> I guess you mean this one from a previous mail:
>
> class ['a,'b] input = object
>     method read : 'a
>     method nread :  int -> 'b
>     method close_in : unit
> end
>
> class ['a,'b,'c] output = object
>     method write : 'a
>     method nwrite : 'b
>     method close_out : 'c
> end
>
> I doubt this is very practical. Consider you want to write Unicode
> characters into a file (I think a common example). The file as such is a
> byte stream, but you want an additional Unicode interface that converts
> implicitly to, say UTF-8. With your idea of generalised channel, the
> only way to do this is to build layers, something like
>
> let file_as_byte_channel = new file_out_channel "name" in
> let file_as_uni_channel = new convert_utf8_to_byte file_as_byte_channel
>
> Call file_as_byte_channel # write to output a byte, and call
> file_as_uni_channel # write to output a Unicode character. You don't
> have a single object that can do both, however. Even worse: If you want
> to use both interfaces alternately, you have to be careful to flush
> buffers at the right time (in the case there are buffers).

I don't know so much about UTF-8, but does it accept normal bytes ? For
example, ANSI chars are converted to identity by UTF-8, aren't they ? So to
write text only you only need to keep the second instance. Of course if
you're dealing with a channel that can write both binary data (without any
conversion) and text data (with UTF-8) the best is to write an adaptator
that will enable you to do the both, and flush the buffers for you. But
we're already out of the common example you're describing.

Layered IO are powerful. This is how it works in Java : you create for
example a ZipOutputStream with an existing OutputStream. Of course it's
possible that the ZipOutputStream have its own internal buffer, so you have
to be very careful to flush it before writing something directly to the
underlying OutputStream.

Pseudo code :
    use my ouputstream
        create a new zip outputstream wrapped on my outputstream
        write contents to it
        flush
    continue using my outputstream

That's same for UTF-8/binary streams.
More interesting, for example in Java you have a CRC32OutputStream, you can
put in at any layer you want, and at any time extract the CRC32 calculated
from all the data that went through it.

> I think it is better to have two methods, one for the polymorphic case,
> and one for strings. The latter plays a special role, simply because all
> I/O finally is string I/O.

I don't agree with this.
Most of IO are (char,string) IO that's true but some are not, I already show
example that were (bool,(int * int)) output.
Of course, you can always define the following :

class type ['a,'b] abstract_input = object
        method read : 'a
        method read_buf : 'b
        ....
end

class type ['a] input = ['a,string] abstract_input

So having one more polymorphic parameter is not so troublesome, and can
actually help.

Regards,
Nicolas Cannasse

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 23:35                   ` Benjamin Geer
  2004-04-28  3:44                     ` John Goerzen
@ 2004-04-28  7:05                     ` Nicolas Cannasse
  1 sibling, 0 replies; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-28  7:05 UTC (permalink / raw)
  To: Benjamin Geer, Yamagata Yoriyuki; +Cc: caml-list

[...]
> > I'm interested in (potential) users of IO libraries.  Could someone
> > comment on IO system of Jave, Perl, Python, for example?
>
> In Java there are two I/O libraries, the original one (java.io)[1] and
[...]

For information, here's the java.io.InputStream documentation. All input
streams need to implement theses functions :

 int available()
 void close()
 void mark(int readlimit)
 boolean markSupported()
 abstract  int read()
 int read(byte[] b)
 int read(byte[] b, int off, int len)
 void reset()
 long skip(long n)

IMHO, there is too much, and seekable stream should already be a special
kind of input streams.

Regards,
Nicolas Cannasse

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 14:26         ` Nicolas Cannasse
@ 2004-04-28  6:52           ` Jacques GARRIGUE
  0 siblings, 0 replies; 67+ messages in thread
From: Jacques GARRIGUE @ 2004-04-28  6:52 UTC (permalink / raw)
  To: warplayer; +Cc: yoriyuki, caml-list

From: "Nicolas Cannasse" <warplayer@free.fr>

> > This also emphasizes one of the advantages of objects: since their
> > types can be compared for equality, several libraries can use the same
> > type without requiring a common header (that is, if everybody agrees
> > on the interface, as you suggest).
> 
> Is there any chance of getting the same behavior with records ?
> Records are currently module-bounded, if Ocaml was enabling structural
> comparison (even without subtyping) it would be very useful.

With the current records?
There are some obstacles, like the fact records allow polymorphic
recursion, makeing impossible to check structural equality.
Or the fact that the order of members is relevant, meaning that you
would only get a weak equality anyway.

New polymorphic records?
I would rather try to make objects easier to use, as they are
certainly more powerful. All the current discussion on IO suggests
that you cannot only define one minimal interface, but actually need a
hierarchy of interfaces.

Jacques Garrigue

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 23:35                   ` Benjamin Geer
@ 2004-04-28  3:44                     ` John Goerzen
  2004-04-28 13:01                       ` Richard Jones
  2004-04-28 21:30                       ` Benjamin Geer
  2004-04-28  7:05                     ` Nicolas Cannasse
  1 sibling, 2 replies; 67+ messages in thread
From: John Goerzen @ 2004-04-28  3:44 UTC (permalink / raw)
  To: Benjamin Geer; +Cc: Yamagata Yoriyuki, warplayer, caml-list

On Wed, Apr 28, 2004 at 12:35:26AM +0100, Benjamin Geer wrote:
> In Java there are two I/O libraries, the original one (java.io)[1] and 
> the new one (java.nio)[2].  The old one has the virtue of being easy to 
> understand and use, and flexible enough for many situations.  The basic 

Uh, no.  I don't have the API reference in front of me, but I seem to
recall somewhere around a dozen or so predefined classes for doing
I/O...  all sorts of StreamReaders, etc, etc.  Please do not model
anything after this horribly bloated API.

Python is simple.  One standard for everything.  You get read(),
write(), readline(), readlines(), xreadlines() (hello Extlib, this one's
for you), seek(), etc.  This can apply to files, strings, sockets,
pipes, whatever.  Before we can start fussing about unicode
abstractions, I think we need to have a uniform I/O layer.  Already we
have two competing ones (Pervasives vs. Unix) that don't exactly play
along well, and we have no way to emulate a channel with a Buffer (which
is a quite useful thing, one which I wish I had many times for OUnit
test cases).  Others have other I/O layers for sockets, etc. too.

Once you have a standard base to use, it makes more sense to build
Unicode or other logical readers atop that standard.  But make it lean
and flat; just tall and fat like Java.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 16:58                 ` Yamagata Yoriyuki
  2004-04-27 23:35                   ` Benjamin Geer
  2004-04-28  0:20                   ` skaller
@ 2004-04-28  3:39                   ` John Goerzen
  2004-04-28 13:04                   ` Richard Jones
  3 siblings, 0 replies; 67+ messages in thread
From: John Goerzen @ 2004-04-28  3:39 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: warplayer, caml-list

On Wed, Apr 28, 2004 at 01:58:00AM +0900, Yamagata Yoriyuki wrote:
> From: "Nicolas Cannasse" <warplayer@free.fr>
> Subject: Re: [Caml-list] Re: Common IO structure
> Date: Tue, 27 Apr 2004 18:17:32 +0200
> 
> > As someone told, read/write concepts are used in most of other languages
> > (including Java, C, and many others).
> 
> read/write of Unix are block-wise operations.  They are not suited for

They can also be used for character-at-a-time operations.

> names for element-wise operations.  get/put come from
> get_char/put_char of C.  Maybe would get_element/put_element be
> better?

But anyway, why dismiss the block-wise operations?  They're still
useful.

-- John

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 16:58                 ` Yamagata Yoriyuki
  2004-04-27 23:35                   ` Benjamin Geer
@ 2004-04-28  0:20                   ` skaller
  2004-04-28  3:39                   ` John Goerzen
  2004-04-28 13:04                   ` Richard Jones
  3 siblings, 0 replies; 67+ messages in thread
From: skaller @ 2004-04-28  0:20 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: warplayer, caml-list

On Wed, 2004-04-28 at 02:58, Yamagata Yoriyuki wrote:

> I'm interested in an emprical evidence, though.

You don't need it. It is clear that there are
common (99%) of all cases where UTF-8 representation
of ISO10646 is the same as ASCII, and 90% of the
rest using Latin-1 which converts very very fast.

In these common cases the overhead of non-inlined
function calls to convert characters could be very serious.

Perhaps it isn't and perhaps it is. Who knows?
Providing bulk conversions seems a prudent way to
hedge your bets. It makes the interface richer,
but there is a universal default for the bulk
operations, so no burden is imposed on the implementor.

To add to the argument in favour of bulk conversions:
in principle, doing *any* conversions on I/O is a bad
idea. The order of priority is:

	1. single point codecs
	2. string codecs
	3. IO codecs

Doesn't really make sense to have (1) and (3) and not (2).
	

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net



-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 16:58                 ` Yamagata Yoriyuki
@ 2004-04-27 23:35                   ` Benjamin Geer
  2004-04-28  3:44                     ` John Goerzen
  2004-04-28  7:05                     ` Nicolas Cannasse
  2004-04-28  0:20                   ` skaller
                                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-27 23:35 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: warplayer, caml-list

Yamagata Yoriyuki wrote:
> I doubt the benefit of
> bufferd IO, as I stated in the previous mail.  Unless operation is
> very simple, and atoms are very small, (that is, character IO) extra
> cost of element-wise IO is not important.

But there are times when you want to read one byte or character at a 
time, and in those cases, buffering saves the overhead of a function or 
method call per byte/char.

Buffering is also useful w you have to process a very large amount of 
data, and cannot keep it all in memory at once.

> I'm interested in (potential) users of IO libraries.  Could someone
> comment on IO system of Jave, Perl, Python, for example?

In Java there are two I/O libraries, the original one (java.io)[1] and 
the new one (java.nio)[2].  The old one has the virtue of being easy to 
understand and use, and flexible enough for many situations.  The basic 
InputStream and OutputStream classes deal only with bytes, have 
Unix-like 'read' and 'write' methods, and do no buffering.  There are 
derived classes such as FileInputStream and SocketInputStream.  The API 
allows you to add functionality to a stream by using wrappers.  For 
example, to add buffering to any InputStream, you wrap it in a 
BufferedInputStream (which is a class derived from InputStream).  To 
marshal Java objects to a byte stream, you wrap an OutputStream in an 
ObjectOutputStream, and pass objects to the ObjectOutputStream.

Classes derived from Reader and Writer deal with characters, and can be 
wrapped around streams to perform conversions between bytes and 
characters.  For example, to read bytes and convert them to characters, 
you wrap an InputStream in an InputStreamReader, which has a constructor 
that says which encoding to read, and 'read' methods that return 
(Unicode) characters.  Another example of a Reader is LineNumberReader, 
which counts lines in its input.

This is all fine as far as it goes, but it turns out to be cumbersome, 
and in some cases impossible, to implement certain things efficiently 
using this API.  The java.nio API solves these problems, but it is much 
more complicated to use.

For example, suppose you have to read a large amount of text from a 
network connection, convert it to another encoding, and save it in a 
file.  There's too much text to store all of it in memory at once, and 
you're dealing with a lot of network requests at the same time, so in 
any case you want to minimise the amount of memory used by each request. 
  You'd like to be able to read about 4K at a time, convert the bytes to 
the target encoding, and write them to the file.  You could make a 4K 
byte array and use it as a buffer, but what if the input encoding is 
UTF-8?  You might get an incomplete character at the end of the buffer; 
if the UTF-8 decoder is expecting a complete string, it will choke.

The solution in java.nio is to have two different kinds of buffer 
classes: ByteBuffer and CharBuffer.  You can fill up a ByteBuffer, and 
use a Decoder to convert the bytes to Unicode characters; the Decoder 
will read as many complete characters as it can, and put them in a 
CharBuffer.  You then 'compact' the ByteBuffer, which moves any 
remaining bytes to the beginning of the buffer, and start again. 
(Similarly, you can use an Encoder to convert the characters to bytes in 
the target encoding, filling up a ByteBuffer which you can then write to 
an output channel.)

Some of other useful things java.nio provides are:

* 'Direct' byte buffers.  'Given a direct byte buffer, the Java virtual 
machine will make a best effort to perform native I/O operations 
directly upon it. That is, it will attempt to avoid copying the buffer's 
content to (or from) an intermediate buffer before (or after) each 
invocation of one of the underlying operating system's native I/O 
operations.'

* Buffers that correspond to a memory-mapped region of a file.  This can 
be useful for dealing with huge files; it takes advantage of the 
operating system's support for memory-mapped files, where available.

* 'Scattering' channels.  'A scattering read operation reads, in a 
single invocation, a sequence of bytes into one or more of a given 
sequence of buffers. Scattering reads are often useful when implementing 
network protocols or file formats that, for example, group data into 
segments consisting of one or more fixed-length headers followed by a 
variable-length body.  Similar gathering write operations are defined in 
the GatheringByteChannel interface.'

My own view is that the flexibility and efficiency permitted by java.nio 
are valuable, but that its complexity is a problem.  The behaviour of 
the buffer classes[3] is tricky to understand and therefore error-prone.

Ben

[1] http://java.sun.com/j2se/1.4.2/docs/api/java/io/package-summary.html

[2] http://java.sun.com/j2se/1.4.2/docs/api/java/nio/package-summary.html

[3] http://java.sun.com/j2se/1.4.2/docs/api/java/nio/Buffer.html

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 19:08               ` Nicolas Cannasse
@ 2004-04-27 22:22                 ` Gerd Stolpmann
  2004-04-28  7:42                   ` Nicolas Cannasse
  2004-04-29 10:13                 ` Yamagata Yoriyuki
  1 sibling, 1 reply; 67+ messages in thread
From: Gerd Stolpmann @ 2004-04-27 22:22 UTC (permalink / raw)
  To: Nicolas Cannasse; +Cc: Yamagata Yoriyuki, caml-list

On Die, 2004-04-27 at 21:08, Nicolas Cannasse wrote:
> >
> > http://ocamlnet.sourceforge.net/intro/netchannels.html
> 
> We're actually quite near to agree on what should be the minimal
> requirements.
> Looks like you included pos_in / pos_out into the "fundamentals methods" ,
> would you agree to drop theses ?

I would not regard them as fundamental. Actually, they are derived, as
they only count the characters flowing through input/output. (They have
nothing to do with absolute file positions.)

Of course, I don't remove them from ocamlnet, as they are very useful
for that context. It is no problem to coerce them away, and to create
them when needed by inheriting them from a virtual class. That won't
complicate interoperability very much.

> In short, I think we all want different things :
> - Yamagata Yoriyuki want IO to be on a char basis (and that makes sense for
> Unicode)
> - you would prefer having buffered channels (and that make sense for network
> protocols, parsing, ...)

They can also be unbuffered, it is not specified. The nice thing about
ocamlnet's channels is that you can add buffers when you need them,
without changing the signature.

Of course, the focus are buffered channels.

> - I propose that we have two way of accessing the channel, that can be
> buffered or unbuffered, or others. I think this is enough general to cover a
> lot of different usage, and introduce some interesting polymorphism.
> I would like to get your opinion on that.

I guess you mean this one from a previous mail:

class ['a,'b] input = object
    method read : 'a
    method nread :  int -> 'b
    method close_in : unit
end

class ['a,'b,'c] output = object
    method write : 'a
    method nwrite : 'b
    method close_out : 'c
end

I doubt this is very practical. Consider you want to write Unicode
characters into a file (I think a common example). The file as such is a
byte stream, but you want an additional Unicode interface that converts
implicitly to, say UTF-8. With your idea of generalised channel, the
only way to do this is to build layers, something like

let file_as_byte_channel = new file_out_channel "name" in
let file_as_uni_channel = new convert_utf8_to_byte file_as_byte_channel

Call file_as_byte_channel # write to output a byte, and call
file_as_uni_channel # write to output a Unicode character. You don't
have a single object that can do both, however. Even worse: If you want
to use both interfaces alternately, you have to be careful to flush
buffers at the right time (in the case there are buffers).

I think it is better to have two methods, one for the polymorphic case,
and one for strings. The latter plays a special role, simply because all
I/O finally is string I/O.

Gerd
-- 
------------------------------------------------------------
Gerd Stolpmann * Viktoriastr. 45 * 64293 Darmstadt * Germany 
gerd@gerd-stolpmann.de          http://www.gerd-stolpmann.de
------------------------------------------------------------

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 16:00               ` Yamagata Yoriyuki
@ 2004-04-27 21:51                 ` Gerd Stolpmann
  0 siblings, 0 replies; 67+ messages in thread
From: Gerd Stolpmann @ 2004-04-27 21:51 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: caml-list

On Die, 2004-04-27 at 18:00, Yamagata Yoriyuki wrote:
> From: Gerd Stolpmann <info@gerd-stolpmann.de>
> Subject: Re: [Caml-list] Re: Common IO structure
> Date: Mon, 26 Apr 2004 22:56:59 +0200
> 
> > Of course, sharing the same method name is possible, in ocamlnet we have
> > e.g. output_char where camomile has put_char. So the question is whether
> > this is worth the effort.
> 
> Camomile uses "put", not "put_char", because channels are polymorphic.

Oh sorry, I just remembered it wrong.

> If ocamlnet channels have input/output for strings, but have
> output_char for one Unicode character, then I would say output_char is
> different from Camomile "put", because Camomile "put" is supposed to
> output one atom (for character channles, atom is char, not a Unicode
> character.)

Yes, in principle, they are different.

> Since I am convinced by Gerd's argument for close_in/close_out, I
> updates my proposal as
> (for input)
> ['a] object 
>      get : unit -> 'a 
>      close_in : unit
> end 
> (raise End_of_file when there is no more element to read.)
> 
> (for output)
> ['a] object 
>      put : 'a -> unit 
>      flush : unit -> unit 
>      close_out : unit -> unit
> end
> 
> for a character channel,
> (for input)
> object 
>        input : string -> int -> int -> int 
>        close_in : unit
> end
> 
> ([c#input s pos len] fills s from pos with less than [len] characters,
> and returns the number of characters really filled.)
> 
> (for output)
> object 
>        output : string -> int -> int -> unit
>        flush : unit -> unit
>        close_out : unit -> unit
> end
> ([c#output s pos len] outputs [len] characters from the position
> [pos])
> 
> But the distinction of put/input, get/output may be confusing.  Hmmm.

I am sure we are very close to a solution here. 

As another name for get/put we also have receive/send. The Event module
of the stdlib uses them.

Gerd
-- 
------------------------------------------------------------
Gerd Stolpmann * Viktoriastr. 45 * 64293 Darmstadt * Germany 
gerd@gerd-stolpmann.de          http://www.gerd-stolpmann.de
------------------------------------------------------------

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 20:56             ` Gerd Stolpmann
                                 ` (2 preceding siblings ...)
  2004-04-27 16:00               ` Yamagata Yoriyuki
@ 2004-04-27 19:08               ` Nicolas Cannasse
  2004-04-27 22:22                 ` Gerd Stolpmann
  2004-04-29 10:13                 ` Yamagata Yoriyuki
  3 siblings, 2 replies; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-27 19:08 UTC (permalink / raw)
  To: Gerd Stolpmann; +Cc: Yamagata Yoriyuki, caml-list

> > > And from my point of view, your proposal has some problems.  For one
> > > thing, it is not compatible to the already existing I/O channels in
> > > other libraries than Extlib.  Camomile uses get and put for your read
> > > and write, and ocamlnet and cryptokit uses input and output (IIRC) for
> > > your nread and nwrite.
> >
> > So ? That's exactly what we're talking about it there : making a choice.
And
> > that include naming of course. I don't say that the name we choosed for
> > ExtLib IO are better, it's just that "reading" and "writing" on an IO
seems
> > natural to me.
>
> That sounds like a paraphrase for "better" without using this word. I
> would like to hear real arguments for why certain names should be used.
> For example, one reason can be that there is already a user base.
>
> I think names are just names, and there are usually several ways of
> referring to things. However, when several independent libraries
> _choose_ to name their methods in a coherent way, I would say this is
> very intelligent.

I agree with you.
Let's agree on something consistent : naming is a non-issue.

> > > Another problem is that it is not minimal
> > > enough.  For character converters, it is impossible to predict how
> > > many characters will be available, for example.  And requiring "pos",
> > > "nread", "nwrite" seems arbitrary for me.  They are somtimes useful
> > > and improvement, but not necessary.
> >
> > That's true, I agree with you but on the last point : they are necessary
in
> > order to get good performances. Concerning "available", it returns None
if
> > no data available. "pos" might throw an exception as well when
unavailable
> > (looks like pos and available should have same behavior here). And
> > nread/nwrite can simply call n times read/write. That means that any
library
> > can put default implementation for additional "not minimal" constructs :
> > they will behave poorly (writing a string char by char) but will
interface
> > well with other IO that are supporting them correctly. If implementing
> > efficently nread/nwrite require additionnal effort, then let's implement
a
> > default behavior and implement it better later. Having theses functions
make
> > room for future improvements, which is not done with minimal IO.
>
> Guess what? ocamlnet implements all that in a convincing way. Read its
> introduction to OO wrappers for I/O:
>
> http://ocamlnet.sourceforge.net/intro/netchannels.html

We're actually quite near to agree on what should be the minimal
requirements.
Looks like you included pos_in / pos_out into the "fundamentals methods" ,
would you agree to drop theses ?

In short, I think we all want different things :
- Yamagata Yoriyuki want IO to be on a char basis (and that makes sense for
Unicode)
- you would prefer having buffered channels (and that make sense for network
protocols, parsing, ...)
- I propose that we have two way of accessing the channel, that can be
buffered or unbuffered, or others. I think this is enough general to cover a
lot of different usage, and introduce some interesting polymorphism.
I would like to get your opinion on that.

Best Regards,
Nicolas Cannasse


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 16:17               ` Nicolas Cannasse
@ 2004-04-27 16:58                 ` Yamagata Yoriyuki
  2004-04-27 23:35                   ` Benjamin Geer
                                     ` (3 more replies)
  0 siblings, 4 replies; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-27 16:58 UTC (permalink / raw)
  To: warplayer; +Cc: caml-list

From: "Nicolas Cannasse" <warplayer@free.fr>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Tue, 27 Apr 2004 18:17:32 +0200

> As someone told, read/write concepts are used in most of other languages
> (including Java, C, and many others).

read/write of Unix are block-wise operations.  They are not suited for
names for element-wise operations.  get/put come from
get_char/put_char of C.  Maybe would get_element/put_element be
better?

> They'll maybe not - in the Unicode case, but they'll definilty help for
> other IO.

Unicode IO is one of most important IO, second of character IO.
(Maybe most important in the future.)  And I doubt the benefit of
bufferd IO, as I stated in the previous mail.  Unless operation is
very simple, and atoms are very small, (that is, character IO) extra
cost of element-wise IO is not important.  Note that channels can
internally process data by chunks.  Only difference is that copy
between the internal buffer and an external buffer occurs in bulk. or
element-wise.

I'm interested in an emprical evidence, though.

> If the user need to write both chars and strings, he will need to
> carry two objects instead of one.

No.  The guy will use a single object having a signature like this.

object
 method	get : char
 method input : string -> int -> int -> int
 method close_in : unit
end

It confirms both of class type I proposed for input.

> class input = object
>      method read : char
>      method nread  : int -> string
>      method close_in : unit
> end
> 
> class output = object
>      method write : char
>      method nwrite : string
>      method close_out : unit
> end

The types of nread and nwrite differ substantially from input/output
in ocamlnet, which means major rewrite of ocamlnet.  And I am not sure
that allocating a new string for each input offers the performance
benefit.

I'm interested in (potential) users of IO libraries.  Could someone
comment on IO system of Jave, Perl, Python, for example?

--
Yamagata Yoriyuki

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-27 15:43             ` Yamagata Yoriyuki
@ 2004-04-27 16:17               ` Nicolas Cannasse
  2004-04-27 16:58                 ` Yamagata Yoriyuki
  0 siblings, 1 reply; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-27 16:17 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: caml-list

> > So ? That's exactly what we're talking about it there : making a choice.
And
> > that include naming of course. I don't say that the name we choosed for
> > ExtLib IO are better, it's just that "reading" and "writing" on an IO
seems
> > natural to me.
>
> get/put are used in Camomie already, and input/output are used in
> ocamlnet.  OO wrappers of Extlib IO are only recent addition, so
> changing Extlib is more natural.  In this case, what Extlib should do is
> just changing OO wrappers.  I do not think you need to change IO
> module itself.

As someone told, read/write concepts are used in most of other languages
(including Java, C, and many others). I agree ExtLib's IO are recent and
should not dictate how other libraries methods should be named. But since
we're standardizing things, Camomile , Ocamlnet and Extlib will have all to
rewrite some code in order to be intercompatible. Since we need to do that,
let's make a choice not based on what's already written, but on what's is
best for the end user. A slashdot poll would help here :)
But don't worry, if you and Gerd agree on some naming, ExtLib IO will
follow. I just want that the read/write naming be taken care as much as
other naming possibilities.

> > > Another problem is that it is not minimal
> > > enough.  For character converters, it is impossible to predict how
> > > many characters will be available, for example.  And requiring "pos",
> > > "nread", "nwrite" seems arbitrary for me.  They are somtimes useful
> > > and improvement, but not necessary.
> >
> > That's true, I agree with you but on the last point : they are necessary
in
> > order to get good performances. Concerning "available", it returns None
if
> > no data available. "pos" might throw an exception as well when
unavailable
> > (looks like pos and available should have same behavior here).
>
> My philosophy is to make the type informative as far as possible.  If
> some method does not work, I would rather remove the method, and
> notify this fact to a user in the compile time (not in the runtime).  A
> user, or the library developer can provide wrappers if necessary.
>
> Philoshophy aside, I do not see how pos and available improve
> performance.  pos certainly decreases performance.  Anyways disks are
> much slower than CPU, so arguing small performance benefit is
> nonsense.  Since there are many possible "improvements" (seek, unget,
> length, destination addresses), it would be better to stick the
> algebraically minimal specification.
>
> Note that I do not oppose extension as such.  I oppose making them as
> the standard.


I agree on dropping pos and available from the standard. ExtLib will deal
consistenly without them.


> > And nread/nwrite can simply call n times read/write. That means that
> > any library can put default implementation for additional "not
> > minimal" constructs : they will behave poorly (writing a string char
> > by char) but will interface well with other IO that are supporting
> > them correctly. If implementing efficently nread/nwrite require
> > additionnal effort, then let's implement a default behavior and
> > implement it better later. Having theses functions make room for
> > future improvements, which is not done with minimal IO.
>
> Choosing a type of buffers is not trivial (except char, in this case I
> propose using stirng, of courst).  For exmaple, what is for a Unicode
> channel?  UTF8/UTF16/UTF32 strings, array, DynArray.t, all have their
> own advantage.  And if someone uses UTF8, another uses UTF16 and so
> on, then there is not much point of having standard.
>
> Of course we would make "nread/nwrite" use a default buffer type
> (maybe list, as you do Extlib), but I doubt that such "filler" methods
> do any good.

They'll maybe not - in the Unicode case, but they'll definilty help for
other IO.
Concerning the channels, I'm against having 4 classes instead of 2. If the
user need to write both chars and strings, he will need to carry two objects
instead of one.
My proposal is based on the following :

class input = object
     method read : char
     method nread  : int -> string
     method close_in : unit
end

class output = object
     method write : char
     method nwrite : string
     method close_out : unit
end

and since it's so easy, let's add some polymorphism to get more general and
more powerful IO objects :

class ['a,'b] input = object
    method read : 'a
    method nread :  int -> 'b
    method close_in : unit
end

class ['a,'b,'c] output = object
    method write : 'a
    method nwrite : 'b
    method close_out : 'c
end

Having bi-polymorphism over an IO is really powerful : you can handle
buffered read/write like this or polymorphic writes (two ways of reading,
two ways of writing).
An example from the ExtLib :

val input_bits : (char,'a) input -> (bool,int) input
val output_bits : (char,'a,'b) output -> (bool,(int * int),'b) output

enable you to read bit-by-bit over a channel :

let ch = input_bits .... in
let b = IO.read ch in (* read a bit as a boolean *)
let n = IO.nread ch 5 in (* read 5 bits as an integer *)
...

let ch = output_bits ... in
IO.write ch true; (* write a bit *)
IO.write ch false;
IO.nwrite ch (5,31); (* write 31 using 5 bits *)

It's not only an extension : putting that in the core interface enable a
wide kind of IO.
I'm interested in your thinking about that.

Best Regards,
Nicolas Cannasse

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 20:56             ` Gerd Stolpmann
  2004-04-26 21:14               ` John Goerzen
  2004-04-26 21:52               ` Benjamin Geer
@ 2004-04-27 16:00               ` Yamagata Yoriyuki
  2004-04-27 21:51                 ` Gerd Stolpmann
  2004-04-27 19:08               ` Nicolas Cannasse
  3 siblings, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-27 16:00 UTC (permalink / raw)
  To: info; +Cc: warplayer, caml-list

From: Gerd Stolpmann <info@gerd-stolpmann.de>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Mon, 26 Apr 2004 22:56:59 +0200

> Of course, sharing the same method name is possible, in ocamlnet we have
> e.g. output_char where camomile has put_char. So the question is whether
> this is worth the effort.

Camomile uses "put", not "put_char", because channels are polymorphic.
If ocamlnet channels have input/output for strings, but have
output_char for one Unicode character, then I would say output_char is
different from Camomile "put", because Camomile "put" is supposed to
output one atom (for character channles, atom is char, not a Unicode
character.)

Since I am convinced by Gerd's argument for close_in/close_out, I
updates my proposal as
(for input)
['a] object 
     get : unit -> 'a 
     close_in : unit
end 
(raise End_of_file when there is no more element to read.)

(for output)
['a] object 
     put : 'a -> unit 
     flush : unit -> unit 
     close_out : unit -> unit
end

for a character channel,
(for input)
object 
       input : string -> int -> int -> int 
       close_in : unit
end

([c#input s pos len] fills s from pos with less than [len] characters,
and returns the number of characters really filled.)

(for output)
object 
       output : string -> int -> int -> unit
       flush : unit -> unit
       close_out : unit -> unit
end
([c#output s pos len] outputs [len] characters from the position
[pos])

But the distinction of put/input, get/output may be confusing.  Hmmm.

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 19:28           ` Nicolas Cannasse
  2004-04-26 20:56             ` Gerd Stolpmann
@ 2004-04-27 15:43             ` Yamagata Yoriyuki
  2004-04-27 16:17               ` Nicolas Cannasse
  1 sibling, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-27 15:43 UTC (permalink / raw)
  To: warplayer; +Cc: caml-list

From: "Nicolas Cannasse" <warplayer@free.fr>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Mon, 26 Apr 2004 21:28:53 +0200

> > You miss my point.  What I propose is having agreement over I/O
> > channels.  So, having OO wrappers only solves the half of our
> > problems.  Another half is whether or not developpers accept
> > them.
> >
> > And from my point of view, your proposal has some problems.  For one
> > thing, it is not compatible to the already existing I/O channels in
> > other libraries than Extlib.  Camomile uses get and put for your read
> > and write, and ocamlnet and cryptokit uses input and output (IIRC) for
> > your nread and nwrite.
> 
> So ? That's exactly what we're talking about it there : making a choice. And
> that include naming of course. I don't say that the name we choosed for
> ExtLib IO are better, it's just that "reading" and "writing" on an IO seems
> natural to me.

get/put are used in Camomie already, and input/output are used in
ocamlnet.  OO wrappers of Extlib IO are only recent addition, so
changing Extlib is more natural.  In this case, what Extlib should do is
just changing OO wrappers.  I do not think you need to change IO
module itself.

> 
> > Another problem is that it is not minimal
> > enough.  For character converters, it is impossible to predict how
> > many characters will be available, for example.  And requiring "pos",
> > "nread", "nwrite" seems arbitrary for me.  They are somtimes useful
> > and improvement, but not necessary.
> 
> That's true, I agree with you but on the last point : they are necessary in
> order to get good performances. Concerning "available", it returns None if
> no data available. "pos" might throw an exception as well when unavailable
> (looks like pos and available should have same behavior here).

My philosophy is to make the type informative as far as possible.  If
some method does not work, I would rather remove the method, and
notify this fact to a user in the compile time (not in the runtime).  A
user, or the library developer can provide wrappers if necessary.

Philoshophy aside, I do not see how pos and available improve
performance.  pos certainly decreases performance.  Anyways disks are
much slower than CPU, so arguing small performance benefit is
nonsense.  Since there are many possible "improvements" (seek, unget,
length, destination addresses), it would be better to stick the
algebraically minimal specification.

Note that I do not oppose extension as such.  I oppose making them as
the standard.

> And nread/nwrite can simply call n times read/write. That means that
> any library can put default implementation for additional "not
> minimal" constructs : they will behave poorly (writing a string char
> by char) but will interface well with other IO that are supporting
> them correctly. If implementing efficently nread/nwrite require
> additionnal effort, then let's implement a default behavior and
> implement it better later. Having theses functions make room for
> future improvements, which is not done with minimal IO.

Choosing a type of buffers is not trivial (except char, in this case I
propose using stirng, of courst).  For exmaple, what is for a Unicode
channel?  UTF8/UTF16/UTF32 strings, array, DynArray.t, all have their
own advantage.  And if someone uses UTF8, another uses UTF16 and so
on, then there is not much point of having standard.

Of course we would make "nread/nwrite" use a default buffer type
(maybe list, as you do Extlib), but I doubt that such "filler" methods
do any good.

For extensibility, it is guaranteed when we agreed to use object.  As
I stated above, I do not oppose using extension of channel types.
What I propose is just to make channels in each library an extension
of "standard" channel objects, and to make standard channel work as an
argument to the library API as far as possible.  (or provide a
converter)

> 
> > Since I want that these interfaces are accepted as the common
> > standard, I wanted that the requirement is absolutely minimal.  My
> > proposal in the previous mail is inspired by the view that channels
> > are co-inductive types defined by its constructer and consumer.
> > Without those methods, they are not channels any more.
> >
> > I'm interested in your opinion.
> 
> Mapping several IO ( a zlib compression + a base64 encoding + a unicode
> reader ) and using them all together to read and write chars will definitly
> slow down. Buffering is our friend, and require some more constructs.

For characters, I have proposed bufferd IO.

--
Yamagata Yoriyuki


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 21:14               ` John Goerzen
@ 2004-04-26 22:32                 ` Gerd Stolpmann
  0 siblings, 0 replies; 67+ messages in thread
From: Gerd Stolpmann @ 2004-04-26 22:32 UTC (permalink / raw)
  To: John Goerzen; +Cc: Nicolas Cannasse, Yamagata Yoriyuki, caml-list

On Mon, 2004-04-26 at 23:14, John Goerzen wrote:
> On Mon, Apr 26, 2004 at 10:56:59PM +0200, Gerd Stolpmann wrote:
> > I would suggest to adopt the names "input", "output", "close_in",
> > "close_out" of the standard library, as users are already familiar with
> > them, and this functionality is already quite powerful. Of course, this
> > is only reasonable for byte channels, not for Unicode channels.
> 
> Don't those names seem incompatible with read/write files and
> bidirectional channels such as network sockets?

Why?

> (Ok, so close_in and close_out could be mapped to shutdown(2) in the
> latter case, but I very much suspect that would NOT be what a programmer
> woudl suspect...)

That depends on what the object represents. In Unix, shutdown(2) refers
to the connection whereas close(2) refers to the descriptor. That means,
one can close one half of a connection, but not one half of a
descriptor. I think this is a deficiency of the Unix API (which has
historical reasons, bidirectional pipes were added late).

As OO channels are not limited to file descriptors, I don't see why we
should model them strictly after the Unix API. Separating close_in and
close_out has the advantage that input and output behaviour can be
really independent, and it is simpler to inherit input and output
behaviour from different superclasses. When mapping to Unix, one can
implement the rule that the descriptor is closed when both directions
are closed.

With only one "close" for both directions, one cannot model
bidirectional channels without additional methods like "shutdown".

Gerd
-- 
------------------------------------------------------------
Gerd Stolpmann * Viktoriastr. 45 * 64293 Darmstadt * Germany 
gerd@gerd-stolpmann.de          http://www.gerd-stolpmann.de
------------------------------------------------------------

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 20:56             ` Gerd Stolpmann
  2004-04-26 21:14               ` John Goerzen
@ 2004-04-26 21:52               ` Benjamin Geer
  2004-04-27 16:00               ` Yamagata Yoriyuki
  2004-04-27 19:08               ` Nicolas Cannasse
  3 siblings, 0 replies; 67+ messages in thread
From: Benjamin Geer @ 2004-04-26 21:52 UTC (permalink / raw)
  To: Gerd Stolpmann; +Cc: Nicolas Cannasse, Yamagata Yoriyuki, caml-list

Gerd Stolpmann wrote:
> I would like to hear real arguments for why certain names should be used.
> For example, one reason can be that there is already a user base.

Many people are familiar with the C functions 'read' and 'write' in 
Unix.  Perhaps for this reason, Perl, Python and Java also use the names 
'read' and 'write'.

Ben

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 20:56             ` Gerd Stolpmann
@ 2004-04-26 21:14               ` John Goerzen
  2004-04-26 22:32                 ` Gerd Stolpmann
  2004-04-26 21:52               ` Benjamin Geer
                                 ` (2 subsequent siblings)
  3 siblings, 1 reply; 67+ messages in thread
From: John Goerzen @ 2004-04-26 21:14 UTC (permalink / raw)
  To: Gerd Stolpmann; +Cc: Nicolas Cannasse, Yamagata Yoriyuki, caml-list

On Mon, Apr 26, 2004 at 10:56:59PM +0200, Gerd Stolpmann wrote:
> I would suggest to adopt the names "input", "output", "close_in",
> "close_out" of the standard library, as users are already familiar with
> them, and this functionality is already quite powerful. Of course, this
> is only reasonable for byte channels, not for Unicode channels.

Don't those names seem incompatible with read/write files and
bidirectional channels such as network sockets?

(Ok, so close_in and close_out could be mapped to shutdown(2) in the
latter case, but I very much suspect that would NOT be what a programmer
woudl suspect...)

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* [Caml-list] Re: Common IO structure
  2004-04-26 14:53     ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
@ 2004-04-26 21:02       ` Gerd Stolpmann
  0 siblings, 0 replies; 67+ messages in thread
From: Gerd Stolpmann @ 2004-04-26 21:02 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: caml-list

On Mon, 2004-04-26 at 16:53, Yamagata Yoriyuki wrote:
> From: Gerd Stolpmann <info@gerd-stolpmann.de>
> Subject: Re: Common IO structure (was Re: [Caml-list] [ANN] The Missing Library)
> Date: Sun, 25 Apr 2004 13:54:01 +0200
> 
> > They differ, however, in
> > what they see as their atoms, i.e. smallest entities read from and
> > written to a channel, for ocamlnet atoms are strings, for camomile atoms
> > are characters (char or UChar.t), reflecting a different view what the
> > libraries regard as important features.
> > 
> > I could imagine ocamlnet and camomile share the same signatures if
> > camomile would use some kind of polymorphic strings instead.
> > String-based I/O is much faster than character-based I/O, so camomile
> > would even profit from this change. However, this unification requires
> > that we define the algebraic properties of strings and string buffers,
> > which is not as easy as it sounds.
> 
> When I did a compatison, the speed of Camomile code converter is in
> the same order of iconv (EUC-JP -> UTF8 2-times slower, UTF8 -> EUC-JP
> 50% faster).  I doubt that char-based I/O is significantly slower,
> unless operation is very simple.  String-based I/O has to manage
> buffer strings, which causes extra cost, and anyways the major cost
> comes from elsewhere.  (For code converters, the major cost is caused
> by table lookup.)

I really believe this, when you are doing charset conversions. Note that
ocamlnet's channels are used for other things, too, e.g. email parsing,
and char-by-char operation is simply inacceptable for that.

So I fear one cannot generally say that char-by-char I/O is the common
case.

> That said, I plan to add string-based I/O for character channels,
> partially to interpolate C fucitons.  So, for character channels,
> Camomile would be compatible ocamlnet.

Good news.

Gerd
-- 
------------------------------------------------------------
Gerd Stolpmann * Viktoriastr. 45 * 64293 Darmstadt * Germany 
gerd@gerd-stolpmann.de          http://www.gerd-stolpmann.de
------------------------------------------------------------

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 19:28           ` Nicolas Cannasse
@ 2004-04-26 20:56             ` Gerd Stolpmann
  2004-04-26 21:14               ` John Goerzen
                                 ` (3 more replies)
  2004-04-27 15:43             ` Yamagata Yoriyuki
  1 sibling, 4 replies; 67+ messages in thread
From: Gerd Stolpmann @ 2004-04-26 20:56 UTC (permalink / raw)
  To: Nicolas Cannasse; +Cc: Yamagata Yoriyuki, caml-list

On Mon, 2004-04-26 at 21:28, Nicolas Cannasse wrote:
> > And from my point of view, your proposal has some problems.  For one
> > thing, it is not compatible to the already existing I/O channels in
> > other libraries than Extlib.  Camomile uses get and put for your read
> > and write, and ocamlnet and cryptokit uses input and output (IIRC) for
> > your nread and nwrite.
> 
> So ? That's exactly what we're talking about it there : making a choice. And
> that include naming of course. I don't say that the name we choosed for
> ExtLib IO are better, it's just that "reading" and "writing" on an IO seems
> natural to me.

That sounds like a paraphrase for "better" without using this word. I
would like to hear real arguments for why certain names should be used.
For example, one reason can be that there is already a user base.

I think names are just names, and there are usually several ways of
referring to things. However, when several independent libraries
_choose_ to name their methods in a coherent way, I would say this is
very intelligent.

> > Another problem is that it is not minimal
> > enough.  For character converters, it is impossible to predict how
> > many characters will be available, for example.  And requiring "pos",
> > "nread", "nwrite" seems arbitrary for me.  They are somtimes useful
> > and improvement, but not necessary.
> 
> That's true, I agree with you but on the last point : they are necessary in
> order to get good performances. Concerning "available", it returns None if
> no data available. "pos" might throw an exception as well when unavailable
> (looks like pos and available should have same behavior here). And
> nread/nwrite can simply call n times read/write. That means that any library
> can put default implementation for additional "not minimal" constructs :
> they will behave poorly (writing a string char by char) but will interface
> well with other IO that are supporting them correctly. If implementing
> efficently nread/nwrite require additionnal effort, then let's implement a
> default behavior and implement it better later. Having theses functions make
> room for future improvements, which is not done with minimal IO.

Guess what? ocamlnet implements all that in a convincing way. Read its
introduction to OO wrappers for I/O:

http://ocamlnet.sourceforge.net/intro/netchannels.html

The signature of Netchannels as reference:

http://cvs.sourceforge.net/viewcvs.py/ocamlnet/ocamlnet/src/netstring/netchannels.mli?rev=1.12&view=auto

Of course, we should not discuss all that, but concentrate on a
reasonable level of abstraction, no matter whether this is a low-level
or high-level abstraction for a certain library.

I would suggest to adopt the names "input", "output", "close_in",
"close_out" of the standard library, as users are already familiar with
them, and this functionality is already quite powerful. Of course, this
is only reasonable for byte channels, not for Unicode channels.

I think we should not meet on the level of character-by-character I/O,
although byte channels and Unicode channels could then share the same
method names. The reason is simple: Users of byte channels don't want to
do char-by-char I/O while users of Unicode channels can accept that. I'm
speaking of the _users_ intentionally, not of the implementors, as the
users will decide which class interface will be successful and which
not. I think the way to go is an adaptor class that translates between
byte strings and Unicode characters, and does the necessary conversions.

Of course, sharing the same method name is possible, in ocamlnet we have
e.g. output_char where camomile has put_char. So the question is whether
this is worth the effort.

As I am the main developer of ocamlnet, I can say that I am willing to
change method names, or define additional methods, when the community
agrees on a certain standard. This greatly improves the interoperability
of the libraries, and is worth the pain resulting from the numerous
follow-up changes in dependent libraries and programs.

Gerd
-- 
------------------------------------------------------------
Gerd Stolpmann * Viktoriastr. 45 * 64293 Darmstadt * Germany 
gerd@gerd-stolpmann.de          http://www.gerd-stolpmann.de
------------------------------------------------------------

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 15:26         ` Yamagata Yoriyuki
@ 2004-04-26 19:28           ` Nicolas Cannasse
  2004-04-26 20:56             ` Gerd Stolpmann
  2004-04-27 15:43             ` Yamagata Yoriyuki
  0 siblings, 2 replies; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-26 19:28 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: caml-list

> You miss my point.  What I propose is having agreement over I/O
> channels.  So, having OO wrappers only solves the half of our
> problems.  Another half is whether or not developpers accept
> them.
>
> And from my point of view, your proposal has some problems.  For one
> thing, it is not compatible to the already existing I/O channels in
> other libraries than Extlib.  Camomile uses get and put for your read
> and write, and ocamlnet and cryptokit uses input and output (IIRC) for
> your nread and nwrite.

So ? That's exactly what we're talking about it there : making a choice. And
that include naming of course. I don't say that the name we choosed for
ExtLib IO are better, it's just that "reading" and "writing" on an IO seems
natural to me.

> Another problem is that it is not minimal
> enough.  For character converters, it is impossible to predict how
> many characters will be available, for example.  And requiring "pos",
> "nread", "nwrite" seems arbitrary for me.  They are somtimes useful
> and improvement, but not necessary.

That's true, I agree with you but on the last point : they are necessary in
order to get good performances. Concerning "available", it returns None if
no data available. "pos" might throw an exception as well when unavailable
(looks like pos and available should have same behavior here). And
nread/nwrite can simply call n times read/write. That means that any library
can put default implementation for additional "not minimal" constructs :
they will behave poorly (writing a string char by char) but will interface
well with other IO that are supporting them correctly. If implementing
efficently nread/nwrite require additionnal effort, then let's implement a
default behavior and implement it better later. Having theses functions make
room for future improvements, which is not done with minimal IO.

> Since I want that these interfaces are accepted as the common
> standard, I wanted that the requirement is absolutely minimal.  My
> proposal in the previous mail is inspired by the view that channels
> are co-inductive types defined by its constructer and consumer.
> Without those methods, they are not channels any more.
>
> I'm interested in your opinion.

Mapping several IO ( a zlib compression + a base64 encoding + a unicode
reader ) and using them all together to read and write chars will definitly
slow down. Buffering is our friend, and require some more constructs.

Best Regards,
Nicolas Cannasse

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 14:23       ` Nicolas Cannasse
  2004-04-26 14:55         ` skaller
@ 2004-04-26 15:26         ` Yamagata Yoriyuki
  2004-04-26 19:28           ` Nicolas Cannasse
  1 sibling, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-26 15:26 UTC (permalink / raw)
  To: warplayer; +Cc: caml-list

From: "Nicolas Cannasse" <warplayer@free.fr>
Subject: Re: [Caml-list] Re: Common IO structure
Date: Mon, 26 Apr 2004 16:23:09 +0200

> val create_in :
>   read:(unit -> 'a) ->
>   nread:(int -> 'b) ->
>   pos:(unit -> int) ->
>   available:(unit -> int option) -> close:(unit -> unit) -> ('a, 'b) input
> 
> val create_out :
>   write:('a -> unit) ->
>   nwrite:('b -> unit) ->
>   pos:(unit -> int) ->
>   flush:(unit -> unit) -> close:(unit -> 'c) -> ('a, 'b, 'c) output
> 
> the "minimal set of methods" is 5 methods for both.
> The OO wrappers I proposed are doing exactly what you want , they're
> converting from and to IO input/outputs. Could you explain what part of the
> problem they're not answering ?

You miss my point.  What I propose is having agreement over I/O
channels.  So, having OO wrappers only solves the half of our
problems.  Another half is whether or not developpers accept
them.

And from my point of view, your proposal has some problems.  For one
thing, it is not compatible to the already existing I/O channels in
other libraries than Extlib.  Camomile uses get and put for your read
and write, and ocamlnet and cryptokit uses input and output (IIRC) for
your nread and nwrite.  Another problem is that it is not minimal
enough.  For character converters, it is impossible to predict how
many characters will be available, for example.  And requiring "pos",
"nread", "nwrite" seems arbitrary for me.  They are somtimes useful
and improvement, but not necessary.  

Since I want that these interfaces are accepted as the common
standard, I wanted that the requirement is absolutely minimal.  My
proposal in the previous mail is inspired by the view that channels
are co-inductive types defined by its constructer and consumer.
Without those methods, they are not channels any more.

I'm interested in your opinion.

--
Yamagata Yoriyuki


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 14:23       ` Nicolas Cannasse
@ 2004-04-26 14:55         ` skaller
  2004-04-26 15:26         ` Yamagata Yoriyuki
  1 sibling, 0 replies; 67+ messages in thread
From: skaller @ 2004-04-26 14:55 UTC (permalink / raw)
  To: Nicolas Cannasse; +Cc: Yamagata Yoriyuki, caml-list

On Tue, 2004-04-27 at 00:23, Nicolas Cannasse wrote:

> Some infos for people who doesn't know about ExtLib IO module :
> ExtLib IO library is bi-polymorphic for performances reasons : it as two
> polymorphic types parameters. A file input for example is a (char,string)
> IO.input so there is two things we can write : chars and strings. Outputs
> have a third parameter that is "what is returned by the close function" : a
> file output is a (char,string,unit) IO.ouput , and a buffer output is a
> (char,string,string) IO.output (the contents of the buffer is returned when
> the ouput is closed).

That's pretty nice concept.

-- 
John Skaller, mailto:skaller@users.sf.net
voice: 061-2-9660-0850, 
snail: PO BOX 401 Glebe NSW 2037 Australia
Checkout the Felix programming language http://felix.sf.net



-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* [Caml-list] Re: Common IO structure
  2004-04-25 11:54   ` Gerd Stolpmann
@ 2004-04-26 14:53     ` Yamagata Yoriyuki
  2004-04-26 21:02       ` Gerd Stolpmann
  0 siblings, 1 reply; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-26 14:53 UTC (permalink / raw)
  To: info; +Cc: caml-list

From: Gerd Stolpmann <info@gerd-stolpmann.de>
Subject: Re: Common IO structure (was Re: [Caml-list] [ANN] The Missing Library)
Date: Sun, 25 Apr 2004 13:54:01 +0200

> They differ, however, in
> what they see as their atoms, i.e. smallest entities read from and
> written to a channel, for ocamlnet atoms are strings, for camomile atoms
> are characters (char or UChar.t), reflecting a different view what the
> libraries regard as important features.
> 
> I could imagine ocamlnet and camomile share the same signatures if
> camomile would use some kind of polymorphic strings instead.
> String-based I/O is much faster than character-based I/O, so camomile
> would even profit from this change. However, this unification requires
> that we define the algebraic properties of strings and string buffers,
> which is not as easy as it sounds.

When I did a compatison, the speed of Camomile code converter is in
the same order of iconv (EUC-JP -> UTF8 2-times slower, UTF8 -> EUC-JP
50% faster).  I doubt that char-based I/O is significantly slower,
unless operation is very simple.  String-based I/O has to manage
buffer strings, which causes extra cost, and anyways the major cost
comes from elsewhere.  (For code converters, the major cost is caused
by table lookup.)

That said, I plan to add string-based I/O for character channels,
partially to interpolate C fucitons.  So, for character channels,
Camomile would be compatible ocamlnet.

--
Yamagata Yoriyuki


-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 13:53       ` Jacques GARRIGUE
@ 2004-04-26 14:26         ` Nicolas Cannasse
  2004-04-28  6:52           ` Jacques GARRIGUE
  0 siblings, 1 reply; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-26 14:26 UTC (permalink / raw)
  To: yoriyuki, Jacques GARRIGUE; +Cc: caml-list

> > What I want is more than that.  I want that we agree some minimal set
> > of methods for IO channles, and agree to accept such an IO channel as
> > an argument for our library functions, or provide a converter to a
> > native IO channel of the library.
[...]
> All this seems reasonable.
> Note that Format also uses [spaces] and [newline], but there are
> reasonable defaults for them.
>
> This also emphasizes one of the advantages of objects: since their
> types can be compared for equality, several libraries can use the same
> type without requiring a common header (that is, if everybody agrees
> on the interface, as you suggest).

Is there any chance of getting the same behavior with records ?
Records are currently module-bounded, if Ocaml was enabling structural
comparison (even without subtyping) it would be very useful.

Best Regards,
Nicolas Cannasse

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 13:16     ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
  2004-04-26 13:53       ` Jacques GARRIGUE
@ 2004-04-26 14:23       ` Nicolas Cannasse
  2004-04-26 14:55         ` skaller
  2004-04-26 15:26         ` Yamagata Yoriyuki
  1 sibling, 2 replies; 67+ messages in thread
From: Nicolas Cannasse @ 2004-04-26 14:23 UTC (permalink / raw)
  To: Yamagata Yoriyuki; +Cc: caml-list

> > I see your point, tell me if I'm wrong :
> > You would like to add ExtLib IO's support to Camomile, without actually
the
> > need for the user to install ExtLib in order to compile your library.
You're
> > true about that, and that's a good idea. I'll try to think about adding
some
> > OO wrappers on ExtLib IO, as well as conversion functions from/to ExtLib
> > input/outputs objects and records before next release.
>
> What I want is more than that.  I want that we agree some minimal set
> of methods for IO channles, and agree to accept such an IO channel as
> an argument for our library functions, or provide a converter to a
> native IO channel of the library.
[...]
> I'm interested in your opinion (and of all caml list participants).

Did you had a look at my OO wrappers proposal for IO module (posted on
ExtLib mailling list) ?

Some infos for people who doesn't know about ExtLib IO module :
ExtLib IO library is bi-polymorphic for performances reasons : it as two
polymorphic types parameters. A file input for example is a (char,string)
IO.input so there is two things we can write : chars and strings. Outputs
have a third parameter that is "what is returned by the close function" : a
file output is a (char,string,unit) IO.ouput , and a buffer output is a
(char,string,string) IO.output (the contents of the buffer is returned when
the ouput is closed).

here's how are created IOs :

val create_in :
  read:(unit -> 'a) ->
  nread:(int -> 'b) ->
  pos:(unit -> int) ->
  available:(unit -> int option) -> close:(unit -> unit) -> ('a, 'b) input

val create_out :
  write:('a -> unit) ->
  nwrite:('b -> unit) ->
  pos:(unit -> int) ->
  flush:(unit -> unit) -> close:(unit -> 'c) -> ('a, 'b, 'c) output

the "minimal set of methods" is 5 methods for both.
The OO wrappers I proposed are doing exactly what you want , they're
converting from and to IO input/outputs. Could you explain what part of the
problem they're not answering ?

Best Regards,
Nicolas Cannasse

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [Caml-list] Re: Common IO structure
  2004-04-26 13:16     ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
@ 2004-04-26 13:53       ` Jacques GARRIGUE
  2004-04-26 14:26         ` Nicolas Cannasse
  2004-04-26 14:23       ` Nicolas Cannasse
  1 sibling, 1 reply; 67+ messages in thread
From: Jacques GARRIGUE @ 2004-04-26 13:53 UTC (permalink / raw)
  To: yoriyuki; +Cc: caml-list

From: Yamagata Yoriyuki <yoriyuki@mbg.ocn.ne.jp>

> What I want is more than that.  I want that we agree some minimal set
> of methods for IO channles, and agree to accept such an IO channel as
> an argument for our library functions, or provide a converter to a
> native IO channel of the library.
[...]
> (for output)
> object 
>        output : string -> int -> int -> unit
>        flush : unit -> unit
>        close : unit -> unit
> end
> ([c#output s pos len] outputs [len] characters from the position
> [pos])

All this seems reasonable.
Note that Format also uses [spaces] and [newline], but there are
reasonable defaults for them.

This also emphasizes one of the advantages of objects: since their
types can be compared for equality, several libraries can use the same
type without requiring a common header (that is, if everybody agrees
on the interface, as you suggest).

     Jacques Garrigue

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

* [Caml-list] Re: Common IO structure
  2004-04-25 19:42   ` Common IO structure (was Re: [Caml-list] [ANN] The Missing Library) Nicolas Cannasse
@ 2004-04-26 13:16     ` Yamagata Yoriyuki
  2004-04-26 13:53       ` Jacques GARRIGUE
  2004-04-26 14:23       ` Nicolas Cannasse
  0 siblings, 2 replies; 67+ messages in thread
From: Yamagata Yoriyuki @ 2004-04-26 13:16 UTC (permalink / raw)
  To: warplayer; +Cc: caml-list

From: "Nicolas Cannasse" <warplayer@free.fr>
Subject: Re: Common IO structure (was Re: [Caml-list] [ANN] The Missing Library)
Date: Sun, 25 Apr 2004 21:42:52 +0200

> I see your point, tell me if I'm wrong :
> You would like to add ExtLib IO's support to Camomile, without actually the
> need for the user to install ExtLib in order to compile your library. You're
> true about that, and that's a good idea. I'll try to think about adding some
> OO wrappers on ExtLib IO, as well as conversion functions from/to ExtLib
> input/outputs objects and records before next release.

What I want is more than that.  I want that we agree some minimal set
of methods for IO channles, and agree to accept such an IO channel as
an argument for our library functions, or provide a converter to a
native IO channel of the library.

For a polymorphic channel, minimal class types would be

(for input)
['a] object get : unit -> 'a end 
(raise End_of_file when there is no more element to read.)

(for output)
['a] object 
     put : 'a -> unit 
     flush : unit -> unit 
     close : unit -> unit
end

for a character channel,
(for input)
object input : string -> int -> int -> int end

([c#input s pos len] fills s from pos with less than [len] characters,
and returns the number of characters really filled.)

(for output)
object 
       output : string -> int -> int -> unit
       flush : unit -> unit
       close : unit -> unit
end
([c#output s pos len] outputs [len] characters from the position
[pos])

I'm interested in your opinion (and of all caml list participants).
--
Yamagata Yoriyuki

-------------------
To unsubscribe, mail caml-list-request@inria.fr Archives: http://caml.inria.fr
Bug reports: http://caml.inria.fr/bin/caml-bugs FAQ: http://caml.inria.fr/FAQ/
Beginner's list: http://groups.yahoo.com/group/ocaml_beginners


^ permalink raw reply	[flat|nested] 67+ messages in thread

end of thread, other threads:[~2004-05-09 17:35 UTC | newest]

Thread overview: 67+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-05-03  6:12 [Caml-list] Re: Common IO structure Vladimir N. Silyaev
2004-05-04 21:31 ` Benjamin Geer
2004-05-04 22:59   ` Yamagata Yoriyuki
2004-05-05  8:11     ` skaller
2004-05-05 15:48       ` Marcin 'Qrczak' Kowalczyk
2004-05-05 19:28         ` skaller
2004-05-05 17:33     ` Vladimir N. Silyaev
2004-05-05 17:31   ` Vladimir N. Silyaev
2004-05-07 22:11     ` Benjamin Geer
2004-05-08  7:29       ` Vladimir N. Silyaev
2004-05-09 17:35         ` Benjamin Geer
  -- strict thread matches above, loose matches on Subject: below --
2004-04-24  9:28 [Caml-list] [ANN] The Missing Library Nicolas Cannasse
2004-04-25  8:56 ` Common IO structure (was Re: [Caml-list] [ANN] The Missing Library) Yamagata Yoriyuki
2004-04-25 11:54   ` Gerd Stolpmann
2004-04-26 14:53     ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
2004-04-26 21:02       ` Gerd Stolpmann
2004-04-25 19:42   ` Common IO structure (was Re: [Caml-list] [ANN] The Missing Library) Nicolas Cannasse
2004-04-26 13:16     ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
2004-04-26 13:53       ` Jacques GARRIGUE
2004-04-26 14:26         ` Nicolas Cannasse
2004-04-28  6:52           ` Jacques GARRIGUE
2004-04-26 14:23       ` Nicolas Cannasse
2004-04-26 14:55         ` skaller
2004-04-26 15:26         ` Yamagata Yoriyuki
2004-04-26 19:28           ` Nicolas Cannasse
2004-04-26 20:56             ` Gerd Stolpmann
2004-04-26 21:14               ` John Goerzen
2004-04-26 22:32                 ` Gerd Stolpmann
2004-04-26 21:52               ` Benjamin Geer
2004-04-27 16:00               ` Yamagata Yoriyuki
2004-04-27 21:51                 ` Gerd Stolpmann
2004-04-27 19:08               ` Nicolas Cannasse
2004-04-27 22:22                 ` Gerd Stolpmann
2004-04-28  7:42                   ` Nicolas Cannasse
2004-04-29 10:13                 ` Yamagata Yoriyuki
2004-04-27 15:43             ` Yamagata Yoriyuki
2004-04-27 16:17               ` Nicolas Cannasse
2004-04-27 16:58                 ` Yamagata Yoriyuki
2004-04-27 23:35                   ` Benjamin Geer
2004-04-28  3:44                     ` John Goerzen
2004-04-28 13:01                       ` Richard Jones
2004-04-28 21:30                       ` Benjamin Geer
2004-04-28 21:44                         ` John Goerzen
2004-04-28 22:41                           ` Richard Jones
2004-04-29 11:51                             ` Benjamin Geer
2004-04-29 12:03                               ` Richard Jones
2004-04-29 15:16                                 ` Benjamin Geer
2004-04-29 10:27                           ` Yamagata Yoriyuki
2004-04-29 13:03                             ` John Goerzen
2004-04-29 13:40                               ` Yamagata Yoriyuki
2004-04-29 14:02                                 ` John Goerzen
2004-04-29 15:31                                   ` Yamagata Yoriyuki
2004-04-29 17:31                                     ` james woodyatt
2004-04-29 23:53                                       ` Benjamin Geer
2004-04-30  4:10                                         ` james woodyatt
2004-04-29 11:23                           ` Benjamin Geer
2004-04-29 12:23                             ` Richard Jones
2004-04-29 15:10                               ` Benjamin Geer
2004-04-29 15:35                                 ` John Goerzen
2004-04-29 15:46                                   ` Benjamin Geer
2004-04-29 15:58                                     ` Richard Jones
2004-04-29 20:41                                     ` John Goerzen
2004-04-29 22:35                                       ` Benjamin Geer
2004-05-01 14:37                               ` Brian Hurt
2004-04-29 13:23                             ` John Goerzen
2004-04-29 14:12                               ` John Goerzen
2004-04-29 15:37                               ` Benjamin Geer
2004-04-28  7:05                     ` Nicolas Cannasse
2004-04-28  0:20                   ` skaller
2004-04-28  3:39                   ` John Goerzen
2004-04-28 13:04                   ` Richard Jones

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).