caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: Florian Weimer <fw@deneb.enyo.de>
To: Damien Doligez <damien.doligez@inria.fr>
Cc: caml-list <caml-list@yquem.inria.fr>
Subject: Re: [Caml-list] Finalization and object dependencies
Date: Sat, 12 Nov 2005 19:22:15 +0100	[thread overview]
Message-ID: <87y83tk9pk.fsf@mid.deneb.enyo.de> (raw)
In-Reply-To: <7F807F11-D671-41F1-9EAE-C05BC6AE8406@inria.fr> (Damien Doligez's message of "Fri, 11 Nov 2005 19:00:15 +0100")

* Damien Doligez:

> This will be hard to do if you really want to be complete.  Some run- 
> time
> errors (most notably, "out of memory") are not exceptions, they stop the
> program immediately.  Moreover, under Unix there are signals that cannot
> be caught or ignored.

Crashes in a C library are yet another problem.

I just want to avoid that I have to initiate complicate recovery
procedures just because some unhandled exception has occurred.

>>   (2) Use Gc.finalise for handler objects which contain an index into
>>       (plain) arrays of tables and cursors.  Use reference counting
>>       (or back pointers) to prevent premature finalization of table
>>       objects while there are still cursors around.
>
> A simple pointer from the cursor to the table should be enough.

Reading the description of Gc.finalise, this seems to be the case.

> I can't see how (1) would work.  

Something like this:

module type Finalizable = sig
  type t
  val none : t
  val finalize : t -> unit
end

module type Finalizer = sig
  type data
    (** The underlying data objects. *)

  type handle
    (** A handle for some registered data object. *)

  val value : handle -> data
    (** Returns the data object for the handle.  May raise Failure if
        the object has been finalized.  This can happen if [delete] is
        called while there are still handle objects in the program, or
        [value] is from some finalization routine, and the handle has
        already been deleted during the same finalization run. *)

  val none : handle
    (** A handle that refers to the object [Finalizable.none]. *)

  type t
    (** A finalization table. *)

  val create : int -> t
    (** [create size] returns a new finalization of size [size].
	The table will grow as needed. *)

  val register : t -> data -> handle
    (** Registers an object in the finalization table and returns a
	handle for the same object.  These objects are finalized using
	[Finalizable.finalize], either when they become unreachable or
	when [delete] is invoked. *)
    
  val delete : t -> unit
    (** Finalize all objects currently registered.  Further calls to
	[register] are not allowed after [delete] has been called. *)

  val compact : t -> int
    (** Run finalization and return the number of active objects.
	Note that some implementations may opt to run finalization
	spontaneously as well. *)
end

module Make (F : Finalizable) : (Finalizer with type data = F.t) = struct
  type data = F.t
  type handle = data ref

  let value h = !h
  let none = ref F.none

  type t = 
      { mutable weak_array : handle Weak.t;
	mutable real_array : F.t array;
	mutable allocated : int; 
	mutable no_register : bool;
	mutable alarm : Gc.alarm option }

  let compact t =
    t.no_register <- true;
    let pos = ref 0 in
      for i = 0 to t.allocated - 1 do
	match Weak.get t.weak_array i with
	    None -> 
	      F.finalize t.real_array.(i);
	      t.real_array.(i) <- F.none
	  | Some v as v' ->
	      Weak.set t.weak_array !pos v';
	      t.real_array.(!pos) <- t.real_array.(i);
	      pos := !pos + 1;
      done;
      t.allocated <- !pos;
      t.no_register <- false;
      !pos

  let create size = 
    let t = { weak_array = Weak.create size; 
	      real_array = Array.create size F.none;
	      allocated = 0;
	      no_register = false;
	      alarm = None }
    in
      t.alarm <- Some (Gc.create_alarm 
			 (function () -> let _ = compact t in ()));
      t

  let delete t =
    if t.no_register then
      raise (Failure "finalize_all");
    match t.alarm with
	None -> raise (Failure "finalize_all")
      | Some alarm ->
	  t.no_register <- true;
	  for i = 0 to t.allocated - 1 do
	    match Weak.get t.weak_array i with
		None -> ()
	      | Some v -> 
		  F.finalize (!v);
		  v := F.none;
		  Weak.set t.weak_array i None;
		  t.real_array.(i) <- F.none;
	  done;
	  t.allocated <- 0;
	  t.no_register <- false;
	  Gc.delete_alarm alarm;
	  t.alarm <- None

  let realloc t =
    (* The call to Gc.major_cycle triggers finalization and may free
       some of the entries.  Further GCs at this point only mark more
       entries as weak, but we won't lose any of them. *)
    t.no_register <- true;
    Gc.major ();
    let mostly_filled = 4 * t.allocated > 3 * Weak.length t.weak_array in
      if mostly_filled then
	let old_size = Weak.length t.weak_array in
	let new_size = 2 * old_size in
	let new_w = Weak.create new_size in
	let new_r = Array.create new_size F.none in
	  Weak.blit t.weak_array 0 new_w 0 t.allocated;
	  Array.blit t.real_array 0 new_r 0 t.allocated;
	  t.weak_array <- new_w;
	  t.real_array <- new_r;
	  t.no_register <- false

  let rec register t d =
    if t.no_register then
      raise (Failure "register");
    let pos = t.allocated in
    let size = Weak.length t.weak_array in
      if pos  < size then
	let h = ref d in
	  (Weak.set t.weak_array pos (Some h);
	   t.real_array.(pos) <- d;
	   t.allocated <- pos + 1;
	   h)
      else
	(realloc t;
	 register t d)
end

(Only lightly tested.)  This is similar to what the run-time library
does to implement Gc.finalise.

The interface needs some more polish; currently, it doesn't compose
when applied to different object types (tables and cursors in my
example).  But this should be fixable, by not registering a GC alarm
automatically and have the user call compact explicitly, in the
correct order.

> (2) is normal if your objects are implemented as OCaml data
> structures.

Unfortunately, I still seem to need a weak data structure or simple,
custom memory allocator.  Using weak hash tables turned out to be
quite slow, and it seems to be difficult to limit the number of
uncollected objects (which might cause the performance problems).


  reply	other threads:[~2005-11-12 18:22 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-11 14:41 Florian Weimer
2005-11-11 18:00 ` [Caml-list] " Damien Doligez
2005-11-12 18:22   ` Florian Weimer [this message]
2005-11-17  1:42 ` Stefan Monnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87y83tk9pk.fsf@mid.deneb.enyo.de \
    --to=fw@deneb.enyo.de \
    --cc=caml-list@yquem.inria.fr \
    --cc=damien.doligez@inria.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).