On Fri, Jan 7, 2011 at 8:17 PM, Florian Weimer <fw@deneb.enyo.de> wrote:

> Essential laziness occurs when you construct values which would not
> otherwise be possible to construct in a pure language.  For instance,
> if you have got some sort of compiler, at one point, you might want to
> perform name resolution.  At that point, you want to translate names
> (references) to the program entities they denote (referents).  In
> general, these references do not form a tree (for instance, a function
> body can refer to another function by name, which refers to the
> original function).  With laziness, you can compute a map from names
> to entities, and the entities use this maps to resolve the names they
> use as references.  Name lookup is only performed once per name.
> Without laziness, you have to perform name lookup each time you follow
> a reference because the self-referential nature of the data structure
> cannot be reflected in the program.
>

I'm not convinced by your example. You need lazyness because you want to
build a cyclic data structure. But is this such a good idea ? In my
experience, implicit cyclicity often raises more problems than it solves,
and it is much safer to use an explicit indirection layer.

In your example, the resolved identifiers could contain not the declaration
code itself, but a unique key that is associated to those declaration sites.
This can be done easily, with only one traversal. Then in a second pass you
can record the mapping of keys to resolved declaration sites, and therefore
tie the knot.

Below is a simple code example:

  type unique_loc = int
  type name = string

  (* a simple language with identifiers and mutually recursive definitions
*)
  type 'a term =
    | Name of 'a
    | Lets of ((name * unique_loc) * 'a term) list * 'a term

  (* unique_loc are supposed to be unique, names are not due to
     shadowing. In a source program, a identifier points to a name,
     which is ambiguous. To resolve it means to turn all such names into
     the unique identifiers corresponding to the binding declaration
  *)
  type input_term = name term
  type resolved_term = unique_loc term

  let resolve : input_term -> resolved_term =
    let rec resolve (env : (name * unique_loc) list) = function
      | Name n -> Name (List.assoc n env)
      | Lets (decls, body) ->
        let env' = List.map fst decls @ env in
        let resolve_decl (binder, code) =
          binder, resolve env' code in
        Lets (List.map resolve_decl decls, resolve env' body)
    in resolve []

  (* we can easily build a table that, to each unique location,
     associates the declaration code; during further analysis, this may
     allow some handling of identifiers based on the expression they
     denote. *)
  let rec resolution_table : resolved_term -> (unique_loc * resolved_term)
list =
    function
      | Name _ -> []
      | Lets (decls, body) ->
        let decl_table ((_, loc), code) =
          (loc, code) :: resolution_table code in
        resolution_table body @ List.concat (List.map decl_table decls)

  (* if you want to test *)
  let input : input_term =
    Lets ([
      ("x", 1), Name "y";
      ("y", 2), Name "x";
      ("z", 3),
        Lets ([
          ("a", 4), Name "x";
          ("b", 5), Name "y";
        ], Name "z");
    ],
      Lets (
        [("x", 6), Name "x"],
        Name "z"))