caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
* Patch to 3.10.0 compiler enabling simple spell-checking
@ 2007-10-29 21:11 Edgar Friendly
  2007-10-29 21:34 ` [Caml-list] " Till Varoquaux
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Edgar Friendly @ 2007-10-29 21:11 UTC (permalink / raw)
  To: Caml List

[-- Attachment #1: Type: text/plain, Size: 1622 bytes --]

One random little feature of GNAT that comes in handy for me is its
habit of, when I misspell an identifier, giving me a possible correction
in its compile error message.  Spending some time with the 3.10.0
sources, I have created a "second draft" patch creating this
functionality in my favored language.

Example:
========

# /home/thelema/Projects/ocaml-custom/bin/ocamlc -o coml -I +lablgtk2
lablgtk.cma gtkInit.cmo coml.ml
File "coml.ml", line 61, characters 16-25:
Unbound value is_arcive, possible misspelling of is_archive

Impacts:
========

Efficiency in the case of finding a mistake should be quite good,
although this shouldn't matter too much since the compiler quits pretty
early in compilation when it finds an unbound identifier.

In the case of no unbound identifiers, the cost is an extra try/with
block around the standard lookup.  I haven't made any benchmarks, though.

I expect this code to have little long term maintenance issues - the
major source of code changes was adding a "* string list" to a number of
exceptions to carry the list of possible correct spellings to the point
they get output by the compiler.  These exceptions are still usable as
before with an empty list in this spot.

It's possible the code has created opportunities for uncaught exceptions
in the compiler as I only checked for instances of "Not_found" in a few
files -- those which dealt with the Unbound_* exceptions.  Someone who
knows the internals better might find places the "Found_nearly"
exception that carries possible corrections might escape into.


Dedicated to:
Yaron Minsky and the team at Jane Street

E.

[-- Attachment #2: ocaml-spelling.patch --]
[-- Type: text/x-patch, Size: 15631 bytes --]

diff --git a/typing/ident.ml b/typing/ident.ml
index a30aa7a..8bbb395 100644
--- a/typing/ident.ml
+++ b/typing/ident.ml
@@ -56,6 +56,61 @@ let same i1 i2 = i1 = i2
        then i1.stamp = i2.stamp
        else i2.stamp = 0 && i1.name = i2.name *)
 
+(* does various heuristics to find spelling mistakes - algorithm from GNAT Ada compiler - Lots of code, but hopefully about as efficient as possible *)
+let nearly_equal s i =
+  let is_digit c = c >= '0' && c <= '9' in
+  let rec tails_equal s1 i1 s2 i2 = i1 >= String.length s1 || (s1.[i1] = s2.[i2] && tails_equal s1 (i1+1) s2 (i2+1)) in
+  
+  let n1 = s and n2 = i.name in
+  let l1 = String.length n1 and l2 = String.length n2 in
+  if l1 = 0 then l2 = 0 (* both null -> equal *)
+  else if l2 = 0 then false (* one of n1 or n2 null -> not equal *)
+  else if n1.[0] != n2.[0] then false (* first characters don't match -> not *)
+  else if l1 < 3 && l2 < 3 then false (* short strings are all different *)
+  else if l1 = l2 then
+    (* look for single errors, transpositions *)
+    let rec find_diff c = 
+      if c < (l1 - 1) then 
+	if n1.[c] != n2.[c] then 
+	  (* mismatched digits -> not equal *)
+	  if is_digit n1.[c] && is_digit n2.[c] then false
+	    (* single error -> equal *)
+	  else if n1.[c+1] = n2.[c+1] && tails_equal n1 (c+2) n2 (c+2)
+	  then true
+	    (* transposition -> equal *)
+	  else if n1.[c] = n2.[c+1] && n1.[c+1] = n2.[c] &&
+	  tails_equal n1 (c+2) n2 (c+2)
+	  then true
+	  else false
+	else find_diff (c+1)
+      else (* at the end of the strings *)
+	if is_digit n1.[c] && is_digit n2.[c] 
+	  && n1.[c] != n2.[c] 
+	then false
+	else true
+    in
+    find_diff 1
+  else if l1 = l2 - 1 then (* short by one *)
+    let rec find_del c = 
+      if c < l1 then
+	if n1.[c] != n2.[c] then 
+	  tails_equal n1 c n2 (c+1)
+	else find_del (c+1)
+      else true (* last character was deleted *)
+    in
+    find_del 1
+  else if l1 = l2 + 1 then (* too long by one *)
+    let rec find_add c =
+      if c < l2 then
+	if n1.[c] != n2.[c] then 
+	  tails_equal n1 (c+1) n2 c
+	else find_add (c+1)
+      else true (* last character added *)
+    in
+    find_add 1
+  else (* lengths totally different *)
+    false
+	
 let binding_time i = i.stamp
 
 let current_time() = !currentstamp
@@ -97,6 +152,8 @@ and 'a data =
 
 let empty = Empty
 
+exception Found_nearly of string list
+
 (* Inline expansion of height for better speed
  * let height = function
  *     Empty -> 0
@@ -182,3 +239,34 @@ let rec keys_aux stack accu = function
       keys_aux (l :: stack) (k.ident :: accu) r
 
 let keys tbl = keys_aux [] [] tbl
+
+let find_nearly_equal n tbl =
+(*  List.filter (nearly_equal name) (keys tbl) -- optimized for your enjoyment*)
+  let rec find_nearly_equal_aux stack accu = function
+      Empty ->
+	begin match stack with
+            [] -> accu
+	  | a :: l -> find_nearly_equal_aux l accu a
+	end
+    | Node(l, k, r, _) -> 
+	let accu' = 
+	  if nearly_equal n k.ident 
+	  then k.ident.name :: accu
+	  else accu 
+	in
+	find_nearly_equal_aux (l :: stack) accu' r
+  in
+  find_nearly_equal_aux [] [] tbl
+
+let find_name_with_nearly name tbl = 
+  try find_name name tbl
+  with Not_found -> 
+(*    prerr_string "FNE: "; prerr_endline name; *)
+    let nearlies = find_nearly_equal name tbl in
+(*    let rec pr_list to_str = function [] -> prerr_endline "Empty" | [a] -> prerr_endline (to_str a) | h :: t -> prerr_string (to_str h); prerr_string ", "; pr_list to_str t
+    in
+    prerr_string "nearlies: ";
+    pr_list (fun i -> i) nearlies;
+    prerr_string "whole table: ";
+    pr_list (fun i -> i.name) (keys tbl); *)
+    raise (Found_nearly nearlies)
diff --git a/typing/ident.mli b/typing/ident.mli
index 9f7372c..7ab9327 100644
--- a/typing/ident.mli
+++ b/typing/ident.mli
@@ -52,8 +52,12 @@ val print: Format.formatter -> t -> unit
 type 'a tbl
         (* Association tables from identifiers to type 'a. *)
 
+exception Found_nearly of string list
+
 val empty: 'a tbl
 val add: t -> 'a -> 'a tbl -> 'a tbl
 val find_same: t -> 'a tbl -> 'a
 val find_name: string -> 'a tbl -> 'a
 val keys: 'a tbl -> t list
+
+val find_name_with_nearly: string -> 'a tbl -> 'a
diff --git a/typing/typecore.ml b/typing/typecore.ml
index 691eb49..adff3ad 100644
--- a/typing/typecore.ml
+++ b/typing/typecore.ml
@@ -23,9 +23,9 @@ open Btype
 open Ctype
 
 type error =
-    Unbound_value of Longident.t
-  | Unbound_constructor of Longident.t
-  | Unbound_label of Longident.t
+    Unbound_value of Longident.t * string list
+  | Unbound_constructor of Longident.t * string list
+  | Unbound_label of Longident.t * string list
   | Polymorphic_label of Longident.t
   | Constructor_arity_mismatch of Longident.t * int * int
   | Label_mismatch of Longident.t * (type_expr * type_expr) list
@@ -42,11 +42,11 @@ type error =
   | Bad_conversion of string * int * char
   | Undefined_method of type_expr * string
   | Undefined_inherited_method of string
-  | Unbound_class of Longident.t
+  | Unbound_class of Longident.t * string list
   | Virtual_class of Longident.t
   | Private_type of type_expr
   | Private_label of Longident.t * type_expr
-  | Unbound_instance_variable of string
+  | Unbound_instance_variable of string * string list
   | Instance_variable_not_mutable of string
   | Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list
   | Outside_class
@@ -276,7 +276,7 @@ let rec build_as_type env p =
       unify_pat env {p2 with pat_type = ty2} ty1;
       begin match path with None -> ()
       | Some path ->
-          let td = try Env.find_type path env with Not_found -> assert false in
+          let td = try Env.find_type path env with Not_found | Ident.Found_nearly _ -> assert false in
           let params = List.map (fun _ -> newvar()) td.type_params in
           match expand_head env (newty (Tconstr (path, params, ref Mnil)))
           with {desc=Tvariant row} when static_row row ->
@@ -291,7 +291,9 @@ let build_or_pat env loc lid =
   let path, decl =
     try Env.lookup_type lid env
     with Not_found ->
-      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor lid))
+      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,[])))
+      | Ident.Found_nearly l -> 
+      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,l)))
   in
   let tyl = List.map (fun _ -> newvar()) decl.type_params in
   let fields =
@@ -400,7 +402,10 @@ let rec type_pat env sp =
         try
           Env.lookup_constructor lid env
         with Not_found ->
-          raise(Error(sp.ppat_loc, Unbound_constructor lid)) in
+          raise(Error(sp.ppat_loc, Unbound_constructor (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sp.ppat_loc, Unbound_constructor (lid,l)))
+      in
       let sargs =
         match sarg with
           None -> []
@@ -449,7 +454,10 @@ let rec type_pat env sp =
           try
             Env.lookup_label lid env
           with Not_found ->
-            raise(Error(sp.ppat_loc, Unbound_label lid)) in
+            raise(Error(sp.ppat_loc, Unbound_label (lid,[]))) 
+	    | Ident.Found_nearly l -> 
+	    raise(Error(sp.ppat_loc, Unbound_label (lid,l)))
+	in
         begin_def ();
         let (vars, ty_arg, ty_res) = instance_label false label in
         if vars = [] then end_def ();
@@ -806,7 +814,7 @@ let rec approx_type env sty =
         if List.length ctl <> decl.type_arity then raise Not_found;
         let tyl = List.map (approx_type env) ctl in
         newconstr path tyl
-      with Not_found -> newvar ()
+      with Not_found | Ident.Found_nearly _ -> newvar ()
       end
   | _ -> newvar ()
 
@@ -923,7 +931,9 @@ let rec type_exp env sexp =
           exp_type = instance desc.val_type;
           exp_env = env }
       with Not_found ->
-        raise(Error(sexp.pexp_loc, Unbound_value lid))
+        raise(Error(sexp.pexp_loc, Unbound_value (lid,[])))
+	| Ident.Found_nearly l -> 
+	raise(Error(sexp.pexp_loc, Unbound_value (lid,l)))
       end
   | Pexp_constant cst ->
       re {
@@ -1021,7 +1031,10 @@ let rec type_exp env sexp =
           try
             Env.lookup_label lid env
           with Not_found ->
-            raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+            raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	    | Ident.Found_nearly l -> 
+	    raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+	in
         begin_def ();
         if !Clflags.principal then begin_def ();
         let (vars, ty_arg, ty_res) = instance_label true label in
@@ -1098,7 +1111,10 @@ let rec type_exp env sexp =
         try
           Env.lookup_label lid env
         with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+          raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+      in
       let (_, ty_arg, ty_res) = instance_label false label in
       unify_exp env arg ty_res;
       re {
@@ -1112,7 +1128,10 @@ let rec type_exp env sexp =
         try
           Env.lookup_label lid env
         with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+          raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+      in
       if label.lbl_mut = Immutable then
         raise(Error(sexp.pexp_loc, Label_not_mutable lid));
       begin_def ();
@@ -1332,7 +1351,9 @@ let rec type_exp env sexp =
   | Pexp_new cl ->
       let (cl_path, cl_decl) =
         try Env.lookup_class cl env with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_class cl))
+          raise(Error(sexp.pexp_loc, Unbound_class (cl,[])))
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_class (cl,l)))
       in
         begin match cl_decl.cty_new with
           None ->
@@ -1361,10 +1382,13 @@ let rec type_exp env sexp =
         | Val_ivar _ ->
             raise(Error(sexp.pexp_loc, Instance_variable_not_mutable lab))
         | _ ->
-            raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+            raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
       with
         Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+          raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
+	| Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
+
       end
   | Pexp_override lst ->
       let _ =
@@ -1380,7 +1404,7 @@ let rec type_exp env sexp =
         try
           Env.lookup_value (Longident.Lident "selfpat-*") env,
           Env.lookup_value (Longident.Lident "self-*") env
-        with Not_found ->
+        with Not_found | Ident.Found_nearly _ ->
           raise(Error(sexp.pexp_loc, Outside_class))
       with
         (_, {val_type = self_ty; val_kind = Val_self (_, vars, _, _)}),
@@ -1391,7 +1415,9 @@ let rec type_exp env sexp =
               (Path.Pident id, type_expect env snewval (instance ty))
             with
               Not_found ->
-                raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+                raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
+	      | Ident.Found_nearly l -> 
+		raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
             end
           in
           let modifs = List.map type_override lst in
@@ -1637,7 +1663,7 @@ and type_application env funct sargs =
                   may_warn sarg0.pexp_loc
                     (Warnings.Not_principal "commuting this argument");
                 (l', sarg0, sargs1 @ sargs2, more_sargs)
-              with Not_found ->
+              with Not_found | Ident.Found_nearly _ ->
                 let (l', sarg0, sargs1, sargs2) =
                   extract_label name more_sargs in
                 if sargs1 <> [] || sargs <> [] then
@@ -1707,7 +1733,9 @@ and type_construct env loc lid sarg explicit_arity ty_expected =
     try
       Env.lookup_constructor lid env
     with Not_found ->
-      raise(Error(loc, Unbound_constructor lid)) in
+      raise(Error(loc, Unbound_constructor (lid,[])))
+      | Ident.Found_nearly l -> 
+      raise(Error(loc, Unbound_constructor (lid,l))) in
   let sargs =
     match sarg with
       None -> []
@@ -2020,12 +2048,18 @@ open Format
 open Printtyp
 
 let report_error ppf = function
-  | Unbound_value lid ->
-      fprintf ppf "Unbound value %a" longident lid
-  | Unbound_constructor lid ->
+  | Unbound_value (lid,[]) ->
+      fprintf ppf "Unbound value %a" longident lid 
+  | Unbound_value (lid,corr::_) ->
+      fprintf ppf "Unbound value %a, possible misspelling of %s" longident lid corr
+  | Unbound_constructor (lid,[]) ->
       fprintf ppf "Unbound constructor %a" longident lid
-  | Unbound_label lid ->
+  | Unbound_constructor (lid,corr::_) ->
+      fprintf ppf "Unbound constructor %a, possible misspelling of %s" longident lid corr
+  | Unbound_label (lid,[]) ->
       fprintf ppf "Unbound record field label %a" longident lid
+  | Unbound_label (lid,corr::_) ->
+      fprintf ppf "Unbound record field label %a, possible misspelling of %s" longident lid corr
   | Polymorphic_label lid ->
       fprintf ppf "@[The record field label %a is polymorphic.@ %s@]"
         longident lid "You cannot instantiate it in a pattern."
@@ -2100,13 +2134,17 @@ let report_error ppf = function
          It has no method %s@]" type_expr ty me
   | Undefined_inherited_method me ->
       fprintf ppf "This expression has no method %s" me
-  | Unbound_class cl ->
+  | Unbound_class (cl,[]) ->
       fprintf ppf "Unbound class %a" longident cl
+  | Unbound_class (cl,corr::_) ->
+      fprintf ppf "Unbound class %a, possible misspelling of %s" longident cl corr
   | Virtual_class cl ->
       fprintf ppf "One cannot create instances of the virtual class %a"
         longident cl
-  | Unbound_instance_variable v ->
+  | Unbound_instance_variable (v,[]) ->
       fprintf ppf "Unbound instance variable %s" v
+  | Unbound_instance_variable (v,corr::_) ->
+      fprintf ppf "Unbound instance variable %s, possible misspelling of %s" v corr
   | Instance_variable_not_mutable v ->
       fprintf ppf "The instance variable %s is not mutable" v
   | Not_subtype(tr1, tr2) ->
diff --git a/typing/typecore.mli b/typing/typecore.mli
index 24aea7d..171e96e 100644
--- a/typing/typecore.mli
+++ b/typing/typecore.mli
@@ -61,9 +61,9 @@ val force_delayed_checks: unit -> unit
 val self_coercion : (Path.t * Location.t list ref) list ref
 
 type error =
-    Unbound_value of Longident.t
-  | Unbound_constructor of Longident.t
-  | Unbound_label of Longident.t
+    Unbound_value of Longident.t * string list
+  | Unbound_constructor of Longident.t * string list
+  | Unbound_label of Longident.t * string list
   | Polymorphic_label of Longident.t
   | Constructor_arity_mismatch of Longident.t * int * int
   | Label_mismatch of Longident.t * (type_expr * type_expr) list
@@ -80,11 +80,11 @@ type error =
   | Bad_conversion of string * int * char
   | Undefined_method of type_expr * string
   | Undefined_inherited_method of string
-  | Unbound_class of Longident.t
+  | Unbound_class of Longident.t * string list
   | Virtual_class of Longident.t
   | Private_type of type_expr
   | Private_label of Longident.t * type_expr
-  | Unbound_instance_variable of string
+  | Unbound_instance_variable of string * string list
   | Instance_variable_not_mutable of string
   | Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list
   | Outside_class

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-29 21:11 Patch to 3.10.0 compiler enabling simple spell-checking Edgar Friendly
@ 2007-10-29 21:34 ` Till Varoquaux
  2007-10-29 23:32 ` Julien Moutinho
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Till Varoquaux @ 2007-10-29 21:34 UTC (permalink / raw)
  To: Edgar Friendly; +Cc: Caml List

Cool!

Haven't looked at the patch yet but this seems like a neat feature
(might be a little too much of a gadget but,hey I love gadgets).

I am curious. Why is this dedicated to Jane Street? Since I am
probably the worst at typing out here (and even though they bought me
a shiny shiny keyboard) I will take this patch as a personnal
intention ;-).

Till

On 10/29/07, Edgar Friendly <thelema314@gmail.com> wrote:
> One random little feature of GNAT that comes in handy for me is its
> habit of, when I misspell an identifier, giving me a possible correction
> in its compile error message.  Spending some time with the 3.10.0
> sources, I have created a "second draft" patch creating this
> functionality in my favored language.
>
> Example:
> ========
>
> # /home/thelema/Projects/ocaml-custom/bin/ocamlc -o coml -I +lablgtk2
> lablgtk.cma gtkInit.cmo coml.ml
> File "coml.ml", line 61, characters 16-25:
> Unbound value is_arcive, possible misspelling of is_archive
>
> Impacts:
> ========
>
> Efficiency in the case of finding a mistake should be quite good,
> although this shouldn't matter too much since the compiler quits pretty
> early in compilation when it finds an unbound identifier.
>
> In the case of no unbound identifiers, the cost is an extra try/with
> block around the standard lookup.  I haven't made any benchmarks, though.
>
> I expect this code to have little long term maintenance issues - the
> major source of code changes was adding a "* string list" to a number of
> exceptions to carry the list of possible correct spellings to the point
> they get output by the compiler.  These exceptions are still usable as
> before with an empty list in this spot.
>
> It's possible the code has created opportunities for uncaught exceptions
> in the compiler as I only checked for instances of "Not_found" in a few
> files -- those which dealt with the Unbound_* exceptions.  Someone who
> knows the internals better might find places the "Found_nearly"
> exception that carries possible corrections might escape into.
>
>
> Dedicated to:
> Yaron Minsky and the team at Jane Street
>
> E.
>
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs
>
>
>


-- 
http://till-varoquaux.blogspot.com/


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-29 21:11 Patch to 3.10.0 compiler enabling simple spell-checking Edgar Friendly
  2007-10-29 21:34 ` [Caml-list] " Till Varoquaux
@ 2007-10-29 23:32 ` Julien Moutinho
  2007-10-30  5:51   ` Edgar Friendly
  2007-10-30  0:13 ` Yitzhak Mandelbaum
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Julien Moutinho @ 2007-10-29 23:32 UTC (permalink / raw)
  To: Caml List

On Mon, Oct 29, 2007 at 04:11:12PM -0500, Edgar Friendly wrote:
> Spending some time with the 3.10.0 sources, I have created
> a "second draft" patch creating this functionality
> in my favored language.

I'm sorry but could it be that you have posted an incomplete patch?

For instance typing/typetexp.ml should be modified, because
it defines [Unbound_type_constructor of Longident.t]
which is used in b/typing/typecore.ml
as a [of Longident.t * string list]

Besides [find_name_with_nearly] is defined in b/typing/ident.ml
but is never used anywhere.

Also, could you post a patch against today's release310 branch please?

Regards,
  Julien.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-29 21:11 Patch to 3.10.0 compiler enabling simple spell-checking Edgar Friendly
  2007-10-29 21:34 ` [Caml-list] " Till Varoquaux
  2007-10-29 23:32 ` Julien Moutinho
@ 2007-10-30  0:13 ` Yitzhak Mandelbaum
  2007-10-30  0:17 ` Yitzhak Mandelbaum
  2007-10-30  8:15 ` Sébastien Hinderer
  4 siblings, 0 replies; 10+ messages in thread
From: Yitzhak Mandelbaum @ 2007-10-30  0:13 UTC (permalink / raw)
  To: Edgar Friendly; +Cc: Caml List

[-- Attachment #1: Type: text/plain, Size: 19236 bytes --]

Very cool! Do you think there's any way you could separate it from  
the compiler, like Learner et al.'s SEMINAL work, which separates  
type error messages from the compiler?. Separation could help ensure  
this (and any other, similar) ideas don't accidentally introduce bugs  
into the compiler, and make it much easier for you to maintain. A  
very simple hack might be tod wrap ocamlc in a script that parses  
such error messages and then tokenizes the source file, looking for  
similar strings?

Cheers,
Yitzhak


On Oct 29, 2007, at 5:11 PM, Edgar Friendly wrote:

> One random little feature of GNAT that comes in handy for me is its
> habit of, when I misspell an identifier, giving me a possible  
> correction
> in its compile error message.  Spending some time with the 3.10.0
> sources, I have created a "second draft" patch creating this
> functionality in my favored language.
>
> Example:
> ========
>
> # /home/thelema/Projects/ocaml-custom/bin/ocamlc -o coml -I +lablgtk2
> lablgtk.cma gtkInit.cmo coml.ml
> File "coml.ml", line 61, characters 16-25:
> Unbound value is_arcive, possible misspelling of is_archive
>
> Impacts:
> ========
>
> Efficiency in the case of finding a mistake should be quite good,
> although this shouldn't matter too much since the compiler quits  
> pretty
> early in compilation when it finds an unbound identifier.
>
> In the case of no unbound identifiers, the cost is an extra try/with
> block around the standard lookup.  I haven't made any benchmarks,  
> though.
>
> I expect this code to have little long term maintenance issues - the
> major source of code changes was adding a "* string list" to a  
> number of
> exceptions to carry the list of possible correct spellings to the  
> point
> they get output by the compiler.  These exceptions are still usable as
> before with an empty list in this spot.
>
> It's possible the code has created opportunities for uncaught  
> exceptions
> in the compiler as I only checked for instances of "Not_found" in a  
> few
> files -- those which dealt with the Unbound_* exceptions.  Someone who
> knows the internals better might find places the "Found_nearly"
> exception that carries possible corrections might escape into.
>
>
> Dedicated to:
> Yaron Minsky and the team at Jane Street
>
> E.
> diff --git a/typing/ident.ml b/typing/ident.ml
> index a30aa7a..8bbb395 100644
> --- a/typing/ident.ml
> +++ b/typing/ident.ml
> @@ -56,6 +56,61 @@ let same i1 i2 = i1 = i2
>         then i1.stamp = i2.stamp
>         else i2.stamp = 0 && i1.name = i2.name *)
>
> +(* does various heuristics to find spelling mistakes - algorithm  
> from GNAT Ada compiler - Lots of code, but hopefully about as  
> efficient as possible *)
> +let nearly_equal s i =
> +  let is_digit c = c >= '0' && c <= '9' in
> +  let rec tails_equal s1 i1 s2 i2 = i1 >= String.length s1 || (s1. 
> [i1] = s2.[i2] && tails_equal s1 (i1+1) s2 (i2+1)) in
> +
> +  let n1 = s and n2 = i.name in
> +  let l1 = String.length n1 and l2 = String.length n2 in
> +  if l1 = 0 then l2 = 0 (* both null -> equal *)
> +  else if l2 = 0 then false (* one of n1 or n2 null -> not equal *)
> +  else if n1.[0] != n2.[0] then false (* first characters don't  
> match -> not *)
> +  else if l1 < 3 && l2 < 3 then false (* short strings are all  
> different *)
> +  else if l1 = l2 then
> +    (* look for single errors, transpositions *)
> +    let rec find_diff c =
> +      if c < (l1 - 1) then
> +	if n1.[c] != n2.[c] then
> +	  (* mismatched digits -> not equal *)
> +	  if is_digit n1.[c] && is_digit n2.[c] then false
> +	    (* single error -> equal *)
> +	  else if n1.[c+1] = n2.[c+1] && tails_equal n1 (c+2) n2 (c+2)
> +	  then true
> +	    (* transposition -> equal *)
> +	  else if n1.[c] = n2.[c+1] && n1.[c+1] = n2.[c] &&
> +	  tails_equal n1 (c+2) n2 (c+2)
> +	  then true
> +	  else false
> +	else find_diff (c+1)
> +      else (* at the end of the strings *)
> +	if is_digit n1.[c] && is_digit n2.[c]
> +	  && n1.[c] != n2.[c]
> +	then false
> +	else true
> +    in
> +    find_diff 1
> +  else if l1 = l2 - 1 then (* short by one *)
> +    let rec find_del c =
> +      if c < l1 then
> +	if n1.[c] != n2.[c] then
> +	  tails_equal n1 c n2 (c+1)
> +	else find_del (c+1)
> +      else true (* last character was deleted *)
> +    in
> +    find_del 1
> +  else if l1 = l2 + 1 then (* too long by one *)
> +    let rec find_add c =
> +      if c < l2 then
> +	if n1.[c] != n2.[c] then
> +	  tails_equal n1 (c+1) n2 c
> +	else find_add (c+1)
> +      else true (* last character added *)
> +    in
> +    find_add 1
> +  else (* lengths totally different *)
> +    false
> +	
>  let binding_time i = i.stamp
>
>  let current_time() = !currentstamp
> @@ -97,6 +152,8 @@ and 'a data =
>
>  let empty = Empty
>
> +exception Found_nearly of string list
> +
>  (* Inline expansion of height for better speed
>   * let height = function
>   *     Empty -> 0
> @@ -182,3 +239,34 @@ let rec keys_aux stack accu = function
>        keys_aux (l :: stack) (k.ident :: accu) r
>
>  let keys tbl = keys_aux [] [] tbl
> +
> +let find_nearly_equal n tbl =
> +(*  List.filter (nearly_equal name) (keys tbl) -- optimized for  
> your enjoyment*)
> +  let rec find_nearly_equal_aux stack accu = function
> +      Empty ->
> +	begin match stack with
> +            [] -> accu
> +	  | a :: l -> find_nearly_equal_aux l accu a
> +	end
> +    | Node(l, k, r, _) ->
> +	let accu' =
> +	  if nearly_equal n k.ident
> +	  then k.ident.name :: accu
> +	  else accu
> +	in
> +	find_nearly_equal_aux (l :: stack) accu' r
> +  in
> +  find_nearly_equal_aux [] [] tbl
> +
> +let find_name_with_nearly name tbl =
> +  try find_name name tbl
> +  with Not_found ->
> +(*    prerr_string "FNE: "; prerr_endline name; *)
> +    let nearlies = find_nearly_equal name tbl in
> +(*    let rec pr_list to_str = function [] -> prerr_endline  
> "Empty" | [a] -> prerr_endline (to_str a) | h :: t -> prerr_string  
> (to_str h); prerr_string ", "; pr_list to_str t
> +    in
> +    prerr_string "nearlies: ";
> +    pr_list (fun i -> i) nearlies;
> +    prerr_string "whole table: ";
> +    pr_list (fun i -> i.name) (keys tbl); *)
> +    raise (Found_nearly nearlies)
> diff --git a/typing/ident.mli b/typing/ident.mli
> index 9f7372c..7ab9327 100644
> --- a/typing/ident.mli
> +++ b/typing/ident.mli
> @@ -52,8 +52,12 @@ val print: Format.formatter -> t -> unit
>  type 'a tbl
>          (* Association tables from identifiers to type 'a. *)
>
> +exception Found_nearly of string list
> +
>  val empty: 'a tbl
>  val add: t -> 'a -> 'a tbl -> 'a tbl
>  val find_same: t -> 'a tbl -> 'a
>  val find_name: string -> 'a tbl -> 'a
>  val keys: 'a tbl -> t list
> +
> +val find_name_with_nearly: string -> 'a tbl -> 'a
> diff --git a/typing/typecore.ml b/typing/typecore.ml
> index 691eb49..adff3ad 100644
> --- a/typing/typecore.ml
> +++ b/typing/typecore.ml
> @@ -23,9 +23,9 @@ open Btype
>  open Ctype
>
>  type error =
> -    Unbound_value of Longident.t
> -  | Unbound_constructor of Longident.t
> -  | Unbound_label of Longident.t
> +    Unbound_value of Longident.t * string list
> +  | Unbound_constructor of Longident.t * string list
> +  | Unbound_label of Longident.t * string list
>    | Polymorphic_label of Longident.t
>    | Constructor_arity_mismatch of Longident.t * int * int
>    | Label_mismatch of Longident.t * (type_expr * type_expr) list
> @@ -42,11 +42,11 @@ type error =
>    | Bad_conversion of string * int * char
>    | Undefined_method of type_expr * string
>    | Undefined_inherited_method of string
> -  | Unbound_class of Longident.t
> +  | Unbound_class of Longident.t * string list
>    | Virtual_class of Longident.t
>    | Private_type of type_expr
>    | Private_label of Longident.t * type_expr
> -  | Unbound_instance_variable of string
> +  | Unbound_instance_variable of string * string list
>    | Instance_variable_not_mutable of string
>    | Not_subtype of (type_expr * type_expr) list * (type_expr *  
> type_expr) list
>    | Outside_class
> @@ -276,7 +276,7 @@ let rec build_as_type env p =
>        unify_pat env {p2 with pat_type = ty2} ty1;
>        begin match path with None -> ()
>        | Some path ->
> -          let td = try Env.find_type path env with Not_found ->  
> assert false in
> +          let td = try Env.find_type path env with Not_found |  
> Ident.Found_nearly _ -> assert false in
>            let params = List.map (fun _ -> newvar()) td.type_params in
>            match expand_head env (newty (Tconstr (path, params, ref  
> Mnil)))
>            with {desc=Tvariant row} when static_row row ->
> @@ -291,7 +291,9 @@ let build_or_pat env loc lid =
>    let path, decl =
>      try Env.lookup_type lid env
>      with Not_found ->
> -      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor  
> lid))
> +      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor  
> (lid,[])))
> +      | Ident.Found_nearly l ->
> +      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor  
> (lid,l)))
>    in
>    let tyl = List.map (fun _ -> newvar()) decl.type_params in
>    let fields =
> @@ -400,7 +402,10 @@ let rec type_pat env sp =
>          try
>            Env.lookup_constructor lid env
>          with Not_found ->
> -          raise(Error(sp.ppat_loc, Unbound_constructor lid)) in
> +          raise(Error(sp.ppat_loc, Unbound_constructor (lid,[])))
> +	  | Ident.Found_nearly l ->
> +	  raise(Error(sp.ppat_loc, Unbound_constructor (lid,l)))
> +      in
>        let sargs =
>          match sarg with
>            None -> []
> @@ -449,7 +454,10 @@ let rec type_pat env sp =
>            try
>              Env.lookup_label lid env
>            with Not_found ->
> -            raise(Error(sp.ppat_loc, Unbound_label lid)) in
> +            raise(Error(sp.ppat_loc, Unbound_label (lid,[])))
> +	    | Ident.Found_nearly l ->
> +	    raise(Error(sp.ppat_loc, Unbound_label (lid,l)))
> +	in
>          begin_def ();
>          let (vars, ty_arg, ty_res) = instance_label false label in
>          if vars = [] then end_def ();
> @@ -806,7 +814,7 @@ let rec approx_type env sty =
>          if List.length ctl <> decl.type_arity then raise Not_found;
>          let tyl = List.map (approx_type env) ctl in
>          newconstr path tyl
> -      with Not_found -> newvar ()
> +      with Not_found | Ident.Found_nearly _ -> newvar ()
>        end
>    | _ -> newvar ()
>
> @@ -923,7 +931,9 @@ let rec type_exp env sexp =
>            exp_type = instance desc.val_type;
>            exp_env = env }
>        with Not_found ->
> -        raise(Error(sexp.pexp_loc, Unbound_value lid))
> +        raise(Error(sexp.pexp_loc, Unbound_value (lid,[])))
> +	| Ident.Found_nearly l ->
> +	raise(Error(sexp.pexp_loc, Unbound_value (lid,l)))
>        end
>    | Pexp_constant cst ->
>        re {
> @@ -1021,7 +1031,10 @@ let rec type_exp env sexp =
>            try
>              Env.lookup_label lid env
>            with Not_found ->
> -            raise(Error(sexp.pexp_loc, Unbound_label lid)) in
> +            raise(Error(sexp.pexp_loc, Unbound_label (lid,[])))
> +	    | Ident.Found_nearly l ->
> +	    raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
> +	in
>          begin_def ();
>          if !Clflags.principal then begin_def ();
>          let (vars, ty_arg, ty_res) = instance_label true label in
> @@ -1098,7 +1111,10 @@ let rec type_exp env sexp =
>          try
>            Env.lookup_label lid env
>          with Not_found ->
> -          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
> +          raise(Error(sexp.pexp_loc, Unbound_label (lid,[])))
> +	  | Ident.Found_nearly l ->
> +	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
> +      in
>        let (_, ty_arg, ty_res) = instance_label false label in
>        unify_exp env arg ty_res;
>        re {
> @@ -1112,7 +1128,10 @@ let rec type_exp env sexp =
>          try
>            Env.lookup_label lid env
>          with Not_found ->
> -          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
> +          raise(Error(sexp.pexp_loc, Unbound_label (lid,[])))
> +	  | Ident.Found_nearly l ->
> +	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
> +      in
>        if label.lbl_mut = Immutable then
>          raise(Error(sexp.pexp_loc, Label_not_mutable lid));
>        begin_def ();
> @@ -1332,7 +1351,9 @@ let rec type_exp env sexp =
>    | Pexp_new cl ->
>        let (cl_path, cl_decl) =
>          try Env.lookup_class cl env with Not_found ->
> -          raise(Error(sexp.pexp_loc, Unbound_class cl))
> +          raise(Error(sexp.pexp_loc, Unbound_class (cl,[])))
> +	  | Ident.Found_nearly l ->
> +	  raise(Error(sexp.pexp_loc, Unbound_class (cl,l)))
>        in
>          begin match cl_decl.cty_new with
>            None ->
> @@ -1361,10 +1382,13 @@ let rec type_exp env sexp =
>          | Val_ivar _ ->
>              raise(Error(sexp.pexp_loc,  
> Instance_variable_not_mutable lab))
>          | _ ->
> -            raise(Error(sexp.pexp_loc, Unbound_instance_variable  
> lab))
> +            raise(Error(sexp.pexp_loc, Unbound_instance_variable  
> (lab,[])))
>        with
>          Not_found ->
> -          raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
> +          raise(Error(sexp.pexp_loc, Unbound_instance_variable  
> (lab,[])))
> +	| Ident.Found_nearly l ->
> +	  raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
> +
>        end
>    | Pexp_override lst ->
>        let _ =
> @@ -1380,7 +1404,7 @@ let rec type_exp env sexp =
>          try
>            Env.lookup_value (Longident.Lident "selfpat-*") env,
>            Env.lookup_value (Longident.Lident "self-*") env
> -        with Not_found ->
> +        with Not_found | Ident.Found_nearly _ ->
>            raise(Error(sexp.pexp_loc, Outside_class))
>        with
>          (_, {val_type = self_ty; val_kind = Val_self (_, vars, _,  
> _)}),
> @@ -1391,7 +1415,9 @@ let rec type_exp env sexp =
>                (Path.Pident id, type_expect env snewval (instance ty))
>              with
>                Not_found ->
> -                raise(Error(sexp.pexp_loc,  
> Unbound_instance_variable lab))
> +                raise(Error(sexp.pexp_loc,  
> Unbound_instance_variable (lab,[])))
> +	      | Ident.Found_nearly l ->
> +		raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
>              end
>            in
>            let modifs = List.map type_override lst in
> @@ -1637,7 +1663,7 @@ and type_application env funct sargs =
>                    may_warn sarg0.pexp_loc
>                      (Warnings.Not_principal "commuting this  
> argument");
>                  (l', sarg0, sargs1 @ sargs2, more_sargs)
> -              with Not_found ->
> +              with Not_found | Ident.Found_nearly _ ->
>                  let (l', sarg0, sargs1, sargs2) =
>                    extract_label name more_sargs in
>                  if sargs1 <> [] || sargs <> [] then
> @@ -1707,7 +1733,9 @@ and type_construct env loc lid sarg  
> explicit_arity ty_expected =
>      try
>        Env.lookup_constructor lid env
>      with Not_found ->
> -      raise(Error(loc, Unbound_constructor lid)) in
> +      raise(Error(loc, Unbound_constructor (lid,[])))
> +      | Ident.Found_nearly l ->
> +      raise(Error(loc, Unbound_constructor (lid,l))) in
>    let sargs =
>      match sarg with
>        None -> []
> @@ -2020,12 +2048,18 @@ open Format
>  open Printtyp
>
>  let report_error ppf = function
> -  | Unbound_value lid ->
> -      fprintf ppf "Unbound value %a" longident lid
> -  | Unbound_constructor lid ->
> +  | Unbound_value (lid,[]) ->
> +      fprintf ppf "Unbound value %a" longident lid
> +  | Unbound_value (lid,corr::_) ->
> +      fprintf ppf "Unbound value %a, possible misspelling of %s"  
> longident lid corr
> +  | Unbound_constructor (lid,[]) ->
>        fprintf ppf "Unbound constructor %a" longident lid
> -  | Unbound_label lid ->
> +  | Unbound_constructor (lid,corr::_) ->
> +      fprintf ppf "Unbound constructor %a, possible misspelling of  
> %s" longident lid corr
> +  | Unbound_label (lid,[]) ->
>        fprintf ppf "Unbound record field label %a" longident lid
> +  | Unbound_label (lid,corr::_) ->
> +      fprintf ppf "Unbound record field label %a, possible  
> misspelling of %s" longident lid corr
>    | Polymorphic_label lid ->
>        fprintf ppf "@[The record field label %a is polymorphic.@ %s@]"
>          longident lid "You cannot instantiate it in a pattern."
> @@ -2100,13 +2134,17 @@ let report_error ppf = function
>           It has no method %s@]" type_expr ty me
>    | Undefined_inherited_method me ->
>        fprintf ppf "This expression has no method %s" me
> -  | Unbound_class cl ->
> +  | Unbound_class (cl,[]) ->
>        fprintf ppf "Unbound class %a" longident cl
> +  | Unbound_class (cl,corr::_) ->
> +      fprintf ppf "Unbound class %a, possible misspelling of %s"  
> longident cl corr
>    | Virtual_class cl ->
>        fprintf ppf "One cannot create instances of the virtual  
> class %a"
>          longident cl
> -  | Unbound_instance_variable v ->
> +  | Unbound_instance_variable (v,[]) ->
>        fprintf ppf "Unbound instance variable %s" v
> +  | Unbound_instance_variable (v,corr::_) ->
> +      fprintf ppf "Unbound instance variable %s, possible  
> misspelling of %s" v corr
>    | Instance_variable_not_mutable v ->
>        fprintf ppf "The instance variable %s is not mutable" v
>    | Not_subtype(tr1, tr2) ->
> diff --git a/typing/typecore.mli b/typing/typecore.mli
> index 24aea7d..171e96e 100644
> --- a/typing/typecore.mli
> +++ b/typing/typecore.mli
> @@ -61,9 +61,9 @@ val force_delayed_checks: unit -> unit
>  val self_coercion : (Path.t * Location.t list ref) list ref
>
>  type error =
> -    Unbound_value of Longident.t
> -  | Unbound_constructor of Longident.t
> -  | Unbound_label of Longident.t
> +    Unbound_value of Longident.t * string list
> +  | Unbound_constructor of Longident.t * string list
> +  | Unbound_label of Longident.t * string list
>    | Polymorphic_label of Longident.t
>    | Constructor_arity_mismatch of Longident.t * int * int
>    | Label_mismatch of Longident.t * (type_expr * type_expr) list
> @@ -80,11 +80,11 @@ type error =
>    | Bad_conversion of string * int * char
>    | Undefined_method of type_expr * string
>    | Undefined_inherited_method of string
> -  | Unbound_class of Longident.t
> +  | Unbound_class of Longident.t * string list
>    | Virtual_class of Longident.t
>    | Private_type of type_expr
>    | Private_label of Longident.t * type_expr
> -  | Unbound_instance_variable of string
> +  | Unbound_instance_variable of string * string list
>    | Instance_variable_not_mutable of string
>    | Not_subtype of (type_expr * type_expr) list * (type_expr *  
> type_expr) list
>    | Outside_class
> _______________________________________________
> Caml-list mailing list. Subscription management:
> http://yquem.inria.fr/cgi-bin/mailman/listinfo/caml-list
> Archives: http://caml.inria.fr
> Beginner's list: http://groups.yahoo.com/group/ocaml_beginners
> Bug reports: http://caml.inria.fr/bin/caml-bugs

--------------------------------------------------
Yitzhak Mandelbaum
AT&T Labs - Research

http://www.research.att.com/~yitzhak



[-- Attachment #2: Type: text/html, Size: 81695 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-29 21:11 Patch to 3.10.0 compiler enabling simple spell-checking Edgar Friendly
                   ` (2 preceding siblings ...)
  2007-10-30  0:13 ` Yitzhak Mandelbaum
@ 2007-10-30  0:17 ` Yitzhak Mandelbaum
  2007-10-30  5:53   ` Edgar Friendly
  2007-10-30  8:15 ` Sébastien Hinderer
  4 siblings, 1 reply; 10+ messages in thread
From: Yitzhak Mandelbaum @ 2007-10-30  0:17 UTC (permalink / raw)
  To: Caml List

[-- Attachment #1: Type: text/plain, Size: 2348 bytes --]

Very cool! Do you think there's any way you could separate it from  
the compiler, like Learner et al.'s SEMINAL work, which separates  
type error messages from the compiler?. Separation could help ensure  
this (and any other, similar) ideas don't accidentally introduce bugs  
into the compiler, and make it much easier for you to maintain. A  
very simple hack might be tod wrap ocamlc in a script that parses  
such error messages and then tokenizes the source file, looking for  
similar strings?

Cheers,
Yitzhak

On Oct 29, 2007, at 5:11 PM, Edgar Friendly wrote:

> One random little feature of GNAT that comes in handy for me is its
> habit of, when I misspell an identifier, giving me a possible  
> correction
> in its compile error message.  Spending some time with the 3.10.0
> sources, I have created a "second draft" patch creating this
> functionality in my favored language.
>
> Example:
> ========
>
> # /home/thelema/Projects/ocaml-custom/bin/ocamlc -o coml -I +lablgtk2
> lablgtk.cma gtkInit.cmo coml.ml
> File "coml.ml", line 61, characters 16-25:
> Unbound value is_arcive, possible misspelling of is_archive
>
> Impacts:
> ========
>
> Efficiency in the case of finding a mistake should be quite good,
> although this shouldn't matter too much since the compiler quits  
> pretty
> early in compilation when it finds an unbound identifier.
>
> In the case of no unbound identifiers, the cost is an extra try/with
> block around the standard lookup.  I haven't made any benchmarks,  
> though.
>
> I expect this code to have little long term maintenance issues - the
> major source of code changes was adding a "* string list" to a  
> number of
> exceptions to carry the list of possible correct spellings to the  
> point
> they get output by the compiler.  These exceptions are still usable as
> before with an empty list in this spot.
>
> It's possible the code has created opportunities for uncaught  
> exceptions
> in the compiler as I only checked for instances of "Not_found" in a  
> few
> files -- those which dealt with the Unbound_* exceptions.  Someone who
> knows the internals better might find places the "Found_nearly"
> exception that carries possible corrections might escape into.
>
>
> Dedicated to:
> Yaron Minsky and the team at Jane Street
>
> E.

-----------------------------
Yitzhak Mandelbaum




[-- Attachment #2: Type: text/html, Size: 7686 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-29 23:32 ` Julien Moutinho
@ 2007-10-30  5:51   ` Edgar Friendly
  0 siblings, 0 replies; 10+ messages in thread
From: Edgar Friendly @ 2007-10-30  5:51 UTC (permalink / raw)
  To: Caml List

[-- Attachment #1: Type: text/plain, Size: 533 bytes --]

Julien Moutinho wrote:
> On Mon, Oct 29, 2007 at 04:11:12PM -0500, Edgar Friendly wrote:
>> Spending some time with the 3.10.0 sources, I have created
>> a "second draft" patch creating this functionality
>> in my favored language.
> 
> I'm sorry but could it be that you have posted an incomplete patch?

I did.  Here's a "third draft" which should include all the necessary
bits to patch off 3.10.0.  There's still plenty of rough edges to smooth
out in the patch, but it should suffice for people to have something
working.

E.



[-- Attachment #2: ocaml-spelling-3.10.0.patch --]
[-- Type: text/x-patch, Size: 21960 bytes --]

diff --git a/typing/env.ml b/typing/env.ml
index 780ed8d..a0114be 100644
--- a/typing/env.ml
+++ b/typing/env.ml
@@ -357,7 +357,7 @@ and lookup_module lid env =
 let lookup proj1 proj2 lid env =
   match lid with
     Lident s ->
-      Ident.find_name s (proj1 env)
+      Ident.find_name_with_nearly s (proj1 env)
   | Ldot(l, s) ->
       let (p, desc) = lookup_module_descr l env in
       begin match Lazy.force desc with
@@ -373,7 +373,7 @@ let lookup proj1 proj2 lid env =
 let lookup_simple proj1 proj2 lid env =
   match lid with
     Lident s ->
-      Ident.find_name s (proj1 env)
+      Ident.find_name_with_nearly s (proj1 env)
   | Ldot(l, s) ->
       let (p, desc) = lookup_module_descr l env in
       begin match Lazy.force desc with
diff --git a/typing/ident.ml b/typing/ident.ml
index a30aa7a..8bbb395 100644
--- a/typing/ident.ml
+++ b/typing/ident.ml
@@ -56,6 +56,61 @@ let same i1 i2 = i1 = i2
        then i1.stamp = i2.stamp
        else i2.stamp = 0 && i1.name = i2.name *)
 
+(* does various heuristics to find spelling mistakes - algorithm from GNAT Ada compiler - Lots of code, but hopefully about as efficient as possible *)
+let nearly_equal s i =
+  let is_digit c = c >= '0' && c <= '9' in
+  let rec tails_equal s1 i1 s2 i2 = i1 >= String.length s1 || (s1.[i1] = s2.[i2] && tails_equal s1 (i1+1) s2 (i2+1)) in
+  
+  let n1 = s and n2 = i.name in
+  let l1 = String.length n1 and l2 = String.length n2 in
+  if l1 = 0 then l2 = 0 (* both null -> equal *)
+  else if l2 = 0 then false (* one of n1 or n2 null -> not equal *)
+  else if n1.[0] != n2.[0] then false (* first characters don't match -> not *)
+  else if l1 < 3 && l2 < 3 then false (* short strings are all different *)
+  else if l1 = l2 then
+    (* look for single errors, transpositions *)
+    let rec find_diff c = 
+      if c < (l1 - 1) then 
+	if n1.[c] != n2.[c] then 
+	  (* mismatched digits -> not equal *)
+	  if is_digit n1.[c] && is_digit n2.[c] then false
+	    (* single error -> equal *)
+	  else if n1.[c+1] = n2.[c+1] && tails_equal n1 (c+2) n2 (c+2)
+	  then true
+	    (* transposition -> equal *)
+	  else if n1.[c] = n2.[c+1] && n1.[c+1] = n2.[c] &&
+	  tails_equal n1 (c+2) n2 (c+2)
+	  then true
+	  else false
+	else find_diff (c+1)
+      else (* at the end of the strings *)
+	if is_digit n1.[c] && is_digit n2.[c] 
+	  && n1.[c] != n2.[c] 
+	then false
+	else true
+    in
+    find_diff 1
+  else if l1 = l2 - 1 then (* short by one *)
+    let rec find_del c = 
+      if c < l1 then
+	if n1.[c] != n2.[c] then 
+	  tails_equal n1 c n2 (c+1)
+	else find_del (c+1)
+      else true (* last character was deleted *)
+    in
+    find_del 1
+  else if l1 = l2 + 1 then (* too long by one *)
+    let rec find_add c =
+      if c < l2 then
+	if n1.[c] != n2.[c] then 
+	  tails_equal n1 (c+1) n2 c
+	else find_add (c+1)
+      else true (* last character added *)
+    in
+    find_add 1
+  else (* lengths totally different *)
+    false
+	
 let binding_time i = i.stamp
 
 let current_time() = !currentstamp
@@ -97,6 +152,8 @@ and 'a data =
 
 let empty = Empty
 
+exception Found_nearly of string list
+
 (* Inline expansion of height for better speed
  * let height = function
  *     Empty -> 0
@@ -182,3 +239,34 @@ let rec keys_aux stack accu = function
       keys_aux (l :: stack) (k.ident :: accu) r
 
 let keys tbl = keys_aux [] [] tbl
+
+let find_nearly_equal n tbl =
+(*  List.filter (nearly_equal name) (keys tbl) -- optimized for your enjoyment*)
+  let rec find_nearly_equal_aux stack accu = function
+      Empty ->
+	begin match stack with
+            [] -> accu
+	  | a :: l -> find_nearly_equal_aux l accu a
+	end
+    | Node(l, k, r, _) -> 
+	let accu' = 
+	  if nearly_equal n k.ident 
+	  then k.ident.name :: accu
+	  else accu 
+	in
+	find_nearly_equal_aux (l :: stack) accu' r
+  in
+  find_nearly_equal_aux [] [] tbl
+
+let find_name_with_nearly name tbl = 
+  try find_name name tbl
+  with Not_found -> 
+(*    prerr_string "FNE: "; prerr_endline name; *)
+    let nearlies = find_nearly_equal name tbl in
+(*    let rec pr_list to_str = function [] -> prerr_endline "Empty" | [a] -> prerr_endline (to_str a) | h :: t -> prerr_string (to_str h); prerr_string ", "; pr_list to_str t
+    in
+    prerr_string "nearlies: ";
+    pr_list (fun i -> i) nearlies;
+    prerr_string "whole table: ";
+    pr_list (fun i -> i.name) (keys tbl); *)
+    raise (Found_nearly nearlies)
diff --git a/typing/ident.mli b/typing/ident.mli
index 9f7372c..7ab9327 100644
--- a/typing/ident.mli
+++ b/typing/ident.mli
@@ -52,8 +52,12 @@ val print: Format.formatter -> t -> unit
 type 'a tbl
         (* Association tables from identifiers to type 'a. *)
 
+exception Found_nearly of string list
+
 val empty: 'a tbl
 val add: t -> 'a -> 'a tbl -> 'a tbl
 val find_same: t -> 'a tbl -> 'a
 val find_name: string -> 'a tbl -> 'a
 val keys: 'a tbl -> t list
+
+val find_name_with_nearly: string -> 'a tbl -> 'a
diff --git a/typing/typecore.ml b/typing/typecore.ml
index 691eb49..adff3ad 100644
--- a/typing/typecore.ml
+++ b/typing/typecore.ml
@@ -23,9 +23,9 @@ open Btype
 open Ctype
 
 type error =
-    Unbound_value of Longident.t
-  | Unbound_constructor of Longident.t
-  | Unbound_label of Longident.t
+    Unbound_value of Longident.t * string list
+  | Unbound_constructor of Longident.t * string list
+  | Unbound_label of Longident.t * string list
   | Polymorphic_label of Longident.t
   | Constructor_arity_mismatch of Longident.t * int * int
   | Label_mismatch of Longident.t * (type_expr * type_expr) list
@@ -42,11 +42,11 @@ type error =
   | Bad_conversion of string * int * char
   | Undefined_method of type_expr * string
   | Undefined_inherited_method of string
-  | Unbound_class of Longident.t
+  | Unbound_class of Longident.t * string list
   | Virtual_class of Longident.t
   | Private_type of type_expr
   | Private_label of Longident.t * type_expr
-  | Unbound_instance_variable of string
+  | Unbound_instance_variable of string * string list
   | Instance_variable_not_mutable of string
   | Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list
   | Outside_class
@@ -276,7 +276,7 @@ let rec build_as_type env p =
       unify_pat env {p2 with pat_type = ty2} ty1;
       begin match path with None -> ()
       | Some path ->
-          let td = try Env.find_type path env with Not_found -> assert false in
+          let td = try Env.find_type path env with Not_found | Ident.Found_nearly _ -> assert false in
           let params = List.map (fun _ -> newvar()) td.type_params in
           match expand_head env (newty (Tconstr (path, params, ref Mnil)))
           with {desc=Tvariant row} when static_row row ->
@@ -291,7 +291,9 @@ let build_or_pat env loc lid =
   let path, decl =
     try Env.lookup_type lid env
     with Not_found ->
-      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor lid))
+      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,[])))
+      | Ident.Found_nearly l -> 
+      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,l)))
   in
   let tyl = List.map (fun _ -> newvar()) decl.type_params in
   let fields =
@@ -400,7 +402,10 @@ let rec type_pat env sp =
         try
           Env.lookup_constructor lid env
         with Not_found ->
-          raise(Error(sp.ppat_loc, Unbound_constructor lid)) in
+          raise(Error(sp.ppat_loc, Unbound_constructor (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sp.ppat_loc, Unbound_constructor (lid,l)))
+      in
       let sargs =
         match sarg with
           None -> []
@@ -449,7 +454,10 @@ let rec type_pat env sp =
           try
             Env.lookup_label lid env
           with Not_found ->
-            raise(Error(sp.ppat_loc, Unbound_label lid)) in
+            raise(Error(sp.ppat_loc, Unbound_label (lid,[]))) 
+	    | Ident.Found_nearly l -> 
+	    raise(Error(sp.ppat_loc, Unbound_label (lid,l)))
+	in
         begin_def ();
         let (vars, ty_arg, ty_res) = instance_label false label in
         if vars = [] then end_def ();
@@ -806,7 +814,7 @@ let rec approx_type env sty =
         if List.length ctl <> decl.type_arity then raise Not_found;
         let tyl = List.map (approx_type env) ctl in
         newconstr path tyl
-      with Not_found -> newvar ()
+      with Not_found | Ident.Found_nearly _ -> newvar ()
       end
   | _ -> newvar ()
 
@@ -923,7 +931,9 @@ let rec type_exp env sexp =
           exp_type = instance desc.val_type;
           exp_env = env }
       with Not_found ->
-        raise(Error(sexp.pexp_loc, Unbound_value lid))
+        raise(Error(sexp.pexp_loc, Unbound_value (lid,[])))
+	| Ident.Found_nearly l -> 
+	raise(Error(sexp.pexp_loc, Unbound_value (lid,l)))
       end
   | Pexp_constant cst ->
       re {
@@ -1021,7 +1031,10 @@ let rec type_exp env sexp =
           try
             Env.lookup_label lid env
           with Not_found ->
-            raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+            raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	    | Ident.Found_nearly l -> 
+	    raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+	in
         begin_def ();
         if !Clflags.principal then begin_def ();
         let (vars, ty_arg, ty_res) = instance_label true label in
@@ -1098,7 +1111,10 @@ let rec type_exp env sexp =
         try
           Env.lookup_label lid env
         with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+          raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+      in
       let (_, ty_arg, ty_res) = instance_label false label in
       unify_exp env arg ty_res;
       re {
@@ -1112,7 +1128,10 @@ let rec type_exp env sexp =
         try
           Env.lookup_label lid env
         with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+          raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+      in
       if label.lbl_mut = Immutable then
         raise(Error(sexp.pexp_loc, Label_not_mutable lid));
       begin_def ();
@@ -1332,7 +1351,9 @@ let rec type_exp env sexp =
   | Pexp_new cl ->
       let (cl_path, cl_decl) =
         try Env.lookup_class cl env with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_class cl))
+          raise(Error(sexp.pexp_loc, Unbound_class (cl,[])))
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_class (cl,l)))
       in
         begin match cl_decl.cty_new with
           None ->
@@ -1361,10 +1382,13 @@ let rec type_exp env sexp =
         | Val_ivar _ ->
             raise(Error(sexp.pexp_loc, Instance_variable_not_mutable lab))
         | _ ->
-            raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+            raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
       with
         Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+          raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
+	| Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
+
       end
   | Pexp_override lst ->
       let _ =
@@ -1380,7 +1404,7 @@ let rec type_exp env sexp =
         try
           Env.lookup_value (Longident.Lident "selfpat-*") env,
           Env.lookup_value (Longident.Lident "self-*") env
-        with Not_found ->
+        with Not_found | Ident.Found_nearly _ ->
           raise(Error(sexp.pexp_loc, Outside_class))
       with
         (_, {val_type = self_ty; val_kind = Val_self (_, vars, _, _)}),
@@ -1391,7 +1415,9 @@ let rec type_exp env sexp =
               (Path.Pident id, type_expect env snewval (instance ty))
             with
               Not_found ->
-                raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+                raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
+	      | Ident.Found_nearly l -> 
+		raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
             end
           in
           let modifs = List.map type_override lst in
@@ -1637,7 +1663,7 @@ and type_application env funct sargs =
                   may_warn sarg0.pexp_loc
                     (Warnings.Not_principal "commuting this argument");
                 (l', sarg0, sargs1 @ sargs2, more_sargs)
-              with Not_found ->
+              with Not_found | Ident.Found_nearly _ ->
                 let (l', sarg0, sargs1, sargs2) =
                   extract_label name more_sargs in
                 if sargs1 <> [] || sargs <> [] then
@@ -1707,7 +1733,9 @@ and type_construct env loc lid sarg explicit_arity ty_expected =
     try
       Env.lookup_constructor lid env
     with Not_found ->
-      raise(Error(loc, Unbound_constructor lid)) in
+      raise(Error(loc, Unbound_constructor (lid,[])))
+      | Ident.Found_nearly l -> 
+      raise(Error(loc, Unbound_constructor (lid,l))) in
   let sargs =
     match sarg with
       None -> []
@@ -2020,12 +2048,18 @@ open Format
 open Printtyp
 
 let report_error ppf = function
-  | Unbound_value lid ->
-      fprintf ppf "Unbound value %a" longident lid
-  | Unbound_constructor lid ->
+  | Unbound_value (lid,[]) ->
+      fprintf ppf "Unbound value %a" longident lid 
+  | Unbound_value (lid,corr::_) ->
+      fprintf ppf "Unbound value %a, possible misspelling of %s" longident lid corr
+  | Unbound_constructor (lid,[]) ->
       fprintf ppf "Unbound constructor %a" longident lid
-  | Unbound_label lid ->
+  | Unbound_constructor (lid,corr::_) ->
+      fprintf ppf "Unbound constructor %a, possible misspelling of %s" longident lid corr
+  | Unbound_label (lid,[]) ->
       fprintf ppf "Unbound record field label %a" longident lid
+  | Unbound_label (lid,corr::_) ->
+      fprintf ppf "Unbound record field label %a, possible misspelling of %s" longident lid corr
   | Polymorphic_label lid ->
       fprintf ppf "@[The record field label %a is polymorphic.@ %s@]"
         longident lid "You cannot instantiate it in a pattern."
@@ -2100,13 +2134,17 @@ let report_error ppf = function
          It has no method %s@]" type_expr ty me
   | Undefined_inherited_method me ->
       fprintf ppf "This expression has no method %s" me
-  | Unbound_class cl ->
+  | Unbound_class (cl,[]) ->
       fprintf ppf "Unbound class %a" longident cl
+  | Unbound_class (cl,corr::_) ->
+      fprintf ppf "Unbound class %a, possible misspelling of %s" longident cl corr
   | Virtual_class cl ->
       fprintf ppf "One cannot create instances of the virtual class %a"
         longident cl
-  | Unbound_instance_variable v ->
+  | Unbound_instance_variable (v,[]) ->
       fprintf ppf "Unbound instance variable %s" v
+  | Unbound_instance_variable (v,corr::_) ->
+      fprintf ppf "Unbound instance variable %s, possible misspelling of %s" v corr
   | Instance_variable_not_mutable v ->
       fprintf ppf "The instance variable %s is not mutable" v
   | Not_subtype(tr1, tr2) ->
diff --git a/typing/typecore.mli b/typing/typecore.mli
index 24aea7d..171e96e 100644
--- a/typing/typecore.mli
+++ b/typing/typecore.mli
@@ -61,9 +61,9 @@ val force_delayed_checks: unit -> unit
 val self_coercion : (Path.t * Location.t list ref) list ref
 
 type error =
-    Unbound_value of Longident.t
-  | Unbound_constructor of Longident.t
-  | Unbound_label of Longident.t
+    Unbound_value of Longident.t * string list
+  | Unbound_constructor of Longident.t * string list
+  | Unbound_label of Longident.t * string list
   | Polymorphic_label of Longident.t
   | Constructor_arity_mismatch of Longident.t * int * int
   | Label_mismatch of Longident.t * (type_expr * type_expr) list
@@ -80,11 +80,11 @@ type error =
   | Bad_conversion of string * int * char
   | Undefined_method of type_expr * string
   | Undefined_inherited_method of string
-  | Unbound_class of Longident.t
+  | Unbound_class of Longident.t * string list
   | Virtual_class of Longident.t
   | Private_type of type_expr
   | Private_label of Longident.t * type_expr
-  | Unbound_instance_variable of string
+  | Unbound_instance_variable of string * string list
   | Instance_variable_not_mutable of string
   | Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list
   | Outside_class
diff --git a/typing/typetexp.ml b/typing/typetexp.ml
index 4f615ab..3d4f6cf 100644
--- a/typing/typetexp.ml
+++ b/typing/typetexp.ml
@@ -22,14 +22,14 @@ open Ctype
 exception Already_bound
 
 type error =
-    Unbound_type_variable of string
-  | Unbound_type_constructor of Longident.t
+    Unbound_type_variable of string * string list
+  | Unbound_type_constructor of Longident.t * string list
   | Unbound_type_constructor_2 of Path.t
   | Type_arity_mismatch of Longident.t * int * int
   | Bound_type_variable of string
   | Recursive_type
-  | Unbound_class of Longident.t
-  | Unbound_row_variable of Longident.t
+  | Unbound_class of Longident.t * string list
+  | Unbound_row_variable of Longident.t * string list
   | Type_mismatch of (type_expr * type_expr) list
   | Alias_type_mismatch of (type_expr * type_expr) list
   | Present_has_conjunction of string
@@ -78,7 +78,7 @@ let type_variable loc name =
   try
     Tbl.find name !type_variables
   with Not_found ->
-    raise(Error(loc, Unbound_type_variable ("'" ^ name)))
+    raise(Error(loc, Unbound_type_variable (("'" ^ name),[])))
 
 let wrap_method ty =
   match (Ctype.repr ty).desc with
@@ -99,7 +99,7 @@ let rec transl_type env policy styp =
     Ptyp_any ->
       if policy = Univars then new_pre_univar () else
       if policy = Fixed then
-        raise (Error (styp.ptyp_loc, Unbound_type_variable "_"))
+        raise (Error (styp.ptyp_loc, Unbound_type_variable ("_",[])))
       else newvar ()
   | Ptyp_var name ->
       if name <> "" && name.[0] = '_' then
@@ -125,7 +125,9 @@ let rec transl_type env policy styp =
         try
           Env.lookup_type lid env
         with Not_found ->
-          raise(Error(styp.ptyp_loc, Unbound_type_constructor lid)) in
+          raise(Error(styp.ptyp_loc, Unbound_type_constructor (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(styp.ptyp_loc, Unbound_type_constructor (lid,l))) in
       if List.length stl <> decl.type_arity then
         raise(Error(styp.ptyp_loc, Type_arity_mismatch(lid, decl.type_arity,
                                                            List.length stl)));
@@ -178,7 +180,9 @@ let rec transl_type env policy styp =
           let (path, decl) = Env.lookup_type lid2 env in
           (path, decl, false)
         with Not_found ->
-          raise(Error(styp.ptyp_loc, Unbound_class lid))
+          raise(Error(styp.ptyp_loc, Unbound_class (lid,[])))
+	  | Ident.Found_nearly l -> 
+	  raise(Error(styp.ptyp_loc, Unbound_class (lid,l)))
       in
       if List.length stl <> decl.type_arity then
         raise(Error(styp.ptyp_loc, Type_arity_mismatch(lid, decl.type_arity,
@@ -429,7 +433,7 @@ let globalize_used_variables env fixed =
         r := (loc, v,  Tbl.find name !type_variables) :: !r
       with Not_found ->
         if fixed && (repr ty).desc = Tvar then
-          raise(Error(loc, Unbound_type_variable ("'"^name)));
+          raise(Error(loc, Unbound_type_variable (("'"^name),[])));
         let v2 = new_global_var () in
         r := (loc, v, v2) :: !r;
         type_variables := Tbl.add name v2 !type_variables)
@@ -494,10 +498,14 @@ open Format
 open Printtyp
 
 let report_error ppf = function
-  | Unbound_type_variable name ->
+  | Unbound_type_variable (name,[]) ->
       fprintf ppf "Unbound type parameter %s" name
-  | Unbound_type_constructor lid ->
+  | Unbound_type_variable (name,corr::_) ->
+      fprintf ppf "Unbound type parameter %s, possible misspelling of %s" name corr
+  | Unbound_type_constructor (lid,[]) ->
       fprintf ppf "Unbound type constructor %a" longident lid
+  | Unbound_type_constructor (lid,corr::_) ->
+      fprintf ppf "Unbound type constructor %a, possible misspelling of %s" longident lid corr
   | Unbound_type_constructor_2 p ->
       fprintf ppf "The type constructor@ %a@ is not yet completely defined"
         path p
@@ -510,10 +518,14 @@ let report_error ppf = function
       fprintf ppf "Already bound type parameter '%s" name
   | Recursive_type ->
       fprintf ppf "This type is recursive"
-  | Unbound_class lid ->
+  | Unbound_class (lid,[]) ->
       fprintf ppf "Unbound class %a" longident lid
-  | Unbound_row_variable lid ->
+  | Unbound_class (lid,corr::_) ->
+      fprintf ppf "Unbound class %a, possible misspelling of %s" longident lid corr
+  | Unbound_row_variable (lid,[]) ->
       fprintf ppf "Unbound row variable in #%a" longident lid
+  | Unbound_row_variable (lid,corr::_) ->
+      fprintf ppf "Unbound row variable in #%a, possible misspelling of %s" longident lid corr
   | Type_mismatch trace ->
       Printtyp.unification_error true trace
         (function ppf ->
diff --git a/typing/typetexp.mli b/typing/typetexp.mli
index b7d3a55..ee47077 100644
--- a/typing/typetexp.mli
+++ b/typing/typetexp.mli
@@ -37,14 +37,14 @@ val widen: variable_context -> unit
 exception Already_bound
 
 type error =
-    Unbound_type_variable of string
-  | Unbound_type_constructor of Longident.t
+    Unbound_type_variable of string * string list
+  | Unbound_type_constructor of Longident.t * string list
   | Unbound_type_constructor_2 of Path.t
   | Type_arity_mismatch of Longident.t * int * int
   | Bound_type_variable of string
   | Recursive_type
-  | Unbound_class of Longident.t
-  | Unbound_row_variable of Longident.t
+  | Unbound_class of Longident.t * string list
+  | Unbound_row_variable of Longident.t * string list
   | Type_mismatch of (Types.type_expr * Types.type_expr) list
   | Alias_type_mismatch of (Types.type_expr * Types.type_expr) list
   | Present_has_conjunction of string

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-30  0:17 ` Yitzhak Mandelbaum
@ 2007-10-30  5:53   ` Edgar Friendly
  0 siblings, 0 replies; 10+ messages in thread
From: Edgar Friendly @ 2007-10-30  5:53 UTC (permalink / raw)
  To: Caml List

Yitzhak Mandelbaum wrote:
> Very cool! Do you think there's any way you could separate it from the
> compiler, like Learner et al.'s SEMINAL work, which separates type error
> messages from the compiler?. Separation could help ensure this (and any
> other, similar) ideas don't accidentally introduce bugs into the
> compiler, and make it much easier for you to maintain. A very simple
> hack might be tod wrap ocamlc in a script that parses such error
> messages and then tokenizes the source file, looking for similar strings?
> 
> Cheers,
> Yitzhak
> 
Separating it from the compiler would keep from interfering with the
compiler's activities, but it would add some difficulties:

Parsing ocaml - I guess I could steal the parser out of the current
source code, and use the internal parse tree, but it'd be a lot more
difficult than what I've done so far.

Namespaces - Ocaml has a ton of namespaces.  At the moment, the patch
doesn't find mistyped module names, but it does distinguish the
following: type parameters, type constructors, classes, row variables,
values, constructors, labels and instance variables.

Visibilty/scope - A simple script would have to add much complication to
keep track of where each identifier is visible - Maybe easy, maybe not.

Parsing the output of the ocaml compiler - ocamlc lacks i18n to make
this extra difficult, but the error messages don't follow any spec, and
could change at any time.


On the plus side, if the simpler hack were built into an IDE, it could
embed a list of corrections into a right-click menu, like a spell
checker.  It could do this outside the current cycle of edit -> compile
-> edit

I don't have interest in doing this - my ideas of a super-ocaml-IDE seem
too big for me to program ATM.

E.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-29 21:11 Patch to 3.10.0 compiler enabling simple spell-checking Edgar Friendly
                   ` (3 preceding siblings ...)
  2007-10-30  0:17 ` Yitzhak Mandelbaum
@ 2007-10-30  8:15 ` Sébastien Hinderer
  2007-10-30 15:50   ` [Caml-list] " Edgar Friendly
  4 siblings, 1 reply; 10+ messages in thread
From: Sébastien Hinderer @ 2007-10-30  8:15 UTC (permalink / raw)
  To: caml-list, Caml List

> One random little feature of GNAT that comes in handy for me is its
> habit of, when I misspell an identifier, giving me a possible correction
> in its compile error message.  Spending some time with the 3.10.0
> sources, I have created a "second draft" patch creating this
> functionality in my favored language.

Sounds great! Just out of curiosity: does the patch take into account
typing information to restrict the proposals to identifiers with a
compatible type ?
If it does not, is it because it would be too difficult to gather all the
necessary information ?

Cheers, and congratulations!
Sébastien.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Caml-list] Re: Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-30  8:15 ` Sébastien Hinderer
@ 2007-10-30 15:50   ` Edgar Friendly
  2007-10-30 15:58     ` Sébastien Hinderer
  0 siblings, 1 reply; 10+ messages in thread
From: Edgar Friendly @ 2007-10-30 15:50 UTC (permalink / raw)
  To: caml-list, Caml List

Sébastien Hinderer wrote:
> Sounds great! Just out of curiosity: does the patch take into account
> typing information to restrict the proposals to identifiers with a
> compatible type ?
> If it does not, is it because it would be too difficult to gather all the
> necessary information ?
> 
> Cheers, and congratulations!
> Sébastien.

It does.  It works by wrapping the tree lookup for an identifier
(separate trees are kept for each "compatible type" of identifier), and
if that fails, traversing the tree once more to find corrections.

E.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Re: Patch to 3.10.0 compiler enabling simple spell-checking
  2007-10-30 15:50   ` [Caml-list] " Edgar Friendly
@ 2007-10-30 15:58     ` Sébastien Hinderer
  0 siblings, 0 replies; 10+ messages in thread
From: Sébastien Hinderer @ 2007-10-30 15:58 UTC (permalink / raw)
  To: caml-list, Caml List

Edgar Friendly :
> Sébastien Hinderer wrote:
> > Sounds great! Just out of curiosity: does the patch take into account
> > typing information to restrict the proposals to identifiers with a
> > compatible type ?
> > If it does not, is it because it would be too difficult to gather all the
> > necessary information ?
> It does.  It works by wrapping the tree lookup for an identifier
> (separate trees are kept for each "compatible type" of identifier), and
> if that fails, traversing the tree once more to find corrections.

Wow... Definitely great.
Sébastien.


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-10-30 15:58 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-29 21:11 Patch to 3.10.0 compiler enabling simple spell-checking Edgar Friendly
2007-10-29 21:34 ` [Caml-list] " Till Varoquaux
2007-10-29 23:32 ` Julien Moutinho
2007-10-30  5:51   ` Edgar Friendly
2007-10-30  0:13 ` Yitzhak Mandelbaum
2007-10-30  0:17 ` Yitzhak Mandelbaum
2007-10-30  5:53   ` Edgar Friendly
2007-10-30  8:15 ` Sébastien Hinderer
2007-10-30 15:50   ` [Caml-list] " Edgar Friendly
2007-10-30 15:58     ` Sébastien Hinderer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).