caml-list - the Caml user's mailing list
 help / color / mirror / Atom feed
From: Edgar Friendly <thelema314@gmail.com>
To: Caml List <caml-list@inria.fr>
Subject: Patch to 3.10.0 compiler enabling simple spell-checking
Date: Mon, 29 Oct 2007 16:11:12 -0500	[thread overview]
Message-ID: <47264C70.5050100@gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1622 bytes --]

One random little feature of GNAT that comes in handy for me is its
habit of, when I misspell an identifier, giving me a possible correction
in its compile error message.  Spending some time with the 3.10.0
sources, I have created a "second draft" patch creating this
functionality in my favored language.

Example:
========

# /home/thelema/Projects/ocaml-custom/bin/ocamlc -o coml -I +lablgtk2
lablgtk.cma gtkInit.cmo coml.ml
File "coml.ml", line 61, characters 16-25:
Unbound value is_arcive, possible misspelling of is_archive

Impacts:
========

Efficiency in the case of finding a mistake should be quite good,
although this shouldn't matter too much since the compiler quits pretty
early in compilation when it finds an unbound identifier.

In the case of no unbound identifiers, the cost is an extra try/with
block around the standard lookup.  I haven't made any benchmarks, though.

I expect this code to have little long term maintenance issues - the
major source of code changes was adding a "* string list" to a number of
exceptions to carry the list of possible correct spellings to the point
they get output by the compiler.  These exceptions are still usable as
before with an empty list in this spot.

It's possible the code has created opportunities for uncaught exceptions
in the compiler as I only checked for instances of "Not_found" in a few
files -- those which dealt with the Unbound_* exceptions.  Someone who
knows the internals better might find places the "Found_nearly"
exception that carries possible corrections might escape into.


Dedicated to:
Yaron Minsky and the team at Jane Street

E.

[-- Attachment #2: ocaml-spelling.patch --]
[-- Type: text/x-patch, Size: 15631 bytes --]

diff --git a/typing/ident.ml b/typing/ident.ml
index a30aa7a..8bbb395 100644
--- a/typing/ident.ml
+++ b/typing/ident.ml
@@ -56,6 +56,61 @@ let same i1 i2 = i1 = i2
        then i1.stamp = i2.stamp
        else i2.stamp = 0 && i1.name = i2.name *)
 
+(* does various heuristics to find spelling mistakes - algorithm from GNAT Ada compiler - Lots of code, but hopefully about as efficient as possible *)
+let nearly_equal s i =
+  let is_digit c = c >= '0' && c <= '9' in
+  let rec tails_equal s1 i1 s2 i2 = i1 >= String.length s1 || (s1.[i1] = s2.[i2] && tails_equal s1 (i1+1) s2 (i2+1)) in
+  
+  let n1 = s and n2 = i.name in
+  let l1 = String.length n1 and l2 = String.length n2 in
+  if l1 = 0 then l2 = 0 (* both null -> equal *)
+  else if l2 = 0 then false (* one of n1 or n2 null -> not equal *)
+  else if n1.[0] != n2.[0] then false (* first characters don't match -> not *)
+  else if l1 < 3 && l2 < 3 then false (* short strings are all different *)
+  else if l1 = l2 then
+    (* look for single errors, transpositions *)
+    let rec find_diff c = 
+      if c < (l1 - 1) then 
+	if n1.[c] != n2.[c] then 
+	  (* mismatched digits -> not equal *)
+	  if is_digit n1.[c] && is_digit n2.[c] then false
+	    (* single error -> equal *)
+	  else if n1.[c+1] = n2.[c+1] && tails_equal n1 (c+2) n2 (c+2)
+	  then true
+	    (* transposition -> equal *)
+	  else if n1.[c] = n2.[c+1] && n1.[c+1] = n2.[c] &&
+	  tails_equal n1 (c+2) n2 (c+2)
+	  then true
+	  else false
+	else find_diff (c+1)
+      else (* at the end of the strings *)
+	if is_digit n1.[c] && is_digit n2.[c] 
+	  && n1.[c] != n2.[c] 
+	then false
+	else true
+    in
+    find_diff 1
+  else if l1 = l2 - 1 then (* short by one *)
+    let rec find_del c = 
+      if c < l1 then
+	if n1.[c] != n2.[c] then 
+	  tails_equal n1 c n2 (c+1)
+	else find_del (c+1)
+      else true (* last character was deleted *)
+    in
+    find_del 1
+  else if l1 = l2 + 1 then (* too long by one *)
+    let rec find_add c =
+      if c < l2 then
+	if n1.[c] != n2.[c] then 
+	  tails_equal n1 (c+1) n2 c
+	else find_add (c+1)
+      else true (* last character added *)
+    in
+    find_add 1
+  else (* lengths totally different *)
+    false
+	
 let binding_time i = i.stamp
 
 let current_time() = !currentstamp
@@ -97,6 +152,8 @@ and 'a data =
 
 let empty = Empty
 
+exception Found_nearly of string list
+
 (* Inline expansion of height for better speed
  * let height = function
  *     Empty -> 0
@@ -182,3 +239,34 @@ let rec keys_aux stack accu = function
       keys_aux (l :: stack) (k.ident :: accu) r
 
 let keys tbl = keys_aux [] [] tbl
+
+let find_nearly_equal n tbl =
+(*  List.filter (nearly_equal name) (keys tbl) -- optimized for your enjoyment*)
+  let rec find_nearly_equal_aux stack accu = function
+      Empty ->
+	begin match stack with
+            [] -> accu
+	  | a :: l -> find_nearly_equal_aux l accu a
+	end
+    | Node(l, k, r, _) -> 
+	let accu' = 
+	  if nearly_equal n k.ident 
+	  then k.ident.name :: accu
+	  else accu 
+	in
+	find_nearly_equal_aux (l :: stack) accu' r
+  in
+  find_nearly_equal_aux [] [] tbl
+
+let find_name_with_nearly name tbl = 
+  try find_name name tbl
+  with Not_found -> 
+(*    prerr_string "FNE: "; prerr_endline name; *)
+    let nearlies = find_nearly_equal name tbl in
+(*    let rec pr_list to_str = function [] -> prerr_endline "Empty" | [a] -> prerr_endline (to_str a) | h :: t -> prerr_string (to_str h); prerr_string ", "; pr_list to_str t
+    in
+    prerr_string "nearlies: ";
+    pr_list (fun i -> i) nearlies;
+    prerr_string "whole table: ";
+    pr_list (fun i -> i.name) (keys tbl); *)
+    raise (Found_nearly nearlies)
diff --git a/typing/ident.mli b/typing/ident.mli
index 9f7372c..7ab9327 100644
--- a/typing/ident.mli
+++ b/typing/ident.mli
@@ -52,8 +52,12 @@ val print: Format.formatter -> t -> unit
 type 'a tbl
         (* Association tables from identifiers to type 'a. *)
 
+exception Found_nearly of string list
+
 val empty: 'a tbl
 val add: t -> 'a -> 'a tbl -> 'a tbl
 val find_same: t -> 'a tbl -> 'a
 val find_name: string -> 'a tbl -> 'a
 val keys: 'a tbl -> t list
+
+val find_name_with_nearly: string -> 'a tbl -> 'a
diff --git a/typing/typecore.ml b/typing/typecore.ml
index 691eb49..adff3ad 100644
--- a/typing/typecore.ml
+++ b/typing/typecore.ml
@@ -23,9 +23,9 @@ open Btype
 open Ctype
 
 type error =
-    Unbound_value of Longident.t
-  | Unbound_constructor of Longident.t
-  | Unbound_label of Longident.t
+    Unbound_value of Longident.t * string list
+  | Unbound_constructor of Longident.t * string list
+  | Unbound_label of Longident.t * string list
   | Polymorphic_label of Longident.t
   | Constructor_arity_mismatch of Longident.t * int * int
   | Label_mismatch of Longident.t * (type_expr * type_expr) list
@@ -42,11 +42,11 @@ type error =
   | Bad_conversion of string * int * char
   | Undefined_method of type_expr * string
   | Undefined_inherited_method of string
-  | Unbound_class of Longident.t
+  | Unbound_class of Longident.t * string list
   | Virtual_class of Longident.t
   | Private_type of type_expr
   | Private_label of Longident.t * type_expr
-  | Unbound_instance_variable of string
+  | Unbound_instance_variable of string * string list
   | Instance_variable_not_mutable of string
   | Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list
   | Outside_class
@@ -276,7 +276,7 @@ let rec build_as_type env p =
       unify_pat env {p2 with pat_type = ty2} ty1;
       begin match path with None -> ()
       | Some path ->
-          let td = try Env.find_type path env with Not_found -> assert false in
+          let td = try Env.find_type path env with Not_found | Ident.Found_nearly _ -> assert false in
           let params = List.map (fun _ -> newvar()) td.type_params in
           match expand_head env (newty (Tconstr (path, params, ref Mnil)))
           with {desc=Tvariant row} when static_row row ->
@@ -291,7 +291,9 @@ let build_or_pat env loc lid =
   let path, decl =
     try Env.lookup_type lid env
     with Not_found ->
-      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor lid))
+      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,[])))
+      | Ident.Found_nearly l -> 
+      raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,l)))
   in
   let tyl = List.map (fun _ -> newvar()) decl.type_params in
   let fields =
@@ -400,7 +402,10 @@ let rec type_pat env sp =
         try
           Env.lookup_constructor lid env
         with Not_found ->
-          raise(Error(sp.ppat_loc, Unbound_constructor lid)) in
+          raise(Error(sp.ppat_loc, Unbound_constructor (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sp.ppat_loc, Unbound_constructor (lid,l)))
+      in
       let sargs =
         match sarg with
           None -> []
@@ -449,7 +454,10 @@ let rec type_pat env sp =
           try
             Env.lookup_label lid env
           with Not_found ->
-            raise(Error(sp.ppat_loc, Unbound_label lid)) in
+            raise(Error(sp.ppat_loc, Unbound_label (lid,[]))) 
+	    | Ident.Found_nearly l -> 
+	    raise(Error(sp.ppat_loc, Unbound_label (lid,l)))
+	in
         begin_def ();
         let (vars, ty_arg, ty_res) = instance_label false label in
         if vars = [] then end_def ();
@@ -806,7 +814,7 @@ let rec approx_type env sty =
         if List.length ctl <> decl.type_arity then raise Not_found;
         let tyl = List.map (approx_type env) ctl in
         newconstr path tyl
-      with Not_found -> newvar ()
+      with Not_found | Ident.Found_nearly _ -> newvar ()
       end
   | _ -> newvar ()
 
@@ -923,7 +931,9 @@ let rec type_exp env sexp =
           exp_type = instance desc.val_type;
           exp_env = env }
       with Not_found ->
-        raise(Error(sexp.pexp_loc, Unbound_value lid))
+        raise(Error(sexp.pexp_loc, Unbound_value (lid,[])))
+	| Ident.Found_nearly l -> 
+	raise(Error(sexp.pexp_loc, Unbound_value (lid,l)))
       end
   | Pexp_constant cst ->
       re {
@@ -1021,7 +1031,10 @@ let rec type_exp env sexp =
           try
             Env.lookup_label lid env
           with Not_found ->
-            raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+            raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	    | Ident.Found_nearly l -> 
+	    raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+	in
         begin_def ();
         if !Clflags.principal then begin_def ();
         let (vars, ty_arg, ty_res) = instance_label true label in
@@ -1098,7 +1111,10 @@ let rec type_exp env sexp =
         try
           Env.lookup_label lid env
         with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+          raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+      in
       let (_, ty_arg, ty_res) = instance_label false label in
       unify_exp env arg ty_res;
       re {
@@ -1112,7 +1128,10 @@ let rec type_exp env sexp =
         try
           Env.lookup_label lid env
         with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_label lid)) in
+          raise(Error(sexp.pexp_loc, Unbound_label (lid,[]))) 
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))
+      in
       if label.lbl_mut = Immutable then
         raise(Error(sexp.pexp_loc, Label_not_mutable lid));
       begin_def ();
@@ -1332,7 +1351,9 @@ let rec type_exp env sexp =
   | Pexp_new cl ->
       let (cl_path, cl_decl) =
         try Env.lookup_class cl env with Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_class cl))
+          raise(Error(sexp.pexp_loc, Unbound_class (cl,[])))
+	  | Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_class (cl,l)))
       in
         begin match cl_decl.cty_new with
           None ->
@@ -1361,10 +1382,13 @@ let rec type_exp env sexp =
         | Val_ivar _ ->
             raise(Error(sexp.pexp_loc, Instance_variable_not_mutable lab))
         | _ ->
-            raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+            raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
       with
         Not_found ->
-          raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+          raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
+	| Ident.Found_nearly l -> 
+	  raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
+
       end
   | Pexp_override lst ->
       let _ =
@@ -1380,7 +1404,7 @@ let rec type_exp env sexp =
         try
           Env.lookup_value (Longident.Lident "selfpat-*") env,
           Env.lookup_value (Longident.Lident "self-*") env
-        with Not_found ->
+        with Not_found | Ident.Found_nearly _ ->
           raise(Error(sexp.pexp_loc, Outside_class))
       with
         (_, {val_type = self_ty; val_kind = Val_self (_, vars, _, _)}),
@@ -1391,7 +1415,9 @@ let rec type_exp env sexp =
               (Path.Pident id, type_expect env snewval (instance ty))
             with
               Not_found ->
-                raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))
+                raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))
+	      | Ident.Found_nearly l -> 
+		raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))
             end
           in
           let modifs = List.map type_override lst in
@@ -1637,7 +1663,7 @@ and type_application env funct sargs =
                   may_warn sarg0.pexp_loc
                     (Warnings.Not_principal "commuting this argument");
                 (l', sarg0, sargs1 @ sargs2, more_sargs)
-              with Not_found ->
+              with Not_found | Ident.Found_nearly _ ->
                 let (l', sarg0, sargs1, sargs2) =
                   extract_label name more_sargs in
                 if sargs1 <> [] || sargs <> [] then
@@ -1707,7 +1733,9 @@ and type_construct env loc lid sarg explicit_arity ty_expected =
     try
       Env.lookup_constructor lid env
     with Not_found ->
-      raise(Error(loc, Unbound_constructor lid)) in
+      raise(Error(loc, Unbound_constructor (lid,[])))
+      | Ident.Found_nearly l -> 
+      raise(Error(loc, Unbound_constructor (lid,l))) in
   let sargs =
     match sarg with
       None -> []
@@ -2020,12 +2048,18 @@ open Format
 open Printtyp
 
 let report_error ppf = function
-  | Unbound_value lid ->
-      fprintf ppf "Unbound value %a" longident lid
-  | Unbound_constructor lid ->
+  | Unbound_value (lid,[]) ->
+      fprintf ppf "Unbound value %a" longident lid 
+  | Unbound_value (lid,corr::_) ->
+      fprintf ppf "Unbound value %a, possible misspelling of %s" longident lid corr
+  | Unbound_constructor (lid,[]) ->
       fprintf ppf "Unbound constructor %a" longident lid
-  | Unbound_label lid ->
+  | Unbound_constructor (lid,corr::_) ->
+      fprintf ppf "Unbound constructor %a, possible misspelling of %s" longident lid corr
+  | Unbound_label (lid,[]) ->
       fprintf ppf "Unbound record field label %a" longident lid
+  | Unbound_label (lid,corr::_) ->
+      fprintf ppf "Unbound record field label %a, possible misspelling of %s" longident lid corr
   | Polymorphic_label lid ->
       fprintf ppf "@[The record field label %a is polymorphic.@ %s@]"
         longident lid "You cannot instantiate it in a pattern."
@@ -2100,13 +2134,17 @@ let report_error ppf = function
          It has no method %s@]" type_expr ty me
   | Undefined_inherited_method me ->
       fprintf ppf "This expression has no method %s" me
-  | Unbound_class cl ->
+  | Unbound_class (cl,[]) ->
       fprintf ppf "Unbound class %a" longident cl
+  | Unbound_class (cl,corr::_) ->
+      fprintf ppf "Unbound class %a, possible misspelling of %s" longident cl corr
   | Virtual_class cl ->
       fprintf ppf "One cannot create instances of the virtual class %a"
         longident cl
-  | Unbound_instance_variable v ->
+  | Unbound_instance_variable (v,[]) ->
       fprintf ppf "Unbound instance variable %s" v
+  | Unbound_instance_variable (v,corr::_) ->
+      fprintf ppf "Unbound instance variable %s, possible misspelling of %s" v corr
   | Instance_variable_not_mutable v ->
       fprintf ppf "The instance variable %s is not mutable" v
   | Not_subtype(tr1, tr2) ->
diff --git a/typing/typecore.mli b/typing/typecore.mli
index 24aea7d..171e96e 100644
--- a/typing/typecore.mli
+++ b/typing/typecore.mli
@@ -61,9 +61,9 @@ val force_delayed_checks: unit -> unit
 val self_coercion : (Path.t * Location.t list ref) list ref
 
 type error =
-    Unbound_value of Longident.t
-  | Unbound_constructor of Longident.t
-  | Unbound_label of Longident.t
+    Unbound_value of Longident.t * string list
+  | Unbound_constructor of Longident.t * string list
+  | Unbound_label of Longident.t * string list
   | Polymorphic_label of Longident.t
   | Constructor_arity_mismatch of Longident.t * int * int
   | Label_mismatch of Longident.t * (type_expr * type_expr) list
@@ -80,11 +80,11 @@ type error =
   | Bad_conversion of string * int * char
   | Undefined_method of type_expr * string
   | Undefined_inherited_method of string
-  | Unbound_class of Longident.t
+  | Unbound_class of Longident.t * string list
   | Virtual_class of Longident.t
   | Private_type of type_expr
   | Private_label of Longident.t * type_expr
-  | Unbound_instance_variable of string
+  | Unbound_instance_variable of string * string list
   | Instance_variable_not_mutable of string
   | Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list
   | Outside_class

             reply	other threads:[~2007-10-29 21:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-29 21:11 Edgar Friendly [this message]
2007-10-29 21:34 ` [Caml-list] " Till Varoquaux
2007-10-29 23:32 ` Julien Moutinho
2007-10-30  5:51   ` Edgar Friendly
2007-10-30  0:13 ` Yitzhak Mandelbaum
2007-10-30  0:17 ` Yitzhak Mandelbaum
2007-10-30  5:53   ` Edgar Friendly
2007-10-30  8:15 ` Sébastien Hinderer
2007-10-30 15:50   ` [Caml-list] " Edgar Friendly
2007-10-30 15:58     ` Sébastien Hinderer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47264C70.5050100@gmail.com \
    --to=thelema314@gmail.com \
    --cc=caml-list@inria.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).