One random little feature of GNAT that comes in handy for me is itshabit of, when I misspell an identifier, giving me a possible correctionin its compile error message. Spending some time with the 3.10.0sources, I have created a "second draft" patch creating thisfunctionality in my favored language.Example:========# /home/thelema/Projects/ocaml-custom/bin/ocamlc -o coml -I +lablgtk2lablgtk.cma gtkInit.cmo coml.mlFile "coml.ml", line 61, characters 16-25:Unbound value is_arcive, possible misspelling of is_archiveImpacts:========Efficiency in the case of finding a mistake should be quite good,although this shouldn't matter too much since the compiler quits prettyearly in compilation when it finds an unbound identifier.In the case of no unbound identifiers, the cost is an extra try/withblock around the standard lookup. I haven't made any benchmarks, though.I expect this code to have little long term maintenance issues - themajor source of code changes was adding a "* string list" to a number ofexceptions to carry the list of possible correct spellings to the pointthey get output by the compiler. These exceptions are still usable asbefore with an empty list in this spot.It's possible the code has created opportunities for uncaught exceptionsin the compiler as I only checked for instances of "Not_found" in a fewfiles -- those which dealt with the Unbound_* exceptions. Someone whoknows the internals better might find places the "Found_nearly"exception that carries possible corrections might escape into.Dedicated to:Yaron Minsky and the team at Jane StreetE.diff --git a/typing/ident.ml b/typing/ident.mlindex a30aa7a..8bbb395 100644--- a/typing/ident.ml+++ b/typing/ident.ml@@ -56,6 +56,61 @@ let same i1 i2 = i1 = i2then i1.stamp = i2.stampelse i2.stamp = 0 && i1.name = i2.name *)
+(* does various heuristics to find spelling mistakes - algorithm from GNAT Ada compiler - Lots of code, but hopefully about as efficient as possible *)+let nearly_equal s i =+ let is_digit c = c >= '0' && c <= '9' in+ let rec tails_equal s1 i1 s2 i2 = i1 >= String.length s1 || (s1.[i1] = s2.[i2] && tails_equal s1 (i1+1) s2 (i2+1)) in++ let n1 = s and n2 = i.name in+ let l1 = String.length n1 and l2 = String.length n2 in+ if l1 = 0 then l2 = 0 (* both null -> equal *)+ else if l2 = 0 then false (* one of n1 or n2 null -> not equal *)+ else if n1.[0] != n2.[0] then false (* first characters don't match -> not *)+ else if l1 < 3 && l2 < 3 then false (* short strings are all different *)+ else if l1 = l2 then+ (* look for single errors, transpositions *)+ let rec find_diff c =+ if c < (l1 - 1) then+ if n1.[c] != n2.[c] then+ (* mismatched digits -> not equal *)+ if is_digit n1.[c] && is_digit n2.[c] then false+ (* single error -> equal *)+ else if n1.[c+1] = n2.[c+1] && tails_equal n1 (c+2) n2 (c+2)+ then true+ (* transposition -> equal *)+ else if n1.[c] = n2.[c+1] && n1.[c+1] = n2.[c] &&+ tails_equal n1 (c+2) n2 (c+2)+ then true+ else false+ else find_diff (c+1)+ else (* at the end of the strings *)+ if is_digit n1.[c] && is_digit n2.[c]+ && n1.[c] != n2.[c]+ then false+ else true+ in+ find_diff 1+ else if l1 = l2 - 1 then (* short by one *)+ let rec find_del c =+ if c < l1 then+ if n1.[c] != n2.[c] then+ tails_equal n1 c n2 (c+1)+ else find_del (c+1)+ else true (* last character was deleted *)+ in+ find_del 1+ else if l1 = l2 + 1 then (* too long by one *)+ let rec find_add c =+ if c < l2 then+ if n1.[c] != n2.[c] then+ tails_equal n1 (c+1) n2 c+ else find_add (c+1)+ else true (* last character added *)+ in+ find_add 1+ else (* lengths totally different *)+ false+let binding_time i = i.stamp
let current_time() = !currentstamp@@ -97,6 +152,8 @@ and 'a data =
let empty = Empty
+exception Found_nearly of string list+(* Inline expansion of height for better speed* let height = function* Empty -> 0@@ -182,3 +239,34 @@ let rec keys_aux stack accu = functionkeys_aux (l :: stack) (k.ident :: accu) r
let keys tbl = keys_aux [] [] tbl++let find_nearly_equal n tbl =+(* List.filter (nearly_equal name) (keys tbl) -- optimized for your enjoyment*)+ let rec find_nearly_equal_aux stack accu = function+ Empty ->+ begin match stack with+ [] -> accu+ | a :: l -> find_nearly_equal_aux l accu a+ end+ | Node(l, k, r, _) ->+ let accu' =+ if nearly_equal n k.ident+ then k.ident.name :: accu+ else accu+ in+ find_nearly_equal_aux (l :: stack) accu' r+ in+ find_nearly_equal_aux [] [] tbl++let find_name_with_nearly name tbl =+ try find_name name tbl+ with Not_found ->+(* prerr_string "FNE: "; prerr_endline name; *)+ let nearlies = find_nearly_equal name tbl in+(* let rec pr_list to_str = function [] -> prerr_endline "Empty" | [a] -> prerr_endline (to_str a) | h :: t -> prerr_string (to_str h); prerr_string ", "; pr_list to_str t+ in+ prerr_string "nearlies: ";+ pr_list (fun i -> i) nearlies;+ prerr_string "whole table: ";+ pr_list (fun i -> i.name) (keys tbl); *)+ raise (Found_nearly nearlies)diff --git a/typing/ident.mli b/typing/ident.mliindex 9f7372c..7ab9327 100644--- a/typing/ident.mli+++ b/typing/ident.mli@@ -52,8 +52,12 @@ val print: Format.formatter -> t -> unittype 'a tbl(* Association tables from identifiers to type 'a. *)
+exception Found_nearly of string list+val empty: 'a tblval add: t -> 'a -> 'a tbl -> 'a tblval find_same: t -> 'a tbl -> 'aval find_name: string -> 'a tbl -> 'aval keys: 'a tbl -> t list++val find_name_with_nearly: string -> 'a tbl -> 'adiff --git a/typing/typecore.ml b/typing/typecore.mlindex 691eb49..adff3ad 100644--- a/typing/typecore.ml+++ b/typing/typecore.ml@@ -23,9 +23,9 @@ open Btypeopen Ctype
type error =- Unbound_value of Longident.t- | Unbound_constructor of Longident.t- | Unbound_label of Longident.t+ Unbound_value of Longident.t * string list+ | Unbound_constructor of Longident.t * string list+ | Unbound_label of Longident.t * string list| Polymorphic_label of Longident.t| Constructor_arity_mismatch of Longident.t * int * int| Label_mismatch of Longident.t * (type_expr * type_expr) list@@ -42,11 +42,11 @@ type error =| Bad_conversion of string * int * char| Undefined_method of type_expr * string| Undefined_inherited_method of string- | Unbound_class of Longident.t+ | Unbound_class of Longident.t * string list| Virtual_class of Longident.t| Private_type of type_expr| Private_label of Longident.t * type_expr- | Unbound_instance_variable of string+ | Unbound_instance_variable of string * string list| Instance_variable_not_mutable of string| Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list| Outside_class@@ -276,7 +276,7 @@ let rec build_as_type env p =unify_pat env {p2 with pat_type = ty2} ty1;begin match path with None -> ()| Some path ->- let td = try Env.find_type path env with Not_found -> assert false in+ let td = try Env.find_type path env with Not_found | Ident.Found_nearly _ -> assert false inlet params = List.map (fun _ -> newvar()) td.type_params inmatch expand_head env (newty (Tconstr (path, params, ref Mnil)))with {desc=Tvariant row} when static_row row ->@@ -291,7 +291,9 @@ let build_or_pat env loc lid =let path, decl =try Env.lookup_type lid envwith Not_found ->- raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor lid))+ raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,[])))+ | Ident.Found_nearly l ->+ raise(Typetexp.Error(loc, Typetexp.Unbound_type_constructor (lid,l)))inlet tyl = List.map (fun _ -> newvar()) decl.type_params inlet fields =@@ -400,7 +402,10 @@ let rec type_pat env sp =tryEnv.lookup_constructor lid envwith Not_found ->- raise(Error(sp.ppat_loc, Unbound_constructor lid)) in+ raise(Error(sp.ppat_loc, Unbound_constructor (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(sp.ppat_loc, Unbound_constructor (lid,l)))+ inlet sargs =match sarg withNone -> []@@ -449,7 +454,10 @@ let rec type_pat env sp =tryEnv.lookup_label lid envwith Not_found ->- raise(Error(sp.ppat_loc, Unbound_label lid)) in+ raise(Error(sp.ppat_loc, Unbound_label (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(sp.ppat_loc, Unbound_label (lid,l)))+ inbegin_def ();let (vars, ty_arg, ty_res) = instance_label false label inif vars = [] then end_def ();@@ -806,7 +814,7 @@ let rec approx_type env sty =if List.length ctl <> decl.type_arity then raise Not_found;let tyl = List.map (approx_type env) ctl innewconstr path tyl- with Not_found -> newvar ()+ with Not_found | Ident.Found_nearly _ -> newvar ()end| _ -> newvar ()
@@ -923,7 +931,9 @@ let rec type_exp env sexp =exp_type = instance desc.val_type;exp_env = env }with Not_found ->- raise(Error(sexp.pexp_loc, Unbound_value lid))+ raise(Error(sexp.pexp_loc, Unbound_value (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_value (lid,l)))end| Pexp_constant cst ->re {@@ -1021,7 +1031,10 @@ let rec type_exp env sexp =tryEnv.lookup_label lid envwith Not_found ->- raise(Error(sexp.pexp_loc, Unbound_label lid)) in+ raise(Error(sexp.pexp_loc, Unbound_label (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))+ inbegin_def ();if !Clflags.principal then begin_def ();let (vars, ty_arg, ty_res) = instance_label true label in@@ -1098,7 +1111,10 @@ let rec type_exp env sexp =tryEnv.lookup_label lid envwith Not_found ->- raise(Error(sexp.pexp_loc, Unbound_label lid)) in+ raise(Error(sexp.pexp_loc, Unbound_label (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))+ inlet (_, ty_arg, ty_res) = instance_label false label inunify_exp env arg ty_res;re {@@ -1112,7 +1128,10 @@ let rec type_exp env sexp =tryEnv.lookup_label lid envwith Not_found ->- raise(Error(sexp.pexp_loc, Unbound_label lid)) in+ raise(Error(sexp.pexp_loc, Unbound_label (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_label (lid,l)))+ inif label.lbl_mut = Immutable thenraise(Error(sexp.pexp_loc, Label_not_mutable lid));begin_def ();@@ -1332,7 +1351,9 @@ let rec type_exp env sexp =| Pexp_new cl ->let (cl_path, cl_decl) =try Env.lookup_class cl env with Not_found ->- raise(Error(sexp.pexp_loc, Unbound_class cl))+ raise(Error(sexp.pexp_loc, Unbound_class (cl,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_class (cl,l)))inbegin match cl_decl.cty_new withNone ->@@ -1361,10 +1382,13 @@ let rec type_exp env sexp =| Val_ivar _ ->raise(Error(sexp.pexp_loc, Instance_variable_not_mutable lab))| _ ->- raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))+ raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))withNot_found ->- raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))+ raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))+end| Pexp_override lst ->let _ =@@ -1380,7 +1404,7 @@ let rec type_exp env sexp =tryEnv.lookup_value (Longident.Lident "selfpat-*") env,Env.lookup_value (Longident.Lident "self-*") env- with Not_found ->+ with Not_found | Ident.Found_nearly _ ->raise(Error(sexp.pexp_loc, Outside_class))with(_, {val_type = self_ty; val_kind = Val_self (_, vars, _, _)}),@@ -1391,7 +1415,9 @@ let rec type_exp env sexp =(Path.Pident id, type_expect env snewval (instance ty))withNot_found ->- raise(Error(sexp.pexp_loc, Unbound_instance_variable lab))+ raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,[])))+ | Ident.Found_nearly l ->+ raise(Error(sexp.pexp_loc, Unbound_instance_variable (lab,l)))endinlet modifs = List.map type_override lst in@@ -1637,7 +1663,7 @@ and type_application env funct sargs =may_warn sarg0.pexp_loc(Warnings.Not_principal "commuting this argument");(l', sarg0, sargs1 @ sargs2, more_sargs)- with Not_found ->+ with Not_found | Ident.Found_nearly _ ->let (l', sarg0, sargs1, sargs2) =extract_label name more_sargs inif sargs1 <> [] || sargs <> [] then@@ -1707,7 +1733,9 @@ and type_construct env loc lid sarg explicit_arity ty_expected =tryEnv.lookup_constructor lid envwith Not_found ->- raise(Error(loc, Unbound_constructor lid)) in+ raise(Error(loc, Unbound_constructor (lid,[])))+ | Ident.Found_nearly l ->+ raise(Error(loc, Unbound_constructor (lid,l))) inlet sargs =match sarg withNone -> []@@ -2020,12 +2048,18 @@ open Formatopen Printtyp
let report_error ppf = function- | Unbound_value lid ->- fprintf ppf "Unbound value %a" longident lid- | Unbound_constructor lid ->+ | Unbound_value (lid,[]) ->+ fprintf ppf "Unbound value %a" longident lid+ | Unbound_value (lid,corr::_) ->+ fprintf ppf "Unbound value %a, possible misspelling of %s" longident lid corr+ | Unbound_constructor (lid,[]) ->fprintf ppf "Unbound constructor %a" longident lid- | Unbound_label lid ->+ | Unbound_constructor (lid,corr::_) ->+ fprintf ppf "Unbound constructor %a, possible misspelling of %s" longident lid corr+ | Unbound_label (lid,[]) ->fprintf ppf "Unbound record field label %a" longident lid+ | Unbound_label (lid,corr::_) ->+ fprintf ppf "Unbound record field label %a, possible misspelling of %s" longident lid corr| Polymorphic_label lid ->fprintf ppf "@[The record field label %a is polymorphic.@ %s@]"longident lid "You cannot instantiate it in a pattern."@@ -2100,13 +2134,17 @@ let report_error ppf = functionIt has no method %s@]" type_expr ty me| Undefined_inherited_method me ->fprintf ppf "This expression has no method %s" me- | Unbound_class cl ->+ | Unbound_class (cl,[]) ->fprintf ppf "Unbound class %a" longident cl+ | Unbound_class (cl,corr::_) ->+ fprintf ppf "Unbound class %a, possible misspelling of %s" longident cl corr| Virtual_class cl ->fprintf ppf "One cannot create instances of the virtual class %a"longident cl- | Unbound_instance_variable v ->+ | Unbound_instance_variable (v,[]) ->fprintf ppf "Unbound instance variable %s" v+ | Unbound_instance_variable (v,corr::_) ->+ fprintf ppf "Unbound instance variable %s, possible misspelling of %s" v corr| Instance_variable_not_mutable v ->fprintf ppf "The instance variable %s is not mutable" v| Not_subtype(tr1, tr2) ->diff --git a/typing/typecore.mli b/typing/typecore.mliindex 24aea7d..171e96e 100644--- a/typing/typecore.mli+++ b/typing/typecore.mli@@ -61,9 +61,9 @@ val force_delayed_checks: unit -> unitval self_coercion : (Path.t * Location.t list ref) list ref
type error =- Unbound_value of Longident.t- | Unbound_constructor of Longident.t- | Unbound_label of Longident.t+ Unbound_value of Longident.t * string list+ | Unbound_constructor of Longident.t * string list+ | Unbound_label of Longident.t * string list| Polymorphic_label of Longident.t| Constructor_arity_mismatch of Longident.t * int * int| Label_mismatch of Longident.t * (type_expr * type_expr) list@@ -80,11 +80,11 @@ type error =| Bad_conversion of string * int * char| Undefined_method of type_expr * string| Undefined_inherited_method of string- | Unbound_class of Longident.t+ | Unbound_class of Longident.t * string list| Virtual_class of Longident.t| Private_type of type_expr| Private_label of Longident.t * type_expr- | Unbound_instance_variable of string+ | Unbound_instance_variable of string * string list| Instance_variable_not_mutable of string| Not_subtype of (type_expr * type_expr) list * (type_expr * type_expr) list| Outside_class_______________________________________________Caml-list mailing list. Subscription management:Archives: http://caml.inria.frBeginner's list: http://groups.yahoo.com/group/ocaml_beginnersBug reports: http://caml.inria.fr/bin/caml-bugs