From 41734affb3cef63ef7c98643e9202cf0274b3950 Mon Sep 17 00:00:00 2001 From: joe-conigliaro Date: Fri, 13 Sep 2019 21:10:24 +1000 Subject: [PATCH] compiler: detect typos in function/variable/module names --- compiler/fn.v | 20 +++++++++++++ compiler/parser.v | 5 ++++ compiler/table.v | 63 +++++++++++++++++++++++++++++++++++++++ vlib/strings/similarity.v | 59 ++++++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 vlib/strings/similarity.v diff --git a/compiler/fn.v b/compiler/fn.v index 41e0f85745..fb5dac609e 100644 --- a/compiler/fn.v +++ b/compiler/fn.v @@ -1023,3 +1023,23 @@ fn (f &Fn) str_args(table &Table) string { } return s } + +// find local function variable with closest name to `name` +fn (f &Fn) find_misspelled_local_var(name string, min_match f64) string { + mut closest := f64(0) + mut closest_var := '' + for var in f.local_vars { + n := '${f.mod}.$var.name' + if var.name == '' || !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue } + p := strings.dice_coefficient(name, n) + println(' ## $name - $n: $p') + if p > closest { + closest = p + closest_var = n + } + } + if closest >= min_match { + return closest_var + } + return '' +} diff --git a/compiler/parser.v b/compiler/parser.v index 61e34fba3d..7b36357f13 100644 --- a/compiler/parser.v +++ b/compiler/parser.v @@ -1648,6 +1648,11 @@ fn (p mut Parser) name_expr() string { f = p.table.find_fn(name) } if f.name == '' { + // check for misspelled function / variable / module + suggested := p.table.identify_typo(name, p.cur_fn, p.import_table) + if suggested != '' { + p.error('undefined: `$name`. did you mean:$suggested') + } // If orig_name is a mod, then printing undefined: `mod` tells us nothing // if p.table.known_mod(orig_name) { if p.table.known_mod(orig_name) || p.import_table.known_alias(orig_name) { diff --git a/compiler/table.v b/compiler/table.v index da8e79594e..4366136c44 100644 --- a/compiler/table.v +++ b/compiler/table.v @@ -926,3 +926,66 @@ fn (t &Type) contains_field_type(typ string) bool { } return false } + +// check for a function / variable / module typo in `name` +fn (table &Table) identify_typo(name string, current_fn &Fn, fit &FileImportTable) string { + // dont check if so short + if name.len < 2 { return '' } + min_match := 0.8 // for dice coefficient between 0.0 - 1.0 + name_orig := name.replace('__', '.').replace('_dot_', '.') + mut output := '' + // check functions + mut n := table.find_misspelled_fn(name_orig, min_match) + if n != '' { + output += '\n * function: `$n`' + } + // check function local variables + n = current_fn.find_misspelled_local_var(name_orig, min_match) + if n != '' { + output += '\n * variable: `$n`' + } + // check imported modules + n = table.find_misspelled_imported_mod(name_orig, fit, min_match) + if n != '' { + output += '\n * module: `$n`' + } + return output +} + +// find function with closest name to `name` +fn (table &Table) find_misspelled_fn(name string, min_match f64) string { + mut closest := f64(0) + mut closest_fn := '' + for _, f in table.fns { + n := '${f.mod}.$f.name' + if !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue } + p := strings.dice_coefficient(name, n) + if p > closest { + closest = p + closest_fn = n + } + } + if closest >= min_match { + return closest_fn + } + return '' +} + +// find imported module with closest name to `name` +fn (table &Table) find_misspelled_imported_mod(name string, fit &FileImportTable, min_match f64) string { + mut closest := f64(0) + mut closest_mod := '' + for alias, mod in fit.imports { + n := '${fit.module_name}.$alias' + if !name.starts_with(fit.module_name) || (n.len - name.len > 3 || name.len - n.len > 3) { continue } + p := strings.dice_coefficient(name, n) + if p > closest { + closest = p + closest_mod = '$alias ($mod)' + } + } + if closest >= min_match { + return closest_mod + } + return '' +} diff --git a/vlib/strings/similarity.v b/vlib/strings/similarity.v new file mode 100644 index 0000000000..4b18c86bf0 --- /dev/null +++ b/vlib/strings/similarity.v @@ -0,0 +1,59 @@ +module strings + +// use levenshtein distance algorithm to calculate +// the distance between between two strings (lower is closer) +pub fn levenshtein_distance(a, b string) int { + mut f := [int(0); b.len+1] + for ca in a { + mut j := 1 + mut fj1 := f[0] + f[0]++ + for cb in b { + mut mn := if f[j]+1 <= f[j-1]+1 { f[j]+1 } else { f[j-1]+1 } + if cb != ca { + mn = if mn <= fj1+1 { mn } else { fj1+1 } + } else { + mn = if mn <= fj1 { mn } else { fj1 } + } + fj1 = f[j] + f[j] = mn + j++ + } + } + return f[f.len-1] +} + +// use levenshtein distance algorithm to calculate +// how similar two strings are as a percentage (higher is closer) +pub fn levenshtein_distance_percentage(a, b string) f64 { + d := levenshtein_distance(a, b) + l := if a.len >= b.len { a.len } else { b.len } + return (1.00 - f64(d)/f64(l)) * 100.00 +} + +// implementation of Sørensen–Dice coefficient. +// find the similarity between two strings. +// returns f64 between 0.0 (not similar) and 1.0 (exact match). +pub fn dice_coefficient(s1, s2 string) f64 { + if s1.len == 0 || s2.len == 0 { return 0.0 } + if s1 == s2 { return 1.0 } + if s1.len < 2 || s2.len < 2 { return 0.0 } + mut first_bigrams := map[string]int + for i := 0; i < s1.len-1; i++ { + a := s1[i] + b := s1[i+1] + bigram := (a+b).str() + first_bigrams[bigram] = if bigram in first_bigrams { first_bigrams[bigram]+1 } else { 1 } + } + mut intersection_size := 0 + for i := 0; i < s2.len-1; i++ { + a := s2[i] + b := s2[i+1] + bigram := (a+b).str() + count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 } + if count > 0 { + intersection_size++ + } + } + return (2.0 * intersection_size) / (f64(s1.len) + f64(s2.len) - 2) +}