From 075a8e5ccb8a3287ce546aca8e0428bb4c9d79ec Mon Sep 17 00:00:00 2001 From: joe-conigliaro Date: Mon, 16 Sep 2019 03:07:12 +1000 Subject: [PATCH] compiler: improve typo detection --- compiler/fn.v | 8 ++++---- compiler/table.v | 27 +++++++++++++++++++-------- vlib/strings/similarity.v | 19 +++++++++---------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/compiler/fn.v b/compiler/fn.v index d789530282..f3c0090fa0 100644 --- a/compiler/fn.v +++ b/compiler/fn.v @@ -1015,12 +1015,12 @@ fn (f &Fn) find_misspelled_local_var(name string, min_match f32) string { mut closest := f32(0) mut closest_var := '' for var in f.local_vars { - n := '${f.mod}.$var.name' - if var.name == '' || !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue } - p := strings.dice_coefficient(name, n) + n := name.all_after('.') + if var.name == '' || (n.len - var.name.len > 2 || var.name.len - n.len > 2) { continue } + p := strings.dice_coefficient(var.name, n) if p > closest { closest = p - closest_var = n + closest_var = var.name } } return if closest >= min_match { closest_var } else { '' } diff --git a/compiler/table.v b/compiler/table.v index 2211f10324..0ed7e39ca7 100644 --- a/compiler/table.v +++ b/compiler/table.v @@ -856,11 +856,11 @@ fn (t &Type) contains_field_type(typ string) bool { fn (table &Table) identify_typo(name string, current_fn &Fn, fit &FileImportTable) string { // dont check if so short if name.len < 2 { return '' } - min_match := 0.8 // for dice coefficient between 0.0 - 1.0 + min_match := 0.50 // for dice coefficient between 0.0 - 1.0 name_orig := name.replace('__', '.').replace('_dot_', '.') mut output := '' // check functions - mut n := table.find_misspelled_fn(name_orig, min_match) + mut n := table.find_misspelled_fn(name, fit, min_match) if n != '' { output += '\n * function: `$n`' } @@ -878,16 +878,27 @@ fn (table &Table) identify_typo(name string, current_fn &Fn, fit &FileImportTabl } // find function with closest name to `name` -fn (table &Table) find_misspelled_fn(name string, min_match f32) string { +fn (table &Table) find_misspelled_fn(name string, fit &FileImportTable, min_match f32) string { mut closest := f32(0) mut closest_fn := '' + is_main_fn := name.starts_with('main__') + n1 := if is_main_fn { name.right(6) } else { name } for _, f in table.fns { - n := '${f.mod}.$f.name' - if !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue } - p := strings.dice_coefficient(name, n) + if n1.len - f.name.len > 2 || f.name.len - n1.len > 2 { continue } + if !(f.mod in ['', 'main', 'builtin']) { + mut mod_imported := false + for _, m in fit.imports { + if f.mod == m { + mod_imported = true + break + } + } + if !mod_imported { continue } + } + p := strings.dice_coefficient(n1, f.name) if p > closest { closest = p - closest_fn = n + closest_fn = f.name } } return if closest >= min_match { closest_fn } else { '' } @@ -899,7 +910,7 @@ fn (table &Table) find_misspelled_imported_mod(name string, fit &FileImportTable mut closest_mod := '' for alias, mod in fit.imports { n := '${fit.module_name}.$alias' - if !name.starts_with(fit.module_name) || (n.len - name.len > 3 || name.len - n.len > 3) { continue } + if !name.starts_with(fit.module_name) || (n.len - name.len > 2 || name.len - n.len > 2) { continue } p := strings.dice_coefficient(name, n) if p > closest { closest = p diff --git a/vlib/strings/similarity.v b/vlib/strings/similarity.v index dfa5596665..7ae64fa35c 100644 --- a/vlib/strings/similarity.v +++ b/vlib/strings/similarity.v @@ -40,22 +40,21 @@ pub fn dice_coefficient(s1, s2 string) f32 { if s1.len == 0 || s2.len == 0 { return 0.0 } if s1 == s2 { return 1.0 } if s1.len < 2 || s2.len < 2 { return 0.0 } + a := if s1.len > s2.len { s1 } else { s2 } + b := if a == s1 { s2 } else { s1 } mut first_bigrams := map[string]int - for i := 0; i < s1.len-1; i++ { - a := s1[i] - b := s1[i+1] - bigram := (a+b).str() + for i := 0; i < a.len-1; i++ { + bigram := a.substr(i, i+2) first_bigrams[bigram] = if bigram in first_bigrams { first_bigrams[bigram]+1 } else { 1 } } mut intersection_size := 0 - for i := 0; i < s2.len-1; i++ { - a := s2[i] - b := s2[i+1] - bigram := (a+b).str() - count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 } + for i := 0; i < b.len-1; i++ { + bigram := b.substr(i, i+2) + count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 } if count > 0 { + first_bigrams[bigram] = count - 1 intersection_size++ } } - return (2.0 * intersection_size) / (f32(s1.len) + f32(s2.len) - 2) + return (2.0 * intersection_size) / (f32(a.len) + f32(b.len) - 2) }