mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
compiler: detect typos in function/variable/module names
This commit is contained in:
parent
5055ac4b23
commit
41734affb3
@ -1023,3 +1023,23 @@ fn (f &Fn) str_args(table &Table) string {
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// find local function variable with closest name to `name`
|
||||
fn (f &Fn) find_misspelled_local_var(name string, min_match f64) string {
|
||||
mut closest := f64(0)
|
||||
mut closest_var := ''
|
||||
for var in f.local_vars {
|
||||
n := '${f.mod}.$var.name'
|
||||
if var.name == '' || !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
|
||||
p := strings.dice_coefficient(name, n)
|
||||
println(' ## $name - $n: $p')
|
||||
if p > closest {
|
||||
closest = p
|
||||
closest_var = n
|
||||
}
|
||||
}
|
||||
if closest >= min_match {
|
||||
return closest_var
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
@ -1648,6 +1648,11 @@ fn (p mut Parser) name_expr() string {
|
||||
f = p.table.find_fn(name)
|
||||
}
|
||||
if f.name == '' {
|
||||
// check for misspelled function / variable / module
|
||||
suggested := p.table.identify_typo(name, p.cur_fn, p.import_table)
|
||||
if suggested != '' {
|
||||
p.error('undefined: `$name`. did you mean:$suggested')
|
||||
}
|
||||
// If orig_name is a mod, then printing undefined: `mod` tells us nothing
|
||||
// if p.table.known_mod(orig_name) {
|
||||
if p.table.known_mod(orig_name) || p.import_table.known_alias(orig_name) {
|
||||
|
@ -926,3 +926,66 @@ fn (t &Type) contains_field_type(typ string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// check for a function / variable / module typo in `name`
|
||||
fn (table &Table) identify_typo(name string, current_fn &Fn, fit &FileImportTable) string {
|
||||
// dont check if so short
|
||||
if name.len < 2 { return '' }
|
||||
min_match := 0.8 // for dice coefficient between 0.0 - 1.0
|
||||
name_orig := name.replace('__', '.').replace('_dot_', '.')
|
||||
mut output := ''
|
||||
// check functions
|
||||
mut n := table.find_misspelled_fn(name_orig, min_match)
|
||||
if n != '' {
|
||||
output += '\n * function: `$n`'
|
||||
}
|
||||
// check function local variables
|
||||
n = current_fn.find_misspelled_local_var(name_orig, min_match)
|
||||
if n != '' {
|
||||
output += '\n * variable: `$n`'
|
||||
}
|
||||
// check imported modules
|
||||
n = table.find_misspelled_imported_mod(name_orig, fit, min_match)
|
||||
if n != '' {
|
||||
output += '\n * module: `$n`'
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
// find function with closest name to `name`
|
||||
fn (table &Table) find_misspelled_fn(name string, min_match f64) string {
|
||||
mut closest := f64(0)
|
||||
mut closest_fn := ''
|
||||
for _, f in table.fns {
|
||||
n := '${f.mod}.$f.name'
|
||||
if !name.starts_with(f.mod) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
|
||||
p := strings.dice_coefficient(name, n)
|
||||
if p > closest {
|
||||
closest = p
|
||||
closest_fn = n
|
||||
}
|
||||
}
|
||||
if closest >= min_match {
|
||||
return closest_fn
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
// find imported module with closest name to `name`
|
||||
fn (table &Table) find_misspelled_imported_mod(name string, fit &FileImportTable, min_match f64) string {
|
||||
mut closest := f64(0)
|
||||
mut closest_mod := ''
|
||||
for alias, mod in fit.imports {
|
||||
n := '${fit.module_name}.$alias'
|
||||
if !name.starts_with(fit.module_name) || (n.len - name.len > 3 || name.len - n.len > 3) { continue }
|
||||
p := strings.dice_coefficient(name, n)
|
||||
if p > closest {
|
||||
closest = p
|
||||
closest_mod = '$alias ($mod)'
|
||||
}
|
||||
}
|
||||
if closest >= min_match {
|
||||
return closest_mod
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
59
vlib/strings/similarity.v
Normal file
59
vlib/strings/similarity.v
Normal file
@ -0,0 +1,59 @@
|
||||
module strings
|
||||
|
||||
// use levenshtein distance algorithm to calculate
|
||||
// the distance between between two strings (lower is closer)
|
||||
pub fn levenshtein_distance(a, b string) int {
|
||||
mut f := [int(0); b.len+1]
|
||||
for ca in a {
|
||||
mut j := 1
|
||||
mut fj1 := f[0]
|
||||
f[0]++
|
||||
for cb in b {
|
||||
mut mn := if f[j]+1 <= f[j-1]+1 { f[j]+1 } else { f[j-1]+1 }
|
||||
if cb != ca {
|
||||
mn = if mn <= fj1+1 { mn } else { fj1+1 }
|
||||
} else {
|
||||
mn = if mn <= fj1 { mn } else { fj1 }
|
||||
}
|
||||
fj1 = f[j]
|
||||
f[j] = mn
|
||||
j++
|
||||
}
|
||||
}
|
||||
return f[f.len-1]
|
||||
}
|
||||
|
||||
// use levenshtein distance algorithm to calculate
|
||||
// how similar two strings are as a percentage (higher is closer)
|
||||
pub fn levenshtein_distance_percentage(a, b string) f64 {
|
||||
d := levenshtein_distance(a, b)
|
||||
l := if a.len >= b.len { a.len } else { b.len }
|
||||
return (1.00 - f64(d)/f64(l)) * 100.00
|
||||
}
|
||||
|
||||
// implementation of Sørensen–Dice coefficient.
|
||||
// find the similarity between two strings.
|
||||
// returns f64 between 0.0 (not similar) and 1.0 (exact match).
|
||||
pub fn dice_coefficient(s1, s2 string) f64 {
|
||||
if s1.len == 0 || s2.len == 0 { return 0.0 }
|
||||
if s1 == s2 { return 1.0 }
|
||||
if s1.len < 2 || s2.len < 2 { return 0.0 }
|
||||
mut first_bigrams := map[string]int
|
||||
for i := 0; i < s1.len-1; i++ {
|
||||
a := s1[i]
|
||||
b := s1[i+1]
|
||||
bigram := (a+b).str()
|
||||
first_bigrams[bigram] = if bigram in first_bigrams { first_bigrams[bigram]+1 } else { 1 }
|
||||
}
|
||||
mut intersection_size := 0
|
||||
for i := 0; i < s2.len-1; i++ {
|
||||
a := s2[i]
|
||||
b := s2[i+1]
|
||||
bigram := (a+b).str()
|
||||
count := if bigram in first_bigrams { first_bigrams[bigram] } else { 0 }
|
||||
if count > 0 {
|
||||
intersection_size++
|
||||
}
|
||||
}
|
||||
return (2.0 * intersection_size) / (f64(s1.len) + f64(s2.len) - 2)
|
||||
}
|
Loading…
Reference in New Issue
Block a user