2021-01-18 15:20:06 +03:00
|
|
|
// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
|
2019-06-23 05:21:30 +03:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
|
|
|
|
2019-06-22 21:20:28 +03:00
|
|
|
import os
|
|
|
|
|
|
|
|
fn main() {
|
2019-06-28 16:24:46 +03:00
|
|
|
mut path := 'cinderella.txt'
|
2019-06-22 21:20:28 +03:00
|
|
|
if os.args.len != 2 {
|
|
|
|
println('usage: word_counter [text_file]')
|
|
|
|
println('using $path')
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
path = os.args[1]
|
|
|
|
}
|
2019-06-26 19:01:31 +03:00
|
|
|
contents := os.read_file(path.trim_space()) or {
|
|
|
|
println('failed to open $path')
|
|
|
|
return
|
|
|
|
}
|
2019-08-17 02:55:11 +03:00
|
|
|
mut m := map[string]int
|
2019-10-16 02:52:37 +03:00
|
|
|
for word in extract_words(contents) {
|
2020-10-18 23:46:13 +03:00
|
|
|
m[word]++
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
// Sort the keys
|
2020-04-26 14:49:31 +03:00
|
|
|
mut keys := m.keys()
|
2019-06-22 21:20:28 +03:00
|
|
|
keys.sort()
|
|
|
|
// Print the map
|
|
|
|
for key in keys {
|
|
|
|
val := m[key]
|
|
|
|
println('$key => $val')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-16 02:52:37 +03:00
|
|
|
// Creates an array of words from a given string
|
|
|
|
fn extract_words(contents string) []string {
|
2020-04-26 14:49:31 +03:00
|
|
|
mut splitted := []string{}
|
2019-10-16 02:52:37 +03:00
|
|
|
for space_splitted in contents.to_lower().split(' ') {
|
|
|
|
if space_splitted.contains('\n') {
|
|
|
|
splitted << space_splitted.split('\n')
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
splitted << space_splitted
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-26 14:49:31 +03:00
|
|
|
mut results := []string{}
|
2019-10-16 02:52:37 +03:00
|
|
|
for s in splitted {
|
|
|
|
result := filter_word(s)
|
|
|
|
if result == '' {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
results << result
|
|
|
|
}
|
|
|
|
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
2019-06-22 21:20:28 +03:00
|
|
|
// Removes punctuation
|
|
|
|
fn filter_word(word string) string {
|
|
|
|
if word == '' || word == ' ' {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
mut i := 0
|
2019-10-22 08:00:28 +03:00
|
|
|
for i < word.len && !word[i].is_letter() {
|
2019-06-22 21:20:28 +03:00
|
|
|
i++
|
|
|
|
}
|
|
|
|
start := i
|
2019-10-22 08:00:28 +03:00
|
|
|
for i < word.len && word[i].is_letter() {
|
2019-06-22 21:20:28 +03:00
|
|
|
i++
|
|
|
|
}
|
|
|
|
end := i
|
2019-10-27 10:03:15 +03:00
|
|
|
return word[start..end]
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|