1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
extern crate regex;
use regex::Regex;
use std::collections::HashMap;
pub struct Speller {
pub letters: String,
pub n_words: HashMap<String, u32>
}
impl Speller {
pub fn train(&mut self, text: &str) {
let re = Regex::new(r"[a-z]+").unwrap();
let lc_text = text.to_lowercase();
for m in re.find_iter(&lc_text) {
let count = self.n_words.entry(m.as_str().to_string()).or_insert(0);
*count += 1;
}
}
pub fn correct(&mut self, word: &str) -> String {
if self.n_words.contains_key(word) {
return word.to_string();
}
let mut candidates: HashMap<u32, String> = HashMap::new();
let list = self.edits(word);
for edit in &list {
if let Some(value) = self.n_words.get(edit) {
candidates.insert(*value, edit.to_string());
}
}
if let Some(c) = candidates.iter().max_by_key(|&entry| entry.0) {
return c.1.to_string();
}
for edit in &list {
for w in self.edits(&edit) {
if let Some(value) = self.n_words.get(&w) {
candidates.insert(*value, w);
}
}
}
if let Some(c) = candidates.iter().max_by_key(|&entry| entry.0) {
return c.1.to_string();
}
word.to_string()
}
fn edits(&mut self, word: &str) -> Vec<String> {
let mut results = Vec::new();
for i in 0 .. word.len() {
let (first, last) = word.split_at(i);
results.push([first, &last[1..]].concat());
}
for i in 0 .. word.len() - 1 {
let (first, last) = word.split_at(i);
results.push([first, &last[1..2], &last[..1], &last[2..]].concat());
}
for i in 0 .. word.len() {
for c in self.letters.chars() {
let (first, last) = word.split_at(i);
let mut buffer = [0; 1];
let result = c.encode_utf8(&mut buffer);
results.push([first, result, &last[1..]].concat());
}
}
for i in 0 .. word.len() + 1 {
for c in self.letters.chars() {
let (first, last) = word.split_at(i);
let mut buffer = [0; 1];
let result = c.encode_utf8(&mut buffer);
results.push([first, result, last].concat());
}
}
results
}
}