1extern crate regex;
12use regex::Regex;
13use std::collections::HashMap;
14
15pub struct Speller {
16 pub letters: String,
18 pub n_words: HashMap<String, u32>
21}
22
23impl Speller {
24 pub fn train(&mut self, text: &str) {
27 let re = Regex::new(r"[a-z]+").unwrap();
28 let lc_text = text.to_lowercase();
29 for m in re.find_iter(&lc_text) {
30 let count = self.n_words.entry(m.as_str().to_string()).or_insert(0);
31 *count += 1;
32 }
33 }
34
35 pub fn correct(&mut self, word: &str) -> String {
37 if self.n_words.contains_key(word) {
39 return word.to_string();
40 }
41
42 let mut candidates: HashMap<u32, String> = HashMap::new();
43 let list = self.edits(word);
44
45 for edit in &list {
47 if let Some(value) = self.n_words.get(edit) {
48 candidates.insert(*value, edit.to_string());
49 }
50 }
51 if let Some(c) = candidates.iter().max_by_key(|&entry| entry.0) {
52 return c.1.to_string();
53 }
54
55 for edit in &list {
57 for w in self.edits(&edit) {
58 if let Some(value) = self.n_words.get(&w) {
59 candidates.insert(*value, w);
60 }
61 }
62 }
63 if let Some(c) = candidates.iter().max_by_key(|&entry| entry.0) {
64 return c.1.to_string();
65 }
66
67 word.to_string()
69 }
70
71 fn edits(&mut self, word: &str) -> Vec<String> {
74 let mut results = Vec::new();
75 for i in 0 .. word.len() {
77 let (first, last) = word.split_at(i);
78 results.push([first, &last[1..]].concat());
79 }
80 for i in 0 .. word.len() - 1 {
82 let (first, last) = word.split_at(i);
83 results.push([first, &last[1..2], &last[..1], &last[2..]].concat());
84 }
85 for i in 0 .. word.len() {
87 for c in self.letters.chars() {
88 let (first, last) = word.split_at(i);
89 let mut buffer = [0; 1];
90 let result = c.encode_utf8(&mut buffer);
91 results.push([first, result, &last[1..]].concat());
92 }
93 }
94 for i in 0 .. word.len() + 1 {
96 for c in self.letters.chars() {
97 let (first, last) = word.split_at(i);
98 let mut buffer = [0; 1];
99 let result = c.encode_utf8(&mut buffer);
100 results.push([first, result, last].concat());
101 }
102 }
103
104 results
105 }
106}