extern crate regex;
use regex::Regex;
use std::collections::HashMap;
pub struct Speller {
pub letters: String,
pub n_words: HashMap<String, u32>
}
impl Speller {
pub fn train(&mut self, text: &str) {
let re = Regex::new(r"[a-z]+").unwrap();
let lc_text = text.to_lowercase();
for m in re.find_iter(&lc_text) {
let count = self.n_words.entry(m.as_str().to_string()).or_insert(0);
*count += 1;
}
}
pub fn correct(&mut self, word: &str) -> String {
if self.n_words.contains_key(word) {
return word.to_string();
}
let mut candidates: HashMap<u32, String> = HashMap::new();
let list = self.edits(word);
for edit in &list {
if let Some(value) = self.n_words.get(edit) {
candidates.insert(*value, edit.to_string());
}
}
if let Some(c) = candidates.iter().max_by_key(|&entry| entry.0) {
return c.1.to_string();
}
for edit in &list {
for w in self.edits(&edit) {
if let Some(value) = self.n_words.get(&w) {
candidates.insert(*value, w);
}
}
}
if let Some(c) = candidates.iter().max_by_key(|&entry| entry.0) {
return c.1.to_string();
}
word.to_string()
}
fn edits(&mut self, word: &str) -> Vec<String> {
let mut results = Vec::new();
for i in 0 .. word.len() {
let (first, last) = word.split_at(i);
results.push([first, &last[1..]].concat());
}
for i in 0 .. word.len() - 1 {
let (first, last) = word.split_at(i);
results.push([first, &last[1..2], &last[..1], &last[2..]].concat());
}
for i in 0 .. word.len() {
for c in self.letters.chars() {
let (first, last) = word.split_at(i);
let mut buffer = [0; 1];
let result = c.encode_utf8(&mut buffer);
results.push([first, result, &last[1..]].concat());
}
}
for i in 0 .. word.len() + 1 {
for c in self.letters.chars() {
let (first, last) = word.split_at(i);
let mut buffer = [0; 1];
let result = c.encode_utf8(&mut buffer);
results.push([first, result, last].concat());
}
}
results
}
}