1#![deny(missing_docs,
2 missing_debug_implementations, missing_copy_implementations,
3 trivial_casts, trivial_numeric_casts,
4 unsafe_code,
5 unstable_features,
6 unused_import_braces, unused_qualifications)]
7
8extern crate rayon;
12
13use std::collections::HashMap;
14use std::collections::HashSet;
15use rayon::prelude::*;
16
17#[derive(Debug)]
18pub struct ClosestMatch {
21 substrings: HashMap<String, HashSet<String>>,
22 substring_sizes: Vec<usize>,
23}
24
25#[derive(Debug)]
26struct SplitWord {
27 word: String,
28 substrings: HashSet<String>,
29}
30
31#[derive(Debug)]
32struct ScoreValue {
33 word: String,
34 score: f32,
35}
36
37fn split_word(word: String, sizes: &Vec<usize>) -> SplitWord {
38 let mut substrings: HashSet<String> = HashSet::new();
39 for size in sizes {
40 if *size > word.len() {
41 continue;
42 }
43 for x in 0..(word.len() - size + 1) {
44 let sub = word[x..(x + size)].to_string().to_lowercase();
45 substrings.insert(sub);
46 }
47 }
48 return SplitWord {
49 word: word,
50 substrings: substrings,
51 };
52}
53
54fn evaluate(word_subs: &HashSet<String>,
55 possible: String,
56 possible_subs: &HashSet<String>)
57 -> ScoreValue {
58 let mut count = 0;
59 let len_sum = word_subs.len() + possible_subs.len();
60 for sub in word_subs {
61 if possible_subs.contains(sub) {
62 count += 1;
63 }
64 }
65 let score = (count as f32) / (len_sum as f32);
66 return ScoreValue {
67 word: possible,
68 score: score,
69 };
70}
71
72fn max_score(a: ScoreValue, b: ScoreValue) -> ScoreValue {
73 if a.score <= b.score {
74 return b;
75 }
76 return a;
77}
78
79impl ClosestMatch {
80 pub fn new(dictionary: Vec<String>, sizes: Vec<usize>) -> ClosestMatch {
84 let mut substrings: HashMap<String, HashSet<String>> = HashMap::new();
85 let splitwords: Vec<SplitWord> = dictionary
86 .par_iter()
87 .map(|possible| split_word(possible.to_lowercase(), &sizes))
88 .collect();
89 for splitword in splitwords {
90 substrings.insert(splitword.word, splitword.substrings);
91 }
92 return ClosestMatch {
93 substrings: substrings,
94 substring_sizes: sizes,
95 };
96 }
97
98 pub fn get_closest(&self, word: String) -> Option<String> {
101 let word_subs = split_word(word, &self.substring_sizes).substrings;
102 let best = self.substrings
103 .par_iter()
104 .map(|(possible, possible_subs)| {
105 evaluate(&word_subs, possible.to_lowercase(), possible_subs)
106 })
107 .reduce_with(|a, b| max_score(a, b));
108 match best {
109 Some(expr) => Some(expr.word),
110 None => None,
111 }
112 }
113}
114
115#[cfg(test)]
116mod tests {
117 use ClosestMatch;
118
119 #[test]
120 fn it_works() {
121 let cm = ClosestMatch::new(["hello".to_string(),
122 "bullo".to_string(),
123 "hello world".to_string()]
124 .to_vec(),
125 [1, 2, 3].to_vec());
126 let closest = cm.get_closest("hlo".to_string());
127 println!("{:?}", closest);
128 }
129}