1use crate::{CharString, CharStringExt, WordMetadata};
2
3pub use self::dictionary::Dictionary;
4pub use self::fst_dictionary::FstDictionary;
5pub use self::merged_dictionary::MergedDictionary;
6pub use self::mutable_dictionary::MutableDictionary;
7pub use self::word_id::WordId;
8
9mod dictionary;
10mod fst_dictionary;
11mod merged_dictionary;
12mod mutable_dictionary;
13mod rune;
14mod word_id;
15mod word_map;
16
17#[derive(PartialEq, Debug, Hash, Eq)]
18pub struct FuzzyMatchResult<'a> {
19 pub word: &'a [char],
20 pub edit_distance: u8,
21 pub metadata: &'a WordMetadata,
22}
23
24impl PartialOrd for FuzzyMatchResult<'_> {
25 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
26 self.edit_distance.partial_cmp(&other.edit_distance)
27 }
28}
29
30fn score_suggestion(misspelled_word: &[char], sug: &FuzzyMatchResult) -> i32 {
34 if misspelled_word.is_empty() || sug.word.is_empty() {
35 return i32::MAX;
36 }
37
38 let mut score = sug.edit_distance as i32 * 10;
39
40 if misspelled_word.first().unwrap() == sug.word.first().unwrap() {
42 score -= 10;
43 }
44
45 if *misspelled_word.last().unwrap() == 's' && *sug.word.last().unwrap() == 's' {
47 score -= 5;
48 }
49
50 if sug.metadata.common {
52 score -= 5;
53 }
54
55 if sug.word.iter().filter(|c| **c == '\'').count() == 1 {
57 score -= 5;
58 }
59
60 score
61}
62
63fn order_suggestions<'b>(
65 misspelled_word: &[char],
66 mut matches: Vec<FuzzyMatchResult<'b>>,
67) -> Vec<&'b [char]> {
68 matches.sort_by_key(|v| score_suggestion(misspelled_word, v));
69
70 matches.into_iter().map(|v| v.word).collect()
71}
72
73pub fn suggest_correct_spelling<'a>(
76 misspelled_word: &[char],
77 result_limit: usize,
78 max_edit_dist: u8,
79 dictionary: &'a impl Dictionary,
80) -> Vec<&'a [char]> {
81 let matches: Vec<FuzzyMatchResult> = dictionary
82 .fuzzy_match(misspelled_word, max_edit_dist, result_limit)
83 .into_iter()
84 .collect();
85
86 order_suggestions(misspelled_word, matches)
87}
88
89pub fn suggest_correct_spelling_str(
92 misspelled_word: impl Into<String>,
93 result_limit: usize,
94 max_edit_dist: u8,
95 dictionary: &impl Dictionary,
96) -> Vec<String> {
97 let chars: CharString = misspelled_word.into().chars().collect();
98 suggest_correct_spelling(&chars, result_limit, max_edit_dist, dictionary)
99 .into_iter()
100 .map(|a| a.to_string())
101 .collect()
102}
103
104#[cfg(test)]
105mod tests {
106 use itertools::Itertools;
107
108 use crate::CharStringExt;
109
110 use super::{FstDictionary, suggest_correct_spelling_str};
111
112 const RESULT_LIMIT: usize = 100;
113 const MAX_EDIT_DIST: u8 = 2;
114
115 #[test]
116 fn normalizes_weve() {
117 let word = ['w', 'e', '’', 'v', 'e'];
118 let norm = word.normalized();
119
120 assert_eq!(norm.clone(), vec!['w', 'e', '\'', 'v', 'e'])
121 }
122
123 #[test]
124 fn punctation_no_duplicates() {
125 let results = suggest_correct_spelling_str(
126 "punctation",
127 RESULT_LIMIT,
128 MAX_EDIT_DIST,
129 &FstDictionary::curated(),
130 );
131
132 assert!(results.iter().all_unique())
133 }
134
135 #[test]
136 fn youre_contraction() {
137 assert_suggests_correction("youre", "you're");
138 }
139
140 #[test]
141 fn thats_contraction() {
142 assert_suggests_correction("thats", "that's");
143 }
144
145 #[test]
146 fn weve_contraction() {
147 assert_suggests_correction("weve", "we've");
148 }
149
150 #[test]
151 fn this_correction() {
152 assert_suggests_correction("ths", "this");
153 }
154
155 #[test]
156 fn issue_624_no_duplicates() {
157 let results = suggest_correct_spelling_str(
158 "Semantical",
159 RESULT_LIMIT,
160 MAX_EDIT_DIST,
161 &FstDictionary::curated(),
162 );
163
164 dbg!(&results);
165
166 assert!(results.iter().all_unique())
167 }
168
169 #[test]
170 fn issue_182() {
171 assert_suggests_correction("Im", "I'm");
172 }
173
174 #[test]
175 fn fst_spellcheck_hvllo() {
176 let results = suggest_correct_spelling_str(
177 "hvllo",
178 RESULT_LIMIT,
179 MAX_EDIT_DIST,
180 &FstDictionary::curated(),
181 );
182
183 dbg!(&results);
184
185 assert!(results.iter().take(3).contains(&"hello".to_string()));
186 }
187
188 #[track_caller]
191 fn assert_suggests_correction(misspelled_word: &str, correct: &str) {
192 let results = suggest_correct_spelling_str(
193 misspelled_word,
194 RESULT_LIMIT,
195 MAX_EDIT_DIST,
196 &FstDictionary::curated(),
197 );
198
199 dbg!(&results);
200
201 assert!(results.iter().take(3).contains(&correct.to_string()));
202 }
203
204 #[test]
205 fn spellcheck_hvllo() {
206 assert_suggests_correction("hvllo", "hello");
207 }
208
209 #[test]
210 fn spellcheck_aout() {
211 assert_suggests_correction("aout", "about");
212 }
213
214 #[test]
215 fn spellchecking_is_deterministic() {
216 let results1 = suggest_correct_spelling_str(
217 "hello",
218 RESULT_LIMIT,
219 MAX_EDIT_DIST,
220 &FstDictionary::curated(),
221 );
222 let results2 = suggest_correct_spelling_str(
223 "hello",
224 RESULT_LIMIT,
225 MAX_EDIT_DIST,
226 &FstDictionary::curated(),
227 );
228 let results3 = suggest_correct_spelling_str(
229 "hello",
230 RESULT_LIMIT,
231 MAX_EDIT_DIST,
232 &FstDictionary::curated(),
233 );
234
235 assert_eq!(results1, results2);
236 assert_eq!(results1, results3);
237 }
238
239 #[test]
240 fn adviced_correction() {
241 assert_suggests_correction("adviced", "advised");
242 }
243
244 #[test]
245 fn aknowledged_correction() {
246 assert_suggests_correction("aknowledged", "acknowledged");
247 }
248
249 #[test]
250 fn alcaholic_correction() {
251 assert_suggests_correction("alcaholic", "alcoholic");
252 }
253
254 #[test]
255 fn slaves_correction() {
256 assert_suggests_correction("Slaves", "Slavs");
257 }
258
259 #[test]
260 fn conciousness_correction() {
261 assert_suggests_correction("conciousness", "consciousness");
262 }
263}