harper_core/spell/
mod.rs

1use crate::{CharString, CharStringExt, WordMetadata};
2
3pub use self::dictionary::Dictionary;
4pub use self::fst_dictionary::FstDictionary;
5pub use self::merged_dictionary::MergedDictionary;
6pub use self::mutable_dictionary::MutableDictionary;
7pub use self::word_id::WordId;
8
9mod dictionary;
10mod fst_dictionary;
11mod merged_dictionary;
12mod mutable_dictionary;
13mod rune;
14mod word_id;
15mod word_map;
16
17#[derive(PartialEq, Debug, Hash, Eq)]
18pub struct FuzzyMatchResult<'a> {
19    pub word: &'a [char],
20    pub edit_distance: u8,
21    pub metadata: &'a WordMetadata,
22}
23
24impl PartialOrd for FuzzyMatchResult<'_> {
25    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
26        self.edit_distance.partial_cmp(&other.edit_distance)
27    }
28}
29
30/// Scores a possible spelling suggestion based on possible relevance to the user.
31///
32/// Lower = better.
33fn score_suggestion(misspelled_word: &[char], sug: &FuzzyMatchResult) -> i32 {
34    if misspelled_word.is_empty() || sug.word.is_empty() {
35        return i32::MAX;
36    }
37
38    let mut score = sug.edit_distance as i32 * 10;
39
40    // People are much less likely to mistype the first letter.
41    if misspelled_word.first().unwrap() == sug.word.first().unwrap() {
42        score -= 10;
43    }
44
45    // If the original word is plural, the correct one probably is too.
46    if *misspelled_word.last().unwrap() == 's' && *sug.word.last().unwrap() == 's' {
47        score -= 5;
48    }
49
50    // For turning words into contractions.
51    if sug.metadata.common {
52        score -= 5;
53    }
54
55    // For turning words into contractions.
56    if sug.word.iter().filter(|c| **c == '\'').count() == 1 {
57        score -= 5;
58    }
59
60    score
61}
62
63/// Order the suggestions to be shown to the user.
64fn order_suggestions<'b>(
65    misspelled_word: &[char],
66    mut matches: Vec<FuzzyMatchResult<'b>>,
67) -> Vec<&'b [char]> {
68    matches.sort_by_key(|v| score_suggestion(misspelled_word, v));
69
70    matches.into_iter().map(|v| v.word).collect()
71}
72
73/// Get the closest matches in the provided [`Dictionary`] and rank them
74/// Implementation is left up to the underlying dictionary.
75pub fn suggest_correct_spelling<'a>(
76    misspelled_word: &[char],
77    result_limit: usize,
78    max_edit_dist: u8,
79    dictionary: &'a impl Dictionary,
80) -> Vec<&'a [char]> {
81    let matches: Vec<FuzzyMatchResult> = dictionary
82        .fuzzy_match(misspelled_word, max_edit_dist, result_limit)
83        .into_iter()
84        .collect();
85
86    order_suggestions(misspelled_word, matches)
87}
88
89/// Convenience function over [`suggest_correct_spelling`] that does conversions
90/// for you.
91pub fn suggest_correct_spelling_str(
92    misspelled_word: impl Into<String>,
93    result_limit: usize,
94    max_edit_dist: u8,
95    dictionary: &impl Dictionary,
96) -> Vec<String> {
97    let chars: CharString = misspelled_word.into().chars().collect();
98    suggest_correct_spelling(&chars, result_limit, max_edit_dist, dictionary)
99        .into_iter()
100        .map(|a| a.to_string())
101        .collect()
102}
103
104#[cfg(test)]
105mod tests {
106    use itertools::Itertools;
107
108    use crate::CharStringExt;
109
110    use super::{FstDictionary, suggest_correct_spelling_str};
111
112    const RESULT_LIMIT: usize = 100;
113    const MAX_EDIT_DIST: u8 = 2;
114
115    #[test]
116    fn normalizes_weve() {
117        let word = ['w', 'e', '’', 'v', 'e'];
118        let norm = word.normalized();
119
120        assert_eq!(norm.clone(), vec!['w', 'e', '\'', 'v', 'e'])
121    }
122
123    #[test]
124    fn punctation_no_duplicates() {
125        let results = suggest_correct_spelling_str(
126            "punctation",
127            RESULT_LIMIT,
128            MAX_EDIT_DIST,
129            &FstDictionary::curated(),
130        );
131
132        assert!(results.iter().all_unique())
133    }
134
135    #[test]
136    fn youre_contraction() {
137        assert_suggests_correction("youre", "you're");
138    }
139
140    #[test]
141    fn thats_contraction() {
142        assert_suggests_correction("thats", "that's");
143    }
144
145    #[test]
146    fn weve_contraction() {
147        assert_suggests_correction("weve", "we've");
148    }
149
150    #[test]
151    fn this_correction() {
152        assert_suggests_correction("ths", "this");
153    }
154
155    #[test]
156    fn issue_624_no_duplicates() {
157        let results = suggest_correct_spelling_str(
158            "Semantical",
159            RESULT_LIMIT,
160            MAX_EDIT_DIST,
161            &FstDictionary::curated(),
162        );
163
164        dbg!(&results);
165
166        assert!(results.iter().all_unique())
167    }
168
169    #[test]
170    fn issue_182() {
171        assert_suggests_correction("Im", "I'm");
172    }
173
174    #[test]
175    fn fst_spellcheck_hvllo() {
176        let results = suggest_correct_spelling_str(
177            "hvllo",
178            RESULT_LIMIT,
179            MAX_EDIT_DIST,
180            &FstDictionary::curated(),
181        );
182
183        dbg!(&results);
184
185        assert!(results.iter().take(3).contains(&"hello".to_string()));
186    }
187
188    /// Assert that the default suggestion settings result in a specific word
189    /// being in the top three results for a given misspelling.
190    #[track_caller]
191    fn assert_suggests_correction(misspelled_word: &str, correct: &str) {
192        let results = suggest_correct_spelling_str(
193            misspelled_word,
194            RESULT_LIMIT,
195            MAX_EDIT_DIST,
196            &FstDictionary::curated(),
197        );
198
199        dbg!(&results);
200
201        assert!(results.iter().take(3).contains(&correct.to_string()));
202    }
203
204    #[test]
205    fn spellcheck_hvllo() {
206        assert_suggests_correction("hvllo", "hello");
207    }
208
209    #[test]
210    fn spellcheck_aout() {
211        assert_suggests_correction("aout", "about");
212    }
213
214    #[test]
215    fn spellchecking_is_deterministic() {
216        let results1 = suggest_correct_spelling_str(
217            "hello",
218            RESULT_LIMIT,
219            MAX_EDIT_DIST,
220            &FstDictionary::curated(),
221        );
222        let results2 = suggest_correct_spelling_str(
223            "hello",
224            RESULT_LIMIT,
225            MAX_EDIT_DIST,
226            &FstDictionary::curated(),
227        );
228        let results3 = suggest_correct_spelling_str(
229            "hello",
230            RESULT_LIMIT,
231            MAX_EDIT_DIST,
232            &FstDictionary::curated(),
233        );
234
235        assert_eq!(results1, results2);
236        assert_eq!(results1, results3);
237    }
238
239    #[test]
240    fn adviced_correction() {
241        assert_suggests_correction("adviced", "advised");
242    }
243
244    #[test]
245    fn aknowledged_correction() {
246        assert_suggests_correction("aknowledged", "acknowledged");
247    }
248
249    #[test]
250    fn alcaholic_correction() {
251        assert_suggests_correction("alcaholic", "alcoholic");
252    }
253
254    #[test]
255    fn slaves_correction() {
256        assert_suggests_correction("Slaves", "Slavs");
257    }
258
259    #[test]
260    fn conciousness_correction() {
261        assert_suggests_correction("conciousness", "consciousness");
262    }
263}