harper_core/linting/
spell_check.rs

1use std::num::NonZero;
2
3use lru::LruCache;
4use smallvec::ToSmallVec;
5
6use super::Suggestion;
7use super::{Lint, LintKind, Linter};
8use crate::document::Document;
9use crate::spell::{Dictionary, suggest_correct_spelling};
10use crate::{CharString, CharStringExt, Dialect, TokenStringExt};
11
12pub struct SpellCheck<T>
13where
14    T: Dictionary,
15{
16    dictionary: T,
17    suggestion_cache: LruCache<CharString, Vec<CharString>>,
18    dialect: Dialect,
19}
20
21impl<T: Dictionary> SpellCheck<T> {
22    pub fn new(dictionary: T, dialect: Dialect) -> Self {
23        Self {
24            dictionary,
25            suggestion_cache: LruCache::new(NonZero::new(10000).unwrap()),
26            dialect,
27        }
28    }
29
30    const MAX_SUGGESTIONS: usize = 3;
31
32    fn suggest_correct_spelling(&mut self, word: &[char]) -> Vec<CharString> {
33        if let Some(hit) = self.suggestion_cache.get(word) {
34            hit.clone()
35        } else {
36            let suggestions = self.uncached_suggest_correct_spelling(word);
37            self.suggestion_cache.put(word.into(), suggestions.clone());
38            suggestions
39        }
40    }
41    fn uncached_suggest_correct_spelling(&self, word: &[char]) -> Vec<CharString> {
42        // Back off until we find a match.
43        for dist in 2..5 {
44            let suggestions: Vec<CharString> =
45                suggest_correct_spelling(word, 200, dist, &self.dictionary)
46                    .into_iter()
47                    .filter(|v| {
48                        // Ignore entries outside the configured dialect
49                        self.dictionary
50                            .get_word_metadata(v)
51                            .unwrap()
52                            .dialects
53                            .is_dialect_enabled(self.dialect)
54                    })
55                    .map(|v| v.to_smallvec())
56                    .take(Self::MAX_SUGGESTIONS)
57                    .collect();
58
59            if !suggestions.is_empty() {
60                return suggestions;
61            }
62        }
63
64        // no suggestions found
65        Vec::new()
66    }
67}
68
69impl<T: Dictionary> Linter for SpellCheck<T> {
70    fn lint(&mut self, document: &Document) -> Vec<Lint> {
71        let mut lints = Vec::new();
72
73        for word in document.iter_words() {
74            let word_chars = document.get_span_content(&word.span);
75
76            if let Some(metadata) = word.kind.as_word().unwrap()
77                && metadata.dialects.is_dialect_enabled(self.dialect)
78                && (self.dictionary.contains_exact_word(word_chars)
79                    || self.dictionary.contains_exact_word(&word_chars.to_lower()))
80            {
81                continue;
82            };
83
84            let mut possibilities = self.suggest_correct_spelling(word_chars);
85
86            // If the misspelled word is capitalized, capitalize the results too.
87            if let Some(mis_f) = word_chars.first()
88                && mis_f.is_uppercase()
89            {
90                for sug_f in possibilities.iter_mut().filter_map(|w| w.first_mut()) {
91                    *sug_f = sug_f.to_uppercase().next().unwrap();
92                }
93            }
94
95            let suggestions: Vec<_> = possibilities
96                .iter()
97                .map(|sug| Suggestion::ReplaceWith(sug.to_vec()))
98                .collect();
99
100            // If there's only one suggestion, save the user a step in the GUI
101            let message = if suggestions.len() == 1 {
102                format!(
103                    "Did you mean `{}`?",
104                    possibilities.first().unwrap().iter().collect::<String>()
105                )
106            } else {
107                format!(
108                    "Did you mean to spell `{}` this way?",
109                    document.get_span_content_str(&word.span)
110                )
111            };
112
113            lints.push(Lint {
114                span: word.span,
115                lint_kind: LintKind::Spelling,
116                suggestions,
117                message,
118                priority: 63,
119            })
120        }
121
122        lints
123    }
124
125    fn description(&self) -> &'static str {
126        "Looks and provides corrections for misspelled words."
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use strum::IntoEnumIterator;
133
134    use super::SpellCheck;
135    use crate::dict_word_metadata::DialectFlags;
136    use crate::linting::Linter;
137    use crate::linting::tests::assert_no_lints;
138    use crate::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary};
139    use crate::{
140        Dialect,
141        linting::tests::{
142            assert_lint_count, assert_suggestion_result, assert_top3_suggestion_result,
143        },
144    };
145    use crate::{DictWordMetadata, Document};
146
147    // Capitalization tests
148
149    #[test]
150    fn america_capitalized() {
151        assert_suggestion_result(
152            "The word america should be capitalized.",
153            SpellCheck::new(FstDictionary::curated(), Dialect::American),
154            "The word America should be capitalized.",
155        );
156    }
157
158    // Dialect tests
159
160    #[test]
161    fn harper_automattic_capitalized() {
162        assert_lint_count(
163            "So should harper and automattic.",
164            SpellCheck::new(FstDictionary::curated(), Dialect::American),
165            2,
166        );
167    }
168
169    #[test]
170    fn american_color_in_british_dialect() {
171        assert_lint_count(
172            "Do you like the color?",
173            SpellCheck::new(FstDictionary::curated(), Dialect::British),
174            1,
175        );
176    }
177
178    #[test]
179    fn canadian_words_in_australian_dialect() {
180        assert_lint_count(
181            "Does your mom like yogourt?",
182            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
183            2,
184        );
185    }
186
187    #[test]
188    fn australian_words_in_canadian_dialect() {
189        assert_lint_count(
190            "We mine bauxite to make aluminium.",
191            SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
192            1,
193        );
194    }
195
196    #[test]
197    fn mum_and_mummy_not_just_commonwealth() {
198        assert_lint_count(
199            "Mum's the word about that Egyptian mummy.",
200            SpellCheck::new(FstDictionary::curated(), Dialect::American),
201            0,
202        );
203    }
204
205    #[test]
206    fn australian_verandah() {
207        assert_lint_count(
208            "Our house has a verandah.",
209            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
210            0,
211        );
212    }
213
214    #[test]
215    fn australian_verandah_in_american_dialect() {
216        assert_lint_count(
217            "Our house has a verandah.",
218            SpellCheck::new(FstDictionary::curated(), Dialect::American),
219            1,
220        );
221    }
222
223    #[test]
224    fn australian_verandah_in_british_dialect() {
225        assert_lint_count(
226            "Our house has a verandah.",
227            SpellCheck::new(FstDictionary::curated(), Dialect::British),
228            1,
229        );
230    }
231
232    #[test]
233    fn australian_verandah_in_canadian_dialect() {
234        assert_lint_count(
235            "Our house has a verandah.",
236            SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
237            1,
238        );
239    }
240
241    #[test]
242    fn mixing_australian_and_canadian_dialects() {
243        assert_lint_count(
244            "In summer we sit on the verandah and eat yogourt.",
245            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
246            1,
247        );
248    }
249
250    #[test]
251    fn mixing_canadian_and_australian_dialects() {
252        assert_lint_count(
253            "In summer we sit on the verandah and eat yogourt.",
254            SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
255            1,
256        );
257    }
258
259    #[test]
260    fn australian_and_canadian_spellings_that_are_not_american() {
261        assert_lint_count(
262            "In summer we sit on the verandah and eat yogourt.",
263            SpellCheck::new(FstDictionary::curated(), Dialect::American),
264            2,
265        );
266    }
267
268    #[test]
269    fn australian_and_canadian_spellings_that_are_not_british() {
270        assert_lint_count(
271            "In summer we sit on the verandah and eat yogourt.",
272            SpellCheck::new(FstDictionary::curated(), Dialect::British),
273            2,
274        );
275    }
276
277    #[test]
278    fn australian_labour_vs_labor() {
279        assert_lint_count(
280            "In Australia we write 'labour' but the political party is the 'Labor Party'.",
281            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
282            0,
283        )
284    }
285
286    #[test]
287    fn australian_words_flagged_for_american_english() {
288        assert_lint_count(
289            "There's an esky full of beers in the back of the ute.",
290            SpellCheck::new(FstDictionary::curated(), Dialect::American),
291            2,
292        );
293    }
294
295    #[test]
296    fn american_words_not_flagged_for_australian_english() {
297        assert_lint_count(
298            "In general, utes have unibody construction while pickups have frames.",
299            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
300            0,
301        );
302    }
303
304    #[test]
305    fn abandonware_correction() {
306        assert_suggestion_result(
307            "abanonedware",
308            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
309            "abandonware",
310        );
311    }
312
313    // Unit tests for specific spellcheck corrections
314
315    #[test]
316    fn corrects_abandonedware_1131_1166() {
317        // assert_suggestion_result(
318        assert_top3_suggestion_result(
319            "Abandonedware is abandoned. Do not bother submitting issues about the empty page bug. Author moved to greener pastures",
320            SpellCheck::new(FstDictionary::curated(), Dialect::American),
321            "Abandonware is abandoned. Do not bother submitting issues about the empty page bug. Author moved to greener pastures",
322        );
323    }
324
325    #[test]
326    fn afterwards_not_us() {
327        assert_lint_count(
328            "afterwards",
329            SpellCheck::new(FstDictionary::curated(), Dialect::American),
330            1,
331        );
332    }
333
334    #[test]
335    fn afterward_is_us() {
336        assert_lint_count(
337            "afterward",
338            SpellCheck::new(FstDictionary::curated(), Dialect::American),
339            0,
340        );
341    }
342
343    #[test]
344    fn afterward_not_au() {
345        assert_lint_count(
346            "afterward",
347            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
348            1,
349        );
350    }
351
352    #[test]
353    fn afterwards_is_au() {
354        assert_lint_count(
355            "afterwards",
356            SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
357            0,
358        );
359    }
360
361    #[test]
362    fn afterward_not_ca() {
363        assert_lint_count(
364            "afterward",
365            SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
366            1,
367        );
368    }
369
370    #[test]
371    fn afterwards_is_ca() {
372        assert_lint_count(
373            "afterwards",
374            SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
375            0,
376        );
377    }
378
379    #[test]
380    fn afterward_not_uk() {
381        assert_lint_count(
382            "afterward",
383            SpellCheck::new(FstDictionary::curated(), Dialect::British),
384            1,
385        );
386    }
387
388    #[test]
389    fn afterwards_is_uk() {
390        assert_lint_count(
391            "afterwards",
392            SpellCheck::new(FstDictionary::curated(), Dialect::British),
393            0,
394        );
395    }
396
397    #[test]
398    fn corrects_hes() {
399        assert_suggestion_result(
400            "hes",
401            SpellCheck::new(FstDictionary::curated(), Dialect::British),
402            "he's",
403        );
404    }
405
406    #[test]
407    fn corrects_shes() {
408        assert_suggestion_result(
409            "shes",
410            SpellCheck::new(FstDictionary::curated(), Dialect::British),
411            "she's",
412        );
413    }
414
415    #[test]
416    fn issue_1876() {
417        let user_dialect = Dialect::American;
418
419        // Create a user dictionary with a word normally of another dialect in it.
420        let mut user_dict = MutableDictionary::new();
421        user_dict.append_word_str(
422            "Calibre",
423            DictWordMetadata {
424                dialects: DialectFlags::from_dialect(user_dialect),
425                ..Default::default()
426            },
427        );
428
429        // Create a merged dictionary, using curated first.
430        let mut merged_dict = MergedDictionary::new();
431        merged_dict.add_dictionary(FstDictionary::curated());
432        merged_dict.add_dictionary(std::sync::Arc::from(user_dict));
433        assert!(merged_dict.contains_word_str("Calibre"));
434
435        // No dialect issues should be found if the word from another dialect is in our user dictionary.
436        assert_eq!(
437            SpellCheck::new(merged_dict.clone(), user_dialect)
438                .lint(&Document::new_markdown_default(
439                    "I like to use the software Calibre.",
440                    &merged_dict
441                ))
442                .len(),
443            0,
444            "Calibre is not part of the user's dialect!"
445        );
446
447        assert_eq!(
448            SpellCheck::new(merged_dict.clone(), user_dialect)
449                .lint(&Document::new_markdown_default(
450                    "I like to use the spelling colour.",
451                    &merged_dict
452                ))
453                .len(),
454            1
455        );
456    }
457
458    #[test]
459    fn matt_is_allowed() {
460        for dialect in Dialect::iter() {
461            dbg!(dialect);
462            assert_no_lints(
463                "Matt is a great name.",
464                SpellCheck::new(FstDictionary::curated(), dialect),
465            );
466        }
467    }
468
469    #[test]
470    fn issue_2026() {
471        assert_top3_suggestion_result(
472            "'Tere' is supposed to be 'There'",
473            SpellCheck::new(FstDictionary::curated(), Dialect::British),
474            "'There' is supposed to be 'There'",
475        );
476
477        assert_top3_suggestion_result(
478            "'fll' is supposed to be 'fill'",
479            SpellCheck::new(FstDictionary::curated(), Dialect::British),
480            "'fill' is supposed to be 'fill'",
481        );
482    }
483}