1use std::num::NonZero;
2
3use lru::LruCache;
4use smallvec::ToSmallVec;
5
6use super::Suggestion;
7use super::{Lint, LintKind, Linter};
8use crate::document::Document;
9use crate::spell::{Dictionary, suggest_correct_spelling};
10use crate::{CharString, CharStringExt, Dialect, TokenStringExt};
11
12pub struct SpellCheck<T>
13where
14 T: Dictionary,
15{
16 dictionary: T,
17 suggestion_cache: LruCache<CharString, Vec<CharString>>,
18 dialect: Dialect,
19}
20
21impl<T: Dictionary> SpellCheck<T> {
22 pub fn new(dictionary: T, dialect: Dialect) -> Self {
23 Self {
24 dictionary,
25 suggestion_cache: LruCache::new(NonZero::new(10000).unwrap()),
26 dialect,
27 }
28 }
29
30 const MAX_SUGGESTIONS: usize = 3;
31
32 fn suggest_correct_spelling(&mut self, word: &[char]) -> Vec<CharString> {
33 if let Some(hit) = self.suggestion_cache.get(word) {
34 hit.clone()
35 } else {
36 let suggestions = self.uncached_suggest_correct_spelling(word);
37 self.suggestion_cache.put(word.into(), suggestions.clone());
38 suggestions
39 }
40 }
41 fn uncached_suggest_correct_spelling(&self, word: &[char]) -> Vec<CharString> {
42 for dist in 2..5 {
44 let suggestions: Vec<CharString> =
45 suggest_correct_spelling(word, 200, dist, &self.dictionary)
46 .into_iter()
47 .filter(|v| {
48 self.dictionary
50 .get_word_metadata(v)
51 .unwrap()
52 .dialects
53 .is_dialect_enabled(self.dialect)
54 })
55 .map(|v| v.to_smallvec())
56 .take(Self::MAX_SUGGESTIONS)
57 .collect();
58
59 if !suggestions.is_empty() {
60 return suggestions;
61 }
62 }
63
64 Vec::new()
66 }
67}
68
69impl<T: Dictionary> Linter for SpellCheck<T> {
70 fn lint(&mut self, document: &Document) -> Vec<Lint> {
71 let mut lints = Vec::new();
72
73 for word in document.iter_words() {
74 let word_chars = document.get_span_content(&word.span);
75
76 if let Some(metadata) = word.kind.as_word().unwrap()
77 && metadata.dialects.is_dialect_enabled(self.dialect)
78 && (self.dictionary.contains_exact_word(word_chars)
79 || self.dictionary.contains_exact_word(&word_chars.to_lower()))
80 {
81 continue;
82 };
83
84 let mut possibilities = self.suggest_correct_spelling(word_chars);
85
86 if let Some(mis_f) = word_chars.first()
88 && mis_f.is_uppercase()
89 {
90 for sug_f in possibilities.iter_mut().filter_map(|w| w.first_mut()) {
91 *sug_f = sug_f.to_uppercase().next().unwrap();
92 }
93 }
94
95 let suggestions: Vec<_> = possibilities
96 .iter()
97 .map(|sug| Suggestion::ReplaceWith(sug.to_vec()))
98 .collect();
99
100 let message = if suggestions.len() == 1 {
102 format!(
103 "Did you mean `{}`?",
104 possibilities.first().unwrap().iter().collect::<String>()
105 )
106 } else {
107 format!(
108 "Did you mean to spell `{}` this way?",
109 document.get_span_content_str(&word.span)
110 )
111 };
112
113 lints.push(Lint {
114 span: word.span,
115 lint_kind: LintKind::Spelling,
116 suggestions,
117 message,
118 priority: 63,
119 })
120 }
121
122 lints
123 }
124
125 fn description(&self) -> &'static str {
126 "Looks and provides corrections for misspelled words."
127 }
128}
129
130#[cfg(test)]
131mod tests {
132 use strum::IntoEnumIterator;
133
134 use super::SpellCheck;
135 use crate::dict_word_metadata::DialectFlags;
136 use crate::linting::Linter;
137 use crate::linting::tests::assert_no_lints;
138 use crate::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary};
139 use crate::{
140 Dialect,
141 linting::tests::{
142 assert_lint_count, assert_suggestion_result, assert_top3_suggestion_result,
143 },
144 };
145 use crate::{DictWordMetadata, Document};
146
147 #[test]
150 fn america_capitalized() {
151 assert_suggestion_result(
152 "The word america should be capitalized.",
153 SpellCheck::new(FstDictionary::curated(), Dialect::American),
154 "The word America should be capitalized.",
155 );
156 }
157
158 #[test]
161 fn harper_automattic_capitalized() {
162 assert_lint_count(
163 "So should harper and automattic.",
164 SpellCheck::new(FstDictionary::curated(), Dialect::American),
165 2,
166 );
167 }
168
169 #[test]
170 fn american_color_in_british_dialect() {
171 assert_lint_count(
172 "Do you like the color?",
173 SpellCheck::new(FstDictionary::curated(), Dialect::British),
174 1,
175 );
176 }
177
178 #[test]
179 fn canadian_words_in_australian_dialect() {
180 assert_lint_count(
181 "Does your mom like yogourt?",
182 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
183 2,
184 );
185 }
186
187 #[test]
188 fn australian_words_in_canadian_dialect() {
189 assert_lint_count(
190 "We mine bauxite to make aluminium.",
191 SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
192 1,
193 );
194 }
195
196 #[test]
197 fn mum_and_mummy_not_just_commonwealth() {
198 assert_lint_count(
199 "Mum's the word about that Egyptian mummy.",
200 SpellCheck::new(FstDictionary::curated(), Dialect::American),
201 0,
202 );
203 }
204
205 #[test]
206 fn australian_verandah() {
207 assert_lint_count(
208 "Our house has a verandah.",
209 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
210 0,
211 );
212 }
213
214 #[test]
215 fn australian_verandah_in_american_dialect() {
216 assert_lint_count(
217 "Our house has a verandah.",
218 SpellCheck::new(FstDictionary::curated(), Dialect::American),
219 1,
220 );
221 }
222
223 #[test]
224 fn australian_verandah_in_british_dialect() {
225 assert_lint_count(
226 "Our house has a verandah.",
227 SpellCheck::new(FstDictionary::curated(), Dialect::British),
228 1,
229 );
230 }
231
232 #[test]
233 fn australian_verandah_in_canadian_dialect() {
234 assert_lint_count(
235 "Our house has a verandah.",
236 SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
237 1,
238 );
239 }
240
241 #[test]
242 fn mixing_australian_and_canadian_dialects() {
243 assert_lint_count(
244 "In summer we sit on the verandah and eat yogourt.",
245 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
246 1,
247 );
248 }
249
250 #[test]
251 fn mixing_canadian_and_australian_dialects() {
252 assert_lint_count(
253 "In summer we sit on the verandah and eat yogourt.",
254 SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
255 1,
256 );
257 }
258
259 #[test]
260 fn australian_and_canadian_spellings_that_are_not_american() {
261 assert_lint_count(
262 "In summer we sit on the verandah and eat yogourt.",
263 SpellCheck::new(FstDictionary::curated(), Dialect::American),
264 2,
265 );
266 }
267
268 #[test]
269 fn australian_and_canadian_spellings_that_are_not_british() {
270 assert_lint_count(
271 "In summer we sit on the verandah and eat yogourt.",
272 SpellCheck::new(FstDictionary::curated(), Dialect::British),
273 2,
274 );
275 }
276
277 #[test]
278 fn australian_labour_vs_labor() {
279 assert_lint_count(
280 "In Australia we write 'labour' but the political party is the 'Labor Party'.",
281 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
282 0,
283 )
284 }
285
286 #[test]
287 fn australian_words_flagged_for_american_english() {
288 assert_lint_count(
289 "There's an esky full of beers in the back of the ute.",
290 SpellCheck::new(FstDictionary::curated(), Dialect::American),
291 2,
292 );
293 }
294
295 #[test]
296 fn american_words_not_flagged_for_australian_english() {
297 assert_lint_count(
298 "In general, utes have unibody construction while pickups have frames.",
299 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
300 0,
301 );
302 }
303
304 #[test]
305 fn abandonware_correction() {
306 assert_suggestion_result(
307 "abanonedware",
308 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
309 "abandonware",
310 );
311 }
312
313 #[test]
316 fn corrects_abandonedware_1131_1166() {
317 assert_top3_suggestion_result(
319 "Abandonedware is abandoned. Do not bother submitting issues about the empty page bug. Author moved to greener pastures",
320 SpellCheck::new(FstDictionary::curated(), Dialect::American),
321 "Abandonware is abandoned. Do not bother submitting issues about the empty page bug. Author moved to greener pastures",
322 );
323 }
324
325 #[test]
326 fn afterwards_not_us() {
327 assert_lint_count(
328 "afterwards",
329 SpellCheck::new(FstDictionary::curated(), Dialect::American),
330 1,
331 );
332 }
333
334 #[test]
335 fn afterward_is_us() {
336 assert_lint_count(
337 "afterward",
338 SpellCheck::new(FstDictionary::curated(), Dialect::American),
339 0,
340 );
341 }
342
343 #[test]
344 fn afterward_not_au() {
345 assert_lint_count(
346 "afterward",
347 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
348 1,
349 );
350 }
351
352 #[test]
353 fn afterwards_is_au() {
354 assert_lint_count(
355 "afterwards",
356 SpellCheck::new(FstDictionary::curated(), Dialect::Australian),
357 0,
358 );
359 }
360
361 #[test]
362 fn afterward_not_ca() {
363 assert_lint_count(
364 "afterward",
365 SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
366 1,
367 );
368 }
369
370 #[test]
371 fn afterwards_is_ca() {
372 assert_lint_count(
373 "afterwards",
374 SpellCheck::new(FstDictionary::curated(), Dialect::Canadian),
375 0,
376 );
377 }
378
379 #[test]
380 fn afterward_not_uk() {
381 assert_lint_count(
382 "afterward",
383 SpellCheck::new(FstDictionary::curated(), Dialect::British),
384 1,
385 );
386 }
387
388 #[test]
389 fn afterwards_is_uk() {
390 assert_lint_count(
391 "afterwards",
392 SpellCheck::new(FstDictionary::curated(), Dialect::British),
393 0,
394 );
395 }
396
397 #[test]
398 fn corrects_hes() {
399 assert_suggestion_result(
400 "hes",
401 SpellCheck::new(FstDictionary::curated(), Dialect::British),
402 "he's",
403 );
404 }
405
406 #[test]
407 fn corrects_shes() {
408 assert_suggestion_result(
409 "shes",
410 SpellCheck::new(FstDictionary::curated(), Dialect::British),
411 "she's",
412 );
413 }
414
415 #[test]
416 fn issue_1876() {
417 let user_dialect = Dialect::American;
418
419 let mut user_dict = MutableDictionary::new();
421 user_dict.append_word_str(
422 "Calibre",
423 DictWordMetadata {
424 dialects: DialectFlags::from_dialect(user_dialect),
425 ..Default::default()
426 },
427 );
428
429 let mut merged_dict = MergedDictionary::new();
431 merged_dict.add_dictionary(FstDictionary::curated());
432 merged_dict.add_dictionary(std::sync::Arc::from(user_dict));
433 assert!(merged_dict.contains_word_str("Calibre"));
434
435 assert_eq!(
437 SpellCheck::new(merged_dict.clone(), user_dialect)
438 .lint(&Document::new_markdown_default(
439 "I like to use the software Calibre.",
440 &merged_dict
441 ))
442 .len(),
443 0,
444 "Calibre is not part of the user's dialect!"
445 );
446
447 assert_eq!(
448 SpellCheck::new(merged_dict.clone(), user_dialect)
449 .lint(&Document::new_markdown_default(
450 "I like to use the spelling colour.",
451 &merged_dict
452 ))
453 .len(),
454 1
455 );
456 }
457
458 #[test]
459 fn matt_is_allowed() {
460 for dialect in Dialect::iter() {
461 dbg!(dialect);
462 assert_no_lints(
463 "Matt is a great name.",
464 SpellCheck::new(FstDictionary::curated(), dialect),
465 );
466 }
467 }
468
469 #[test]
470 fn issue_2026() {
471 assert_top3_suggestion_result(
472 "'Tere' is supposed to be 'There'",
473 SpellCheck::new(FstDictionary::curated(), Dialect::British),
474 "'There' is supposed to be 'There'",
475 );
476
477 assert_top3_suggestion_result(
478 "'fll' is supposed to be 'fill'",
479 SpellCheck::new(FstDictionary::curated(), Dialect::British),
480 "'fill' is supposed to be 'fill'",
481 );
482 }
483}