typos_cli/
dict.rs

1use std::borrow::Cow;
2use std::collections::HashMap;
3
4use unicase::UniCase;
5
6use typos::tokens::Case;
7use typos::Status;
8
9#[derive(Default)]
10pub struct BuiltIn {
11    locale: Option<varcon_core::Category>,
12}
13
14impl BuiltIn {
15    pub const fn new(locale: crate::config::Locale) -> Self {
16        Self {
17            locale: locale.category(),
18        }
19    }
20
21    pub fn correct_ident<'s>(
22        &'s self,
23        ident_token: typos::tokens::Identifier<'_>,
24    ) -> Option<Status<'s>> {
25        let ident = ident_token.token();
26        self.correct_ident_with_dict(ident)
27    }
28
29    pub fn correct_word<'s>(&'s self, word_token: typos::tokens::Word<'_>) -> Option<Status<'s>> {
30        if word_token.case() == Case::None {
31            return None;
32        }
33
34        let word = word_token.token();
35        let word_case = UniCase::new(word);
36        let mut corrections = if let Some(corrections) = self.correct_word_with_dict(word_case) {
37            if corrections.is_empty() {
38                Status::Invalid
39            } else {
40                self.chain_with_vars(corrections)
41            }
42        } else {
43            self.correct_with_vars(word_case)?
44        };
45        for s in corrections.corrections_mut() {
46            case_correct(s, word_token.case());
47        }
48        Some(corrections)
49    }
50}
51
52#[cfg(feature = "dict")]
53impl BuiltIn {
54    fn correct_ident_with_dict<'s>(&self, ident: &str) -> Option<Status<'s>> {
55        match ident {
56            "O_WRONLY" => Some(Status::Valid),
57            "dBA" => Some(Status::Valid),
58            _ => None,
59        }
60    }
61
62    // Not using `Status` to avoid the allocations
63    fn correct_word_with_dict(&self, word: UniCase<&str>) -> Option<&'static [&'static str]> {
64        typos_dict::WORD.find(&word).copied()
65    }
66}
67
68#[cfg(not(feature = "dict"))]
69impl BuiltIn {
70    fn correct_ident_with_dict<'s>(&self, _ident: &str) -> Option<Status<'s>> {
71        None
72    }
73
74    fn correct_word_with_dict(
75        &self,
76        _word: unicase::UniCase<&str>,
77    ) -> Option<&'static [&'static str]> {
78        None
79    }
80}
81
82#[cfg(feature = "vars")]
83impl BuiltIn {
84    fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
85        if self.is_vars_enabled() {
86            let mut chained: Vec<_> = corrections
87                .iter()
88                .flat_map(|c| match self.correct_with_vars(UniCase::new(c)) {
89                    Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
90                    Some(Status::Corrections(vars)) => vars,
91                    Some(Status::Invalid) => {
92                        unreachable!("correct_with_vars should always have valid suggestions")
93                    }
94                })
95                .collect();
96            if chained.len() != 1 {
97                chained.sort_unstable();
98                chained.dedup();
99            }
100            debug_assert!(!chained.is_empty());
101            Status::Corrections(chained)
102        } else {
103            Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
104        }
105    }
106
107    fn correct_with_vars(&self, word: UniCase<&str>) -> Option<Status<'static>> {
108        if self.is_vars_enabled() {
109            typos_vars::VARS
110                .find(&word)
111                .map(|variants| self.select_variant(variants))
112        } else {
113            None
114        }
115    }
116
117    fn is_vars_enabled(&self) -> bool {
118        #![allow(clippy::assertions_on_constants)]
119        debug_assert!(typos_vars::NO_INVALID);
120        self.locale.is_some()
121    }
122
123    fn select_variant(
124        &self,
125        vars: &'static [(u8, &'static typos_vars::VariantsMap)],
126    ) -> Status<'static> {
127        let var = vars[0];
128        let var_categories = unsafe {
129            // Code-genned from a checked category-set, so known to be safe
130            typos_vars::CategorySet::from_bits_unchecked(var.0)
131        };
132        if let Some(locale) = self.locale {
133            if var_categories.contains(locale) {
134                // Already valid for the current locale.
135                Status::Valid
136            } else {
137                Status::Corrections(
138                    typos_vars::corrections(locale, *var.1)
139                        .iter()
140                        .copied()
141                        .map(Cow::Borrowed)
142                        .collect(),
143                )
144            }
145        } else {
146            // All locales are valid
147            if var_categories.is_empty() {
148                // But the word is never valid.
149                let mut unique: Vec<_> = var
150                    .1
151                    .iter()
152                    .flat_map(|v| v.iter())
153                    .copied()
154                    .map(Cow::Borrowed)
155                    .collect();
156                unique.sort_unstable();
157                unique.dedup();
158                Status::Corrections(unique)
159            } else {
160                Status::Valid
161            }
162        }
163    }
164}
165
166#[cfg(not(feature = "vars"))]
167impl BuiltIn {
168    fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
169        Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
170    }
171
172    fn correct_with_vars(&self, _word: unicase::UniCase<&str>) -> Option<Status<'static>> {
173        None
174    }
175}
176
177impl typos::Dictionary for BuiltIn {
178    fn correct_ident<'s>(&'s self, ident: typos::tokens::Identifier<'_>) -> Option<Status<'s>> {
179        BuiltIn::correct_ident(self, ident)
180    }
181
182    fn correct_word<'s>(&'s self, word: typos::tokens::Word<'_>) -> Option<Status<'s>> {
183        BuiltIn::correct_word(self, word)
184    }
185}
186
187#[allow(clippy::ptr_arg)]
188fn case_correct(correction: &mut Cow<'_, str>, case: Case) {
189    match case {
190        Case::Lower | Case::None => (),
191        Case::Title => match correction {
192            Cow::Borrowed(s) => {
193                let mut s = String::from(*s);
194                s[0..1].make_ascii_uppercase();
195                *correction = s.into();
196            }
197            Cow::Owned(s) => {
198                s[0..1].make_ascii_uppercase();
199            }
200        },
201        Case::Upper => match correction {
202            Cow::Borrowed(s) => {
203                let mut s = String::from(*s);
204                s.make_ascii_uppercase();
205                *correction = s.into();
206            }
207            Cow::Owned(s) => {
208                s.make_ascii_uppercase();
209            }
210        },
211    }
212}
213
214pub struct Override<'i, 'w, D> {
215    ignored_identifiers: Vec<regex::Regex>,
216    identifiers: HashMap<&'i str, Status<'i>, ahash::RandomState>,
217    ignored_words: Vec<regex::Regex>,
218    words: HashMap<UniCase<&'w str>, Status<'w>, ahash::RandomState>,
219    inner: D,
220}
221
222impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> {
223    pub fn new(inner: D) -> Self {
224        Self {
225            ignored_identifiers: Default::default(),
226            identifiers: Default::default(),
227            ignored_words: Default::default(),
228            words: Default::default(),
229            inner,
230        }
231    }
232
233    pub fn ignored_identifiers<'r>(&mut self, ignored: impl Iterator<Item = &'r regex::Regex>) {
234        self.ignored_identifiers.extend(ignored.cloned());
235    }
236
237    pub fn identifiers<I: Iterator<Item = (&'i str, &'i str)>>(&mut self, identifiers: I) {
238        self.identifiers = Self::interpret(identifiers).collect();
239    }
240
241    pub fn ignored_words<'r>(&mut self, ignored: impl Iterator<Item = &'r regex::Regex>) {
242        self.ignored_words.extend(ignored.cloned());
243    }
244
245    pub fn words<I: Iterator<Item = (&'w str, &'w str)>>(&mut self, words: I) {
246        self.words = Self::interpret(words)
247            .map(|(k, v)| (UniCase::new(k), v))
248            .collect();
249    }
250
251    fn interpret<'z, I: Iterator<Item = (&'z str, &'z str)>>(
252        cases: I,
253    ) -> impl Iterator<Item = (&'z str, Status<'z>)> {
254        cases.map(|(typo, correction)| {
255            let correction = if typo == correction {
256                Status::Valid
257            } else if correction.is_empty() {
258                Status::Invalid
259            } else {
260                Status::Corrections(vec![Cow::Borrowed(correction)])
261            };
262            (typo, correction)
263        })
264    }
265}
266
267impl<D: typos::Dictionary> typos::Dictionary for Override<'_, '_, D> {
268    fn correct_ident<'s>(&'s self, ident: typos::tokens::Identifier<'_>) -> Option<Status<'s>> {
269        for ignored in &self.ignored_identifiers {
270            if ignored.is_match(ident.token()) {
271                return Some(Status::Valid);
272            }
273        }
274
275        // Skip hashing if we can
276        if !self.identifiers.is_empty() {
277            if let Some(status) = self.identifiers.get(ident.token()).map(|c| c.borrow()) {
278                return Some(status);
279            }
280        }
281
282        self.inner.correct_ident(ident)
283    }
284
285    fn correct_word<'s>(&'s self, word_token: typos::tokens::Word<'_>) -> Option<Status<'s>> {
286        if word_token.case() == Case::None {
287            return None;
288        }
289
290        for ignored in &self.ignored_words {
291            if ignored.is_match(word_token.token()) {
292                return Some(Status::Valid);
293            }
294        }
295
296        // Skip hashing if we can
297        if !self.words.is_empty() {
298            let w = UniCase::new(word_token.token());
299            // HACK: couldn't figure out the lifetime issue with replacing `cloned` with `borrow`
300            if let Some(mut corrections) = self.words.get(&w).cloned() {
301                for s in corrections.corrections_mut() {
302                    case_correct(s, word_token.case());
303                }
304                return Some(corrections);
305            }
306        }
307
308        self.inner.correct_word(word_token)
309    }
310}
311
312#[cfg(test)]
313mod test {
314    use super::*;
315
316    #[cfg(feature = "dict")]
317    #[test]
318    fn test_dict_correct() {
319        let dict = BuiltIn::new(crate::config::Locale::default());
320        let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
321            "finallizes",
322            Case::Lower,
323            0,
324        ));
325        assert_eq!(
326            correction,
327            Some(Status::Corrections(vec!["finalizes".into()]))
328        );
329    }
330
331    #[cfg(feature = "vars")]
332    #[test]
333    fn test_varcon_no_locale() {
334        let dict = BuiltIn::new(crate::config::Locale::En);
335        let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
336            "finalizes",
337            Case::Lower,
338            0,
339        ));
340        assert_eq!(correction, None);
341    }
342
343    #[cfg(feature = "vars")]
344    #[test]
345    fn test_varcon_same_locale() {
346        let dict = BuiltIn::new(crate::config::Locale::EnUs);
347        let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
348            "finalizes",
349            Case::Lower,
350            0,
351        ));
352        assert_eq!(correction, Some(Status::Valid));
353    }
354
355    #[cfg(feature = "vars")]
356    #[test]
357    fn test_varcon_different_locale() {
358        let dict = BuiltIn::new(crate::config::Locale::EnGb);
359        let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
360            "finalizes",
361            Case::Lower,
362            0,
363        ));
364        assert_eq!(
365            correction,
366            Some(Status::Corrections(vec!["finalises".into()]))
367        );
368    }
369
370    #[cfg(all(feature = "dict", feature = "vars"))]
371    #[test]
372    fn test_dict_to_varcon() {
373        let dict = BuiltIn::new(crate::config::Locale::EnGb);
374        let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
375            "finallizes",
376            Case::Lower,
377            0,
378        ));
379        assert_eq!(
380            correction,
381            Some(Status::Corrections(vec!["finalises".into()]))
382        );
383    }
384
385    #[test]
386    fn test_case_correct() {
387        let cases = [
388            ("foo", Case::Lower, "foo"),
389            ("foo", Case::None, "foo"),
390            ("foo", Case::Title, "Foo"),
391            ("foo", Case::Upper, "FOO"),
392            ("fOo", Case::None, "fOo"),
393        ];
394        for (correction, case, expected) in cases.iter() {
395            let mut actual = Cow::Borrowed(*correction);
396            case_correct(&mut actual, *case);
397            assert_eq!(*expected, actual);
398
399            let mut actual = Cow::Owned(String::from(*correction));
400            case_correct(&mut actual, *case);
401            assert_eq!(*expected, actual);
402        }
403    }
404}