typos_cli/
policy.rs

1pub struct ConfigStorage {
2    arena: std::sync::Mutex<typed_arena::Arena<kstring::KString>>,
3}
4
5impl ConfigStorage {
6    pub fn new() -> Self {
7        Self {
8            arena: std::sync::Mutex::new(typed_arena::Arena::new()),
9        }
10    }
11
12    fn get<'s>(&'s self, other: &str) -> &'s str {
13        // Safe because we the references are stable once created.
14        //
15        // Trying to get this handled inside of `typed_arena` directly, see
16        // https://github.com/SimonSapin/rust-typed-arena/issues/49#issuecomment-809517312
17        unsafe {
18            std::mem::transmute::<&str, &str>(
19                self.arena
20                    .lock()
21                    .unwrap()
22                    .alloc(kstring::KString::from_ref(other))
23                    .as_str(),
24            )
25        }
26    }
27}
28
29impl Default for ConfigStorage {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35pub struct ConfigEngine<'s> {
36    storage: &'s ConfigStorage,
37
38    overrides: Option<crate::config::Config>,
39    isolated: bool,
40
41    configs: std::collections::HashMap<std::path::PathBuf, DirConfig>,
42    walk: Intern<crate::config::Walk>,
43    tokenizer: Intern<typos::tokens::Tokenizer>,
44    dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
45    ignore: Intern<Vec<regex::Regex>>,
46}
47
48impl<'s> ConfigEngine<'s> {
49    pub fn new(storage: &'s ConfigStorage) -> Self {
50        Self {
51            storage,
52            overrides: Default::default(),
53            configs: Default::default(),
54            isolated: false,
55            walk: Default::default(),
56            tokenizer: Default::default(),
57            dict: Default::default(),
58            ignore: Default::default(),
59        }
60    }
61
62    pub fn set_overrides(&mut self, overrides: crate::config::Config) -> &mut Self {
63        self.overrides = Some(overrides);
64        self
65    }
66
67    pub fn set_isolated(&mut self, isolated: bool) -> &mut Self {
68        self.isolated = isolated;
69        self
70    }
71
72    pub fn walk(&self, cwd: &std::path::Path) -> &crate::config::Walk {
73        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
74        let dir = self
75            .configs
76            .get(cwd)
77            .expect("`init_dir` must be called first");
78        self.get_walk(dir)
79    }
80
81    pub fn file_types(
82        &self,
83        cwd: &std::path::Path,
84    ) -> &std::collections::BTreeMap<kstring::KString, Vec<kstring::KString>> {
85        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
86        let dir = self
87            .configs
88            .get(cwd)
89            .expect("`init_dir` must be called first");
90        dir.type_matcher.definitions()
91    }
92
93    pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_, '_> {
94        debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
95        let dir = self.get_dir(path).expect("`walk()` should be called first");
96        let (file_type, file_config) = dir.get_file_config(path);
97        Policy {
98            check_filenames: file_config.check_filenames,
99            check_files: file_config.check_files,
100            file_type,
101            binary: file_config.binary,
102            tokenizer: self.get_tokenizer(&file_config),
103            dict: self.get_dict(&file_config),
104            ignore: self.get_ignore(&file_config),
105        }
106    }
107
108    fn get_walk(&self, dir: &DirConfig) -> &crate::config::Walk {
109        self.walk.get(dir.walk)
110    }
111
112    fn get_tokenizer(&self, file: &FileConfig) -> &typos::tokens::Tokenizer {
113        self.tokenizer.get(file.tokenizer)
114    }
115
116    fn get_dict(&self, file: &FileConfig) -> &dyn typos::Dictionary {
117        self.dict.get(file.dict)
118    }
119
120    fn get_ignore(&self, file: &FileConfig) -> &[regex::Regex] {
121        self.ignore.get(file.ignore)
122    }
123
124    fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
125        for path in path.ancestors() {
126            if let Some(dir) = self.configs.get(path) {
127                return Some(dir);
128            }
129        }
130        None
131    }
132
133    pub fn load_config(
134        &self,
135        cwd: &std::path::Path,
136    ) -> Result<crate::config::Config, anyhow::Error> {
137        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
138        let mut config = crate::config::Config::default();
139
140        if !self.isolated {
141            for ancestor in cwd.ancestors() {
142                if let Some(derived) = crate::config::Config::from_dir(ancestor)? {
143                    config.update(&derived);
144                    break;
145                }
146            }
147        }
148        if let Some(overrides) = self.overrides.as_ref() {
149            config.update(overrides);
150        }
151
152        let mut types = Default::default();
153        std::mem::swap(&mut types, &mut config.type_.patterns);
154        let mut types = types
155            .into_iter()
156            .map(|(type_, type_engine)| {
157                let mut new_engine = config.default.clone();
158                new_engine.update(&type_engine.engine);
159                new_engine.update(&config.overrides);
160                let new_type_engine = crate::config::GlobEngineConfig {
161                    extend_glob: type_engine.extend_glob,
162                    engine: new_engine,
163                };
164                (type_, new_type_engine)
165            })
166            .collect();
167        std::mem::swap(&mut types, &mut config.type_.patterns);
168
169        config.default.update(&config.overrides);
170
171        Ok(config)
172    }
173
174    pub fn init_dir(&mut self, cwd: &std::path::Path) -> Result<(), anyhow::Error> {
175        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
176        if self.configs.contains_key(cwd) {
177            return Ok(());
178        }
179
180        let config = self.load_config(cwd)?;
181        let crate::config::Config {
182            files,
183            mut default,
184            type_,
185            overrides,
186        } = config;
187
188        let walk = self.walk.intern(files);
189
190        let mut type_matcher = crate::file_type::TypesBuilder::new();
191        type_matcher.add_defaults();
192        let mut types: std::collections::HashMap<_, _> = Default::default();
193        for (type_name, type_engine) in type_.patterns() {
194            if type_engine.extend_glob.is_empty() {
195                if !type_matcher.contains_name(&type_name) {
196                    anyhow::bail!("Unknown type definition `{}`, pass `--type-list` to see valid names or set `extend-glob` to add a new one.", type_name);
197                }
198            } else {
199                for glob in type_engine.extend_glob.iter() {
200                    type_matcher.add(type_name.as_ref(), glob.as_ref());
201                }
202            }
203
204            let mut engine = default.clone();
205            engine.update(&type_engine.engine);
206            engine.update(&overrides);
207
208            let type_config = self.init_file_config(engine);
209            types.insert(type_name, type_config);
210        }
211        default.update(&overrides);
212        let default = self.init_file_config(default);
213
214        let dir = DirConfig {
215            walk,
216            default,
217            types,
218            type_matcher: type_matcher.build()?,
219        };
220
221        self.configs.insert(cwd.to_owned(), dir);
222        Ok(())
223    }
224
225    fn init_file_config(&mut self, engine: crate::config::EngineConfig) -> FileConfig {
226        let binary = engine.binary();
227        let check_filename = engine.check_filename();
228        let check_file = engine.check_file();
229        let crate::config::EngineConfig {
230            tokenizer: tokenizer_user_config,
231            dict: dict_user_config,
232            extend_ignore_re,
233            ..
234        } = engine;
235
236        let mut tokenizer_config = crate::config::TokenizerConfig::from_defaults();
237        tokenizer_config.update(&tokenizer_user_config);
238        let mut dict_config = crate::config::DictConfig::from_defaults();
239        dict_config.update(&dict_user_config);
240
241        if !tokenizer_config.ignore_hex() {
242            log::warn!("`ignore-hex` is deprecated");
243            if !tokenizer_config.identifier_leading_digits() {
244                log::warn!("`identifier-leading-digits` is deprecated");
245            }
246        }
247
248        let tokenizer = typos::tokens::TokenizerBuilder::new()
249            .unicode(tokenizer_config.unicode())
250            .build();
251
252        let dict = crate::dict::BuiltIn::new(dict_config.locale());
253        let mut dict = crate::dict::Override::new(dict);
254        dict.ignored_identifiers(dict_config.extend_ignore_identifiers_re());
255        dict.identifiers(
256            dict_config
257                .extend_identifiers()
258                .map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
259        );
260        dict.ignored_words(dict_config.extend_ignore_words_re());
261        dict.words(
262            dict_config
263                .extend_words()
264                .map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
265        );
266
267        let dict = self.dict.intern(dict);
268        let tokenizer = self.tokenizer.intern(tokenizer);
269
270        let ignore = self.ignore.intern(extend_ignore_re);
271
272        FileConfig {
273            check_filenames: check_filename,
274            check_files: check_file,
275            binary,
276            tokenizer,
277            dict,
278            ignore,
279        }
280    }
281}
282
283struct Intern<T> {
284    data: Vec<T>,
285}
286
287impl<T> Intern<T> {
288    pub(crate) fn new() -> Self {
289        Self {
290            data: Default::default(),
291        }
292    }
293
294    pub(crate) fn intern(&mut self, value: T) -> usize {
295        let symbol = self.data.len();
296        self.data.push(value);
297        symbol
298    }
299
300    pub(crate) fn get(&self, symbol: usize) -> &T {
301        &self.data[symbol]
302    }
303}
304
305impl<T> Default for Intern<T> {
306    fn default() -> Self {
307        Self::new()
308    }
309}
310
311#[derive(Clone, Debug)]
312struct DirConfig {
313    walk: usize,
314    default: FileConfig,
315    types: std::collections::HashMap<kstring::KString, FileConfig>,
316    type_matcher: crate::file_type::Types,
317}
318
319impl DirConfig {
320    fn get_file_config(&self, path: &std::path::Path) -> (Option<&str>, FileConfig) {
321        let name = self.type_matcher.file_matched(path);
322
323        let config = name
324            .and_then(|name| {
325                log::debug!("{}: `{name}` policy", path.display());
326                self.types.get(name).copied()
327            })
328            .unwrap_or_else(|| {
329                log::debug!(
330                    "{}: default policy for `{}` file type",
331                    path.display(),
332                    name.unwrap_or("<unknown>")
333                );
334                self.default
335            });
336        (name, config)
337    }
338}
339
340#[derive(Copy, Clone, Debug)]
341struct FileConfig {
342    tokenizer: usize,
343    dict: usize,
344    check_filenames: bool,
345    check_files: bool,
346    binary: bool,
347    ignore: usize,
348}
349
350#[non_exhaustive]
351#[derive(derive_setters::Setters)]
352pub struct Policy<'t, 'd, 'i> {
353    pub check_filenames: bool,
354    pub check_files: bool,
355    pub file_type: Option<&'d str>,
356    pub binary: bool,
357    pub tokenizer: &'t typos::tokens::Tokenizer,
358    pub dict: &'d dyn typos::Dictionary,
359    pub ignore: &'i [regex::Regex],
360}
361
362impl Policy<'_, '_, '_> {
363    pub fn new() -> Self {
364        Default::default()
365    }
366}
367
368static DEFAULT_TOKENIZER: typos::tokens::Tokenizer = typos::tokens::Tokenizer::new();
369static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
370static DEFAULT_IGNORE: &[regex::Regex] = &[];
371
372impl Default for Policy<'_, '_, '_> {
373    fn default() -> Self {
374        Self {
375            check_filenames: true,
376            check_files: true,
377            file_type: None,
378            binary: false,
379            tokenizer: &DEFAULT_TOKENIZER,
380            dict: &DEFAULT_DICT,
381            ignore: DEFAULT_IGNORE,
382        }
383    }
384}
385
386#[cfg(test)]
387mod test {
388    use super::*;
389
390    const NEVER_EXIST_TYPE: &str = "THISyTYPEySHOULDyNEVERyEXISTyBUTyIyHATEyYOUyIFyITyDOES";
391
392    #[test]
393    fn test_load_config_applies_overrides() {
394        let storage = ConfigStorage::new();
395        let mut engine = ConfigEngine::new(&storage);
396        engine.set_isolated(true);
397
398        let type_name = kstring::KString::from_static("toml");
399
400        let config = crate::config::Config {
401            default: crate::config::EngineConfig {
402                binary: Some(true),
403                check_filename: Some(true),
404                ..Default::default()
405            },
406            type_: crate::config::TypeEngineConfig {
407                patterns: maplit::hashmap! {
408                    type_name.clone() => crate::config::GlobEngineConfig {
409                        engine: crate::config::EngineConfig {
410                            check_filename: Some(false),
411                            check_file: Some(true),
412                            ..Default::default()
413                        },
414                        ..Default::default()
415                    },
416                },
417            },
418            overrides: crate::config::EngineConfig {
419                binary: Some(false),
420                check_file: Some(false),
421                ..Default::default()
422            },
423            ..Default::default()
424        };
425        engine.set_overrides(config);
426
427        let cwd = std::path::Path::new(".").canonicalize().unwrap();
428        let loaded = engine.load_config(&cwd).unwrap();
429        assert_eq!(loaded.default.binary, Some(false));
430        assert_eq!(loaded.default.check_filename, Some(true));
431        assert_eq!(loaded.default.check_file, Some(false));
432        assert_eq!(
433            loaded.type_.patterns[type_name.as_str()].engine.binary,
434            Some(false)
435        );
436        assert_eq!(
437            loaded.type_.patterns[type_name.as_str()]
438                .engine
439                .check_filename,
440            Some(false)
441        );
442        assert_eq!(
443            loaded.type_.patterns[type_name.as_str()].engine.check_file,
444            Some(false)
445        );
446    }
447
448    #[test]
449    fn test_init_fails_on_unknown_type() {
450        let storage = ConfigStorage::new();
451        let mut engine = ConfigEngine::new(&storage);
452        engine.set_isolated(true);
453
454        let type_name = kstring::KString::from_static(NEVER_EXIST_TYPE);
455
456        let config = crate::config::Config {
457            type_: crate::config::TypeEngineConfig {
458                patterns: maplit::hashmap! {
459                    type_name => crate::config::GlobEngineConfig {
460                        ..Default::default()
461                    },
462                },
463            },
464            ..Default::default()
465        };
466        engine.set_overrides(config);
467
468        let cwd = std::path::Path::new(".").canonicalize().unwrap();
469        let result = engine.init_dir(&cwd);
470        assert!(result.is_err());
471    }
472
473    #[test]
474    fn test_policy_default() {
475        let storage = ConfigStorage::new();
476        let mut engine = ConfigEngine::new(&storage);
477        engine.set_isolated(true);
478
479        let config = crate::config::Config::default();
480        engine.set_overrides(config);
481
482        let cwd = std::path::Path::new(".").canonicalize().unwrap();
483        engine.init_dir(&cwd).unwrap();
484        let policy = engine.policy(&cwd.join("Cargo.toml"));
485        assert!(!policy.binary);
486    }
487
488    #[test]
489    fn test_policy_fallback() {
490        let storage = ConfigStorage::new();
491        let mut engine = ConfigEngine::new(&storage);
492        engine.set_isolated(true);
493
494        let type_name = kstring::KString::from_static(NEVER_EXIST_TYPE);
495
496        let config = crate::config::Config {
497            default: crate::config::EngineConfig {
498                binary: Some(true),
499                ..Default::default()
500            },
501            type_: crate::config::TypeEngineConfig {
502                patterns: maplit::hashmap! {
503                    type_name.clone() => crate::config::GlobEngineConfig {
504                        extend_glob: vec![type_name],
505                        engine: crate::config::EngineConfig {
506                            binary: Some(false),
507                            ..Default::default()
508                        },
509                    },
510                },
511            },
512            ..Default::default()
513        };
514        engine.set_overrides(config);
515
516        let cwd = std::path::Path::new(".").canonicalize().unwrap();
517        engine.init_dir(&cwd).unwrap();
518        let policy = engine.policy(&cwd.join("Cargo.toml"));
519        assert!(policy.binary);
520    }
521
522    #[test]
523    fn test_policy_type_specific() {
524        let storage = ConfigStorage::new();
525        let mut engine = ConfigEngine::new(&storage);
526        engine.set_isolated(true);
527
528        let type_name = kstring::KString::from_static(NEVER_EXIST_TYPE);
529
530        let config = crate::config::Config {
531            default: crate::config::EngineConfig {
532                binary: Some(true),
533                ..Default::default()
534            },
535            type_: crate::config::TypeEngineConfig {
536                patterns: maplit::hashmap! {
537                type_name.clone() => crate::config::GlobEngineConfig {
538                    extend_glob: vec![type_name],
539                    engine: crate::config::EngineConfig {
540                        binary: Some(false),
541                        ..Default::default()
542                    },
543                }},
544            },
545            ..Default::default()
546        };
547        engine.set_overrides(config);
548
549        let cwd = std::path::Path::new(".").canonicalize().unwrap();
550        engine.init_dir(&cwd).unwrap();
551        let policy = engine.policy(&cwd.join("Cargo.toml"));
552        assert!(policy.binary);
553        let policy = engine.policy(&cwd.join(NEVER_EXIST_TYPE));
554        assert!(!policy.binary);
555    }
556}