Skip to main content

typos_cli/
policy.rs

1pub struct ConfigStorage {
2    arena: std::sync::Mutex<typed_arena::Arena<kstring::KString>>,
3}
4
5impl ConfigStorage {
6    pub fn new() -> Self {
7        Self {
8            arena: std::sync::Mutex::new(typed_arena::Arena::new()),
9        }
10    }
11
12    fn get<'s>(&'s self, other: &str) -> &'s str {
13        // Safe because we the references are stable once created.
14        //
15        // Trying to get this handled inside of `typed_arena` directly, see
16        // https://github.com/SimonSapin/rust-typed-arena/issues/49#issuecomment-809517312
17        unsafe {
18            std::mem::transmute::<&str, &str>(
19                self.arena
20                    .lock()
21                    .unwrap()
22                    .alloc(kstring::KString::from_ref(other))
23                    .as_str(),
24            )
25        }
26    }
27}
28
29impl Default for ConfigStorage {
30    fn default() -> Self {
31        Self::new()
32    }
33}
34
35pub struct ConfigEngine<'s> {
36    storage: &'s ConfigStorage,
37
38    overrides: Option<crate::config::Config>,
39    isolated: bool,
40
41    configs: std::collections::HashMap<std::path::PathBuf, DirConfig>,
42    walk: Intern<crate::config::Walk>,
43    tokenizer: Intern<typos::tokens::Tokenizer>,
44    dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
45    ignore: Intern<Vec<regex::Regex>>,
46}
47
48impl<'s> ConfigEngine<'s> {
49    pub fn new(storage: &'s ConfigStorage) -> Self {
50        Self {
51            storage,
52            overrides: Default::default(),
53            configs: Default::default(),
54            isolated: false,
55            walk: Default::default(),
56            tokenizer: Default::default(),
57            dict: Default::default(),
58            ignore: Default::default(),
59        }
60    }
61
62    pub fn set_overrides(&mut self, overrides: crate::config::Config) -> &mut Self {
63        self.overrides = Some(overrides);
64        self
65    }
66
67    pub fn set_isolated(&mut self, isolated: bool) -> &mut Self {
68        self.isolated = isolated;
69        self
70    }
71
72    pub fn walk(&self, cwd: &std::path::Path) -> &crate::config::Walk {
73        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
74        let dir = self
75            .configs
76            .get(cwd)
77            .expect("`init_dir` must be called first");
78        self.get_walk(dir)
79    }
80
81    pub fn file_types(
82        &self,
83        cwd: &std::path::Path,
84    ) -> &std::collections::BTreeMap<kstring::KString, Vec<kstring::KString>> {
85        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
86        let dir = self
87            .configs
88            .get(cwd)
89            .expect("`init_dir` must be called first");
90        dir.type_matcher.definitions()
91    }
92
93    pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_, '_> {
94        debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
95        let dir = self.get_dir(path).expect("`walk()` should be called first");
96        let (file_type, file_config) = dir.get_file_config(path);
97        Policy {
98            check_filenames: file_config.check_filenames,
99            check_files: file_config.check_files,
100            file_type,
101            binary: file_config.binary,
102            tokenizer: self.get_tokenizer(&file_config),
103            dict: self.get_dict(&file_config),
104            ignore: self.get_ignore(&file_config),
105        }
106    }
107
108    fn get_walk(&self, dir: &DirConfig) -> &crate::config::Walk {
109        self.walk.get(dir.walk)
110    }
111
112    fn get_tokenizer(&self, file: &FileConfig) -> &typos::tokens::Tokenizer {
113        self.tokenizer.get(file.tokenizer)
114    }
115
116    fn get_dict(&self, file: &FileConfig) -> &dyn typos::Dictionary {
117        self.dict.get(file.dict)
118    }
119
120    fn get_ignore(&self, file: &FileConfig) -> &[regex::Regex] {
121        self.ignore.get(file.ignore)
122    }
123
124    fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
125        for path in path.ancestors() {
126            if let Some(dir) = self.configs.get(path) {
127                return Some(dir);
128            }
129        }
130        None
131    }
132
133    pub fn load_config(
134        &self,
135        cwd: &std::path::Path,
136    ) -> Result<crate::config::Config, anyhow::Error> {
137        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
138        let mut config = crate::config::Config::default();
139
140        if !self.isolated {
141            for ancestor in cwd.ancestors() {
142                if let Some(derived) = crate::config::Config::from_dir(ancestor)? {
143                    config.update(&derived);
144                    break;
145                }
146            }
147        }
148        if let Some(overrides) = self.overrides.as_ref() {
149            config.update(overrides);
150        }
151
152        let mut types = Default::default();
153        std::mem::swap(&mut types, &mut config.type_.patterns);
154        let mut types = types
155            .into_iter()
156            .map(|(type_, type_engine)| {
157                let mut new_engine = config.default.clone();
158                new_engine.update(&type_engine.engine);
159                new_engine.update(&config.overrides);
160                let new_type_engine = crate::config::GlobEngineConfig {
161                    extend_glob: type_engine.extend_glob,
162                    engine: new_engine,
163                };
164                (type_, new_type_engine)
165            })
166            .collect();
167        std::mem::swap(&mut types, &mut config.type_.patterns);
168
169        config.default.update(&config.overrides);
170
171        Ok(config)
172    }
173
174    pub fn init_dir(&mut self, cwd: &std::path::Path) -> Result<(), anyhow::Error> {
175        debug_assert!(cwd.is_absolute(), "{} is not absolute", cwd.display());
176        if self.configs.contains_key(cwd) {
177            return Ok(());
178        }
179
180        let config = self.load_config(cwd)?;
181        let crate::config::Config {
182            files,
183            mut default,
184            type_,
185            overrides,
186        } = config;
187
188        let walk = self.walk.intern(files);
189
190        let mut type_matcher = crate::file_type::TypesBuilder::new();
191        type_matcher.add_defaults();
192        let mut types: std::collections::HashMap<_, _> = Default::default();
193        for (type_name, type_engine) in type_.patterns() {
194            if type_engine.extend_glob.is_empty() {
195                if !type_matcher.contains_name(&type_name) {
196                    anyhow::bail!(
197                        "Unknown type definition `{type_name}`, pass `--type-list` to see valid names or set `extend-glob` to add a new one."
198                    );
199                }
200            } else {
201                for glob in type_engine.extend_glob.iter() {
202                    type_matcher.add(type_name.as_ref(), glob.as_ref());
203                }
204            }
205
206            let mut engine = default.clone();
207            engine.update(&type_engine.engine);
208            engine.update(&overrides);
209
210            let type_config = self.init_file_config(engine);
211            types.insert(type_name, type_config);
212        }
213        default.update(&overrides);
214        let default = self.init_file_config(default);
215
216        let dir = DirConfig {
217            walk,
218            default,
219            types,
220            type_matcher: type_matcher.build()?,
221        };
222
223        self.configs.insert(cwd.to_owned(), dir);
224        Ok(())
225    }
226
227    fn init_file_config(&mut self, engine: crate::config::EngineConfig) -> FileConfig {
228        let binary = engine.binary();
229        let check_filename = engine.check_filename();
230        let check_file = engine.check_file();
231        let crate::config::EngineConfig {
232            tokenizer: tokenizer_user_config,
233            dict: dict_user_config,
234            extend_ignore_re,
235            ..
236        } = engine;
237
238        let mut tokenizer_config = crate::config::TokenizerConfig::from_defaults();
239        tokenizer_config.update(&tokenizer_user_config);
240        let mut dict_config = crate::config::DictConfig::from_defaults();
241        dict_config.update(&dict_user_config);
242
243        if !tokenizer_config.ignore_hex() {
244            log::warn!("`ignore-hex` is deprecated");
245            if !tokenizer_config.identifier_leading_digits() {
246                log::warn!("`identifier-leading-digits` is deprecated");
247            }
248        }
249
250        let tokenizer = typos::tokens::TokenizerBuilder::new()
251            .unicode(tokenizer_config.unicode())
252            .build();
253
254        let dict = crate::dict::BuiltIn::new(dict_config.locale());
255        let mut dict = crate::dict::Override::new(dict);
256        dict.ignored_identifiers(dict_config.extend_ignore_identifiers_re());
257        dict.identifiers(
258            dict_config
259                .extend_identifiers()
260                .map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
261        );
262        dict.ignored_words(dict_config.extend_ignore_words_re());
263        dict.words(
264            dict_config
265                .extend_words()
266                .map(|(k, v)| (self.storage.get(k), self.storage.get(v))),
267        );
268
269        let dict = self.dict.intern(dict);
270        let tokenizer = self.tokenizer.intern(tokenizer);
271
272        let ignore = self.ignore.intern(extend_ignore_re);
273
274        FileConfig {
275            check_filenames: check_filename,
276            check_files: check_file,
277            binary,
278            tokenizer,
279            dict,
280            ignore,
281        }
282    }
283}
284
285struct Intern<T> {
286    data: Vec<T>,
287}
288
289impl<T> Intern<T> {
290    pub(crate) fn new() -> Self {
291        Self {
292            data: Default::default(),
293        }
294    }
295
296    pub(crate) fn intern(&mut self, value: T) -> usize {
297        let symbol = self.data.len();
298        self.data.push(value);
299        symbol
300    }
301
302    pub(crate) fn get(&self, symbol: usize) -> &T {
303        &self.data[symbol]
304    }
305}
306
307impl<T> Default for Intern<T> {
308    fn default() -> Self {
309        Self::new()
310    }
311}
312
313#[derive(Clone, Debug)]
314struct DirConfig {
315    walk: usize,
316    default: FileConfig,
317    types: std::collections::HashMap<kstring::KString, FileConfig>,
318    type_matcher: crate::file_type::Types,
319}
320
321impl DirConfig {
322    fn get_file_config(&self, path: &std::path::Path) -> (Option<&str>, FileConfig) {
323        let name = self.type_matcher.file_matched(path);
324
325        let config = name
326            .and_then(|name| {
327                log::debug!("{}: `{name}` policy", path.display());
328                self.types.get(name).copied()
329            })
330            .unwrap_or_else(|| {
331                log::debug!(
332                    "{}: default policy for `{}` file type",
333                    path.display(),
334                    name.unwrap_or("<unknown>")
335                );
336                self.default
337            });
338        (name, config)
339    }
340}
341
342#[derive(Copy, Clone, Debug)]
343struct FileConfig {
344    tokenizer: usize,
345    dict: usize,
346    check_filenames: bool,
347    check_files: bool,
348    binary: bool,
349    ignore: usize,
350}
351
352#[non_exhaustive]
353#[derive(derive_setters::Setters)]
354pub struct Policy<'t, 'd, 'i> {
355    pub check_filenames: bool,
356    pub check_files: bool,
357    pub file_type: Option<&'d str>,
358    pub binary: bool,
359    pub tokenizer: &'t typos::tokens::Tokenizer,
360    pub dict: &'d dyn typos::Dictionary,
361    pub ignore: &'i [regex::Regex],
362}
363
364impl Policy<'_, '_, '_> {
365    pub fn new() -> Self {
366        Default::default()
367    }
368}
369
370static DEFAULT_TOKENIZER: typos::tokens::Tokenizer = typos::tokens::Tokenizer::new();
371static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
372static DEFAULT_IGNORE: &[regex::Regex] = &[];
373
374impl Default for Policy<'_, '_, '_> {
375    fn default() -> Self {
376        Self {
377            check_filenames: true,
378            check_files: true,
379            file_type: None,
380            binary: false,
381            tokenizer: &DEFAULT_TOKENIZER,
382            dict: &DEFAULT_DICT,
383            ignore: DEFAULT_IGNORE,
384        }
385    }
386}
387
388#[cfg(test)]
389mod test {
390    use super::*;
391
392    const NEVER_EXIST_TYPE: &str = "THISyTYPEySHOULDyNEVERyEXISTyBUTyIyHATEyYOUyIFyITyDOES";
393
394    #[test]
395    fn test_load_config_applies_overrides() {
396        let storage = ConfigStorage::new();
397        let mut engine = ConfigEngine::new(&storage);
398        engine.set_isolated(true);
399
400        let type_name = kstring::KString::from_static("toml");
401
402        let config = crate::config::Config {
403            default: crate::config::EngineConfig {
404                binary: Some(true),
405                check_filename: Some(true),
406                ..Default::default()
407            },
408            type_: crate::config::TypeEngineConfig {
409                patterns: maplit::hashmap! {
410                    type_name.clone() => crate::config::GlobEngineConfig {
411                        engine: crate::config::EngineConfig {
412                            check_filename: Some(false),
413                            check_file: Some(true),
414                            ..Default::default()
415                        },
416                        ..Default::default()
417                    },
418                },
419            },
420            overrides: crate::config::EngineConfig {
421                binary: Some(false),
422                check_file: Some(false),
423                ..Default::default()
424            },
425            ..Default::default()
426        };
427        engine.set_overrides(config);
428
429        let cwd = std::path::Path::new(".").canonicalize().unwrap();
430        let loaded = engine.load_config(&cwd).unwrap();
431        assert_eq!(loaded.default.binary, Some(false));
432        assert_eq!(loaded.default.check_filename, Some(true));
433        assert_eq!(loaded.default.check_file, Some(false));
434        assert_eq!(
435            loaded.type_.patterns[type_name.as_str()].engine.binary,
436            Some(false)
437        );
438        assert_eq!(
439            loaded.type_.patterns[type_name.as_str()]
440                .engine
441                .check_filename,
442            Some(false)
443        );
444        assert_eq!(
445            loaded.type_.patterns[type_name.as_str()].engine.check_file,
446            Some(false)
447        );
448    }
449
450    #[test]
451    fn test_init_fails_on_unknown_type() {
452        let storage = ConfigStorage::new();
453        let mut engine = ConfigEngine::new(&storage);
454        engine.set_isolated(true);
455
456        let type_name = kstring::KString::from_static(NEVER_EXIST_TYPE);
457
458        let config = crate::config::Config {
459            type_: crate::config::TypeEngineConfig {
460                patterns: maplit::hashmap! {
461                    type_name => crate::config::GlobEngineConfig {
462                        ..Default::default()
463                    },
464                },
465            },
466            ..Default::default()
467        };
468        engine.set_overrides(config);
469
470        let cwd = std::path::Path::new(".").canonicalize().unwrap();
471        let result = engine.init_dir(&cwd);
472        assert!(result.is_err());
473    }
474
475    #[test]
476    fn test_policy_default() {
477        let storage = ConfigStorage::new();
478        let mut engine = ConfigEngine::new(&storage);
479        engine.set_isolated(true);
480
481        let config = crate::config::Config::default();
482        engine.set_overrides(config);
483
484        let cwd = std::path::Path::new(".").canonicalize().unwrap();
485        engine.init_dir(&cwd).unwrap();
486        let policy = engine.policy(&cwd.join("Cargo.toml"));
487        assert!(!policy.binary);
488    }
489
490    #[test]
491    fn test_policy_fallback() {
492        let storage = ConfigStorage::new();
493        let mut engine = ConfigEngine::new(&storage);
494        engine.set_isolated(true);
495
496        let type_name = kstring::KString::from_static(NEVER_EXIST_TYPE);
497
498        let config = crate::config::Config {
499            default: crate::config::EngineConfig {
500                binary: Some(true),
501                ..Default::default()
502            },
503            type_: crate::config::TypeEngineConfig {
504                patterns: maplit::hashmap! {
505                    type_name.clone() => crate::config::GlobEngineConfig {
506                        extend_glob: vec![type_name],
507                        engine: crate::config::EngineConfig {
508                            binary: Some(false),
509                            ..Default::default()
510                        },
511                    },
512                },
513            },
514            ..Default::default()
515        };
516        engine.set_overrides(config);
517
518        let cwd = std::path::Path::new(".").canonicalize().unwrap();
519        engine.init_dir(&cwd).unwrap();
520        let policy = engine.policy(&cwd.join("Cargo.toml"));
521        assert!(policy.binary);
522    }
523
524    #[test]
525    fn test_policy_type_specific() {
526        let storage = ConfigStorage::new();
527        let mut engine = ConfigEngine::new(&storage);
528        engine.set_isolated(true);
529
530        let type_name = kstring::KString::from_static(NEVER_EXIST_TYPE);
531
532        let config = crate::config::Config {
533            default: crate::config::EngineConfig {
534                binary: Some(true),
535                ..Default::default()
536            },
537            type_: crate::config::TypeEngineConfig {
538                patterns: maplit::hashmap! {
539                type_name.clone() => crate::config::GlobEngineConfig {
540                    extend_glob: vec![type_name],
541                    engine: crate::config::EngineConfig {
542                        binary: Some(false),
543                        ..Default::default()
544                    },
545                }},
546            },
547            ..Default::default()
548        };
549        engine.set_overrides(config);
550
551        let cwd = std::path::Path::new(".").canonicalize().unwrap();
552        engine.init_dir(&cwd).unwrap();
553        let policy = engine.policy(&cwd.join("Cargo.toml"));
554        assert!(policy.binary);
555        let policy = engine.policy(&cwd.join(NEVER_EXIST_TYPE));
556        assert!(!policy.binary);
557    }
558}