typos_cli/
file_type.rs

1use std::collections::BTreeMap;
2use std::path::Path;
3
4use kstring::KString;
5
6#[derive(Default, Clone, Debug)]
7pub(crate) struct TypesBuilder {
8    definitions: BTreeMap<KString, Vec<(KString, usize)>>,
9}
10
11impl TypesBuilder {
12    pub(crate) fn new() -> Self {
13        Default::default()
14    }
15
16    pub(crate) fn add_defaults(&mut self) {
17        self.definitions.extend(
18            crate::default_types::DEFAULT_TYPES
19                .iter()
20                .map(|(name, glob)| {
21                    let name = KString::from(*name);
22                    let globs = glob.iter().map(|s| (KString::from(*s), 0)).collect();
23                    (name, globs)
24                }),
25        );
26    }
27
28    pub(crate) fn contains_name(&self, name: &str) -> bool {
29        self.definitions.contains_key(name)
30    }
31
32    pub(crate) fn add(&mut self, name: impl Into<KString>, glob: impl Into<KString>) {
33        let name = name.into();
34        let glob = glob.into();
35        let weight = self.definitions.len();
36        self.definitions
37            .entry(name)
38            .or_default()
39            .push((glob, weight));
40    }
41
42    pub(crate) fn build(self) -> Result<Types, anyhow::Error> {
43        let mut definitions = self
44            .definitions
45            .iter()
46            .flat_map(|(name, globs)| {
47                globs.iter().map(move |(glob, weight)| {
48                    let sort = sort_key(glob);
49                    (sort, weight, name, glob)
50                })
51            })
52            .collect::<Vec<_>>();
53        definitions.sort();
54
55        let rev_definitions = definitions
56            .iter()
57            .map(|(_, _, name, glob)| (*glob, *name))
58            .collect::<BTreeMap<_, _>>();
59        let mut unique_definitions = BTreeMap::<KString, Vec<KString>>::new();
60        for (glob, name) in rev_definitions {
61            unique_definitions
62                .entry(name.clone())
63                .or_default()
64                .push(glob.clone());
65        }
66
67        let mut glob_to_name = Vec::new();
68        let mut build_set = globset::GlobSetBuilder::new();
69        for (_, _, name, glob) in definitions {
70            glob_to_name.push(name.clone());
71            build_set.add(
72                globset::GlobBuilder::new(glob)
73                    .literal_separator(true)
74                    .build()?,
75            );
76        }
77        let set = build_set.build()?;
78
79        Ok(Types {
80            definitions: unique_definitions,
81            glob_to_name,
82            set,
83            matches: std::sync::Arc::new(thread_local::ThreadLocal::default()),
84        })
85    }
86}
87
88fn sort_key(glob: &str) -> Vec<GlobPart<'_>> {
89    let mut key = glob
90        .split('.')
91        .map(|s| {
92            if s == "*" {
93                GlobPart::Wild(s)
94            } else if s.contains('*') {
95                GlobPart::PartialWild(s)
96            } else {
97                GlobPart::Literalish(s)
98            }
99        })
100        .collect::<Vec<_>>();
101    key.reverse();
102    key
103}
104
105#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
106enum GlobPart<'s> {
107    Wild(&'s str),
108    PartialWild(&'s str),
109    Literalish(&'s str),
110}
111
112#[derive(Default, Clone, Debug)]
113pub(crate) struct Types {
114    definitions: BTreeMap<KString, Vec<KString>>,
115    glob_to_name: Vec<KString>,
116    set: globset::GlobSet,
117    /// Temporary storage for globs that match.
118    matches: std::sync::Arc<thread_local::ThreadLocal<std::cell::RefCell<Vec<usize>>>>,
119}
120
121impl Types {
122    pub(crate) fn definitions(&self) -> &BTreeMap<KString, Vec<KString>> {
123        &self.definitions
124    }
125
126    pub(crate) fn file_matched(&self, path: &Path) -> Option<&str> {
127        let mut mpath = Path::new(path);
128        let mut matches = self.matches.get_or_default().borrow_mut();
129        loop {
130            self.set.matches_into(mpath.file_name()?, &mut matches);
131            if !matches.is_empty() {
132                break;
133            }
134            match mpath.extension() {
135                None => break,
136                Some(ext) => {
137                    if ext == "in" {
138                        mpath = Path::new(mpath.file_stem()?);
139                        continue;
140                    }
141                }
142            }
143            break;
144        }
145        matches
146            .last()
147            .copied()
148            .map(|i| self.glob_to_name[i].as_str())
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    macro_rules! matched {
157        ($name:ident, $types:expr, $path:expr, $matched:expr) => {
158            #[test]
159            fn $name() {
160                let mut btypes = TypesBuilder::new();
161                for (name, globs) in $types {
162                    for glob in *globs {
163                        btypes.add(*name, *glob);
164                    }
165                }
166                let types = btypes.build().unwrap();
167                let actual = types.file_matched(std::path::Path::new($path));
168                let expected: Option<&str> = $matched.into();
169                assert_eq!(expected, actual, "{}", $path);
170            }
171        };
172    }
173
174    fn types() -> &'static [(&'static str, &'static [&'static str])] {
175        &[
176            ("html", &["*.html", "*.htm"]),
177            ("js", &["*.js"]),
178            ("json", &["*.json"]),
179            ("lock", &["package-lock.json", "*.lock"]),
180            ("js-in", &["*.js.in"]),
181        ]
182    }
183    fn in_types() -> &'static [(&'static str, &'static [&'static str])] {
184        &[("html", &["*.html", "*.htm"]), ("in-canary", &["*.in"])]
185    }
186
187    matched!(basic_match, types(), "leftpad.js", "js");
188    matched!(multi_def_1, types(), "index.html", "html");
189    matched!(multi_def_2, types(), "index.htm", "html");
190    matched!(no_match, types(), "leftpad.ada", None);
191    matched!(more_specific, types(), "package-lock.json", "lock");
192    matched!(basic_in, types(), "index.html.in", "html");
193    matched!(basic_in_in, types(), "index.html.in.in", "html");
194    matched!(ext_plus_in, types(), "foo.js.in", "js-in");
195    matched!(toplevel_in, in_types(), "index.html.in", "in-canary");
196
197    macro_rules! sort {
198        ($name:ident, $actual:expr, $expected:expr) => {
199            #[test]
200            fn $name() {
201                let expected = $expected.into_iter().collect::<Vec<&str>>();
202
203                let mut actual = $actual.into_iter().collect::<Vec<&str>>();
204                actual.sort_by_key(|s| sort_key(s));
205
206                assert_eq!(expected, actual);
207            }
208        };
209    }
210
211    sort!(literal_sort, ["b", "c", "a"], ["a", "b", "c"]);
212    sort!(
213        basic_glob_sort,
214        ["a_specific", "z_partial*"],
215        ["z_partial*", "a_specific"]
216    );
217    sort!(
218        nested_glob_sort,
219        ["a.specific", "z*.partial", "z.partial*"],
220        ["z.partial*", "z*.partial", "a.specific"]
221    );
222    sort!(most_specific, ["*.txt.in", "*.in"], ["*.in", "*.txt.in"]);
223}