bat/
syntax_mapping.rs

1use std::{
2    path::Path,
3    sync::{
4        atomic::{AtomicBool, Ordering},
5        Arc,
6    },
7    thread,
8};
9
10use globset::{Candidate, GlobBuilder, GlobMatcher};
11use once_cell::sync::Lazy;
12
13use crate::error::Result;
14use builtin::BUILTIN_MAPPINGS;
15use ignored_suffixes::IgnoredSuffixes;
16
17mod builtin;
18pub mod ignored_suffixes;
19
20fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
21    let matcher = GlobBuilder::new(from)
22        .case_insensitive(true)
23        .literal_separator(true)
24        .build()?
25        .compile_matcher();
26    Ok(matcher)
27}
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30#[non_exhaustive]
31pub enum MappingTarget<'a> {
32    /// For mapping a path to a specific syntax.
33    MapTo(&'a str),
34
35    /// For mapping a path (typically an extension-less file name) to an unknown
36    /// syntax. This typically means later using the contents of the first line
37    /// of the file to determine what syntax to use.
38    MapToUnknown,
39
40    /// For mapping a file extension (e.g. `*.conf`) to an unknown syntax. This
41    /// typically means later using the contents of the first line of the file
42    /// to determine what syntax to use. However, if a syntax handles a file
43    /// name that happens to have the given file extension (e.g. `resolv.conf`),
44    /// then that association will have higher precedence, and the mapping will
45    /// be ignored.
46    MapExtensionToUnknown,
47}
48
49#[derive(Debug, Clone, Default)]
50pub struct SyntaxMapping<'a> {
51    /// User-defined mappings at run time.
52    ///
53    /// Rules in front have precedence.
54    custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
55
56    pub(crate) ignored_suffixes: IgnoredSuffixes<'a>,
57
58    /// A flag to halt glob matcher building, which is offloaded to another thread.
59    ///
60    /// We have this so that we can signal the thread to halt early when appropriate.
61    halt_glob_build: Arc<AtomicBool>,
62}
63
64impl<'a> Drop for SyntaxMapping<'a> {
65    fn drop(&mut self) {
66        // signal the offload thread to halt early
67        self.halt_glob_build.store(true, Ordering::Relaxed);
68    }
69}
70
71impl<'a> SyntaxMapping<'a> {
72    pub fn new() -> SyntaxMapping<'a> {
73        Default::default()
74    }
75
76    /// Start a thread to build the glob matchers for all builtin mappings.
77    ///
78    /// The use of this function while not necessary, is useful to speed up startup
79    /// times by starting this work early in parallel.
80    ///
81    /// The thread halts if/when `halt_glob_build` is set to true.
82    pub fn start_offload_build_all(&self) {
83        let halt = Arc::clone(&self.halt_glob_build);
84        thread::spawn(move || {
85            for (matcher, _) in BUILTIN_MAPPINGS.iter() {
86                if halt.load(Ordering::Relaxed) {
87                    break;
88                }
89                Lazy::force(matcher);
90            }
91        });
92        // Note that this thread is not joined upon completion because there's
93        // no shared resources that need synchronization to be safely dropped.
94        // If we later add code into this thread that requires interesting
95        // resources (e.g. IO), it would be a good idea to store the handle
96        // and join it on drop.
97    }
98
99    pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
100        let matcher = make_glob_matcher(from)?;
101        self.custom_mappings.push((matcher, to));
102        Ok(())
103    }
104
105    /// Returns an iterator over all mappings. User-defined mappings are listed
106    /// before builtin mappings; mappings in front have higher precedence.
107    ///
108    /// Builtin mappings' `GlobMatcher`s are lazily compiled.
109    ///
110    /// Note that this function only returns mappings that are valid under the
111    /// current environment. For details see [`Self::builtin_mappings`].
112    pub fn all_mappings(&self) -> impl Iterator<Item = (&GlobMatcher, &MappingTarget<'a>)> {
113        self.custom_mappings()
114            .iter()
115            .map(|(matcher, target)| (matcher, target)) // as_ref
116            .chain(
117                // we need a map with a closure to "do" the lifetime variance
118                // see: https://discord.com/channels/273534239310479360/1120124565591425034/1170543402870382653
119                // also, clippy false positive:
120                // see: https://github.com/rust-lang/rust-clippy/issues/9280
121                #[allow(clippy::map_identity)]
122                self.builtin_mappings().map(|rule| rule),
123            )
124    }
125
126    /// Returns an iterator over all valid builtin mappings. Mappings in front
127    /// have higher precedence.
128    ///
129    /// The `GlabMatcher`s are lazily compiled.
130    ///
131    /// Mappings that are invalid under the current environment (i.e. rule
132    /// requires environment variable(s) that is unset, or the joined string
133    /// after variable(s) replacement is not a valid glob expression) are
134    /// ignored.
135    pub fn builtin_mappings(
136        &self,
137    ) -> impl Iterator<Item = (&'static GlobMatcher, &'static MappingTarget<'static>)> {
138        BUILTIN_MAPPINGS
139            .iter()
140            .filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target)))
141    }
142
143    /// Returns all user-defined mappings.
144    pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
145        &self.custom_mappings
146    }
147
148    pub fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
149        // Try matching on the file name as-is.
150        let candidate = Candidate::new(&path);
151        let candidate_filename = path.as_ref().file_name().map(Candidate::new);
152        for (glob, syntax) in self.all_mappings() {
153            if glob.is_match_candidate(&candidate)
154                || candidate_filename
155                    .as_ref()
156                    .map_or(false, |filename| glob.is_match_candidate(filename))
157            {
158                return Some(*syntax);
159            }
160        }
161        // Try matching on the file name after removing an ignored suffix.
162        let file_name = path.as_ref().file_name()?;
163        self.ignored_suffixes
164            .try_with_stripped_suffix(file_name, |stripped_file_name| {
165                Ok(self.get_syntax_for(stripped_file_name))
166            })
167            .ok()?
168    }
169
170    pub fn insert_ignored_suffix(&mut self, suffix: &'a str) {
171        self.ignored_suffixes.add_suffix(suffix);
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn builtin_mappings_work() {
181        let map = SyntaxMapping::new();
182
183        assert_eq!(
184            map.get_syntax_for("/path/to/build"),
185            Some(MappingTarget::MapToUnknown)
186        );
187    }
188
189    #[test]
190    fn all_fixed_builtin_mappings_can_compile() {
191        let map = SyntaxMapping::new();
192
193        // collect call evaluates all lazy closures
194        // fixed builtin mappings will panic if they fail to compile
195        let _mappings = map.builtin_mappings().collect::<Vec<_>>();
196    }
197
198    #[test]
199    fn builtin_mappings_matcher_only_compile_once() {
200        let map = SyntaxMapping::new();
201
202        let two_iterations: Vec<_> = (0..2)
203            .map(|_| {
204                // addresses of every matcher
205                map.builtin_mappings()
206                    .map(|(matcher, _)| matcher as *const _ as usize)
207                    .collect::<Vec<_>>()
208            })
209            .collect();
210
211        // if the matchers are only compiled once, their address should remain the same
212        assert_eq!(two_iterations[0], two_iterations[1]);
213    }
214
215    #[test]
216    fn custom_mappings_work() {
217        let mut map = SyntaxMapping::new();
218        map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
219            .ok();
220        map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
221            .ok();
222
223        assert_eq!(
224            map.get_syntax_for("/path/to/Cargo.lock"),
225            Some(MappingTarget::MapTo("TOML"))
226        );
227        assert_eq!(map.get_syntax_for("/path/to/other.lock"), None);
228
229        assert_eq!(
230            map.get_syntax_for("/path/to/.ignore"),
231            Some(MappingTarget::MapTo("Git Ignore"))
232        );
233    }
234
235    #[test]
236    fn custom_mappings_override_builtin() {
237        let mut map = SyntaxMapping::new();
238
239        assert_eq!(
240            map.get_syntax_for("/path/to/httpd.conf"),
241            Some(MappingTarget::MapTo("Apache Conf"))
242        );
243        map.insert("httpd.conf", MappingTarget::MapTo("My Syntax"))
244            .ok();
245        assert_eq!(
246            map.get_syntax_for("/path/to/httpd.conf"),
247            Some(MappingTarget::MapTo("My Syntax"))
248        );
249    }
250
251    #[test]
252    fn custom_mappings_precedence() {
253        let mut map = SyntaxMapping::new();
254
255        map.insert("/path/to/foo", MappingTarget::MapTo("alpha"))
256            .ok();
257        map.insert("/path/to/foo", MappingTarget::MapTo("bravo"))
258            .ok();
259        assert_eq!(
260            map.get_syntax_for("/path/to/foo"),
261            Some(MappingTarget::MapTo("alpha"))
262        );
263    }
264}