Skip to main content

mago_database/
matcher.rs

1//! Path exclusion matching with mixed glob + prefix semantics.
2//!
3//! This module provides two reusable helpers used by the rest of the workspace:
4//!
5//! - [`build_glob_set`] compiles a list of glob-pattern strings into a
6//!   [`globset::GlobSet`] using the project's [`GlobSettings`]. The file
7//!   loader ([`crate::loader`]) uses this for `source.excludes` entries that
8//!   are pure glob patterns.
9//! - [`ExclusionMatcher`] layers on top of `build_glob_set`, splitting an
10//!   input list into glob patterns and plain prefixes.
11//!
12//! # Pattern semantics used by [`ExclusionMatcher`]
13//!
14//! Each pattern in the input list is classified as either a **glob pattern**
15//! or a **plain prefix** based on whether it contains any glob metacharacters
16//! (`*`, `?`, `[`, `{`):
17//!
18//! - Glob patterns (e.g. `src/**/*.php`, `tests/fixtures/*`) are compiled via
19//!   the [`globset`] crate using the supplied [`GlobSettings`] and matched
20//!   against the full file path.
21//! - Plain prefixes (e.g. `src`, `tests/fixtures/`) use the pre-existing
22//!   directory-prefix semantics: a pattern `X` matches `X`, `X/anything`, or
23//!   (if the pattern already ends with `/`) any path starting with `X/`.
24//!
25//! This split keeps existing `exclude = ["src/foo"]` configurations working
26//! exactly as before while adding full glob support for anyone who needs it.
27
28use globset::GlobBuilder;
29use globset::GlobSet;
30use globset::GlobSetBuilder;
31
32use crate::GlobSettings;
33use crate::error::DatabaseError;
34
35/// Returns `true` if the string contains any character that would make it a
36/// glob pattern rather than a literal path.
37#[inline]
38#[must_use]
39pub fn contains_glob_metacharacters<T>(pattern: T) -> bool
40where
41    T: AsRef<str>,
42{
43    pattern.as_ref().chars().any(|c| matches!(c, '*' | '?' | '[' | '{'))
44}
45
46/// Compiles a list of glob patterns into a [`GlobSet`] using the given
47/// [`GlobSettings`].
48///
49/// This is the single place in the workspace that applies `GlobSettings` to
50/// `GlobBuilder`; callers should prefer this over rebuilding the same
51/// configuration ad-hoc.
52///
53/// Returns an error if any individual pattern fails to compile.
54pub(crate) fn build_glob_set<I, S>(patterns: I, glob_settings: GlobSettings) -> Result<GlobSet, DatabaseError>
55where
56    I: IntoIterator<Item = S>,
57    S: AsRef<str>,
58{
59    let mut builder = GlobSetBuilder::new();
60    for pattern in patterns {
61        let glob = GlobBuilder::new(pattern.as_ref())
62            .case_insensitive(glob_settings.case_insensitive)
63            .literal_separator(glob_settings.literal_separator)
64            .backslash_escape(glob_settings.backslash_escape)
65            .empty_alternates(glob_settings.empty_alternates)
66            .build()?;
67
68        builder.add(glob);
69    }
70
71    Ok(builder.build()?)
72}
73
74/// Compiled matcher for a list of exclusion patterns.
75///
76/// Split patterns are stored once at construction time so that per-file
77/// matching is cheap. See the module-level documentation for the exact
78/// semantics.
79#[derive(Debug, Clone)]
80pub struct ExclusionMatcher<S: Clone + AsRef<str>> {
81    globs: GlobSet,
82    prefixes: Vec<S>,
83}
84
85impl<S: Clone + AsRef<str>> ExclusionMatcher<S> {
86    /// Builds a matcher from a list of patterns and the project's glob
87    /// settings.
88    ///
89    /// # Errors
90    ///
91    /// Returns a [`DatabaseError::InvalidGlobSet`] if any glob pattern fails to compile.
92    #[inline]
93    pub fn compile<I>(patterns: I, glob_settings: GlobSettings) -> Result<Self, DatabaseError>
94    where
95        I: IntoIterator<Item = S>,
96    {
97        let mut globs = Vec::new();
98        let mut prefixes = Vec::new();
99
100        for pattern in patterns {
101            if contains_glob_metacharacters(&pattern) {
102                globs.push(pattern);
103            } else {
104                prefixes.push(pattern);
105            }
106        }
107
108        Ok(Self { globs: build_glob_set(&globs, glob_settings)?, prefixes })
109    }
110
111    /// Returns `true` if there are no patterns at all.
112    #[inline]
113    #[must_use]
114    pub fn is_empty(&self) -> bool {
115        self.globs.is_empty() && self.prefixes.is_empty()
116    }
117
118    /// Returns `true` if `file` matches any of the configured patterns.
119    ///
120    /// `file` is expected to be a forward-slash-separated path string
121    /// relative to whatever root the caller considers meaningful (typically
122    /// the workspace root for source excludes, or the logical
123    /// `File::name` for per-rule excludes).
124    #[inline]
125    #[must_use]
126    pub fn is_match(&self, file: &str) -> bool {
127        if !self.globs.is_empty() && self.globs.is_match(file) {
128            return true;
129        }
130
131        self.prefixes.iter().any(|pattern| prefix_matches(file, pattern.as_ref()))
132    }
133}
134
135/// Checks whether `file` is covered by the plain-prefix pattern `pattern`.
136///
137/// - A pattern ending in `/` must be a path prefix of `file`.
138/// - Otherwise the pattern must equal `file` or be a proper directory prefix
139///   of it (i.e. `file` starts with `pattern` followed by `/`).
140fn prefix_matches(file: &str, pattern: &str) -> bool {
141    if pattern.ends_with('/') {
142        return file.starts_with(pattern);
143    }
144
145    if file == pattern {
146        return true;
147    }
148
149    let rest = file.strip_prefix(pattern);
150    matches!(rest, Some(rest) if rest.starts_with('/'))
151}
152
153#[cfg(test)]
154#[allow(clippy::expect_used)]
155mod tests {
156    use super::*;
157
158    fn matcher<'pat>(patterns: &[&'pat str]) -> ExclusionMatcher<&'pat str> {
159        ExclusionMatcher::compile(patterns.iter().copied(), GlobSettings::default()).expect("compile")
160    }
161
162    #[test]
163    fn empty_matcher_matches_nothing() {
164        let m = matcher(&[]);
165        assert!(m.is_empty());
166        assert!(!m.is_match("any/path.php"));
167    }
168
169    #[test]
170    fn plain_directory_prefix_matches_descendants() {
171        let m = matcher(&["src/vendor"]);
172        assert!(m.is_match("src/vendor"));
173        assert!(m.is_match("src/vendor/foo.php"));
174        assert!(m.is_match("src/vendor/deep/nested.php"));
175        assert!(!m.is_match("src/vendored.php"));
176        assert!(!m.is_match("src/other.php"));
177    }
178
179    #[test]
180    fn plain_trailing_slash_prefix_is_respected() {
181        let m = matcher(&["src/tests/"]);
182        assert!(m.is_match("src/tests/foo.php"));
183        assert!(!m.is_match("src/tests"));
184    }
185
186    #[test]
187    fn plain_file_matches_exactly() {
188        let m = matcher(&["src/skip.php"]);
189        assert!(m.is_match("src/skip.php"));
190        assert!(!m.is_match("src/skipped.php"));
191    }
192
193    #[test]
194    fn glob_double_star_matches_nested() {
195        let m = matcher(&["src/**/*.php"]);
196        assert!(m.is_match("src/a.php"));
197        assert!(m.is_match("src/dir/a.php"));
198        assert!(m.is_match("src/a/b/c.php"));
199        assert!(!m.is_match("tests/a.php"));
200    }
201
202    #[test]
203    fn glob_star_matches_flat_and_nested() {
204        let m = matcher(&["tests/fixtures/*"]);
205        assert!(m.is_match("tests/fixtures/a.php"));
206        assert!(m.is_match("tests/fixtures/dir/a.php"));
207    }
208
209    #[test]
210    fn mixed_patterns_combine_correctly() {
211        let m = matcher(&["src/legacy", "tests/**/*Test.php"]);
212        assert!(m.is_match("src/legacy/foo.php"));
213        assert!(m.is_match("tests/Unit/FooTest.php"));
214        assert!(!m.is_match("src/modern/foo.php"));
215        assert!(!m.is_match("tests/Unit/Helper.php"));
216    }
217
218    #[test]
219    fn contains_glob_metacharacters_detects_patterns() {
220        assert!(contains_glob_metacharacters("src/**/*.php"));
221        assert!(contains_glob_metacharacters("a?.php"));
222        assert!(contains_glob_metacharacters("[abc]"));
223        assert!(contains_glob_metacharacters("{a,b}"));
224        assert!(!contains_glob_metacharacters("src"));
225        assert!(!contains_glob_metacharacters("src/foo.php"));
226        assert!(!contains_glob_metacharacters("vendor/"));
227    }
228}