Skip to main content

mago_database/
matcher.rs

1//! Path exclusion matching with mixed glob + prefix semantics.
2//!
3//! This module provides two reusable helpers used by the rest of the workspace:
4//!
5//! - [`build_glob_set`] compiles a list of glob-pattern strings into a
6//!   [`globset::GlobSet`] using the project's [`GlobSettings`]. The file
7//!   loader ([`crate::loader`]) uses this for `source.excludes` entries that
8//!   are pure glob patterns.
9//! - [`ExclusionMatcher`] layers on top of `build_glob_set`, splitting an
10//!   input list into glob patterns and plain prefixes.
11//!
12//! # Pattern semantics used by [`ExclusionMatcher`]
13//!
14//! Each pattern in the input list is classified as either a **glob pattern**
15//! or a **plain prefix** based on whether it contains any glob metacharacters
16//! (`*`, `?`, `[`, `{`):
17//!
18//! - Glob patterns (e.g. `src/**/*.php`, `tests/fixtures/*`) are compiled via
19//!   the [`globset`] crate using the supplied [`GlobSettings`] and matched
20//!   against the full file path.
21//! - Plain prefixes (e.g. `src`, `tests/fixtures/`) use the pre-existing
22//!   directory-prefix semantics: a pattern `X` matches `X`, `X/anything`, or
23//!   (if the pattern already ends with `/`) any path starting with `X/`.
24//!
25//! This split keeps existing `exclude = ["src/foo"]` configurations working
26//! exactly as before while adding full glob support for anyone who needs it.
27
28use globset::GlobBuilder;
29use globset::GlobSet;
30use globset::GlobSetBuilder;
31
32use crate::GlobSettings;
33use crate::error::DatabaseError;
34
35/// Returns `true` if the string contains any character that would make it a
36/// glob pattern rather than a literal path.
37#[inline]
38#[must_use]
39pub fn contains_glob_metacharacters<T: AsRef<str>>(pattern: T) -> bool {
40    pattern.as_ref().chars().any(|c| matches!(c, '*' | '?' | '[' | '{'))
41}
42
43/// Compiles a list of glob patterns into a [`GlobSet`] using the given
44/// [`GlobSettings`].
45///
46/// This is the single place in the workspace that applies `GlobSettings` to
47/// `GlobBuilder`; callers should prefer this over rebuilding the same
48/// configuration ad-hoc.
49///
50/// Returns an error if any individual pattern fails to compile.
51pub(crate) fn build_glob_set<I, S>(patterns: I, glob_settings: GlobSettings) -> Result<GlobSet, DatabaseError>
52where
53    I: IntoIterator<Item = S>,
54    S: AsRef<str>,
55{
56    let mut builder = GlobSetBuilder::new();
57    for pattern in patterns {
58        let glob = GlobBuilder::new(pattern.as_ref())
59            .case_insensitive(glob_settings.case_insensitive)
60            .literal_separator(glob_settings.literal_separator)
61            .backslash_escape(glob_settings.backslash_escape)
62            .empty_alternates(glob_settings.empty_alternates)
63            .build()?;
64
65        builder.add(glob);
66    }
67
68    Ok(builder.build()?)
69}
70
71/// Compiled matcher for a list of exclusion patterns.
72///
73/// Split patterns are stored once at construction time so that per-file
74/// matching is cheap. See the module-level documentation for the exact
75/// semantics.
76#[derive(Debug, Clone)]
77pub struct ExclusionMatcher<S: Clone + AsRef<str>> {
78    globs: GlobSet,
79    prefixes: Vec<S>,
80}
81
82impl<S: Clone + AsRef<str>> ExclusionMatcher<S> {
83    /// Builds a matcher from a list of patterns and the project's glob
84    /// settings.
85    ///
86    /// Returns an error if any glob pattern fails to compile.
87    pub fn compile<I>(patterns: I, glob_settings: GlobSettings) -> Result<Self, DatabaseError>
88    where
89        I: IntoIterator<Item = S>,
90    {
91        let mut globs = Vec::new();
92        let mut prefixes = Vec::new();
93
94        for pattern in patterns {
95            if contains_glob_metacharacters(&pattern) {
96                globs.push(pattern);
97            } else {
98                prefixes.push(pattern);
99            }
100        }
101
102        Ok(Self { globs: build_glob_set(&globs, glob_settings)?, prefixes })
103    }
104
105    /// Returns `true` if there are no patterns at all.
106    #[must_use]
107    pub fn is_empty(&self) -> bool {
108        self.globs.is_empty() && self.prefixes.is_empty()
109    }
110
111    /// Returns `true` if `file` matches any of the configured patterns.
112    ///
113    /// `file` is expected to be a forward-slash-separated path string
114    /// relative to whatever root the caller considers meaningful (typically
115    /// the workspace root for source excludes, or the logical
116    /// `File::name` for per-rule excludes).
117    #[must_use]
118    pub fn is_match(&self, file: &str) -> bool {
119        if !self.globs.is_empty() && self.globs.is_match(file) {
120            return true;
121        }
122
123        self.prefixes.iter().any(|pattern| prefix_matches(file, pattern.as_ref()))
124    }
125}
126
127/// Checks whether `file` is covered by the plain-prefix pattern `pattern`.
128///
129/// - A pattern ending in `/` must be a path prefix of `file`.
130/// - Otherwise the pattern must equal `file` or be a proper directory prefix
131///   of it (i.e. `file` starts with `pattern` followed by `/`).
132fn prefix_matches(file: &str, pattern: &str) -> bool {
133    if pattern.ends_with('/') {
134        return file.starts_with(pattern);
135    }
136
137    if file == pattern {
138        return true;
139    }
140
141    let rest = file.strip_prefix(pattern);
142    matches!(rest, Some(rest) if rest.starts_with('/'))
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    fn matcher<'a>(patterns: &[&'a str]) -> ExclusionMatcher<&'a str> {
150        ExclusionMatcher::compile(patterns.iter().copied(), GlobSettings::default()).expect("compile")
151    }
152
153    #[test]
154    fn empty_matcher_matches_nothing() {
155        let m = matcher(&[]);
156        assert!(m.is_empty());
157        assert!(!m.is_match("any/path.php"));
158    }
159
160    #[test]
161    fn plain_directory_prefix_matches_descendants() {
162        let m = matcher(&["src/vendor"]);
163        assert!(m.is_match("src/vendor"));
164        assert!(m.is_match("src/vendor/foo.php"));
165        assert!(m.is_match("src/vendor/deep/nested.php"));
166        assert!(!m.is_match("src/vendored.php"));
167        assert!(!m.is_match("src/other.php"));
168    }
169
170    #[test]
171    fn plain_trailing_slash_prefix_is_respected() {
172        let m = matcher(&["src/tests/"]);
173        assert!(m.is_match("src/tests/foo.php"));
174        assert!(!m.is_match("src/tests"));
175    }
176
177    #[test]
178    fn plain_file_matches_exactly() {
179        let m = matcher(&["src/skip.php"]);
180        assert!(m.is_match("src/skip.php"));
181        assert!(!m.is_match("src/skipped.php"));
182    }
183
184    #[test]
185    fn glob_double_star_matches_nested() {
186        let m = matcher(&["src/**/*.php"]);
187        assert!(m.is_match("src/a.php"));
188        assert!(m.is_match("src/dir/a.php"));
189        assert!(m.is_match("src/a/b/c.php"));
190        assert!(!m.is_match("tests/a.php"));
191    }
192
193    #[test]
194    fn glob_star_matches_flat_and_nested() {
195        let m = matcher(&["tests/fixtures/*"]);
196        assert!(m.is_match("tests/fixtures/a.php"));
197        assert!(m.is_match("tests/fixtures/dir/a.php"));
198    }
199
200    #[test]
201    fn mixed_patterns_combine_correctly() {
202        let m = matcher(&["src/legacy", "tests/**/*Test.php"]);
203        assert!(m.is_match("src/legacy/foo.php"));
204        assert!(m.is_match("tests/Unit/FooTest.php"));
205        assert!(!m.is_match("src/modern/foo.php"));
206        assert!(!m.is_match("tests/Unit/Helper.php"));
207    }
208
209    #[test]
210    fn contains_glob_metacharacters_detects_patterns() {
211        assert!(contains_glob_metacharacters("src/**/*.php"));
212        assert!(contains_glob_metacharacters("a?.php"));
213        assert!(contains_glob_metacharacters("[abc]"));
214        assert!(contains_glob_metacharacters("{a,b}"));
215        assert!(!contains_glob_metacharacters("src"));
216        assert!(!contains_glob_metacharacters("src/foo.php"));
217        assert!(!contains_glob_metacharacters("vendor/"));
218    }
219}