Skip to main content

glob_set/
set.rs

1use alloc::vec::Vec;
2
3use crate::engine::{self, MatchEngine};
4use crate::glob::{Candidate, Glob};
5
6/// A set of glob patterns that can be matched against paths efficiently.
7///
8/// `GlobSet` classifies each pattern at build time into the fastest applicable
9/// strategy (extension hash, literal, prefix, suffix) and only falls back to
10/// the full `glob_match` engine for patterns that need it.
11///
12/// # Example
13///
14/// ```
15/// use glob_set::{Glob, GlobSet, GlobSetBuilder};
16///
17/// let mut builder = GlobSetBuilder::new();
18/// builder.add(Glob::new("*.rs").unwrap());
19/// builder.add(Glob::new("*.toml").unwrap());
20/// let set = builder.build().unwrap();
21///
22/// assert!(set.is_match("foo.rs"));
23/// assert!(set.is_match("Cargo.toml"));
24/// assert!(!set.is_match("foo.js"));
25/// ```
26#[derive(Clone, Debug)]
27pub struct GlobSet {
28    engine: MatchEngine,
29}
30
31impl Default for GlobSet {
32    fn default() -> Self {
33        Self {
34            engine: MatchEngine::empty(),
35        }
36    }
37}
38
39impl GlobSet {
40    /// Return the number of patterns in this set.
41    pub fn len(&self) -> usize {
42        self.engine.len()
43    }
44
45    /// Return whether this set is empty.
46    pub fn is_empty(&self) -> bool {
47        self.engine.is_empty()
48    }
49
50    /// Test whether any pattern matches the given path.
51    pub fn is_match(&self, path: impl AsRef<str>) -> bool {
52        self.engine.is_match(path.as_ref())
53    }
54
55    /// Test whether any pattern matches the given candidate.
56    pub fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
57        self.engine.is_match(candidate.path())
58    }
59
60    /// Return the indices of all patterns that match the given path.
61    pub fn matches(&self, path: impl AsRef<str>) -> Vec<usize> {
62        let mut result = Vec::new();
63        self.engine.matches_into(path.as_ref(), &mut result);
64        result
65    }
66
67    /// Append the indices of all matching patterns to `into`.
68    pub fn matches_into(&self, path: impl AsRef<str>, into: &mut Vec<usize>) {
69        self.engine.matches_into(path.as_ref(), into);
70    }
71
72    /// Return the indices of all patterns that match the given candidate.
73    pub fn matches_candidate(&self, candidate: &Candidate<'_>) -> Vec<usize> {
74        self.matches(candidate.path())
75    }
76
77    /// Append the indices of all matching patterns for the given candidate to `into`.
78    pub fn matches_candidate_into(&self, candidate: &Candidate<'_>, into: &mut Vec<usize>) {
79        self.matches_into(candidate.path(), into);
80    }
81}
82
83/// A builder for constructing a [`GlobSet`].
84#[derive(Clone, Debug, Default)]
85pub struct GlobSetBuilder {
86    patterns: Vec<Glob>,
87}
88
89impl GlobSetBuilder {
90    /// Create a new empty builder.
91    pub fn new() -> Self {
92        Self::default()
93    }
94
95    /// Add a glob pattern to the set.
96    pub fn add(&mut self, glob: Glob) -> &mut Self {
97        self.patterns.push(glob);
98        self
99    }
100
101    /// Build the [`GlobSet`].
102    ///
103    /// This classifies each pattern into the fastest applicable strategy
104    /// (extension, literal, prefix, suffix) and only uses the full glob
105    /// engine with Aho-Corasick pre-filtering for patterns that need it.
106    ///
107    /// # Errors
108    ///
109    /// Returns an error if the Aho-Corasick automaton cannot be constructed.
110    pub fn build(&self) -> Result<GlobSet, crate::error::Error> {
111        let engine = engine::build_engine(self.patterns.clone())?;
112        Ok(GlobSet { engine })
113    }
114}
115
116#[cfg(test)]
117#[allow(clippy::unwrap_used)]
118mod tests {
119    use alloc::vec;
120
121    use super::*;
122
123    fn build_set(patterns: &[&str]) -> GlobSet {
124        let mut builder = GlobSetBuilder::new();
125        for p in patterns {
126            builder.add(Glob::new(p).unwrap());
127        }
128        builder.build().unwrap()
129    }
130
131    #[test]
132    fn empty_set() {
133        let set = build_set(&[]);
134        assert!(!set.is_match("anything"));
135        assert!(set.is_empty());
136    }
137
138    #[test]
139    fn single_pattern() {
140        let set = build_set(&["*.rs"]);
141        assert!(set.is_match("foo.rs"));
142        assert!(!set.is_match("foo.txt"));
143    }
144
145    #[test]
146    fn multiple_patterns() {
147        let set = build_set(&["*.rs", "*.toml", "*.md"]);
148        assert!(set.is_match("main.rs"));
149        assert!(set.is_match("Cargo.toml"));
150        assert!(set.is_match("README.md"));
151        assert!(!set.is_match("main.js"));
152    }
153
154    #[test]
155    fn matches_returns_indices() {
156        let set = build_set(&["*.rs", "*.toml", "**/*.rs"]);
157        let mut indices = set.matches("src/main.rs");
158        indices.sort_unstable();
159        assert!(indices.contains(&2)); // **/*.rs matches
160        assert!(!indices.contains(&1)); // *.toml doesn't match
161    }
162
163    #[test]
164    fn globstar_patterns() {
165        let set = build_set(&["**/*.test.js", "src/**/*.rs"]);
166        assert!(set.is_match("foo/bar.test.js"));
167        assert!(set.is_match("src/lib.rs"));
168        assert!(!set.is_match("test/foo.rs"));
169    }
170
171    #[test]
172    fn wildcard_only_patterns_in_always_check() {
173        // "*" has no literal, so it goes into always_check
174        let set = build_set(&["*", "*.rs"]);
175        assert!(set.is_match("anything"));
176        assert!(set.is_match("foo.rs"));
177    }
178
179    #[test]
180    fn matches_into() {
181        let set = build_set(&["*.rs", "*.txt", "**/*"]);
182        let mut results = Vec::new();
183        set.matches_into("foo.rs", &mut results);
184        assert!(results.contains(&0)); // *.rs
185        assert!(results.contains(&2)); // **/*
186        assert!(!results.contains(&1)); // *.txt
187    }
188
189    #[test]
190    fn candidate_matching() {
191        let set = build_set(&["**/*.rs"]);
192        let c = Candidate::new("src\\main.rs");
193        assert!(set.is_match_candidate(&c));
194    }
195
196    #[test]
197    fn default_glob_set() {
198        let set = GlobSet::default();
199        assert!(set.is_empty());
200        assert!(!set.is_match("anything"));
201    }
202
203    #[test]
204    fn braces_pattern() {
205        let set = build_set(&["*.{rs,toml}"]);
206        assert!(set.is_match("main.rs"));
207        assert!(set.is_match("Cargo.toml"));
208        assert!(!set.is_match("main.js"));
209    }
210
211    #[test]
212    fn question_mark_pattern() {
213        let set = build_set(&["a?c"]);
214        assert!(set.is_match("abc"));
215        assert!(set.is_match("axc"));
216        assert!(!set.is_match("abbc"));
217    }
218
219    #[test]
220    fn char_class_pattern() {
221        let set = build_set(&["[abc].txt"]);
222        assert!(set.is_match("a.txt"));
223        assert!(set.is_match("b.txt"));
224        assert!(!set.is_match("d.txt"));
225    }
226
227    #[test]
228    fn literal_strategy() {
229        let set = build_set(&["Cargo.toml"]);
230        assert!(set.is_match("Cargo.toml"));
231        assert!(!set.is_match("cargo.toml"));
232        assert!(!set.is_match("src/Cargo.toml"));
233    }
234
235    #[test]
236    fn prefix_strategy() {
237        let set = build_set(&["src/**"]);
238        assert!(set.is_match("src/main.rs"));
239        assert!(set.is_match("src/lib/util.rs"));
240        assert!(!set.is_match("tests/main.rs"));
241    }
242
243    #[test]
244    fn suffix_strategy() {
245        let set = build_set(&["**/foo.txt"]);
246        assert!(set.is_match("a/b/foo.txt"));
247        assert!(set.is_match("foo.txt")); // also matches without leading /
248        assert!(!set.is_match("bar.txt"));
249    }
250
251    #[test]
252    fn mixed_strategies() {
253        let set = build_set(&[
254            "*.rs",          // extension local
255            "Cargo.toml",    // literal
256            "src/**",        // prefix
257            "**/README.md",  // suffix
258            "{a,b}/**/*.js", // glob fallback
259        ]);
260        assert!(set.is_match("foo.rs"));
261        assert!(set.is_match("Cargo.toml"));
262        assert!(set.is_match("src/lib.rs"));
263        assert!(set.is_match("docs/README.md"));
264        assert!(set.is_match("a/components/app.js"));
265        assert!(!set.is_match("foo.py"));
266    }
267
268    #[test]
269    fn matches_into_mixed_strategies() {
270        let set = build_set(&[
271            "**/*.rs", // ext_any (idx 0)
272            "src/**",  // prefix  (idx 1)
273            "*",       // glob/always-check (idx 2)
274        ]);
275        let mut results = Vec::new();
276        set.matches_into("src/main.rs", &mut results);
277        results.sort_unstable();
278        assert_eq!(results, vec![0, 1]);
279
280        results.clear();
281        set.matches_into("main.rs", &mut results);
282        results.sort_unstable();
283        assert_eq!(results, vec![0, 2]);
284    }
285
286    #[test]
287    fn extension_does_not_false_positive() {
288        // `*.rs` should NOT match `src/main.rs` (single star doesn't cross /).
289        let set = build_set(&["*.rs"]);
290        assert!(!set.is_match("src/main.rs"));
291    }
292
293    #[test]
294    fn ext_any_matches_deep_paths() {
295        let set = build_set(&["**/*.rs"]);
296        assert!(set.is_match("a/b/c/d.rs"));
297        assert!(set.is_match("d.rs"));
298    }
299
300    #[test]
301    fn ext_local_rejects_deep_paths() {
302        let set = build_set(&["*.rs"]);
303        assert!(set.is_match("d.rs"));
304        assert!(!set.is_match("a/d.rs"));
305    }
306
307    #[test]
308    fn compound_suffix_strategy() {
309        let set = build_set(&["**/*.test.js"]);
310        assert!(set.is_match("foo.test.js"));
311        assert!(set.is_match("a/b/foo.test.js"));
312        assert!(!set.is_match("foo.js"));
313        assert!(!set.is_match("foo.test.ts"));
314    }
315
316    #[test]
317    fn compound_suffix_matches_into() {
318        let set = build_set(&["**/*.test.js", "**/*.rs"]);
319        let mut results = Vec::new();
320        set.matches_into("unit/foo.test.js", &mut results);
321        assert_eq!(results, vec![0]);
322    }
323
324    #[test]
325    fn brace_expansion_in_set() {
326        let set = build_set(&["*.{rs,toml}"]);
327        assert!(set.is_match("main.rs"));
328        assert!(set.is_match("Cargo.toml"));
329        assert!(!set.is_match("main.js"));
330    }
331
332    #[test]
333    fn brace_expansion_globstar() {
334        let set = build_set(&["**/*.{rs,toml}"]);
335        assert!(set.is_match("src/main.rs"));
336        assert!(set.is_match("Cargo.toml"));
337        assert!(!set.is_match("main.js"));
338    }
339
340    #[test]
341    fn prefix_suffix_strategy() {
342        let set = build_set(&["src/**/*.js"]);
343        assert!(set.is_match("src/app.js"));
344        assert!(set.is_match("src/components/button.js"));
345        assert!(!set.is_match("lib/app.js"));
346        assert!(!set.is_match("src/app.ts"));
347    }
348
349    #[test]
350    fn prefix_compound_suffix_strategy() {
351        let set = build_set(&["tests/**/*.test.ts"]);
352        assert!(set.is_match("tests/unit/foo.test.ts"));
353        assert!(set.is_match("tests/foo.test.ts"));
354        assert!(!set.is_match("src/foo.test.ts"));
355        assert!(!set.is_match("tests/foo.ts"));
356    }
357
358    #[test]
359    fn brace_expansion_prefix_suffix() {
360        let set = build_set(&["{src,lib}/**/*.rs"]);
361        assert!(set.is_match("src/main.rs"));
362        assert!(set.is_match("lib/core/parser.rs"));
363        assert!(!set.is_match("tests/main.rs"));
364        assert!(!set.is_match("src/main.js"));
365    }
366}