Skip to main content

void_core/support/
pathspec.rs

1//! Pathspec matching for filtering file paths
2//!
3//! Provides glob-style pattern matching for repo-relative paths.
4
5use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
6
7use super::error::{Result, VoidError};
8
9/// A compiled pathspec for matching file paths.
10///
11/// Supports glob patterns like:
12/// - `*.rs` - matches any .rs file in any directory
13/// - `src/*.ts` - matches .ts files directly in src/
14/// - `src/**/*.ts` - matches .ts files anywhere under src/
15/// - `{foo,bar}.js` - matches foo.js or bar.js
16#[derive(Debug, Clone)]
17pub struct Pathspec {
18    /// Original patterns for display
19    patterns: Vec<String>,
20    /// Compiled matcher
21    matcher: GlobSet,
22    /// True if this matches everything (empty pattern set)
23    matches_all: bool,
24}
25
26impl Pathspec {
27    /// Create a new pathspec from patterns.
28    ///
29    /// Empty patterns = match everything.
30    ///
31    /// # Pattern normalization
32    /// - `.` or `./` matches everything (`**`)
33    /// - Trailing `/` matches everything under that directory (`src/**`)
34    /// - If pattern has no `/`, treat as `**/{pattern}` (match anywhere)
35    /// - Path separators normalized to `/`
36    /// - Patterns with `..` are rejected
37    ///
38    /// # Errors
39    /// Returns `VoidError::InvalidPattern` if any pattern is invalid.
40    pub fn new(patterns: &[&str]) -> Result<Self> {
41        if patterns.is_empty() {
42            return Ok(Self {
43                patterns: Vec::new(),
44                matcher: GlobSet::empty(),
45                matches_all: true,
46            });
47        }
48
49        let mut builder = GlobSetBuilder::new();
50        let mut normalized_patterns = Vec::with_capacity(patterns.len());
51
52        for pattern in patterns {
53            let expanded = Self::expand_pattern(pattern);
54            for expanded_pattern in expanded {
55                let normalized = Self::normalize_pattern(&expanded_pattern)?;
56                let glob = GlobBuilder::new(&normalized)
57                    .literal_separator(true)
58                    .build()
59                    .map_err(|e| VoidError::InvalidPattern(e.to_string()))?;
60                builder.add(glob);
61            }
62            normalized_patterns.push(pattern.to_string());
63        }
64
65        let matcher = builder
66            .build()
67            .map_err(|e| VoidError::InvalidPattern(e.to_string()))?;
68
69        Ok(Self {
70            patterns: normalized_patterns,
71            matcher,
72            matches_all: false,
73        })
74    }
75
76    /// Normalize a pattern for matching.
77    fn normalize_pattern(pattern: &str) -> Result<String> {
78        // Reject patterns with ..
79        if pattern.contains("..") {
80            return Err(VoidError::InvalidPattern(
81                "pattern cannot contain '..'".to_string(),
82            ));
83        }
84
85        // Normalize path separators and strip a leading slash (root-anchored).
86        let pattern = pattern.replace('\\', "/");
87        let mut pattern = pattern.trim_start_matches('/').to_string();
88        while pattern.starts_with("./") {
89            pattern = pattern.trim_start_matches("./").to_string();
90        }
91
92        if pattern.is_empty() || pattern == "." {
93            return Ok("**".to_string());
94        }
95
96        if pattern.ends_with('/') {
97            return Ok(format!("{pattern}**"));
98        }
99
100        // If pattern has no slash, treat as matching anywhere
101        if !pattern.contains('/') {
102            Ok(format!("**/{}", pattern))
103        } else {
104            Ok(pattern)
105        }
106    }
107
108    /// Expand a pattern so directory paths match contents recursively.
109    fn expand_pattern(pattern: &str) -> Vec<String> {
110        let normalized = pattern.replace('\\', "/");
111        let mut trimmed = normalized.trim_start_matches('/').to_string();
112        while trimmed.starts_with("./") {
113            trimmed = trimmed.trim_start_matches("./").to_string();
114        }
115
116        if trimmed.is_empty() || trimmed == "." || trimmed.ends_with('/') {
117            return vec![pattern.to_string()];
118        }
119
120        let has_glob = trimmed.contains('*')
121            || trimmed.contains('?')
122            || trimmed.contains('[')
123            || trimmed.contains(']')
124            || trimmed.contains('{')
125            || trimmed.contains('}');
126
127        if has_glob {
128            return vec![pattern.to_string()];
129        }
130
131        let mut expanded = Vec::new();
132        expanded.push(pattern.to_string());
133        if trimmed.contains('/') {
134            expanded.push(format!("{trimmed}/**"));
135        } else {
136            expanded.push(format!("**/{trimmed}/**"));
137        }
138        expanded
139    }
140
141    /// Check if a path matches the pathspec.
142    pub fn matches(&self, path: &str) -> bool {
143        if self.matches_all {
144            return true;
145        }
146
147        // Normalize the path
148        let path = path.replace('\\', "/");
149        let path = path.trim_start_matches('/');
150
151        self.matcher.is_match(path)
152    }
153
154    /// Check if any path with this prefix could match.
155    ///
156    /// Used for early directory pruning during walks.
157    /// Returns true if there's any possibility that a file under
158    /// this prefix could match the pathspec.
159    pub fn could_match_prefix(&self, prefix: &str) -> bool {
160        if self.matches_all {
161            return true;
162        }
163
164        // Normalize the prefix
165        let prefix = prefix.replace('\\', "/");
166        let prefix = prefix.trim_start_matches('/');
167        let prefix = prefix.trim_end_matches('/');
168
169        // For prefix matching, we need to check if any pattern could
170        // potentially match something under this directory.
171        //
172        // Simple heuristic: if the prefix is a component of any pattern,
173        // or if any pattern uses ** (recursive glob), it could match.
174        for pattern in &self.patterns {
175            let normalized = Self::normalize_pattern(pattern).unwrap_or_default();
176
177            // If pattern starts with **/  it can match anywhere
178            if normalized.starts_with("**/") {
179                return true;
180            }
181
182            // Check if prefix is a prefix of the pattern's directory part
183            // or if the pattern could match something under prefix
184            let pattern_parts: Vec<&str> = normalized.split('/').collect();
185            let prefix_parts: Vec<&str> = prefix.split('/').filter(|s| !s.is_empty()).collect();
186
187            // Check if prefix matches the start of the pattern
188            let mut could_match = true;
189            for (i, prefix_part) in prefix_parts.iter().enumerate() {
190                if i >= pattern_parts.len() {
191                    // Prefix is longer than pattern, but pattern might have **
192                    could_match = pattern_parts.iter().any(|p| *p == "**");
193                    break;
194                }
195
196                let pattern_part = pattern_parts[i];
197
198                // If pattern part is **, it matches any depth
199                if pattern_part == "**" {
200                    return true;
201                }
202
203                // Simple glob check - if pattern part contains wildcards,
204                // do a basic check
205                if !Self::part_could_match(pattern_part, prefix_part) {
206                    could_match = false;
207                    break;
208                }
209            }
210
211            if could_match {
212                return true;
213            }
214        }
215
216        false
217    }
218
219    /// Check if a pattern part could match a path part.
220    fn part_could_match(pattern_part: &str, path_part: &str) -> bool {
221        // Simple cases
222        if pattern_part == "*" || pattern_part == "**" {
223            return true;
224        }
225
226        if pattern_part == path_part {
227            return true;
228        }
229
230        // If pattern has wildcards, be conservative and return true
231        if pattern_part.contains('*') || pattern_part.contains('?') || pattern_part.contains('[') {
232            return true;
233        }
234
235        false
236    }
237
238    /// Get the original patterns.
239    pub fn patterns(&self) -> &[String] {
240        &self.patterns
241    }
242
243    /// Returns true if this matches everything (empty pattern set).
244    pub fn matches_all(&self) -> bool {
245        self.matches_all
246    }
247}
248
249/// Convenience: match a single pattern against a path.
250///
251/// # Errors
252/// Returns `VoidError::InvalidPattern` if the pattern is invalid.
253pub fn matches_glob(pattern: &str, path: &str) -> Result<bool> {
254    let pathspec = Pathspec::new(&[pattern])?;
255    Ok(pathspec.matches(path))
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn test_simple_extension() {
264        let ps = Pathspec::new(&["*.rs"]).unwrap();
265        assert!(ps.matches("foo.rs"));
266        assert!(ps.matches("src/bar.rs"));
267        assert!(!ps.matches("foo.txt"));
268    }
269
270    #[test]
271    fn test_directory_pattern() {
272        let ps = Pathspec::new(&["src/*.rs"]).unwrap();
273        assert!(ps.matches("src/lib.rs"));
274        assert!(!ps.matches("src/sub/lib.rs"));
275    }
276
277    #[test]
278    fn test_recursive() {
279        let ps = Pathspec::new(&["src/**/*.rs"]).unwrap();
280        assert!(ps.matches("src/lib.rs"));
281        assert!(ps.matches("src/foo/bar/baz.rs"));
282    }
283
284    #[test]
285    fn test_empty_matches_all() {
286        let ps = Pathspec::new(&[]).unwrap();
287        assert!(ps.matches("anything.txt"));
288        assert!(ps.matches_all());
289    }
290
291    #[test]
292    fn test_multiple_patterns() {
293        let ps = Pathspec::new(&["*.rs", "*.ts"]).unwrap();
294        assert!(ps.matches("foo.rs"));
295        assert!(ps.matches("bar.ts"));
296        assert!(!ps.matches("baz.txt"));
297    }
298
299    #[test]
300    fn test_alternation() {
301        let ps = Pathspec::new(&["{foo,bar}.js"]).unwrap();
302        assert!(ps.matches("foo.js"));
303        assert!(ps.matches("bar.js"));
304        assert!(!ps.matches("baz.js"));
305    }
306
307    #[test]
308    fn test_rejects_dotdot() {
309        let result = Pathspec::new(&["../foo.rs"]);
310        assert!(result.is_err());
311    }
312
313    #[test]
314    fn test_normalized_slashes() {
315        let ps = Pathspec::new(&["src/*.rs"]).unwrap();
316        // Both slash styles should work
317        assert!(ps.matches("src/lib.rs"));
318        assert!(ps.matches("src\\lib.rs"));
319    }
320
321    #[test]
322    fn test_matches_glob_convenience() {
323        assert!(matches_glob("*.rs", "foo.rs").unwrap());
324        assert!(!matches_glob("*.rs", "foo.txt").unwrap());
325    }
326
327    #[test]
328    fn test_trailing_slash_matches_recursive() {
329        let ps = Pathspec::new(&["src/"]).unwrap();
330        assert!(ps.matches("src/lib.rs"));
331        assert!(ps.matches("src/foo/bar.rs"));
332        assert!(!ps.matches("other/lib.rs"));
333    }
334
335    #[test]
336    fn test_directory_matches_recursive_without_slash() {
337        let ps = Pathspec::new(&["src"]).unwrap();
338        assert!(ps.matches("src/lib.rs"));
339        assert!(ps.matches("src/foo/bar/baz.rs"));
340        assert!(ps.matches("other/src/lib.rs"));
341    }
342
343    #[test]
344    fn test_directory_matches_recursive_anywhere() {
345        let ps = Pathspec::new(&["dir"]).unwrap();
346        assert!(ps.matches("dir/file.txt"));
347        assert!(ps.matches("src/dir/file.txt"));
348        assert!(!ps.matches("src/dirfile.txt"));
349    }
350
351    #[test]
352    fn test_dot_matches_all() {
353        let ps = Pathspec::new(&["."]).unwrap();
354        assert!(ps.matches("README.md"));
355        assert!(ps.matches("src/lib.rs"));
356    }
357
358    #[test]
359    fn test_patterns_accessor() {
360        let ps = Pathspec::new(&["*.rs", "*.ts"]).unwrap();
361        let patterns = ps.patterns();
362        assert_eq!(patterns.len(), 2);
363        assert!(patterns.contains(&"*.rs".to_string()));
364        assert!(patterns.contains(&"*.ts".to_string()));
365    }
366
367    #[test]
368    fn test_could_match_prefix_recursive() {
369        let ps = Pathspec::new(&["*.rs"]).unwrap();
370        // *.rs becomes **/*.rs, so any prefix could match
371        assert!(ps.could_match_prefix("src"));
372        assert!(ps.could_match_prefix("src/lib"));
373    }
374
375    #[test]
376    fn test_could_match_prefix_specific() {
377        let ps = Pathspec::new(&["src/*.rs"]).unwrap();
378        assert!(ps.could_match_prefix("src"));
379        assert!(!ps.could_match_prefix("tests"));
380    }
381
382    #[test]
383    fn test_could_match_prefix_empty() {
384        let ps = Pathspec::new(&[]).unwrap();
385        assert!(ps.could_match_prefix("anything"));
386    }
387
388    #[test]
389    fn test_leading_slash_stripped() {
390        let ps = Pathspec::new(&["src/*.rs"]).unwrap();
391        assert!(ps.matches("/src/lib.rs"));
392        assert!(ps.matches("src/lib.rs"));
393    }
394
395    #[test]
396    fn test_nested_directories() {
397        let ps = Pathspec::new(&["a/b/c/*.rs"]).unwrap();
398        assert!(ps.matches("a/b/c/file.rs"));
399        assert!(!ps.matches("a/b/file.rs"));
400        assert!(!ps.matches("a/b/c/d/file.rs"));
401    }
402
403    #[test]
404    fn test_double_star_middle() {
405        let ps = Pathspec::new(&["src/**/test/*.rs"]).unwrap();
406        assert!(ps.matches("src/test/foo.rs"));
407        assert!(ps.matches("src/a/b/test/foo.rs"));
408        assert!(!ps.matches("src/test.rs"));
409    }
410}