Skip to main content

kaish_glob/
glob_path.rs

1//! Path-aware glob matching with globstar (`**`) support.
2//!
3//! Extends the basic glob matching in `glob.rs` to handle patterns
4//! that span directory boundaries with `**`:
5//!
6//! - `**/*.rs` matches `foo.rs`, `src/foo.rs`, `a/b/c/foo.rs`
7//! - `src/**` matches everything under src/
8//! - `a/**/z` matches `a/z`, `a/b/z`, `a/b/c/z`
9
10use std::path::Path;
11use thiserror::Error;
12
13use crate::glob::glob_match;
14
15/// Errors when parsing glob patterns.
16#[derive(Debug, Clone, Error)]
17pub enum PatternError {
18    #[error("empty pattern")]
19    Empty,
20    #[error("invalid pattern: {0}")]
21    Invalid(String),
22}
23
24/// A segment of a path pattern.
25#[derive(Debug, Clone, PartialEq)]
26pub enum PathSegment {
27    /// Literal directory or file name: "src", "main.rs"
28    Literal(String),
29    /// Pattern with wildcards: "*.rs", "test_?"
30    Pattern(String),
31    /// Globstar: matches zero or more directory components
32    Globstar,
33}
34
35/// A path-aware glob pattern with globstar support.
36///
37/// # Examples
38/// ```
39/// use kaish_glob::GlobPath;
40/// use std::path::Path;
41///
42/// let pattern = GlobPath::new("**/*.rs").unwrap();
43/// assert!(pattern.matches(Path::new("main.rs")));
44/// assert!(pattern.matches(Path::new("src/main.rs")));
45/// assert!(pattern.matches(Path::new("src/lib/utils.rs")));
46/// assert!(!pattern.matches(Path::new("README.md")));
47/// ```
48#[derive(Debug, Clone)]
49pub struct GlobPath {
50    segments: Vec<PathSegment>,
51    anchored: bool,
52}
53
54impl GlobPath {
55    /// Parse a glob pattern into a GlobPath.
56    ///
57    /// Patterns starting with `/` are anchored to the root.
58    /// `**` matches zero or more directory components.
59    pub fn new(pattern: &str) -> Result<Self, PatternError> {
60        if pattern.is_empty() {
61            return Err(PatternError::Empty);
62        }
63
64        let (pattern, anchored) = if let Some(stripped) = pattern.strip_prefix('/') {
65            (stripped, true)
66        } else {
67            (pattern, false)
68        };
69
70        let mut segments = Vec::new();
71
72        for part in pattern.split('/') {
73            if part.is_empty() {
74                continue;
75            }
76
77            if part == "**" {
78                // Consecutive globstars collapse to one
79                if !matches!(segments.last(), Some(PathSegment::Globstar)) {
80                    segments.push(PathSegment::Globstar);
81                }
82            } else if Self::is_literal(part) {
83                segments.push(PathSegment::Literal(part.to_string()));
84            } else {
85                segments.push(PathSegment::Pattern(part.to_string()));
86            }
87        }
88
89        Ok(GlobPath { segments, anchored })
90    }
91
92    /// Check if a path matches this pattern.
93    pub fn matches(&self, path: &Path) -> bool {
94        let components: Vec<&str> = path
95            .components()
96            .filter_map(|c| c.as_os_str().to_str())
97            .collect();
98
99        self.match_segments(&self.segments, &components, 0, 0)
100    }
101
102    /// Get the static prefix of the pattern (directories before any wildcard).
103    ///
104    /// This is useful for optimization: we can start the walk from this prefix
105    /// instead of the root.
106    ///
107    /// # Examples
108    /// ```
109    /// use kaish_glob::GlobPath;
110    /// use std::path::PathBuf;
111    ///
112    /// let pattern = GlobPath::new("src/lib/**/*.rs").unwrap();
113    /// assert_eq!(pattern.static_prefix(), Some(PathBuf::from("src/lib")));
114    ///
115    /// let pattern = GlobPath::new("**/*.rs").unwrap();
116    /// assert_eq!(pattern.static_prefix(), None);
117    /// ```
118    pub fn static_prefix(&self) -> Option<std::path::PathBuf> {
119        let mut prefix = std::path::PathBuf::new();
120
121        for segment in &self.segments {
122            match segment {
123                PathSegment::Literal(s) => prefix.push(s),
124                _ => break,
125            }
126        }
127
128        if prefix.as_os_str().is_empty() {
129            None
130        } else {
131            Some(prefix)
132        }
133    }
134
135    /// Check if the pattern only matches directories.
136    pub fn is_dir_only(&self) -> bool {
137        matches!(self.segments.last(), Some(PathSegment::Globstar))
138    }
139
140    /// Check if the pattern is anchored (starts with /).
141    pub fn is_anchored(&self) -> bool {
142        self.anchored
143    }
144
145    /// Check if the pattern contains a globstar (`**`).
146    ///
147    /// Patterns with globstar require recursive directory traversal.
148    /// Patterns without globstar only match at a fixed depth.
149    pub fn has_globstar(&self) -> bool {
150        self.segments.iter().any(|s| matches!(s, PathSegment::Globstar))
151    }
152
153    /// Get the depth of the pattern (number of path components).
154    ///
155    /// Returns `None` if the pattern contains globstar (variable depth).
156    pub fn fixed_depth(&self) -> Option<usize> {
157        if self.has_globstar() {
158            None
159        } else {
160            Some(self.segments.len())
161        }
162    }
163
164    /// Check if a string is a literal (no wildcards).
165    fn is_literal(s: &str) -> bool {
166        !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
167    }
168
169    /// Recursive segment matching with backtracking for globstar.
170    fn match_segments(
171        &self,
172        segments: &[PathSegment],
173        components: &[&str],
174        seg_idx: usize,
175        comp_idx: usize,
176    ) -> bool {
177        // Both exhausted - match!
178        if seg_idx >= segments.len() && comp_idx >= components.len() {
179            return true;
180        }
181
182        // Segments exhausted but components remain - no match
183        // (unless we ended with globstar, which is already consumed)
184        if seg_idx >= segments.len() {
185            return false;
186        }
187
188        match &segments[seg_idx] {
189            PathSegment::Globstar => {
190                // Globstar matches zero or more components
191                // Try matching with 0, 1, 2, ... components consumed
192                for skip in 0..=(components.len() - comp_idx) {
193                    if self.match_segments(segments, components, seg_idx + 1, comp_idx + skip) {
194                        return true;
195                    }
196                }
197                false
198            }
199
200            PathSegment::Literal(lit) => {
201                if comp_idx >= components.len() {
202                    return false;
203                }
204                if components[comp_idx] == lit {
205                    self.match_segments(segments, components, seg_idx + 1, comp_idx + 1)
206                } else {
207                    false
208                }
209            }
210
211            PathSegment::Pattern(pat) => {
212                if comp_idx >= components.len() {
213                    return false;
214                }
215                if self.matches_component(pat, components[comp_idx]) {
216                    self.match_segments(segments, components, seg_idx + 1, comp_idx + 1)
217                } else {
218                    false
219                }
220            }
221        }
222    }
223
224    /// Match a single component against a pattern (with brace expansion).
225    fn matches_component(&self, pattern: &str, component: &str) -> bool {
226        glob_match(pattern, component)
227    }
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use std::path::Path;
234
235    #[test]
236    fn test_literal_pattern() {
237        let pat = GlobPath::new("src/main.rs").unwrap();
238        assert!(pat.matches(Path::new("src/main.rs")));
239        assert!(!pat.matches(Path::new("src/lib.rs")));
240        assert!(!pat.matches(Path::new("main.rs")));
241    }
242
243    #[test]
244    fn test_simple_wildcard() {
245        let pat = GlobPath::new("*.rs").unwrap();
246        assert!(pat.matches(Path::new("main.rs")));
247        assert!(pat.matches(Path::new("lib.rs")));
248        assert!(!pat.matches(Path::new("main.go")));
249        assert!(!pat.matches(Path::new("src/main.rs"))); // Only matches single component
250    }
251
252    #[test]
253    fn test_globstar_prefix() {
254        let pat = GlobPath::new("**/*.rs").unwrap();
255        assert!(pat.matches(Path::new("main.rs")));
256        assert!(pat.matches(Path::new("src/main.rs")));
257        assert!(pat.matches(Path::new("src/lib/utils.rs")));
258        assert!(pat.matches(Path::new("a/b/c/d/e.rs")));
259        assert!(!pat.matches(Path::new("main.go")));
260        assert!(!pat.matches(Path::new("src/main.go")));
261    }
262
263    #[test]
264    fn test_globstar_suffix() {
265        let pat = GlobPath::new("src/**").unwrap();
266        assert!(pat.matches(Path::new("src")));
267        assert!(pat.matches(Path::new("src/main.rs")));
268        assert!(pat.matches(Path::new("src/lib/utils.rs")));
269        assert!(!pat.matches(Path::new("test/main.rs")));
270    }
271
272    #[test]
273    fn test_globstar_middle() {
274        let pat = GlobPath::new("a/**/z").unwrap();
275        assert!(pat.matches(Path::new("a/z")));
276        assert!(pat.matches(Path::new("a/b/z")));
277        assert!(pat.matches(Path::new("a/b/c/z")));
278        assert!(pat.matches(Path::new("a/b/c/d/e/z")));
279        assert!(!pat.matches(Path::new("b/c/z")));
280        assert!(!pat.matches(Path::new("a/z/extra")));
281    }
282
283    #[test]
284    fn test_consecutive_globstars() {
285        let pat = GlobPath::new("a/**/**/z").unwrap();
286        assert!(pat.matches(Path::new("a/z")));
287        assert!(pat.matches(Path::new("a/b/z")));
288        assert!(pat.matches(Path::new("a/b/c/z")));
289    }
290
291    #[test]
292    fn test_brace_expansion() {
293        let pat = GlobPath::new("*.{rs,go,py}").unwrap();
294        assert!(pat.matches(Path::new("main.rs")));
295        assert!(pat.matches(Path::new("server.go")));
296        assert!(pat.matches(Path::new("script.py")));
297        assert!(!pat.matches(Path::new("style.css")));
298    }
299
300    #[test]
301    fn test_brace_with_globstar() {
302        let pat = GlobPath::new("**/*.{rs,go}").unwrap();
303        assert!(pat.matches(Path::new("main.rs")));
304        assert!(pat.matches(Path::new("src/lib.go")));
305        assert!(pat.matches(Path::new("a/b/c/d.rs")));
306        assert!(!pat.matches(Path::new("src/main.py")));
307    }
308
309    #[test]
310    fn test_question_mark() {
311        let pat = GlobPath::new("file?.txt").unwrap();
312        assert!(pat.matches(Path::new("file1.txt")));
313        assert!(pat.matches(Path::new("fileA.txt")));
314        assert!(!pat.matches(Path::new("file12.txt")));
315        assert!(!pat.matches(Path::new("file.txt")));
316    }
317
318    #[test]
319    fn test_char_class() {
320        let pat = GlobPath::new("[abc].rs").unwrap();
321        assert!(pat.matches(Path::new("a.rs")));
322        assert!(pat.matches(Path::new("b.rs")));
323        assert!(pat.matches(Path::new("c.rs")));
324        assert!(!pat.matches(Path::new("d.rs")));
325    }
326
327    #[test]
328    fn test_static_prefix() {
329        assert_eq!(
330            GlobPath::new("src/lib/**/*.rs").unwrap().static_prefix(),
331            Some(std::path::PathBuf::from("src/lib"))
332        );
333
334        assert_eq!(
335            GlobPath::new("src/**").unwrap().static_prefix(),
336            Some(std::path::PathBuf::from("src"))
337        );
338
339        assert_eq!(GlobPath::new("**/*.rs").unwrap().static_prefix(), None);
340
341        assert_eq!(GlobPath::new("*.rs").unwrap().static_prefix(), None);
342    }
343
344    #[test]
345    fn test_anchored_pattern() {
346        let pat = GlobPath::new("/src/*.rs").unwrap();
347        assert!(pat.is_anchored());
348        assert!(pat.matches(Path::new("src/main.rs")));
349    }
350
351    #[test]
352    fn test_empty_pattern() {
353        assert!(matches!(GlobPath::new(""), Err(PatternError::Empty)));
354    }
355
356    #[test]
357    fn test_has_globstar() {
358        assert!(GlobPath::new("**/*.rs").unwrap().has_globstar());
359        assert!(GlobPath::new("src/**").unwrap().has_globstar());
360        assert!(GlobPath::new("a/**/z").unwrap().has_globstar());
361        assert!(!GlobPath::new("*.rs").unwrap().has_globstar());
362        assert!(!GlobPath::new("src/*.rs").unwrap().has_globstar());
363        assert!(!GlobPath::new("src/lib/main.rs").unwrap().has_globstar());
364    }
365
366    #[test]
367    fn test_fixed_depth() {
368        assert_eq!(GlobPath::new("*.rs").unwrap().fixed_depth(), Some(1));
369        assert_eq!(GlobPath::new("src/*.rs").unwrap().fixed_depth(), Some(2));
370        assert_eq!(GlobPath::new("a/b/c.txt").unwrap().fixed_depth(), Some(3));
371        assert_eq!(GlobPath::new("**/*.rs").unwrap().fixed_depth(), None);
372        assert_eq!(GlobPath::new("src/**").unwrap().fixed_depth(), None);
373    }
374
375    #[test]
376    fn test_hidden_files() {
377        let pat = GlobPath::new("**/*.rs").unwrap();
378        assert!(pat.matches(Path::new(".hidden.rs")));
379        assert!(pat.matches(Path::new(".config/settings.rs")));
380    }
381
382    #[test]
383    fn test_complex_real_world() {
384        let pat = GlobPath::new("**/*_test.rs").unwrap();
385        assert!(pat.matches(Path::new("parser_test.rs")));
386        assert!(pat.matches(Path::new("src/lexer_test.rs")));
387        assert!(pat.matches(Path::new("crates/kernel/tests/eval_test.rs")));
388        assert!(!pat.matches(Path::new("parser.rs")));
389
390        let pat = GlobPath::new("src/**/*.{rs,go}").unwrap();
391        assert!(pat.matches(Path::new("src/main.rs")));
392        assert!(pat.matches(Path::new("src/api/handler.go")));
393        assert!(!pat.matches(Path::new("test/main.rs")));
394    }
395}