Skip to main content

kaish_glob/
glob_path.rs

1//! Path-aware glob matching with globstar (`**`) support.
2//!
3//! Extends the basic glob matching in `glob.rs` to handle patterns
4//! that span directory boundaries with `**`:
5//!
6//! - `**/*.rs` matches `foo.rs`, `src/foo.rs`, `a/b/c/foo.rs`
7//! - `src/**` matches everything under src/
8//! - `a/**/z` matches `a/z`, `a/b/z`, `a/b/c/z`
9
10use std::path::Path;
11use thiserror::Error;
12
13use crate::glob::glob_match;
14
15/// Errors when parsing glob patterns.
16#[derive(Debug, Clone, Error)]
17pub enum PatternError {
18    #[error("empty pattern")]
19    Empty,
20    #[error("invalid pattern: {0}")]
21    Invalid(String),
22}
23
24/// A segment of a path pattern.
25#[derive(Debug, Clone, PartialEq)]
26pub enum PathSegment {
27    /// Literal directory or file name: "src", "main.rs"
28    Literal(String),
29    /// Pattern with wildcards: "*.rs", "test_?"
30    Pattern(String),
31    /// Globstar: matches zero or more directory components
32    Globstar,
33}
34
35/// A path-aware glob pattern with globstar support.
36///
37/// # Examples
38/// ```
39/// use kaish_glob::GlobPath;
40/// use std::path::Path;
41///
42/// let pattern = GlobPath::new("**/*.rs").unwrap();
43/// assert!(pattern.matches(Path::new("main.rs")));
44/// assert!(pattern.matches(Path::new("src/main.rs")));
45/// assert!(pattern.matches(Path::new("src/lib/utils.rs")));
46/// assert!(!pattern.matches(Path::new("README.md")));
47/// ```
48#[derive(Debug, Clone)]
49pub struct GlobPath {
50    segments: Vec<PathSegment>,
51    anchored: bool,
52}
53
54impl GlobPath {
55    /// Parse a glob pattern into a GlobPath.
56    ///
57    /// Patterns starting with `/` are anchored to the root.
58    /// `**` matches zero or more directory components.
59    pub fn new(pattern: &str) -> Result<Self, PatternError> {
60        if pattern.is_empty() {
61            return Err(PatternError::Empty);
62        }
63
64        let (pattern, anchored) = if let Some(stripped) = pattern.strip_prefix('/') {
65            (stripped, true)
66        } else {
67            (pattern, false)
68        };
69
70        let mut segments = Vec::new();
71
72        for part in pattern.split('/') {
73            if part.is_empty() {
74                continue;
75            }
76
77            if part == "**" {
78                // Consecutive globstars collapse to one
79                if !matches!(segments.last(), Some(PathSegment::Globstar)) {
80                    segments.push(PathSegment::Globstar);
81                }
82            } else if Self::is_literal(part) {
83                segments.push(PathSegment::Literal(part.to_string()));
84            } else {
85                segments.push(PathSegment::Pattern(part.to_string()));
86            }
87        }
88
89        Ok(GlobPath { segments, anchored })
90    }
91
92    /// Check if a path matches this pattern.
93    pub fn matches(&self, path: &Path) -> bool {
94        let components: Vec<&str> = path
95            .components()
96            .filter_map(|c| c.as_os_str().to_str())
97            .collect();
98
99        self.match_segments(&self.segments, &components, 0, 0)
100    }
101
102    /// Get the static prefix of the pattern (directories before any wildcard).
103    ///
104    /// This is useful for optimization: we can start the walk from this prefix
105    /// instead of the root.
106    ///
107    /// # Examples
108    /// ```
109    /// use kaish_glob::GlobPath;
110    /// use std::path::PathBuf;
111    ///
112    /// let pattern = GlobPath::new("src/lib/**/*.rs").unwrap();
113    /// assert_eq!(pattern.static_prefix(), Some(PathBuf::from("src/lib")));
114    ///
115    /// let pattern = GlobPath::new("**/*.rs").unwrap();
116    /// assert_eq!(pattern.static_prefix(), None);
117    /// ```
118    pub fn static_prefix(&self) -> Option<std::path::PathBuf> {
119        let mut prefix = std::path::PathBuf::new();
120
121        for segment in &self.segments {
122            match segment {
123                PathSegment::Literal(s) => prefix.push(s),
124                _ => break,
125            }
126        }
127
128        if prefix.as_os_str().is_empty() {
129            None
130        } else {
131            Some(prefix)
132        }
133    }
134
135    /// Split the pattern into its deepest static directory prefix and the
136    /// remaining pattern to match beneath it.
137    ///
138    /// Used to start a walk from the literal leading directories instead of
139    /// the filesystem root: walking from `/` is O(filesystem) and skips
140    /// hidden intermediate directories, so `/tmp/.tmpXXXX/*.txt` would match
141    /// nothing. At least one segment is always kept in the remaining pattern,
142    /// so an all-literal pattern (`/a/b/c.txt`) walks `/a/b` and matches
143    /// `c.txt` rather than trying to descend into the file itself. The
144    /// returned pattern is unanchored (the anchor is consumed by the caller's
145    /// walk root).
146    ///
147    /// # Examples
148    /// ```
149    /// use kaish_glob::GlobPath;
150    /// use std::path::{Path, PathBuf};
151    ///
152    /// let (dir, rest) = GlobPath::new("/a/b/*.txt").unwrap().split_static_dir();
153    /// assert_eq!(dir, PathBuf::from("a/b"));
154    /// assert!(rest.matches(Path::new("c.txt")));
155    ///
156    /// // All-literal: the final component stays in the match pattern.
157    /// let (dir, rest) = GlobPath::new("/a/b/c.txt").unwrap().split_static_dir();
158    /// assert_eq!(dir, PathBuf::from("a/b"));
159    /// assert!(rest.matches(Path::new("c.txt")));
160    ///
161    /// // No static prefix (leading wildcard / globstar): empty dir, full pattern.
162    /// let (dir, _rest) = GlobPath::new("**/*.rs").unwrap().split_static_dir();
163    /// assert_eq!(dir, PathBuf::new());
164    /// ```
165    pub fn split_static_dir(&self) -> (std::path::PathBuf, GlobPath) {
166        let leading_literals = self
167            .segments
168            .iter()
169            .take_while(|s| matches!(s, PathSegment::Literal(_)))
170            .count();
171        // Never consume the final segment — leave something to match.
172        let prefix_len = leading_literals.min(self.segments.len().saturating_sub(1));
173
174        let mut prefix = std::path::PathBuf::new();
175        for segment in &self.segments[..prefix_len] {
176            if let PathSegment::Literal(s) = segment {
177                prefix.push(s);
178            }
179        }
180
181        let remaining = GlobPath {
182            segments: self.segments[prefix_len..].to_vec(),
183            anchored: false,
184        };
185        (prefix, remaining)
186    }
187
188    /// Check if the pattern only matches directories.
189    pub fn is_dir_only(&self) -> bool {
190        matches!(self.segments.last(), Some(PathSegment::Globstar))
191    }
192
193    /// Check if the pattern is anchored (starts with /).
194    pub fn is_anchored(&self) -> bool {
195        self.anchored
196    }
197
198    /// Check if the pattern contains a globstar (`**`).
199    ///
200    /// Patterns with globstar require recursive directory traversal.
201    /// Patterns without globstar only match at a fixed depth.
202    pub fn has_globstar(&self) -> bool {
203        self.segments.iter().any(|s| matches!(s, PathSegment::Globstar))
204    }
205
206    /// Get the depth of the pattern (number of path components).
207    ///
208    /// Returns `None` if the pattern contains globstar (variable depth).
209    pub fn fixed_depth(&self) -> Option<usize> {
210        if self.has_globstar() {
211            None
212        } else {
213            Some(self.segments.len())
214        }
215    }
216
217    /// Check if a string is a literal (no wildcards).
218    fn is_literal(s: &str) -> bool {
219        !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
220    }
221
222    /// Recursive segment matching with backtracking for globstar.
223    fn match_segments(
224        &self,
225        segments: &[PathSegment],
226        components: &[&str],
227        seg_idx: usize,
228        comp_idx: usize,
229    ) -> bool {
230        // Both exhausted - match!
231        if seg_idx >= segments.len() && comp_idx >= components.len() {
232            return true;
233        }
234
235        // Segments exhausted but components remain - no match
236        // (unless we ended with globstar, which is already consumed)
237        if seg_idx >= segments.len() {
238            return false;
239        }
240
241        match &segments[seg_idx] {
242            PathSegment::Globstar => {
243                // Globstar matches zero or more components
244                // Try matching with 0, 1, 2, ... components consumed
245                for skip in 0..=(components.len() - comp_idx) {
246                    if self.match_segments(segments, components, seg_idx + 1, comp_idx + skip) {
247                        return true;
248                    }
249                }
250                false
251            }
252
253            PathSegment::Literal(lit) => {
254                if comp_idx >= components.len() {
255                    return false;
256                }
257                if components[comp_idx] == lit {
258                    self.match_segments(segments, components, seg_idx + 1, comp_idx + 1)
259                } else {
260                    false
261                }
262            }
263
264            PathSegment::Pattern(pat) => {
265                if comp_idx >= components.len() {
266                    return false;
267                }
268                if self.matches_component(pat, components[comp_idx]) {
269                    self.match_segments(segments, components, seg_idx + 1, comp_idx + 1)
270                } else {
271                    false
272                }
273            }
274        }
275    }
276
277    /// Match a single component against a pattern (with brace expansion).
278    fn matches_component(&self, pattern: &str, component: &str) -> bool {
279        glob_match(pattern, component)
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use std::path::Path;
287
288    #[test]
289    fn test_literal_pattern() {
290        let pat = GlobPath::new("src/main.rs").unwrap();
291        assert!(pat.matches(Path::new("src/main.rs")));
292        assert!(!pat.matches(Path::new("src/lib.rs")));
293        assert!(!pat.matches(Path::new("main.rs")));
294    }
295
296    #[test]
297    fn test_simple_wildcard() {
298        let pat = GlobPath::new("*.rs").unwrap();
299        assert!(pat.matches(Path::new("main.rs")));
300        assert!(pat.matches(Path::new("lib.rs")));
301        assert!(!pat.matches(Path::new("main.go")));
302        assert!(!pat.matches(Path::new("src/main.rs"))); // Only matches single component
303    }
304
305    #[test]
306    fn test_globstar_prefix() {
307        let pat = GlobPath::new("**/*.rs").unwrap();
308        assert!(pat.matches(Path::new("main.rs")));
309        assert!(pat.matches(Path::new("src/main.rs")));
310        assert!(pat.matches(Path::new("src/lib/utils.rs")));
311        assert!(pat.matches(Path::new("a/b/c/d/e.rs")));
312        assert!(!pat.matches(Path::new("main.go")));
313        assert!(!pat.matches(Path::new("src/main.go")));
314    }
315
316    #[test]
317    fn test_globstar_suffix() {
318        let pat = GlobPath::new("src/**").unwrap();
319        assert!(pat.matches(Path::new("src")));
320        assert!(pat.matches(Path::new("src/main.rs")));
321        assert!(pat.matches(Path::new("src/lib/utils.rs")));
322        assert!(!pat.matches(Path::new("test/main.rs")));
323    }
324
325    #[test]
326    fn test_globstar_middle() {
327        let pat = GlobPath::new("a/**/z").unwrap();
328        assert!(pat.matches(Path::new("a/z")));
329        assert!(pat.matches(Path::new("a/b/z")));
330        assert!(pat.matches(Path::new("a/b/c/z")));
331        assert!(pat.matches(Path::new("a/b/c/d/e/z")));
332        assert!(!pat.matches(Path::new("b/c/z")));
333        assert!(!pat.matches(Path::new("a/z/extra")));
334    }
335
336    #[test]
337    fn test_consecutive_globstars() {
338        let pat = GlobPath::new("a/**/**/z").unwrap();
339        assert!(pat.matches(Path::new("a/z")));
340        assert!(pat.matches(Path::new("a/b/z")));
341        assert!(pat.matches(Path::new("a/b/c/z")));
342    }
343
344    #[test]
345    fn test_brace_expansion() {
346        let pat = GlobPath::new("*.{rs,go,py}").unwrap();
347        assert!(pat.matches(Path::new("main.rs")));
348        assert!(pat.matches(Path::new("server.go")));
349        assert!(pat.matches(Path::new("script.py")));
350        assert!(!pat.matches(Path::new("style.css")));
351    }
352
353    #[test]
354    fn test_brace_with_globstar() {
355        let pat = GlobPath::new("**/*.{rs,go}").unwrap();
356        assert!(pat.matches(Path::new("main.rs")));
357        assert!(pat.matches(Path::new("src/lib.go")));
358        assert!(pat.matches(Path::new("a/b/c/d.rs")));
359        assert!(!pat.matches(Path::new("src/main.py")));
360    }
361
362    #[test]
363    fn test_question_mark() {
364        let pat = GlobPath::new("file?.txt").unwrap();
365        assert!(pat.matches(Path::new("file1.txt")));
366        assert!(pat.matches(Path::new("fileA.txt")));
367        assert!(!pat.matches(Path::new("file12.txt")));
368        assert!(!pat.matches(Path::new("file.txt")));
369    }
370
371    #[test]
372    fn test_char_class() {
373        let pat = GlobPath::new("[abc].rs").unwrap();
374        assert!(pat.matches(Path::new("a.rs")));
375        assert!(pat.matches(Path::new("b.rs")));
376        assert!(pat.matches(Path::new("c.rs")));
377        assert!(!pat.matches(Path::new("d.rs")));
378    }
379
380    #[test]
381    fn test_static_prefix() {
382        assert_eq!(
383            GlobPath::new("src/lib/**/*.rs").unwrap().static_prefix(),
384            Some(std::path::PathBuf::from("src/lib"))
385        );
386
387        assert_eq!(
388            GlobPath::new("src/**").unwrap().static_prefix(),
389            Some(std::path::PathBuf::from("src"))
390        );
391
392        assert_eq!(GlobPath::new("**/*.rs").unwrap().static_prefix(), None);
393
394        assert_eq!(GlobPath::new("*.rs").unwrap().static_prefix(), None);
395    }
396
397    #[test]
398    fn test_anchored_pattern() {
399        let pat = GlobPath::new("/src/*.rs").unwrap();
400        assert!(pat.is_anchored());
401        assert!(pat.matches(Path::new("src/main.rs")));
402    }
403
404    #[test]
405    fn test_empty_pattern() {
406        assert!(matches!(GlobPath::new(""), Err(PatternError::Empty)));
407    }
408
409    #[test]
410    fn test_has_globstar() {
411        assert!(GlobPath::new("**/*.rs").unwrap().has_globstar());
412        assert!(GlobPath::new("src/**").unwrap().has_globstar());
413        assert!(GlobPath::new("a/**/z").unwrap().has_globstar());
414        assert!(!GlobPath::new("*.rs").unwrap().has_globstar());
415        assert!(!GlobPath::new("src/*.rs").unwrap().has_globstar());
416        assert!(!GlobPath::new("src/lib/main.rs").unwrap().has_globstar());
417    }
418
419    #[test]
420    fn test_fixed_depth() {
421        assert_eq!(GlobPath::new("*.rs").unwrap().fixed_depth(), Some(1));
422        assert_eq!(GlobPath::new("src/*.rs").unwrap().fixed_depth(), Some(2));
423        assert_eq!(GlobPath::new("a/b/c.txt").unwrap().fixed_depth(), Some(3));
424        assert_eq!(GlobPath::new("**/*.rs").unwrap().fixed_depth(), None);
425        assert_eq!(GlobPath::new("src/**").unwrap().fixed_depth(), None);
426    }
427
428    #[test]
429    fn test_hidden_files() {
430        let pat = GlobPath::new("**/*.rs").unwrap();
431        assert!(pat.matches(Path::new(".hidden.rs")));
432        assert!(pat.matches(Path::new(".config/settings.rs")));
433    }
434
435    #[test]
436    fn test_complex_real_world() {
437        let pat = GlobPath::new("**/*_test.rs").unwrap();
438        assert!(pat.matches(Path::new("parser_test.rs")));
439        assert!(pat.matches(Path::new("src/lexer_test.rs")));
440        assert!(pat.matches(Path::new("crates/kernel/tests/eval_test.rs")));
441        assert!(!pat.matches(Path::new("parser.rs")));
442
443        let pat = GlobPath::new("src/**/*.{rs,go}").unwrap();
444        assert!(pat.matches(Path::new("src/main.rs")));
445        assert!(pat.matches(Path::new("src/api/handler.go")));
446        assert!(!pat.matches(Path::new("test/main.rs")));
447    }
448}