Skip to main content

kaish_glob/
glob_path.rs

1//! Path-aware glob matching with globstar (`**`) support.
2//!
3//! Extends the basic glob matching in `glob.rs` to handle patterns
4//! that span directory boundaries with `**`:
5//!
6//! - `**/*.rs` matches `foo.rs`, `src/foo.rs`, `a/b/c/foo.rs`
7//! - `src/**` matches everything under src/
8//! - `a/**/z` matches `a/z`, `a/b/z`, `a/b/c/z`
9
10use std::path::Path;
11use thiserror::Error;
12
13use crate::glob::glob_match;
14
15/// Errors when parsing glob patterns.
16#[derive(Debug, Clone, Error)]
17pub enum PatternError {
18    #[error("empty pattern")]
19    Empty,
20    #[error("invalid pattern: {0}")]
21    Invalid(String),
22}
23
24/// A segment of a path pattern.
25#[derive(Debug, Clone, PartialEq)]
26pub enum PathSegment {
27    /// Literal directory or file name: "src", "main.rs"
28    Literal(String),
29    /// Pattern with wildcards: "*.rs", "test_?"
30    Pattern(String),
31    /// Globstar: matches zero or more directory components
32    Globstar,
33}
34
35/// A path-aware glob pattern with globstar support.
36///
37/// # Examples
38/// ```
39/// use kaish_glob::GlobPath;
40/// use std::path::Path;
41///
42/// let pattern = GlobPath::new("**/*.rs").unwrap();
43/// assert!(pattern.matches(Path::new("main.rs")));
44/// assert!(pattern.matches(Path::new("src/main.rs")));
45/// assert!(pattern.matches(Path::new("src/lib/utils.rs")));
46/// assert!(!pattern.matches(Path::new("README.md")));
47/// ```
48#[derive(Debug, Clone)]
49pub struct GlobPath {
50    segments: Vec<PathSegment>,
51    anchored: bool,
52}
53
54impl GlobPath {
55    /// Parse a glob pattern into a GlobPath.
56    ///
57    /// Patterns starting with `/` are anchored to the root.
58    /// `**` matches zero or more directory components.
59    pub fn new(pattern: &str) -> Result<Self, PatternError> {
60        if pattern.is_empty() {
61            return Err(PatternError::Empty);
62        }
63
64        let (pattern, anchored) = if let Some(stripped) = pattern.strip_prefix('/') {
65            (stripped, true)
66        } else {
67            (pattern, false)
68        };
69
70        let mut segments = Vec::new();
71
72        for part in pattern.split('/') {
73            if part.is_empty() {
74                continue;
75            }
76
77            if part == "**" {
78                // Consecutive globstars collapse to one
79                if !matches!(segments.last(), Some(PathSegment::Globstar)) {
80                    segments.push(PathSegment::Globstar);
81                }
82            } else if Self::is_literal(part) {
83                segments.push(PathSegment::Literal(part.to_string()));
84            } else {
85                segments.push(PathSegment::Pattern(part.to_string()));
86            }
87        }
88
89        Ok(GlobPath { segments, anchored })
90    }
91
92    /// Check if a path matches this pattern.
93    pub fn matches(&self, path: &Path) -> bool {
94        let components: Vec<&str> = path
95            .components()
96            .filter_map(|c| c.as_os_str().to_str())
97            .collect();
98
99        self.match_segments(&self.segments, &components, 0, 0)
100    }
101
102    /// Get the static prefix of the pattern (directories before any wildcard).
103    ///
104    /// This is useful for optimization: we can start the walk from this prefix
105    /// instead of the root.
106    ///
107    /// # Examples
108    /// ```
109    /// use kaish_glob::GlobPath;
110    /// use std::path::PathBuf;
111    ///
112    /// let pattern = GlobPath::new("src/lib/**/*.rs").unwrap();
113    /// assert_eq!(pattern.static_prefix(), Some(PathBuf::from("src/lib")));
114    ///
115    /// let pattern = GlobPath::new("**/*.rs").unwrap();
116    /// assert_eq!(pattern.static_prefix(), None);
117    /// ```
118    pub fn static_prefix(&self) -> Option<std::path::PathBuf> {
119        let mut prefix = std::path::PathBuf::new();
120
121        for segment in &self.segments {
122            match segment {
123                PathSegment::Literal(s) => prefix.push(s),
124                _ => break,
125            }
126        }
127
128        if prefix.as_os_str().is_empty() {
129            None
130        } else {
131            Some(prefix)
132        }
133    }
134
135    /// Split the pattern into its deepest static directory prefix and the
136    /// remaining pattern to match beneath it.
137    ///
138    /// Used to start a walk from the literal leading directories instead of
139    /// the filesystem root: walking from `/` is O(filesystem) and skips
140    /// hidden intermediate directories, so `/tmp/.tmpXXXX/*.txt` would match
141    /// nothing. At least one segment is always kept in the remaining pattern,
142    /// so an all-literal pattern (`/a/b/c.txt`) walks `/a/b` and matches
143    /// `c.txt` rather than trying to descend into the file itself. The
144    /// returned pattern is unanchored (the anchor is consumed by the caller's
145    /// walk root).
146    ///
147    /// # Examples
148    /// ```
149    /// use kaish_glob::GlobPath;
150    /// use std::path::{Path, PathBuf};
151    ///
152    /// let (dir, rest) = GlobPath::new("/a/b/*.txt").unwrap().split_static_dir();
153    /// assert_eq!(dir, PathBuf::from("a/b"));
154    /// assert!(rest.matches(Path::new("c.txt")));
155    ///
156    /// // All-literal: the final component stays in the match pattern.
157    /// let (dir, rest) = GlobPath::new("/a/b/c.txt").unwrap().split_static_dir();
158    /// assert_eq!(dir, PathBuf::from("a/b"));
159    /// assert!(rest.matches(Path::new("c.txt")));
160    ///
161    /// // No static prefix (leading wildcard / globstar): empty dir, full pattern.
162    /// let (dir, _rest) = GlobPath::new("**/*.rs").unwrap().split_static_dir();
163    /// assert_eq!(dir, PathBuf::new());
164    /// ```
165    pub fn split_static_dir(&self) -> (std::path::PathBuf, GlobPath) {
166        let leading_literals = self
167            .segments
168            .iter()
169            .take_while(|s| matches!(s, PathSegment::Literal(_)))
170            .count();
171        // Never consume the final segment — leave something to match.
172        let prefix_len = leading_literals.min(self.segments.len().saturating_sub(1));
173
174        let mut prefix = std::path::PathBuf::new();
175        for segment in &self.segments[..prefix_len] {
176            if let PathSegment::Literal(s) = segment {
177                prefix.push(s);
178            }
179        }
180
181        let remaining = GlobPath {
182            segments: self.segments[prefix_len..].to_vec(),
183            anchored: false,
184        };
185        (prefix, remaining)
186    }
187
188    /// Check if the pattern only matches directories.
189    pub fn is_dir_only(&self) -> bool {
190        matches!(self.segments.last(), Some(PathSegment::Globstar))
191    }
192
193    /// Check if the pattern is anchored (starts with /).
194    pub fn is_anchored(&self) -> bool {
195        self.anchored
196    }
197
198    /// Check if the pattern contains a globstar (`**`).
199    ///
200    /// Patterns with globstar require recursive directory traversal.
201    /// Patterns without globstar only match at a fixed depth.
202    pub fn has_globstar(&self) -> bool {
203        self.segments.iter().any(|s| matches!(s, PathSegment::Globstar))
204    }
205
206    /// Get the depth of the pattern (number of path components).
207    ///
208    /// Returns `None` if the pattern contains globstar (variable depth).
209    pub fn fixed_depth(&self) -> Option<usize> {
210        if self.has_globstar() {
211            None
212        } else {
213            Some(self.segments.len())
214        }
215    }
216
217    /// Match a path under file-walk semantics, honouring the leading-dot rule.
218    ///
219    /// When `dotglob` is false (the default) a leading `.` in a path component
220    /// is matched only by a segment that explicitly begins with a literal `.`:
221    /// bare wildcards (`*`, `?`, `[…]`) and globstar (`**`) skip dot entries, so
222    /// `*` hides dotfiles while `.*`, `.github`, and `**/.env` reach them.
223    /// `dotglob == true` disables the rule (bash `shopt -s dotglob`).
224    ///
225    /// This differs from [`matches`](Self::matches), which is dotfile-agnostic
226    /// and used for include/exclude filtering of already-walked paths.
227    pub fn matches_walk(&self, path: &Path, dotglob: bool) -> bool {
228        let components: Vec<&str> = path
229            .components()
230            .filter_map(|c| c.as_os_str().to_str())
231            .collect();
232        self.walk_match(&components, 0, 0, dotglob, false)
233    }
234
235    /// Whether the walker should descend into the directory at relative path
236    /// `dir` — i.e. whether some entry beneath it could still match.
237    ///
238    /// Honours the same leading-dot rule as [`matches_walk`](Self::matches_walk):
239    /// `**` does not descend into hidden directories without `dotglob`, while an
240    /// explicitly named dot directory (`.github`, or a `.foo` segment reached
241    /// through a zero-width `**`) is entered.
242    pub fn could_descend(&self, dir: &Path, dotglob: bool) -> bool {
243        let components: Vec<&str> = dir
244            .components()
245            .filter_map(|c| c.as_os_str().to_str())
246            .collect();
247        self.walk_match(&components, 0, 0, dotglob, true)
248    }
249
250    /// Shared engine for [`matches_walk`] and [`could_descend`].
251    ///
252    /// In full-match mode (`prefix == false`) it answers "does this complete
253    /// path match?". In prefix mode (`prefix == true`) `components` are a
254    /// directory's path and it answers "could a deeper entry match?", which the
255    /// walker uses to decide descent.
256    fn walk_match(
257        &self,
258        components: &[&str],
259        seg_idx: usize,
260        comp_idx: usize,
261        dotglob: bool,
262        prefix: bool,
263    ) -> bool {
264        if comp_idx >= components.len() {
265            return if prefix {
266                // Directory prefix fully consumed: descend if any segment
267                // remains for a child component to match.
268                seg_idx < self.segments.len()
269            } else {
270                // Full match: only trailing globstars may match zero components.
271                self.segments[seg_idx..]
272                    .iter()
273                    .all(|s| matches!(s, PathSegment::Globstar))
274            };
275        }
276        if seg_idx >= self.segments.len() {
277            return false;
278        }
279
280        match &self.segments[seg_idx] {
281            PathSegment::Globstar => {
282                // Match zero components...
283                if self.walk_match(components, seg_idx + 1, comp_idx, dotglob, prefix) {
284                    return true;
285                }
286                // ...or consume one component and stay on the globstar. Without
287                // dotglob, `**` never traverses a hidden component.
288                if dotglob || !components[comp_idx].starts_with('.') {
289                    self.walk_match(components, seg_idx, comp_idx + 1, dotglob, prefix)
290                } else {
291                    false
292                }
293            }
294
295            PathSegment::Literal(lit) => {
296                if components[comp_idx] == *lit {
297                    self.walk_match(components, seg_idx + 1, comp_idx + 1, dotglob, prefix)
298                } else {
299                    false
300                }
301            }
302
303            PathSegment::Pattern(pat) => {
304                let comp = components[comp_idx];
305                // A bare wildcard segment does not match a leading dot.
306                if comp.starts_with('.') && !dotglob && !pattern_leads_with_dot(pat) {
307                    return false;
308                }
309                if self.matches_component(pat, comp) {
310                    self.walk_match(components, seg_idx + 1, comp_idx + 1, dotglob, prefix)
311                } else {
312                    false
313                }
314            }
315        }
316    }
317
318    /// Check if a string is a literal (no wildcards).
319    fn is_literal(s: &str) -> bool {
320        !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
321    }
322
323    /// Recursive segment matching with backtracking for globstar.
324    fn match_segments(
325        &self,
326        segments: &[PathSegment],
327        components: &[&str],
328        seg_idx: usize,
329        comp_idx: usize,
330    ) -> bool {
331        // Both exhausted - match!
332        if seg_idx >= segments.len() && comp_idx >= components.len() {
333            return true;
334        }
335
336        // Segments exhausted but components remain - no match
337        // (unless we ended with globstar, which is already consumed)
338        if seg_idx >= segments.len() {
339            return false;
340        }
341
342        match &segments[seg_idx] {
343            PathSegment::Globstar => {
344                // Globstar matches zero or more components
345                // Try matching with 0, 1, 2, ... components consumed
346                for skip in 0..=(components.len() - comp_idx) {
347                    if self.match_segments(segments, components, seg_idx + 1, comp_idx + skip) {
348                        return true;
349                    }
350                }
351                false
352            }
353
354            PathSegment::Literal(lit) => {
355                if comp_idx >= components.len() {
356                    return false;
357                }
358                if components[comp_idx] == lit {
359                    self.match_segments(segments, components, seg_idx + 1, comp_idx + 1)
360                } else {
361                    false
362                }
363            }
364
365            PathSegment::Pattern(pat) => {
366                if comp_idx >= components.len() {
367                    return false;
368                }
369                if self.matches_component(pat, components[comp_idx]) {
370                    self.match_segments(segments, components, seg_idx + 1, comp_idx + 1)
371                } else {
372                    false
373                }
374            }
375        }
376    }
377
378    /// Match a single component against a pattern (with brace expansion).
379    fn matches_component(&self, pattern: &str, component: &str) -> bool {
380        glob_match(pattern, component)
381    }
382}
383
384/// Whether a wildcard segment explicitly names a leading dot — i.e. some brace
385/// alternative begins with a literal `.` (`.*`, `.[bg]it`, `{.,}config`). A
386/// leading wildcard (`*`, `?`, `[…]`) does not count — matching bash, even a
387/// character class that *could* match `.` (`[.]foo`) does not, because the
388/// first pattern character is `[`, not a literal `.`.
389fn pattern_leads_with_dot(pattern: &str) -> bool {
390    crate::glob::expand_braces(pattern)
391        .iter()
392        .any(|alt| alt.starts_with('.'))
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398    use std::path::Path;
399
400    #[test]
401    fn test_literal_pattern() {
402        let pat = GlobPath::new("src/main.rs").unwrap();
403        assert!(pat.matches(Path::new("src/main.rs")));
404        assert!(!pat.matches(Path::new("src/lib.rs")));
405        assert!(!pat.matches(Path::new("main.rs")));
406    }
407
408    #[test]
409    fn test_simple_wildcard() {
410        let pat = GlobPath::new("*.rs").unwrap();
411        assert!(pat.matches(Path::new("main.rs")));
412        assert!(pat.matches(Path::new("lib.rs")));
413        assert!(!pat.matches(Path::new("main.go")));
414        assert!(!pat.matches(Path::new("src/main.rs"))); // Only matches single component
415    }
416
417    #[test]
418    fn test_globstar_prefix() {
419        let pat = GlobPath::new("**/*.rs").unwrap();
420        assert!(pat.matches(Path::new("main.rs")));
421        assert!(pat.matches(Path::new("src/main.rs")));
422        assert!(pat.matches(Path::new("src/lib/utils.rs")));
423        assert!(pat.matches(Path::new("a/b/c/d/e.rs")));
424        assert!(!pat.matches(Path::new("main.go")));
425        assert!(!pat.matches(Path::new("src/main.go")));
426    }
427
428    #[test]
429    fn test_globstar_suffix() {
430        let pat = GlobPath::new("src/**").unwrap();
431        assert!(pat.matches(Path::new("src")));
432        assert!(pat.matches(Path::new("src/main.rs")));
433        assert!(pat.matches(Path::new("src/lib/utils.rs")));
434        assert!(!pat.matches(Path::new("test/main.rs")));
435    }
436
437    #[test]
438    fn test_globstar_middle() {
439        let pat = GlobPath::new("a/**/z").unwrap();
440        assert!(pat.matches(Path::new("a/z")));
441        assert!(pat.matches(Path::new("a/b/z")));
442        assert!(pat.matches(Path::new("a/b/c/z")));
443        assert!(pat.matches(Path::new("a/b/c/d/e/z")));
444        assert!(!pat.matches(Path::new("b/c/z")));
445        assert!(!pat.matches(Path::new("a/z/extra")));
446    }
447
448    #[test]
449    fn test_consecutive_globstars() {
450        let pat = GlobPath::new("a/**/**/z").unwrap();
451        assert!(pat.matches(Path::new("a/z")));
452        assert!(pat.matches(Path::new("a/b/z")));
453        assert!(pat.matches(Path::new("a/b/c/z")));
454    }
455
456    #[test]
457    fn test_brace_expansion() {
458        let pat = GlobPath::new("*.{rs,go,py}").unwrap();
459        assert!(pat.matches(Path::new("main.rs")));
460        assert!(pat.matches(Path::new("server.go")));
461        assert!(pat.matches(Path::new("script.py")));
462        assert!(!pat.matches(Path::new("style.css")));
463    }
464
465    #[test]
466    fn test_brace_with_globstar() {
467        let pat = GlobPath::new("**/*.{rs,go}").unwrap();
468        assert!(pat.matches(Path::new("main.rs")));
469        assert!(pat.matches(Path::new("src/lib.go")));
470        assert!(pat.matches(Path::new("a/b/c/d.rs")));
471        assert!(!pat.matches(Path::new("src/main.py")));
472    }
473
474    #[test]
475    fn test_question_mark() {
476        let pat = GlobPath::new("file?.txt").unwrap();
477        assert!(pat.matches(Path::new("file1.txt")));
478        assert!(pat.matches(Path::new("fileA.txt")));
479        assert!(!pat.matches(Path::new("file12.txt")));
480        assert!(!pat.matches(Path::new("file.txt")));
481    }
482
483    #[test]
484    fn test_char_class() {
485        let pat = GlobPath::new("[abc].rs").unwrap();
486        assert!(pat.matches(Path::new("a.rs")));
487        assert!(pat.matches(Path::new("b.rs")));
488        assert!(pat.matches(Path::new("c.rs")));
489        assert!(!pat.matches(Path::new("d.rs")));
490    }
491
492    #[test]
493    fn test_static_prefix() {
494        assert_eq!(
495            GlobPath::new("src/lib/**/*.rs").unwrap().static_prefix(),
496            Some(std::path::PathBuf::from("src/lib"))
497        );
498
499        assert_eq!(
500            GlobPath::new("src/**").unwrap().static_prefix(),
501            Some(std::path::PathBuf::from("src"))
502        );
503
504        assert_eq!(GlobPath::new("**/*.rs").unwrap().static_prefix(), None);
505
506        assert_eq!(GlobPath::new("*.rs").unwrap().static_prefix(), None);
507    }
508
509    #[test]
510    fn test_anchored_pattern() {
511        let pat = GlobPath::new("/src/*.rs").unwrap();
512        assert!(pat.is_anchored());
513        assert!(pat.matches(Path::new("src/main.rs")));
514    }
515
516    #[test]
517    fn test_empty_pattern() {
518        assert!(matches!(GlobPath::new(""), Err(PatternError::Empty)));
519    }
520
521    #[test]
522    fn test_has_globstar() {
523        assert!(GlobPath::new("**/*.rs").unwrap().has_globstar());
524        assert!(GlobPath::new("src/**").unwrap().has_globstar());
525        assert!(GlobPath::new("a/**/z").unwrap().has_globstar());
526        assert!(!GlobPath::new("*.rs").unwrap().has_globstar());
527        assert!(!GlobPath::new("src/*.rs").unwrap().has_globstar());
528        assert!(!GlobPath::new("src/lib/main.rs").unwrap().has_globstar());
529    }
530
531    #[test]
532    fn test_fixed_depth() {
533        assert_eq!(GlobPath::new("*.rs").unwrap().fixed_depth(), Some(1));
534        assert_eq!(GlobPath::new("src/*.rs").unwrap().fixed_depth(), Some(2));
535        assert_eq!(GlobPath::new("a/b/c.txt").unwrap().fixed_depth(), Some(3));
536        assert_eq!(GlobPath::new("**/*.rs").unwrap().fixed_depth(), None);
537        assert_eq!(GlobPath::new("src/**").unwrap().fixed_depth(), None);
538    }
539
540    #[test]
541    fn test_hidden_files() {
542        let pat = GlobPath::new("**/*.rs").unwrap();
543        assert!(pat.matches(Path::new(".hidden.rs")));
544        assert!(pat.matches(Path::new(".config/settings.rs")));
545    }
546
547    #[test]
548    fn test_matches_walk_leading_dot_rule() {
549        let no = false; // dotglob off
550
551        // Bare wildcard skips dotfiles; explicit dot segment matches them.
552        assert!(!GlobPath::new("*").unwrap().matches_walk(Path::new(".env"), no));
553        assert!(GlobPath::new("*").unwrap().matches_walk(Path::new("visible"), no));
554        assert!(GlobPath::new(".*").unwrap().matches_walk(Path::new(".env"), no));
555        assert!(!GlobPath::new(".*").unwrap().matches_walk(Path::new("visible"), no));
556
557        // Explicit dot directory, and the `*` inside still hides dotfiles.
558        assert!(GlobPath::new(".github/*").unwrap().matches_walk(Path::new(".github/ci.yml"), no));
559        assert!(!GlobPath::new(".github/*").unwrap().matches_walk(Path::new(".github/.secret"), no));
560
561        // Globstar does not match or traverse hidden components without dotglob.
562        assert!(!GlobPath::new("**/*.rs").unwrap().matches_walk(Path::new(".hidden.rs"), no));
563        assert!(!GlobPath::new("**/*.rs").unwrap().matches_walk(Path::new(".git/config.rs"), no));
564        assert!(GlobPath::new("**/*.rs").unwrap().matches_walk(Path::new("src/main.rs"), no));
565
566        // Explicit dot segment AFTER a globstar (the regression DeepSeek found).
567        assert!(GlobPath::new("**/.env").unwrap().matches_walk(Path::new(".env"), no));
568        assert!(GlobPath::new("**/.env").unwrap().matches_walk(Path::new("sub/.env"), no));
569        assert!(!GlobPath::new("**/.env").unwrap().matches_walk(Path::new(".hidden/.env"), no));
570        assert!(GlobPath::new("**/.github/*.yml").unwrap()
571            .matches_walk(Path::new(".github/ci.yml"), no));
572        assert!(GlobPath::new("**/.github/*.yml").unwrap()
573            .matches_walk(Path::new("sub/.github/ci.yml"), no));
574
575        // dotglob disables the rule (bash `shopt -s dotglob`).
576        assert!(GlobPath::new("*").unwrap().matches_walk(Path::new(".env"), true));
577        assert!(GlobPath::new("**/*.rs").unwrap().matches_walk(Path::new(".git/config.rs"), true));
578    }
579
580    #[test]
581    fn test_could_descend_leading_dot_rule() {
582        let no = false;
583
584        // `**` descends into visible dirs but not hidden ones.
585        assert!(GlobPath::new("**/.env").unwrap().could_descend(Path::new("sub"), no));
586        assert!(!GlobPath::new("**/.env").unwrap().could_descend(Path::new(".hidden"), no));
587
588        // An explicitly named dot dir is entered, including through zero-width `**`.
589        assert!(GlobPath::new(".github/*").unwrap().could_descend(Path::new(".github"), no));
590        assert!(GlobPath::new("**/.github/*.yml").unwrap()
591            .could_descend(Path::new(".github"), no));
592
593        // Bare `*` (fixed depth 1) needs no descent; `**` enters visible dirs.
594        assert!(!GlobPath::new("*").unwrap().could_descend(Path::new("sub"), no));
595        assert!(GlobPath::new("src/*.rs").unwrap().could_descend(Path::new("src"), no));
596        assert!(!GlobPath::new("src/*.rs").unwrap().could_descend(Path::new("other"), no));
597
598        // dotglob lets `**` descend into hidden dirs.
599        assert!(GlobPath::new("**/*.rs").unwrap().could_descend(Path::new(".git"), true));
600        assert!(!GlobPath::new("**/*.rs").unwrap().could_descend(Path::new(".git"), no));
601    }
602
603    #[test]
604    fn test_complex_real_world() {
605        let pat = GlobPath::new("**/*_test.rs").unwrap();
606        assert!(pat.matches(Path::new("parser_test.rs")));
607        assert!(pat.matches(Path::new("src/lexer_test.rs")));
608        assert!(pat.matches(Path::new("crates/kernel/tests/eval_test.rs")));
609        assert!(!pat.matches(Path::new("parser.rs")));
610
611        let pat = GlobPath::new("src/**/*.{rs,go}").unwrap();
612        assert!(pat.matches(Path::new("src/main.rs")));
613        assert!(pat.matches(Path::new("src/api/handler.go")));
614        assert!(!pat.matches(Path::new("test/main.rs")));
615    }
616}