Skip to main content

testx/watcher/
glob.rs

1/// Simple glob pattern matching for ignore patterns.
2/// Supports: * (any chars), ? (single char), ** (recursive dirs).
3#[derive(Debug, Clone)]
4pub struct GlobPattern {
5    pattern: String,
6    parts: Vec<GlobPart>,
7}
8
9#[derive(Debug, Clone)]
10enum GlobPart {
11    Literal(String),
12    Star,       // * — matches anything except /
13    DoubleStar, // ** — matches anything including /
14    Question,   // ? — matches single char
15}
16
17impl GlobPattern {
18    pub fn new(pattern: &str) -> Self {
19        let parts = Self::parse(pattern);
20        Self {
21            pattern: pattern.to_string(),
22            parts,
23        }
24    }
25
26    pub fn pattern(&self) -> &str {
27        &self.pattern
28    }
29
30    fn parse(pattern: &str) -> Vec<GlobPart> {
31        let mut parts = Vec::new();
32        let mut chars = pattern.chars().peekable();
33        let mut literal = String::new();
34
35        while let Some(c) = chars.next() {
36            match c {
37                '*' => {
38                    if !literal.is_empty() {
39                        parts.push(GlobPart::Literal(std::mem::take(&mut literal)));
40                    }
41                    if chars.peek() == Some(&'*') {
42                        chars.next(); // consume second *
43                        // Skip trailing / after **
44                        if chars.peek() == Some(&'/') {
45                            chars.next();
46                        }
47                        parts.push(GlobPart::DoubleStar);
48                    } else {
49                        parts.push(GlobPart::Star);
50                    }
51                }
52                '?' => {
53                    if !literal.is_empty() {
54                        parts.push(GlobPart::Literal(std::mem::take(&mut literal)));
55                    }
56                    parts.push(GlobPart::Question);
57                }
58                _ => {
59                    literal.push(c);
60                }
61            }
62        }
63
64        if !literal.is_empty() {
65            parts.push(GlobPart::Literal(literal));
66        }
67
68        parts
69    }
70
71    /// Check if a path matches this glob pattern.
72    pub fn matches(&self, path: &str) -> bool {
73        Self::match_parts(&self.parts, path)
74    }
75
76    /// Check if a filename (last component) matches this pattern.
77    /// Useful for patterns like "*.pyc" that should match any file with that extension.
78    pub fn matches_filename(&self, path: &str) -> bool {
79        // If pattern contains no path separator, match against filename only
80        if !self.pattern.contains('/')
81            && let Some(filename) = path.rsplit('/').next()
82        {
83            return Self::match_parts(&self.parts, filename);
84        }
85        self.matches(path)
86    }
87
88    fn match_parts(parts: &[GlobPart], text: &str) -> bool {
89        if parts.is_empty() {
90            return text.is_empty();
91        }
92
93        match &parts[0] {
94            GlobPart::Literal(lit) => {
95                if let Some(rest) = text.strip_prefix(lit.as_str()) {
96                    Self::match_parts(&parts[1..], rest)
97                } else {
98                    false
99                }
100            }
101            GlobPart::Question => {
102                if text.is_empty() {
103                    return false;
104                }
105                let mut chars = text.chars();
106                let c = chars.next().unwrap();
107                if c == '/' {
108                    return false;
109                }
110                Self::match_parts(&parts[1..], chars.as_str())
111            }
112            GlobPart::Star => {
113                // * matches zero or more non-/ characters
114                let remaining = &parts[1..];
115                // Try matching zero chars, then one, two, etc.
116                for (i, c) in text.char_indices() {
117                    if c == '/' {
118                        // Star doesn't cross directory boundaries
119                        return Self::match_parts(remaining, &text[i..]);
120                    }
121                    if Self::match_parts(remaining, &text[i..]) {
122                        return true;
123                    }
124                }
125                // Try matching entire remaining text
126                Self::match_parts(remaining, "")
127            }
128            GlobPart::DoubleStar => {
129                // ** matches zero or more path components
130                let remaining = &parts[1..];
131                // Try matching at every position
132                for (i, _) in text.char_indices() {
133                    if Self::match_parts(remaining, &text[i..]) {
134                        return true;
135                    }
136                }
137                Self::match_parts(remaining, "")
138            }
139        }
140    }
141}
142
143/// Check if a path should be ignored based on a list of glob patterns.
144pub fn should_ignore(path: &str, patterns: &[GlobPattern]) -> bool {
145    let normalized = path.replace('\\', "/");
146    patterns.iter().any(|p| {
147        p.matches_filename(&normalized)
148            || normalized.split('/').any(|component| p.matches(component))
149    })
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn literal_match() {
158        let p = GlobPattern::new("hello");
159        assert!(p.matches("hello"));
160        assert!(!p.matches("world"));
161        assert!(!p.matches("hello!"));
162    }
163
164    #[test]
165    fn star_match_extension() {
166        let p = GlobPattern::new("*.pyc");
167        assert!(p.matches("test.pyc"));
168        assert!(p.matches("foo.pyc"));
169        assert!(!p.matches("test.py"));
170        assert!(!p.matches("dir/test.pyc")); // star doesn't cross /
171    }
172
173    #[test]
174    fn star_match_prefix() {
175        let p = GlobPattern::new("test_*");
176        assert!(p.matches("test_foo"));
177        assert!(p.matches("test_bar_baz"));
178        assert!(!p.matches("foo_test"));
179    }
180
181    #[test]
182    fn double_star_match() {
183        let p = GlobPattern::new("**/*.pyc");
184        assert!(p.matches("dir/test.pyc"));
185        assert!(p.matches("a/b/c/test.pyc"));
186        assert!(p.matches("test.pyc"));
187        assert!(!p.matches("test.py"));
188    }
189
190    #[test]
191    fn question_mark() {
192        let p = GlobPattern::new("test?.py");
193        assert!(p.matches("test1.py"));
194        assert!(p.matches("testA.py"));
195        assert!(!p.matches("test.py"));
196        assert!(!p.matches("test12.py"));
197    }
198
199    #[test]
200    fn matches_filename_simple() {
201        let p = GlobPattern::new("*.pyc");
202        assert!(p.matches_filename("src/test.pyc"));
203        assert!(p.matches_filename("a/b/c.pyc"));
204        assert!(!p.matches_filename("src/test.py"));
205    }
206
207    #[test]
208    fn matches_filename_with_path() {
209        let p = GlobPattern::new("src/*.rs");
210        assert!(p.matches_filename("src/main.rs"));
211        assert!(!p.matches_filename("test/main.rs"));
212    }
213
214    #[test]
215    fn should_ignore_matching() {
216        let patterns = vec![
217            GlobPattern::new("*.pyc"),
218            GlobPattern::new("__pycache__"),
219            GlobPattern::new(".git"),
220            GlobPattern::new("node_modules"),
221        ];
222
223        assert!(should_ignore("test.pyc", &patterns));
224        assert!(should_ignore("src/test.pyc", &patterns));
225        assert!(should_ignore("__pycache__/cache.py", &patterns));
226        assert!(should_ignore(".git/config", &patterns));
227        assert!(should_ignore("node_modules/express/index.js", &patterns));
228        assert!(!should_ignore("src/main.rs", &patterns));
229        assert!(!should_ignore("tests/test_auth.py", &patterns));
230    }
231
232    #[test]
233    fn should_ignore_target_dir() {
234        let patterns = vec![GlobPattern::new("target")];
235        assert!(should_ignore("target/debug/testx", &patterns));
236        assert!(!should_ignore("src/target_utils.rs", &patterns));
237    }
238
239    #[test]
240    fn empty_pattern() {
241        let p = GlobPattern::new("");
242        assert!(p.matches(""));
243        assert!(!p.matches("something"));
244    }
245
246    #[test]
247    fn star_only() {
248        let p = GlobPattern::new("*");
249        assert!(p.matches("anything"));
250        assert!(p.matches(""));
251        assert!(!p.matches("dir/file")); // * doesn't cross /
252    }
253
254    #[test]
255    fn double_star_only() {
256        let p = GlobPattern::new("**");
257        assert!(p.matches("anything"));
258        assert!(p.matches("dir/file"));
259        assert!(p.matches("a/b/c/d"));
260        assert!(p.matches(""));
261    }
262
263    #[test]
264    fn pattern_accessor() {
265        let p = GlobPattern::new("*.rs");
266        assert_eq!(p.pattern(), "*.rs");
267    }
268
269    #[test]
270    fn complex_pattern() {
271        let p = GlobPattern::new("src/**/test_*.rs");
272        assert!(p.matches("src/test_foo.rs"));
273        assert!(p.matches("src/adapters/test_foo.rs"));
274        assert!(!p.matches("tests/test_foo.rs"));
275    }
276
277    #[test]
278    fn backslash_normalization() {
279        let patterns = vec![GlobPattern::new(".git")];
280        assert!(should_ignore(".git\\config", &patterns));
281    }
282}