project_rag/
glob_utils.rs

1//! Glob pattern matching utilities for path filtering
2
3use globset::{Glob, GlobMatcher};
4
5/// Check if a file path matches any of the given glob patterns
6///
7/// # Examples
8///
9/// ```
10/// use project_rag::glob_utils::matches_any_pattern;
11///
12/// let patterns = vec!["lib/**".to_string(), "src/**/*.ts".to_string()];
13/// assert!(matches_any_pattern("/project/lib/utils.ts", &patterns));
14/// assert!(matches_any_pattern("/project/src/components/Button.ts", &patterns));
15/// assert!(!matches_any_pattern("/project/tests/unit.rs", &patterns));
16/// ```
17pub fn matches_any_pattern(path: &str, patterns: &[String]) -> bool {
18    if patterns.is_empty() {
19        return true; // No patterns means match everything
20    }
21
22    patterns.iter().any(|pattern| {
23        // Try to compile the glob pattern
24        match Glob::new(pattern) {
25            Ok(glob) => {
26                let matcher = glob.compile_matcher();
27
28                // Try matching against the full path
29                if matcher.is_match(path) {
30                    return true;
31                }
32
33                // Try without leading slash
34                let path_no_slash = path.trim_start_matches('/');
35                if matcher.is_match(path_no_slash) {
36                    return true;
37                }
38
39                // For patterns like "lib/**", also try matching against path suffixes
40                // This handles cases like "/absolute/path/to/lib/file.ts" matching "lib/**"
41                if pattern.contains("**") || pattern.contains('*') {
42                    // Split path into components and try matching from each component
43                    let path_parts: Vec<&str> = path.split('/').collect();
44                    for i in 0..path_parts.len() {
45                        let suffix = path_parts[i..].join("/");
46                        if matcher.is_match(&suffix) {
47                            return true;
48                        }
49                    }
50                }
51
52                false
53            }
54            Err(e) => {
55                // If glob compilation fails, fall back to simple substring matching
56                tracing::warn!(
57                    "Invalid glob pattern '{}', falling back to substring match: {}",
58                    pattern,
59                    e
60                );
61                path.contains(pattern)
62            }
63        }
64    })
65}
66
67/// Compile multiple glob patterns into matchers for efficient repeated matching
68///
69/// Returns None if any pattern fails to compile
70pub fn compile_patterns(patterns: &[String]) -> Option<Vec<GlobMatcher>> {
71    patterns
72        .iter()
73        .map(|pattern| {
74            Glob::new(pattern)
75                .map(|g| g.compile_matcher())
76                .map_err(|e| {
77                    tracing::warn!("Failed to compile glob pattern '{}': {}", pattern, e);
78                    e
79                })
80                .ok()
81        })
82        .collect()
83}
84
85/// Check if a path matches any of the precompiled glob matchers
86pub fn matches_any_matcher(path: &str, matchers: &[GlobMatcher]) -> bool {
87    if matchers.is_empty() {
88        return true;
89    }
90
91    matchers.iter().any(|matcher| {
92        // Try matching against the full path
93        if matcher.is_match(path) {
94            return true;
95        }
96
97        // Try without leading slash
98        let path_no_slash = path.trim_start_matches('/');
99        if matcher.is_match(path_no_slash) {
100            return true;
101        }
102
103        // Try matching against path suffixes for glob patterns
104        let path_parts: Vec<&str> = path.split('/').collect();
105        for i in 0..path_parts.len() {
106            let suffix = path_parts[i..].join("/");
107            if matcher.is_match(&suffix) {
108                return true;
109            }
110        }
111
112        false
113    })
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn test_matches_directory_glob() {
122        let patterns = vec!["lib/**".to_string()];
123
124        assert!(matches_any_pattern("/project/lib/utils.ts", &patterns));
125        assert!(matches_any_pattern("lib/nested/file.rs", &patterns));
126        assert!(!matches_any_pattern("/project/src/main.rs", &patterns));
127    }
128
129    #[test]
130    fn test_matches_extension_glob() {
131        let patterns = vec!["**/*.ts".to_string()];
132
133        assert!(matches_any_pattern("/project/src/main.ts", &patterns));
134        assert!(matches_any_pattern("lib/utils.ts", &patterns));
135        assert!(!matches_any_pattern("/project/src/main.rs", &patterns));
136    }
137
138    #[test]
139    fn test_matches_multiple_patterns() {
140        let patterns = vec!["lib/**".to_string(), "**/*.tsx".to_string()];
141
142        assert!(matches_any_pattern("/project/lib/utils.ts", &patterns));
143        assert!(matches_any_pattern("/project/src/Component.tsx", &patterns));
144        assert!(!matches_any_pattern("/project/src/main.rs", &patterns));
145    }
146
147    #[test]
148    fn test_matches_complex_glob() {
149        // globset doesn't support brace expansion {ts,tsx}
150        // Use separate patterns instead
151        let patterns = vec!["src/components/**/*.ts".to_string()];
152
153        assert!(matches_any_pattern(
154            "/project/src/components/Button.ts",
155            &patterns
156        ));
157        assert!(!matches_any_pattern("/project/lib/utils.ts", &patterns));
158    }
159
160    #[test]
161    fn test_empty_patterns() {
162        let patterns = vec![];
163        assert!(matches_any_pattern("/any/path.rs", &patterns));
164    }
165
166    #[test]
167    fn test_invalid_pattern_fallback() {
168        let patterns = vec!["[invalid".to_string()];
169
170        // Should fall back to substring matching
171        assert!(matches_any_pattern("/path/[invalid/file.rs", &patterns));
172        assert!(!matches_any_pattern("/path/valid/file.rs", &patterns));
173    }
174
175    #[test]
176    fn test_compile_patterns() {
177        let patterns = vec!["lib/**".to_string(), "**/*.rs".to_string()];
178        let matchers = compile_patterns(&patterns);
179
180        assert!(matchers.is_some());
181        let matchers = matchers.unwrap();
182        assert_eq!(matchers.len(), 2);
183
184        assert!(matches_any_matcher("/project/lib/utils.ts", &matchers));
185        assert!(matches_any_matcher("/project/src/main.rs", &matchers));
186        assert!(!matches_any_matcher("/project/test.txt", &matchers));
187    }
188
189    #[test]
190    fn test_compile_invalid_patterns() {
191        let patterns = vec!["lib/**".to_string(), "[invalid".to_string()];
192        let matchers = compile_patterns(&patterns);
193
194        // Should return None if any pattern fails to compile
195        assert!(matchers.is_none());
196    }
197
198    #[test]
199    fn test_matches_without_leading_slash() {
200        let patterns = vec!["lib/**".to_string()];
201
202        // Should match with or without leading slash
203        assert!(matches_any_pattern("lib/file.rs", &patterns));
204        assert!(matches_any_pattern("/lib/file.rs", &patterns));
205    }
206
207    #[test]
208    fn test_specific_file_pattern() {
209        let patterns = vec!["**/test.rs".to_string()];
210
211        assert!(matches_any_pattern("/project/src/test.rs", &patterns));
212        assert!(matches_any_pattern("test.rs", &patterns));
213        assert!(!matches_any_pattern("/project/src/main.rs", &patterns));
214    }
215}