infiniloom_engine/
filtering.rs

1//! Centralized file filtering logic
2//!
3//! This module provides unified pattern matching and filtering functionality
4//! used across all commands (pack, diff, scan, map, chunk, index).
5//!
6//! # Key Features
7//!
8//! - **Glob pattern support**: `*.rs`, `src/**/*.ts`, `**/*.test.js`
9//! - **Substring matching**: `node_modules`, `dist`, `target`
10//! - **Path component matching**: Match against directory names
11//! - **Generic API**: Works with any collection type
12//!
13//! # Usage Example
14//!
15//! ```no_run
16//! use infiniloom_engine::filtering::{apply_exclude_patterns, apply_include_patterns};
17//! use infiniloom_engine::types::RepoFile;
18//!
19//! let mut files: Vec<RepoFile> = vec![/* ... */];
20//! let exclude = vec!["node_modules".to_string(), "*.min.js".to_string()];
21//! let include = vec!["src/**/*.rs".to_string()];
22//!
23//! // Apply filters
24//! apply_exclude_patterns(&mut files, &exclude, |f| &f.relative_path);
25//! apply_include_patterns(&mut files, &include, |f| &f.relative_path);
26//! ```
27
28use glob::Pattern;
29use std::collections::HashMap;
30use std::sync::OnceLock;
31
32/// Compiled pattern cache to avoid recompilation
33static PATTERN_CACHE: OnceLock<std::sync::Mutex<HashMap<String, Option<Pattern>>>> =
34    OnceLock::new();
35
36/// Get or create the pattern cache
37fn get_pattern_cache() -> &'static std::sync::Mutex<HashMap<String, Option<Pattern>>> {
38    PATTERN_CACHE.get_or_init(|| std::sync::Mutex::new(HashMap::new()))
39}
40
41/// Compile a glob pattern with caching
42///
43/// Returns `None` if the pattern is invalid.
44fn compile_pattern(pattern: &str) -> Option<Pattern> {
45    let cache = get_pattern_cache();
46    let mut cache_guard = cache.lock().unwrap();
47
48    if let Some(cached) = cache_guard.get(pattern) {
49        return cached.clone();
50    }
51
52    let compiled = Pattern::new(pattern).ok();
53    cache_guard.insert(pattern.to_string(), compiled.clone());
54    compiled
55}
56
57/// Check if a path matches an exclude pattern
58///
59/// Exclude patterns support:
60/// - Glob patterns: `*.min.js`, `src/**/*.test.ts`
61/// - Path component matches: `tests`, `vendor`, `node_modules` (matches directory names)
62/// - Prefix matches: `target` matches `target/debug/file.rs`
63///
64/// Note: Pattern "target" will match "target/file.rs" and "src/target/file.rs"
65/// but NOT "src/target.rs" (where target is part of a filename).
66///
67/// # Arguments
68///
69/// * `path` - File path to check
70/// * `pattern` - Exclude pattern
71///
72/// # Returns
73///
74/// Returns `true` if the path should be excluded.
75///
76/// # Examples
77///
78/// ```no_run
79/// use infiniloom_engine::filtering::matches_exclude_pattern;
80///
81/// assert!(matches_exclude_pattern("src/tests/foo.rs", "tests"));
82/// assert!(matches_exclude_pattern("node_modules/lib.js", "node_modules"));
83/// assert!(matches_exclude_pattern("dist/bundle.min.js", "*.min.js"));
84/// ```
85pub fn matches_exclude_pattern(path: &str, pattern: &str) -> bool {
86    // Empty pattern should not match anything
87    if pattern.is_empty() {
88        return false;
89    }
90
91    // Try as glob pattern first if contains wildcard
92    if pattern.contains('*') {
93        if let Some(glob) = compile_pattern(pattern) {
94            if glob.matches(path) {
95                return true;
96            }
97        }
98    }
99
100    // Path component match (e.g., "tests" matches "src/tests/foo.rs")
101    // This handles directory names like "node_modules", "target", "dist"
102    if path.split('/').any(|part| part == pattern) {
103        return true;
104    }
105
106    // Prefix match (e.g., "src/" matches "src/foo.rs")
107    if path.starts_with(pattern) {
108        return true;
109    }
110
111    false
112}
113
114/// Check if a path matches an include pattern
115///
116/// Include patterns support:
117/// - Glob patterns: `*.rs`, `src/**/*.ts`, `**/*.test.js`
118/// - Substring matches: `src`, `lib`
119/// - Suffix matches: `.rs`, `.ts`
120///
121/// # Arguments
122///
123/// * `path` - File path to check
124/// * `pattern` - Include pattern
125///
126/// # Returns
127///
128/// Returns `true` if the path should be included.
129///
130/// # Examples
131///
132/// ```no_run
133/// use infiniloom_engine::filtering::matches_include_pattern;
134///
135/// assert!(matches_include_pattern("src/main.rs", "*.rs"));
136/// assert!(matches_include_pattern("src/lib.rs", "src"));
137/// assert!(matches_include_pattern("foo.test.ts", "*.test.ts"));
138/// ```
139pub fn matches_include_pattern(path: &str, pattern: &str) -> bool {
140    // Empty pattern should not match anything
141    if pattern.is_empty() {
142        return false;
143    }
144
145    // Try as glob pattern first if contains wildcard
146    if pattern.contains('*') {
147        if let Some(glob) = compile_pattern(pattern) {
148            return glob.matches(path);
149        }
150    }
151
152    // Substring match or suffix match
153    path.contains(pattern) || path.ends_with(pattern)
154}
155
156/// Apply exclude patterns to a collection
157///
158/// Removes items whose paths match any exclude pattern.
159/// Uses a generic `get_path` function to extract the path from each item.
160///
161/// # Arguments
162///
163/// * `items` - Mutable reference to collection to filter
164/// * `patterns` - List of exclude patterns
165/// * `get_path` - Function to extract path from an item
166///
167/// # Type Parameters
168///
169/// * `T` - Type of items in the collection
170/// * `F` - Type of the path extraction function
171///
172/// # Examples
173///
174/// ```no_run
175/// use infiniloom_engine::filtering::apply_exclude_patterns;
176/// use infiniloom_engine::types::RepoFile;
177///
178/// let mut files: Vec<RepoFile> = vec![/* ... */];
179/// let exclude = vec!["node_modules".to_string(), "*.min.js".to_string()];
180///
181/// apply_exclude_patterns(&mut files, &exclude, |f| &f.relative_path);
182/// ```
183pub fn apply_exclude_patterns<T, F>(items: &mut Vec<T>, patterns: &[String], get_path: F)
184where
185    F: Fn(&T) -> &str,
186{
187    if patterns.is_empty() {
188        return;
189    }
190
191    items.retain(|item| {
192        let path = get_path(item);
193        !patterns
194            .iter()
195            .any(|pattern| matches_exclude_pattern(path, pattern))
196    });
197}
198
199/// Apply include patterns to a collection
200///
201/// Keeps only items whose paths match at least one include pattern.
202/// Uses a generic `get_path` function to extract the path from each item.
203///
204/// # Arguments
205///
206/// * `items` - Mutable reference to collection to filter
207/// * `patterns` - List of include patterns
208/// * `get_path` - Function to extract path from an item
209///
210/// # Type Parameters
211///
212/// * `T` - Type of items in the collection
213/// * `F` - Type of the path extraction function
214///
215/// # Examples
216///
217/// ```no_run
218/// use infiniloom_engine::filtering::apply_include_patterns;
219/// use infiniloom_engine::types::RepoFile;
220///
221/// let mut files: Vec<RepoFile> = vec![/* ... */];
222/// let include = vec!["*.rs".to_string(), "*.ts".to_string()];
223///
224/// apply_include_patterns(&mut files, &include, |f| &f.relative_path);
225/// ```
226pub fn apply_include_patterns<T, F>(items: &mut Vec<T>, patterns: &[String], get_path: F)
227where
228    F: Fn(&T) -> &str,
229{
230    if patterns.is_empty() {
231        return;
232    }
233
234    items.retain(|item| {
235        let path = get_path(item);
236        patterns
237            .iter()
238            .any(|pattern| matches_include_pattern(path, pattern))
239    });
240}
241
242/// Compile patterns into glob::Pattern objects
243///
244/// Used by CLI commands that need pre-compiled patterns for repeated use.
245///
246/// # Arguments
247///
248/// * `patterns` - List of pattern strings
249///
250/// # Returns
251///
252/// Vector of successfully compiled glob patterns.
253/// Invalid patterns are silently skipped.
254///
255/// # Examples
256///
257/// ```no_run
258/// use infiniloom_engine::filtering::compile_patterns;
259///
260/// let patterns = vec!["*.rs".to_string(), "src/**/*.ts".to_string()];
261/// let compiled = compile_patterns(&patterns);
262/// assert_eq!(compiled.len(), 2);
263/// ```
264pub fn compile_patterns(patterns: &[String]) -> Vec<Pattern> {
265    patterns.iter().filter_map(|p| compile_pattern(p)).collect()
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    // =============================================================================
273    // Exclude Pattern Tests
274    // =============================================================================
275
276    #[test]
277    fn test_exclude_glob_patterns() {
278        assert!(matches_exclude_pattern("foo.min.js", "*.min.js"));
279        assert!(matches_exclude_pattern("dist/bundle.min.js", "*.min.js"));
280        assert!(!matches_exclude_pattern("foo.js", "*.min.js"));
281    }
282
283    #[test]
284    fn test_exclude_glob_recursive() {
285        assert!(matches_exclude_pattern("src/tests/foo.rs", "**/tests/**"));
286        assert!(matches_exclude_pattern("tests/unit/bar.rs", "**/tests/**"));
287        assert!(!matches_exclude_pattern("src/main.rs", "**/tests/**"));
288    }
289
290    #[test]
291    fn test_exclude_substring_match() {
292        assert!(matches_exclude_pattern("node_modules/foo/bar.js", "node_modules"));
293        assert!(matches_exclude_pattern("dist/bundle.js", "dist"));
294        assert!(!matches_exclude_pattern("src/index.ts", "dist"));
295    }
296
297    #[test]
298    fn test_exclude_prefix_match() {
299        assert!(matches_exclude_pattern("target/debug/main", "target"));
300        assert!(matches_exclude_pattern("vendor/lib.js", "vendor"));
301        assert!(!matches_exclude_pattern("src/target.rs", "target"));
302    }
303
304    #[test]
305    fn test_exclude_component_match() {
306        assert!(matches_exclude_pattern("src/tests/foo.rs", "tests"));
307        assert!(matches_exclude_pattern("lib/vendor/bar.js", "vendor"));
308        assert!(!matches_exclude_pattern("src/main.rs", "tests"));
309    }
310
311    // =============================================================================
312    // Include Pattern Tests
313    // =============================================================================
314
315    #[test]
316    fn test_include_glob_patterns() {
317        assert!(matches_include_pattern("foo.rs", "*.rs"));
318        assert!(matches_include_pattern("src/main.rs", "*.rs"));
319        assert!(!matches_include_pattern("foo.py", "*.rs"));
320    }
321
322    #[test]
323    fn test_include_glob_recursive() {
324        assert!(matches_include_pattern("src/foo/bar.rs", "src/**/*.rs"));
325        assert!(matches_include_pattern("src/main.rs", "src/**/*.rs"));
326        assert!(!matches_include_pattern("tests/foo.rs", "src/**/*.rs"));
327    }
328
329    #[test]
330    fn test_include_substring_match() {
331        assert!(matches_include_pattern("src/main.rs", "src"));
332        assert!(matches_include_pattern("lib/index.ts", "lib"));
333        assert!(!matches_include_pattern("tests/foo.rs", "src"));
334    }
335
336    #[test]
337    fn test_include_suffix_match() {
338        assert!(matches_include_pattern("foo.test.ts", ".test.ts"));
339        assert!(matches_include_pattern("bar.spec.js", ".spec.js"));
340        assert!(!matches_include_pattern("foo.ts", ".test.ts"));
341    }
342
343    // =============================================================================
344    // Generic Filtering Tests
345    // =============================================================================
346
347    #[derive(Debug, Clone)]
348    struct TestFile {
349        path: String,
350    }
351
352    #[test]
353    fn test_apply_exclude_patterns_empty() {
354        let mut files = vec![
355            TestFile { path: "src/main.rs".to_string() },
356            TestFile { path: "node_modules/lib.js".to_string() },
357        ];
358
359        apply_exclude_patterns(&mut files, &[], |f| &f.path);
360        assert_eq!(files.len(), 2);
361    }
362
363    #[test]
364    fn test_apply_exclude_patterns_basic() {
365        let mut files = vec![
366            TestFile { path: "src/main.rs".to_string() },
367            TestFile { path: "node_modules/lib.js".to_string() },
368            TestFile { path: "dist/bundle.js".to_string() },
369        ];
370
371        let exclude = vec!["node_modules".to_string(), "dist".to_string()];
372        apply_exclude_patterns(&mut files, &exclude, |f| &f.path);
373
374        assert_eq!(files.len(), 1);
375        assert_eq!(files[0].path, "src/main.rs");
376    }
377
378    #[test]
379    fn test_apply_exclude_patterns_glob() {
380        let mut files = vec![
381            TestFile { path: "foo.js".to_string() },
382            TestFile { path: "foo.min.js".to_string() },
383            TestFile { path: "bar.js".to_string() },
384        ];
385
386        let exclude = vec!["*.min.js".to_string()];
387        apply_exclude_patterns(&mut files, &exclude, |f| &f.path);
388
389        assert_eq!(files.len(), 2);
390        assert!(files.iter().all(|f| !f.path.contains(".min.")));
391    }
392
393    #[test]
394    fn test_apply_include_patterns_empty() {
395        let mut files = vec![
396            TestFile { path: "src/main.rs".to_string() },
397            TestFile { path: "src/lib.py".to_string() },
398        ];
399
400        apply_include_patterns(&mut files, &[], |f| &f.path);
401        assert_eq!(files.len(), 2);
402    }
403
404    #[test]
405    fn test_apply_include_patterns_basic() {
406        let mut files = vec![
407            TestFile { path: "src/main.rs".to_string() },
408            TestFile { path: "src/lib.py".to_string() },
409            TestFile { path: "src/index.ts".to_string() },
410        ];
411
412        let include = vec!["*.rs".to_string(), "*.ts".to_string()];
413        apply_include_patterns(&mut files, &include, |f| &f.path);
414
415        assert_eq!(files.len(), 2);
416        assert!(files.iter().any(|f| f.path.ends_with(".rs")));
417        assert!(files.iter().any(|f| f.path.ends_with(".ts")));
418    }
419
420    #[test]
421    fn test_apply_include_patterns_substring() {
422        let mut files = vec![
423            TestFile { path: "src/main.rs".to_string() },
424            TestFile { path: "tests/test.rs".to_string() },
425            TestFile { path: "lib/index.ts".to_string() },
426        ];
427
428        let include = vec!["src".to_string()];
429        apply_include_patterns(&mut files, &include, |f| &f.path);
430
431        assert_eq!(files.len(), 1);
432        assert_eq!(files[0].path, "src/main.rs");
433    }
434
435    #[test]
436    fn test_compile_patterns() {
437        let patterns = vec!["*.rs".to_string(), "*.ts".to_string(), "src/**/*.js".to_string()];
438
439        let compiled = compile_patterns(&patterns);
440        assert_eq!(compiled.len(), 3);
441    }
442
443    #[test]
444    fn test_compile_patterns_invalid() {
445        let patterns = vec![
446            "*.rs".to_string(),
447            "[invalid".to_string(), // Invalid glob
448            "*.ts".to_string(),
449        ];
450
451        let compiled = compile_patterns(&patterns);
452        assert_eq!(compiled.len(), 2); // Invalid pattern skipped
453    }
454
455    // =============================================================================
456    // Integration Tests
457    // =============================================================================
458
459    #[test]
460    fn test_exclude_then_include() {
461        let mut files = vec![
462            TestFile { path: "src/main.rs".to_string() },
463            TestFile { path: "src/lib.rs".to_string() },
464            TestFile { path: "src/main.test.rs".to_string() },
465            TestFile { path: "node_modules/lib.js".to_string() },
466        ];
467
468        // First exclude test files and node_modules
469        let exclude = vec!["node_modules".to_string(), "*.test.rs".to_string()];
470        apply_exclude_patterns(&mut files, &exclude, |f| &f.path);
471        assert_eq!(files.len(), 2);
472
473        // Then include only Rust files
474        let include = vec!["*.rs".to_string()];
475        apply_include_patterns(&mut files, &include, |f| &f.path);
476        assert_eq!(files.len(), 2);
477        assert!(files.iter().all(|f| f.path.ends_with(".rs")));
478    }
479
480    #[test]
481    fn test_pattern_cache() {
482        // First call compiles pattern
483        let pattern1 = compile_pattern("*.rs");
484        assert!(pattern1.is_some());
485
486        // Second call uses cache
487        let pattern2 = compile_pattern("*.rs");
488        assert!(pattern2.is_some());
489
490        // Patterns should be equal
491        assert!(pattern1.unwrap().matches("foo.rs"));
492        assert!(pattern2.unwrap().matches("foo.rs"));
493    }
494}