infiniloom_engine/
filtering.rs

1//! Centralized file filtering logic
2//!
3//! This module provides unified pattern matching and filtering functionality
4//! used across all commands (pack, diff, scan, map, chunk, index).
5//!
6//! # Key Features
7//!
8//! - **Glob pattern support**: `*.rs`, `src/**/*.ts`, `**/*.test.js`
9//! - **Substring matching**: `node_modules`, `dist`, `target`
10//! - **Path component matching**: Match against directory names
11//! - **Generic API**: Works with any collection type
12//!
13//! # Usage Example
14//!
15//! ```no_run
16//! use infiniloom_engine::filtering::{apply_exclude_patterns, apply_include_patterns};
17//! use infiniloom_engine::types::RepoFile;
18//!
19//! let mut files: Vec<RepoFile> = vec![/* ... */];
20//! let exclude = vec!["node_modules".to_string(), "*.min.js".to_string()];
21//! let include = vec!["src/**/*.rs".to_string()];
22//!
23//! // Apply filters
24//! apply_exclude_patterns(&mut files, &exclude, |f| &f.relative_path);
25//! apply_include_patterns(&mut files, &include, |f| &f.relative_path);
26//! ```
27
28use glob::Pattern;
29use std::collections::HashMap;
30use std::sync::OnceLock;
31
32/// Compiled pattern cache to avoid recompilation
33static PATTERN_CACHE: OnceLock<std::sync::Mutex<HashMap<String, Option<Pattern>>>> =
34    OnceLock::new();
35
36/// Get or create the pattern cache
37fn get_pattern_cache() -> &'static std::sync::Mutex<HashMap<String, Option<Pattern>>> {
38    PATTERN_CACHE.get_or_init(|| std::sync::Mutex::new(HashMap::new()))
39}
40
41/// Compile a glob pattern with caching
42///
43/// Returns `None` if the pattern is invalid.
44fn compile_pattern(pattern: &str) -> Option<Pattern> {
45    let cache = get_pattern_cache();
46    let mut cache_guard = cache.lock().unwrap();
47
48    if let Some(cached) = cache_guard.get(pattern) {
49        return cached.clone();
50    }
51
52    let compiled = Pattern::new(pattern).ok();
53    cache_guard.insert(pattern.to_string(), compiled.clone());
54    compiled
55}
56
57/// Check if a path matches an exclude pattern
58///
59/// Exclude patterns support:
60/// - Glob patterns: `*.min.js`, `src/**/*.test.ts`
61/// - Path component matches: `tests`, `vendor`, `node_modules` (matches directory names)
62/// - Prefix matches: `target` matches `target/debug/file.rs`
63///
64/// Note: Pattern "target" will match "target/file.rs" and "src/target/file.rs"
65/// but NOT "src/target.rs" (where target is part of a filename).
66///
67/// # Arguments
68///
69/// * `path` - File path to check
70/// * `pattern` - Exclude pattern
71///
72/// # Returns
73///
74/// Returns `true` if the path should be excluded.
75///
76/// # Examples
77///
78/// ```no_run
79/// use infiniloom_engine::filtering::matches_exclude_pattern;
80///
81/// assert!(matches_exclude_pattern("src/tests/foo.rs", "tests"));
82/// assert!(matches_exclude_pattern("node_modules/lib.js", "node_modules"));
83/// assert!(matches_exclude_pattern("dist/bundle.min.js", "*.min.js"));
84/// ```
85pub fn matches_exclude_pattern(path: &str, pattern: &str) -> bool {
86    // Try as glob pattern first if contains wildcard
87    if pattern.contains('*') {
88        if let Some(glob) = compile_pattern(pattern) {
89            if glob.matches(path) {
90                return true;
91            }
92        }
93    }
94
95    // Path component match (e.g., "tests" matches "src/tests/foo.rs")
96    // This handles directory names like "node_modules", "target", "dist"
97    if path.split('/').any(|part| part == pattern) {
98        return true;
99    }
100
101    // Prefix match (e.g., "src/" matches "src/foo.rs")
102    if path.starts_with(pattern) {
103        return true;
104    }
105
106    false
107}
108
109/// Check if a path matches an include pattern
110///
111/// Include patterns support:
112/// - Glob patterns: `*.rs`, `src/**/*.ts`, `**/*.test.js`
113/// - Substring matches: `src`, `lib`
114/// - Suffix matches: `.rs`, `.ts`
115///
116/// # Arguments
117///
118/// * `path` - File path to check
119/// * `pattern` - Include pattern
120///
121/// # Returns
122///
123/// Returns `true` if the path should be included.
124///
125/// # Examples
126///
127/// ```no_run
128/// use infiniloom_engine::filtering::matches_include_pattern;
129///
130/// assert!(matches_include_pattern("src/main.rs", "*.rs"));
131/// assert!(matches_include_pattern("src/lib.rs", "src"));
132/// assert!(matches_include_pattern("foo.test.ts", "*.test.ts"));
133/// ```
134pub fn matches_include_pattern(path: &str, pattern: &str) -> bool {
135    // Try as glob pattern first if contains wildcard
136    if pattern.contains('*') {
137        if let Some(glob) = compile_pattern(pattern) {
138            return glob.matches(path);
139        }
140    }
141
142    // Substring match or suffix match
143    path.contains(pattern) || path.ends_with(pattern)
144}
145
146/// Apply exclude patterns to a collection
147///
148/// Removes items whose paths match any exclude pattern.
149/// Uses a generic `get_path` function to extract the path from each item.
150///
151/// # Arguments
152///
153/// * `items` - Mutable reference to collection to filter
154/// * `patterns` - List of exclude patterns
155/// * `get_path` - Function to extract path from an item
156///
157/// # Type Parameters
158///
159/// * `T` - Type of items in the collection
160/// * `F` - Type of the path extraction function
161///
162/// # Examples
163///
164/// ```no_run
165/// use infiniloom_engine::filtering::apply_exclude_patterns;
166/// use infiniloom_engine::types::RepoFile;
167///
168/// let mut files: Vec<RepoFile> = vec![/* ... */];
169/// let exclude = vec!["node_modules".to_string(), "*.min.js".to_string()];
170///
171/// apply_exclude_patterns(&mut files, &exclude, |f| &f.relative_path);
172/// ```
173pub fn apply_exclude_patterns<T, F>(items: &mut Vec<T>, patterns: &[String], get_path: F)
174where
175    F: Fn(&T) -> &str,
176{
177    if patterns.is_empty() {
178        return;
179    }
180
181    items.retain(|item| {
182        let path = get_path(item);
183        !patterns
184            .iter()
185            .any(|pattern| matches_exclude_pattern(path, pattern))
186    });
187}
188
189/// Apply include patterns to a collection
190///
191/// Keeps only items whose paths match at least one include pattern.
192/// Uses a generic `get_path` function to extract the path from each item.
193///
194/// # Arguments
195///
196/// * `items` - Mutable reference to collection to filter
197/// * `patterns` - List of include patterns
198/// * `get_path` - Function to extract path from an item
199///
200/// # Type Parameters
201///
202/// * `T` - Type of items in the collection
203/// * `F` - Type of the path extraction function
204///
205/// # Examples
206///
207/// ```no_run
208/// use infiniloom_engine::filtering::apply_include_patterns;
209/// use infiniloom_engine::types::RepoFile;
210///
211/// let mut files: Vec<RepoFile> = vec![/* ... */];
212/// let include = vec!["*.rs".to_string(), "*.ts".to_string()];
213///
214/// apply_include_patterns(&mut files, &include, |f| &f.relative_path);
215/// ```
216pub fn apply_include_patterns<T, F>(items: &mut Vec<T>, patterns: &[String], get_path: F)
217where
218    F: Fn(&T) -> &str,
219{
220    if patterns.is_empty() {
221        return;
222    }
223
224    items.retain(|item| {
225        let path = get_path(item);
226        patterns
227            .iter()
228            .any(|pattern| matches_include_pattern(path, pattern))
229    });
230}
231
232/// Compile patterns into glob::Pattern objects
233///
234/// Used by CLI commands that need pre-compiled patterns for repeated use.
235///
236/// # Arguments
237///
238/// * `patterns` - List of pattern strings
239///
240/// # Returns
241///
242/// Vector of successfully compiled glob patterns.
243/// Invalid patterns are silently skipped.
244///
245/// # Examples
246///
247/// ```no_run
248/// use infiniloom_engine::filtering::compile_patterns;
249///
250/// let patterns = vec!["*.rs".to_string(), "src/**/*.ts".to_string()];
251/// let compiled = compile_patterns(&patterns);
252/// assert_eq!(compiled.len(), 2);
253/// ```
254pub fn compile_patterns(patterns: &[String]) -> Vec<Pattern> {
255    patterns.iter().filter_map(|p| compile_pattern(p)).collect()
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    // =============================================================================
263    // Exclude Pattern Tests
264    // =============================================================================
265
266    #[test]
267    fn test_exclude_glob_patterns() {
268        assert!(matches_exclude_pattern("foo.min.js", "*.min.js"));
269        assert!(matches_exclude_pattern("dist/bundle.min.js", "*.min.js"));
270        assert!(!matches_exclude_pattern("foo.js", "*.min.js"));
271    }
272
273    #[test]
274    fn test_exclude_glob_recursive() {
275        assert!(matches_exclude_pattern("src/tests/foo.rs", "**/tests/**"));
276        assert!(matches_exclude_pattern("tests/unit/bar.rs", "**/tests/**"));
277        assert!(!matches_exclude_pattern("src/main.rs", "**/tests/**"));
278    }
279
280    #[test]
281    fn test_exclude_substring_match() {
282        assert!(matches_exclude_pattern("node_modules/foo/bar.js", "node_modules"));
283        assert!(matches_exclude_pattern("dist/bundle.js", "dist"));
284        assert!(!matches_exclude_pattern("src/index.ts", "dist"));
285    }
286
287    #[test]
288    fn test_exclude_prefix_match() {
289        assert!(matches_exclude_pattern("target/debug/main", "target"));
290        assert!(matches_exclude_pattern("vendor/lib.js", "vendor"));
291        assert!(!matches_exclude_pattern("src/target.rs", "target"));
292    }
293
294    #[test]
295    fn test_exclude_component_match() {
296        assert!(matches_exclude_pattern("src/tests/foo.rs", "tests"));
297        assert!(matches_exclude_pattern("lib/vendor/bar.js", "vendor"));
298        assert!(!matches_exclude_pattern("src/main.rs", "tests"));
299    }
300
301    // =============================================================================
302    // Include Pattern Tests
303    // =============================================================================
304
305    #[test]
306    fn test_include_glob_patterns() {
307        assert!(matches_include_pattern("foo.rs", "*.rs"));
308        assert!(matches_include_pattern("src/main.rs", "*.rs"));
309        assert!(!matches_include_pattern("foo.py", "*.rs"));
310    }
311
312    #[test]
313    fn test_include_glob_recursive() {
314        assert!(matches_include_pattern("src/foo/bar.rs", "src/**/*.rs"));
315        assert!(matches_include_pattern("src/main.rs", "src/**/*.rs"));
316        assert!(!matches_include_pattern("tests/foo.rs", "src/**/*.rs"));
317    }
318
319    #[test]
320    fn test_include_substring_match() {
321        assert!(matches_include_pattern("src/main.rs", "src"));
322        assert!(matches_include_pattern("lib/index.ts", "lib"));
323        assert!(!matches_include_pattern("tests/foo.rs", "src"));
324    }
325
326    #[test]
327    fn test_include_suffix_match() {
328        assert!(matches_include_pattern("foo.test.ts", ".test.ts"));
329        assert!(matches_include_pattern("bar.spec.js", ".spec.js"));
330        assert!(!matches_include_pattern("foo.ts", ".test.ts"));
331    }
332
333    // =============================================================================
334    // Generic Filtering Tests
335    // =============================================================================
336
337    #[derive(Debug, Clone)]
338    struct TestFile {
339        path: String,
340    }
341
342    #[test]
343    fn test_apply_exclude_patterns_empty() {
344        let mut files = vec![
345            TestFile { path: "src/main.rs".to_string() },
346            TestFile { path: "node_modules/lib.js".to_string() },
347        ];
348
349        apply_exclude_patterns(&mut files, &[], |f| &f.path);
350        assert_eq!(files.len(), 2);
351    }
352
353    #[test]
354    fn test_apply_exclude_patterns_basic() {
355        let mut files = vec![
356            TestFile { path: "src/main.rs".to_string() },
357            TestFile { path: "node_modules/lib.js".to_string() },
358            TestFile { path: "dist/bundle.js".to_string() },
359        ];
360
361        let exclude = vec!["node_modules".to_string(), "dist".to_string()];
362        apply_exclude_patterns(&mut files, &exclude, |f| &f.path);
363
364        assert_eq!(files.len(), 1);
365        assert_eq!(files[0].path, "src/main.rs");
366    }
367
368    #[test]
369    fn test_apply_exclude_patterns_glob() {
370        let mut files = vec![
371            TestFile { path: "foo.js".to_string() },
372            TestFile { path: "foo.min.js".to_string() },
373            TestFile { path: "bar.js".to_string() },
374        ];
375
376        let exclude = vec!["*.min.js".to_string()];
377        apply_exclude_patterns(&mut files, &exclude, |f| &f.path);
378
379        assert_eq!(files.len(), 2);
380        assert!(files.iter().all(|f| !f.path.contains(".min.")));
381    }
382
383    #[test]
384    fn test_apply_include_patterns_empty() {
385        let mut files = vec![
386            TestFile { path: "src/main.rs".to_string() },
387            TestFile { path: "src/lib.py".to_string() },
388        ];
389
390        apply_include_patterns(&mut files, &[], |f| &f.path);
391        assert_eq!(files.len(), 2);
392    }
393
394    #[test]
395    fn test_apply_include_patterns_basic() {
396        let mut files = vec![
397            TestFile { path: "src/main.rs".to_string() },
398            TestFile { path: "src/lib.py".to_string() },
399            TestFile { path: "src/index.ts".to_string() },
400        ];
401
402        let include = vec!["*.rs".to_string(), "*.ts".to_string()];
403        apply_include_patterns(&mut files, &include, |f| &f.path);
404
405        assert_eq!(files.len(), 2);
406        assert!(files.iter().any(|f| f.path.ends_with(".rs")));
407        assert!(files.iter().any(|f| f.path.ends_with(".ts")));
408    }
409
410    #[test]
411    fn test_apply_include_patterns_substring() {
412        let mut files = vec![
413            TestFile { path: "src/main.rs".to_string() },
414            TestFile { path: "tests/test.rs".to_string() },
415            TestFile { path: "lib/index.ts".to_string() },
416        ];
417
418        let include = vec!["src".to_string()];
419        apply_include_patterns(&mut files, &include, |f| &f.path);
420
421        assert_eq!(files.len(), 1);
422        assert_eq!(files[0].path, "src/main.rs");
423    }
424
425    #[test]
426    fn test_compile_patterns() {
427        let patterns = vec!["*.rs".to_string(), "*.ts".to_string(), "src/**/*.js".to_string()];
428
429        let compiled = compile_patterns(&patterns);
430        assert_eq!(compiled.len(), 3);
431    }
432
433    #[test]
434    fn test_compile_patterns_invalid() {
435        let patterns = vec![
436            "*.rs".to_string(),
437            "[invalid".to_string(), // Invalid glob
438            "*.ts".to_string(),
439        ];
440
441        let compiled = compile_patterns(&patterns);
442        assert_eq!(compiled.len(), 2); // Invalid pattern skipped
443    }
444
445    // =============================================================================
446    // Integration Tests
447    // =============================================================================
448
449    #[test]
450    fn test_exclude_then_include() {
451        let mut files = vec![
452            TestFile { path: "src/main.rs".to_string() },
453            TestFile { path: "src/lib.rs".to_string() },
454            TestFile { path: "src/main.test.rs".to_string() },
455            TestFile { path: "node_modules/lib.js".to_string() },
456        ];
457
458        // First exclude test files and node_modules
459        let exclude = vec!["node_modules".to_string(), "*.test.rs".to_string()];
460        apply_exclude_patterns(&mut files, &exclude, |f| &f.path);
461        assert_eq!(files.len(), 2);
462
463        // Then include only Rust files
464        let include = vec!["*.rs".to_string()];
465        apply_include_patterns(&mut files, &include, |f| &f.path);
466        assert_eq!(files.len(), 2);
467        assert!(files.iter().all(|f| f.path.ends_with(".rs")));
468    }
469
470    #[test]
471    fn test_pattern_cache() {
472        // First call compiles pattern
473        let pattern1 = compile_pattern("*.rs");
474        assert!(pattern1.is_some());
475
476        // Second call uses cache
477        let pattern2 = compile_pattern("*.rs");
478        assert!(pattern2.is_some());
479
480        // Patterns should be equal
481        assert!(pattern1.unwrap().matches("foo.rs"));
482        assert!(pattern2.unwrap().matches("foo.rs"));
483    }
484}