rust_guardian/patterns/
path_filter.rs

1//! Path filtering using .gitignore-style patterns
2//!
3//! Architectural Principle: Service Layer - PathFilter orchestrates complex path matching logic
4//! - Encapsulates the rules for include/exclude pattern evaluation
5//! - Provides clean interface for determining whether a path should be analyzed
6//! - Handles .guardianignore file discovery and parsing
7
8use crate::domain::violations::{GuardianError, GuardianResult};
9use std::fs;
10use std::path::{Path, PathBuf};
11use walkdir::WalkDir;
12
13/// Manages path filtering using .gitignore-style patterns
14#[derive(Debug, Clone)]
15pub struct PathFilter {
16    /// Include/exclude patterns
17    patterns: Vec<FilterPattern>,
18    /// Whether to process .guardianignore files
19    process_ignore_files: bool,
20    /// Name of ignore files to process
21    ignore_filename: String,
22}
23
24/// A single path filter pattern
25#[derive(Debug, Clone)]
26struct FilterPattern {
27    /// The glob pattern
28    pattern: glob::Pattern,
29    /// Whether this is an include pattern (starts with !)
30    is_include: bool,
31    /// Original pattern string for debugging
32    original: String,
33}
34
35impl PathFilter {
36    /// Create a new path filter with the given patterns
37    pub fn new(patterns: Vec<String>, ignore_filename: Option<String>) -> GuardianResult<Self> {
38        let mut filter_patterns = Vec::new();
39
40        for pattern_str in patterns {
41            let (is_include, pattern_str) = if let Some(stripped) = pattern_str.strip_prefix('!') {
42                (true, stripped.to_string())
43            } else {
44                (false, pattern_str)
45            };
46
47            let pattern = glob::Pattern::new(&pattern_str).map_err(|e| {
48                GuardianError::pattern(format!("Invalid pattern '{pattern_str}': {e}"))
49            })?;
50
51            filter_patterns.push(FilterPattern { pattern, is_include, original: pattern_str });
52        }
53
54        Ok(Self {
55            patterns: filter_patterns,
56            process_ignore_files: ignore_filename.is_some(),
57            ignore_filename: ignore_filename.unwrap_or_else(|| ".guardianignore".to_string()),
58        })
59    }
60
61    /// Create a default path filter with sensible exclusions
62    pub fn with_defaults() -> GuardianResult<Self> {
63        Self::new(
64            vec![
65                // Exclude common build/cache directories
66                "target/**".to_string(),
67                "**/node_modules/**".to_string(),
68                "**/.git/**".to_string(),
69                "**/*.generated.*".to_string(),
70                "**/dist/**".to_string(),
71                "**/build/**".to_string(),
72            ],
73            Some(".guardianignore".to_string()),
74        )
75    }
76
77    /// Check if a file should be analyzed based on all patterns and ignore files
78    pub fn should_analyze<P: AsRef<Path>>(&self, path: P) -> GuardianResult<bool> {
79        let path = path.as_ref();
80        let _path_str = path.to_string_lossy();
81
82        // Start with default: include all files
83        let mut should_include = true;
84
85        // Apply patterns in order (like .gitignore)
86        for pattern in &self.patterns {
87            let matches = self.pattern_matches_path(pattern, path);
88
89            if matches {
90                should_include = pattern.is_include;
91            }
92        }
93
94        // If excluded by configured patterns, return false
95        if !should_include {
96            return Ok(false);
97        }
98
99        // Check .guardianignore files if enabled
100        if self.process_ignore_files {
101            let ignored_by_files = self.is_ignored_by_files(path)?;
102            if ignored_by_files {
103                return Ok(false);
104            }
105        }
106
107        Ok(true)
108    }
109
110    /// Check if path is ignored by .guardianignore files
111    fn is_ignored_by_files<P: AsRef<Path>>(&self, path: P) -> GuardianResult<bool> {
112        let path = path.as_ref();
113        let mut current_dir = path.parent();
114        let mut is_ignored = false;
115
116        // Walk up the directory tree looking for .guardianignore files
117        while let Some(dir) = current_dir {
118            let ignore_file = dir.join(&self.ignore_filename);
119
120            if ignore_file.exists() {
121                let patterns = self.load_ignore_file(&ignore_file)?;
122
123                // Check if any pattern in this file matches
124                for pattern in patterns {
125                    // Make path relative to the ignore file's directory
126                    if let Ok(relative_path) = path.strip_prefix(dir) {
127                        let matches = self.pattern_matches_path(&pattern, relative_path);
128
129                        if matches {
130                            is_ignored = !pattern.is_include;
131                        }
132                    }
133                }
134            }
135
136            current_dir = dir.parent();
137        }
138
139        Ok(is_ignored)
140    }
141
142    /// Load patterns from a .guardianignore file
143    fn load_ignore_file<P: AsRef<Path>>(&self, path: P) -> GuardianResult<Vec<FilterPattern>> {
144        let content = fs::read_to_string(&path).map_err(|e| {
145            GuardianError::config(format!(
146                "Failed to read ignore file '{}': {}",
147                path.as_ref().display(),
148                e
149            ))
150        })?;
151
152        let mut patterns = Vec::new();
153
154        for line in content.lines() {
155            let line = line.trim();
156
157            // Skip empty lines and comments
158            if line.is_empty() || line.starts_with('#') {
159                continue;
160            }
161
162            let (is_include, pattern_str) = if let Some(stripped) = line.strip_prefix('!') {
163                (true, stripped.to_string())
164            } else {
165                (false, line.to_string())
166            };
167
168            match glob::Pattern::new(&pattern_str) {
169                Ok(pattern) => {
170                    patterns.push(FilterPattern { pattern, is_include, original: pattern_str });
171                }
172                Err(e) => {
173                    // Log warning but don't fail - just skip invalid patterns
174                    tracing::warn!(
175                        "Invalid pattern '{}' in {}: {}",
176                        pattern_str,
177                        path.as_ref().display(),
178                        e
179                    );
180                }
181            }
182        }
183
184        Ok(patterns)
185    }
186
187    /// Get all files that should be analyzed in a directory tree
188    pub fn find_files<P: AsRef<Path>>(&self, root: P) -> GuardianResult<Vec<PathBuf>> {
189        let root = root.as_ref();
190        let mut files = Vec::new();
191
192        for entry in WalkDir::new(root).follow_links(false).into_iter().filter_map(|e| e.ok()) {
193            let path = entry.path();
194
195            // Only process files, not directories
196            if path.is_file() && self.should_analyze(path)? {
197                files.push(path.to_path_buf());
198            }
199        }
200
201        Ok(files)
202    }
203
204    /// Filter a list of paths to only those that should be analyzed
205    pub fn filter_paths<P: AsRef<Path>>(&self, paths: &[P]) -> GuardianResult<Vec<PathBuf>> {
206        let mut filtered = Vec::new();
207
208        for path in paths {
209            if self.should_analyze(path)? {
210                filtered.push(path.as_ref().to_path_buf());
211            }
212        }
213
214        Ok(filtered)
215    }
216
217    /// Add a pattern to the filter
218    pub fn add_pattern(&mut self, pattern: String) -> GuardianResult<()> {
219        let (is_include, pattern_str) = if let Some(stripped) = pattern.strip_prefix('!') {
220            (true, stripped.to_string())
221        } else {
222            (false, pattern)
223        };
224
225        let glob_pattern = glob::Pattern::new(&pattern_str)
226            .map_err(|e| GuardianError::pattern(format!("Invalid pattern '{pattern_str}': {e}")))?;
227
228        self.patterns.push(FilterPattern {
229            pattern: glob_pattern,
230            is_include,
231            original: pattern_str,
232        });
233
234        Ok(())
235    }
236
237    /// Get debug information about patterns and their matches
238    pub fn debug_patterns<P: AsRef<Path>>(&self, path: P) -> Vec<String> {
239        let path = path.as_ref();
240        let mut debug_info = Vec::new();
241
242        for (i, pattern) in self.patterns.iter().enumerate() {
243            let matches = self.pattern_matches_path(pattern, path);
244            let prefix = if pattern.is_include { "!" } else { "" };
245
246            debug_info.push(format!(
247                "Pattern {}: {}{} -> {}",
248                i,
249                prefix,
250                pattern.original,
251                if matches { "MATCH" } else { "no match" }
252            ));
253        }
254
255        debug_info
256    }
257
258    /// Check if a pattern matches a path using .gitignore-style rules
259    fn pattern_matches_path(&self, pattern: &FilterPattern, path: &Path) -> bool {
260        let path_str = path.to_string_lossy();
261
262        // Handle different pattern types
263        if pattern.original.ends_with('/') {
264            // Directory pattern - only match directories
265            if !path.is_dir() {
266                return false;
267            }
268            // Remove trailing slash and match
269            let dir_pattern = pattern.original.trim_end_matches('/');
270            return glob::Pattern::new(dir_pattern).map(|p| p.matches(&path_str)).unwrap_or(false);
271        }
272
273        if pattern.original.starts_with('/') {
274            // Absolute pattern from root - remove leading slash and match from beginning
275            let absolute_pattern = pattern.original.strip_prefix('/').unwrap_or(&pattern.original);
276            return glob::Pattern::new(absolute_pattern)
277                .map(|p| p.matches(&path_str))
278                .unwrap_or(false);
279        }
280
281        if pattern.original.contains('/') {
282            // Pattern contains slash - match full path
283            return pattern.pattern.matches(&path_str);
284        } else {
285            // No slash - match filename only
286            if let Some(filename) = path.file_name() {
287                return pattern.pattern.matches(&filename.to_string_lossy());
288            }
289        }
290
291        false
292    }
293}
294
295/// Architecture-compliant validation functions for integration testing
296#[cfg(test)]
297#[allow(dead_code)]
298pub mod validation {
299    use super::*;
300    use std::fs;
301    use tempfile::TempDir;
302
303    /// Validate basic pattern matching functionality - designed for integration testing
304    pub fn validate_basic_pattern_matching() -> GuardianResult<()> {
305        let filter = PathFilter::new(
306            vec![
307                "target/**".to_string(), // Exclude target directory
308                "*.md".to_string(),      // Exclude markdown files
309            ],
310            None,
311        )?;
312
313        if !filter.should_analyze(Path::new("src/lib.rs"))? {
314            return Err(GuardianError::pattern(
315                "Basic pattern validation failed - should analyze src files",
316            ));
317        }
318
319        if filter.should_analyze(Path::new("target/debug/lib.rs"))? {
320            return Err(GuardianError::pattern(
321                "Basic pattern validation failed - should exclude target files",
322            ));
323        }
324
325        if filter.should_analyze(Path::new("README.md"))? {
326            return Err(GuardianError::pattern(
327                "Basic pattern validation failed - should exclude markdown files",
328            ));
329        }
330
331        Ok(())
332    }
333
334    /// Validate include override functionality - designed for integration testing
335    pub fn validate_include_override() -> GuardianResult<()> {
336        let filter = PathFilter::new(
337            vec![
338                "target/**".to_string(),          // Exclude target
339                "!target/special/**".to_string(), // But include target/special
340            ],
341            None,
342        )?;
343
344        if filter.should_analyze(Path::new("target/debug/lib.rs"))? {
345            return Err(GuardianError::pattern(
346                "Include override validation failed - should exclude target/debug",
347            ));
348        }
349
350        if !filter.should_analyze(Path::new("target/special/lib.rs"))? {
351            return Err(GuardianError::pattern(
352                "Include override validation failed - should include target/special",
353            ));
354        }
355
356        Ok(())
357    }
358
359    /// Validate pattern order functionality - designed for integration testing
360    pub fn validate_pattern_order() -> GuardianResult<()> {
361        let filter = PathFilter::new(
362            vec![
363                "tests/**".to_string(),            // Exclude tests
364                "!tests/important.rs".to_string(), // But include important test
365                "!*.rs".to_string(),               // And include all .rs files (overrides excludes)
366            ],
367            None,
368        )?;
369
370        if !filter.should_analyze(Path::new("src/lib.rs"))? {
371            return Err(GuardianError::pattern(
372                "Pattern order validation failed - should analyze src files",
373            ));
374        }
375
376        if !filter.should_analyze(Path::new("tests/unit.rs"))? {
377            return Err(GuardianError::pattern(
378                "Pattern order validation failed - should analyze test files with overrides",
379            ));
380        }
381
382        if !filter.should_analyze(Path::new("tests/important.rs"))? {
383            return Err(GuardianError::pattern(
384                "Pattern order validation failed - should analyze important test files",
385            ));
386        }
387
388        Ok(())
389    }
390
391    /// Validate guardianignore file functionality - designed for integration testing
392    pub fn validate_guardianignore_file() -> GuardianResult<()> {
393        let temp_dir = TempDir::new()
394            .map_err(|e| GuardianError::config(format!("Failed to create temp dir: {}", e)))?;
395        let root = temp_dir.path();
396
397        // Create directory structure
398        fs::create_dir_all(root.join("src"))?;
399        fs::create_dir_all(root.join("tests"))?;
400
401        // Create .guardianignore file
402        fs::write(root.join(".guardianignore"), "*.tmp\ntests/**\n!tests/important.rs\n")?;
403
404        // Create test files
405        fs::write(root.join("src/lib.rs"), "")?;
406        fs::write(root.join("temp.tmp"), "")?;
407        fs::write(root.join("tests/unit.rs"), "")?;
408        fs::write(root.join("tests/important.rs"), "")?;
409
410        let filter = PathFilter::new(vec![], Some(".guardianignore".to_string()))?;
411
412        if !filter.should_analyze(root.join("src/lib.rs"))? {
413            return Err(GuardianError::pattern(
414                "Guardianignore validation failed - should analyze src files",
415            ));
416        }
417
418        if filter.should_analyze(root.join("temp.tmp"))? {
419            return Err(GuardianError::pattern(
420                "Guardianignore validation failed - should exclude tmp files",
421            ));
422        }
423
424        if filter.should_analyze(root.join("tests/unit.rs"))? {
425            return Err(GuardianError::pattern(
426                "Guardianignore validation failed - should exclude test files",
427            ));
428        }
429
430        if !filter.should_analyze(root.join("tests/important.rs"))? {
431            return Err(GuardianError::pattern(
432                "Guardianignore validation failed - should include important files",
433            ));
434        }
435
436        Ok(())
437    }
438
439    /// Validate invalid pattern handling - designed for integration testing
440    pub fn validate_invalid_pattern_handling() -> GuardianResult<()> {
441        let result = PathFilter::new(vec!["[invalid".to_string()], None);
442        if result.is_ok() {
443            return Err(GuardianError::pattern(
444                "Invalid pattern validation failed - should reject invalid patterns",
445            ));
446        }
447
448        Ok(())
449    }
450
451    /// Validate default filter functionality - designed for integration testing
452    pub fn validate_default_filter() -> GuardianResult<()> {
453        let filter = PathFilter::with_defaults()?;
454
455        if filter.should_analyze(Path::new("target/debug/lib.rs"))? {
456            return Err(GuardianError::pattern(
457                "Default filter validation failed - should exclude target directory",
458            ));
459        }
460
461        if !filter.should_analyze(Path::new("src/lib.rs"))? {
462            return Err(GuardianError::pattern(
463                "Default filter validation failed - should include source files",
464            ));
465        }
466
467        Ok(())
468    }
469}