Skip to main content

cc_audit/discovery/
filter.rs

1//! Ignore filtering for file discovery.
2
3use crate::config::IgnoreConfig;
4use ignore::gitignore::{Gitignore, GitignoreBuilder};
5use std::collections::HashSet;
6use std::path::Path;
7
8/// Filter for determining which files should be ignored during scanning.
9#[derive(Default)]
10pub struct IgnoreFilter {
11    gitignore: Option<Gitignore>,
12    include_tests: bool,
13    include_node_modules: bool,
14    include_vendor: bool,
15    /// Additional directories to ignore from config
16    extra_directories: HashSet<String>,
17    /// Custom glob patterns from config
18    custom_patterns: Option<Gitignore>,
19}
20
21impl IgnoreFilter {
22    /// Create a new IgnoreFilter for the given root directory.
23    pub fn new(root: &Path) -> Self {
24        let gitignore = Self::load_ignorefiles(root);
25
26        Self {
27            gitignore,
28            include_tests: false,
29            include_node_modules: false,
30            include_vendor: false,
31            extra_directories: HashSet::new(),
32            custom_patterns: None,
33        }
34    }
35
36    /// Create IgnoreFilter from config.
37    pub fn from_config(root: &Path, config: &IgnoreConfig) -> Self {
38        let gitignore = Self::load_ignorefiles(root);
39        let custom_patterns = Self::build_custom_patterns(root, &config.patterns);
40
41        Self {
42            gitignore,
43            include_tests: config.include_tests,
44            include_node_modules: config.include_node_modules,
45            include_vendor: config.include_vendor,
46            extra_directories: config.directories.clone(),
47            custom_patterns,
48        }
49    }
50
51    /// Apply config settings to existing filter.
52    pub fn with_config(mut self, config: &IgnoreConfig) -> Self {
53        self.include_tests = config.include_tests;
54        self.include_node_modules = config.include_node_modules;
55        self.include_vendor = config.include_vendor;
56        self.extra_directories = config.directories.clone();
57        // Note: custom_patterns requires root path, so it's not updated here
58        self
59    }
60
61    /// Set whether to include test directories.
62    pub fn with_include_tests(mut self, include: bool) -> Self {
63        self.include_tests = include;
64        self
65    }
66
67    /// Set whether to include node_modules directories.
68    pub fn with_include_node_modules(mut self, include: bool) -> Self {
69        self.include_node_modules = include;
70        self
71    }
72
73    /// Set whether to include vendor directories.
74    pub fn with_include_vendor(mut self, include: bool) -> Self {
75        self.include_vendor = include;
76        self
77    }
78
79    /// Build gitignore-style patterns from config patterns.
80    fn build_custom_patterns(root: &Path, patterns: &[String]) -> Option<Gitignore> {
81        if patterns.is_empty() {
82            return None;
83        }
84
85        let mut builder = GitignoreBuilder::new(root);
86        for pattern in patterns {
87            // Add pattern - ignore errors for invalid patterns
88            let _ = builder.add_line(None, pattern);
89        }
90
91        builder.build().ok()
92    }
93
94    fn load_ignorefiles(root: &Path) -> Option<Gitignore> {
95        let mut builder = GitignoreBuilder::new(root);
96        let mut has_patterns = false;
97
98        // Load .gitignore first (if it exists and there's a .git directory)
99        let git_dir = root.join(".git");
100        let gitignore_file = root.join(".gitignore");
101        if git_dir.exists() && gitignore_file.exists() && builder.add(&gitignore_file).is_none() {
102            has_patterns = true;
103        }
104
105        // Load .cc-auditignore (overrides/extends .gitignore)
106        let cc_audit_ignore = root.join(".cc-auditignore");
107        if cc_audit_ignore.exists() && builder.add(&cc_audit_ignore).is_none() {
108            has_patterns = true;
109        }
110
111        if has_patterns {
112            builder.build().ok()
113        } else {
114            None
115        }
116    }
117
118    /// Check if a path should be ignored.
119    pub fn is_ignored(&self, path: &Path) -> bool {
120        // Check default exclusions first
121        if !self.include_tests && self.is_test_path(path) {
122            return true;
123        }
124
125        if !self.include_node_modules && self.is_node_modules_path(path) {
126            return true;
127        }
128
129        if !self.include_vendor && self.is_vendor_path(path) {
130            return true;
131        }
132
133        // Check extra directories from config
134        if self.is_in_extra_directories(path) {
135            return true;
136        }
137
138        // Check custom patterns from config
139        if let Some(ref custom) = self.custom_patterns {
140            let is_dir = path.is_dir();
141            if custom.matched(path, is_dir).is_ignore() {
142                return true;
143            }
144        }
145
146        // Check .cc-auditignore patterns
147        if let Some(ref gitignore) = self.gitignore {
148            let is_dir = path.is_dir();
149            return gitignore.matched(path, is_dir).is_ignore();
150        }
151
152        false
153    }
154
155    fn is_in_extra_directories(&self, path: &Path) -> bool {
156        if self.extra_directories.is_empty() {
157            return false;
158        }
159
160        path.components().any(|c| {
161            let name = c.as_os_str().to_string_lossy();
162            // Skip node_modules check if include_node_modules is true
163            if self.include_node_modules && name == "node_modules" {
164                return false;
165            }
166            // Skip vendor check if include_vendor is true
167            if self.include_vendor
168                && (name == "vendor" || name == "vendors" || name == "third_party")
169            {
170                return false;
171            }
172            self.extra_directories.contains(name.as_ref())
173        })
174    }
175
176    fn is_test_path(&self, path: &Path) -> bool {
177        path.components().any(|c| {
178            let name = c.as_os_str().to_string_lossy();
179            name == "tests"
180                || name == "test"
181                || name == "__tests__"
182                || name == "spec"
183                || name == "specs"
184                || name.ends_with("_test")
185                || name.ends_with(".test")
186        })
187    }
188
189    fn is_node_modules_path(&self, path: &Path) -> bool {
190        path.components()
191            .any(|c| c.as_os_str().to_string_lossy() == "node_modules")
192    }
193
194    fn is_vendor_path(&self, path: &Path) -> bool {
195        path.components().any(|c| {
196            let name = c.as_os_str().to_string_lossy();
197            name == "vendor" || name == "vendors" || name == "third_party"
198        })
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use std::fs;
206    use tempfile::TempDir;
207
208    #[test]
209    fn test_default_excludes_tests() {
210        let dir = TempDir::new().unwrap();
211        let filter = IgnoreFilter::new(dir.path());
212
213        assert!(filter.is_ignored(Path::new("/project/tests/test_file.rs")));
214        assert!(filter.is_ignored(Path::new("/project/__tests__/spec.js")));
215        assert!(filter.is_ignored(Path::new("/project/spec/helpers.rb")));
216        assert!(!filter.is_ignored(Path::new("/project/src/main.rs")));
217    }
218
219    #[test]
220    fn test_default_excludes_node_modules() {
221        let dir = TempDir::new().unwrap();
222        let filter = IgnoreFilter::new(dir.path());
223
224        assert!(filter.is_ignored(Path::new("/project/node_modules/package/index.js")));
225        assert!(!filter.is_ignored(Path::new("/project/src/index.js")));
226    }
227
228    #[test]
229    fn test_default_excludes_vendor() {
230        let dir = TempDir::new().unwrap();
231        let filter = IgnoreFilter::new(dir.path());
232
233        assert!(filter.is_ignored(Path::new("/project/vendor/bundle/gems")));
234        assert!(filter.is_ignored(Path::new("/project/third_party/lib")));
235        assert!(!filter.is_ignored(Path::new("/project/src/lib")));
236    }
237
238    #[test]
239    fn test_include_tests() {
240        let dir = TempDir::new().unwrap();
241        let filter = IgnoreFilter::new(dir.path()).with_include_tests(true);
242
243        assert!(!filter.is_ignored(Path::new("/project/tests/test_file.rs")));
244    }
245
246    #[test]
247    fn test_include_node_modules() {
248        let dir = TempDir::new().unwrap();
249        let filter = IgnoreFilter::new(dir.path()).with_include_node_modules(true);
250
251        assert!(!filter.is_ignored(Path::new("/project/node_modules/package/index.js")));
252    }
253
254    #[test]
255    fn test_include_vendor() {
256        let dir = TempDir::new().unwrap();
257        let filter = IgnoreFilter::new(dir.path()).with_include_vendor(true);
258
259        assert!(!filter.is_ignored(Path::new("/project/vendor/bundle/gems")));
260    }
261
262    #[test]
263    fn test_custom_ignorefile() {
264        let dir = TempDir::new().unwrap();
265        let ignore_file = dir.path().join(".cc-auditignore");
266        fs::write(&ignore_file, "*.generated.js\nbuild/\n").unwrap();
267
268        let filter = IgnoreFilter::new(dir.path());
269
270        let generated_file = dir.path().join("app.generated.js");
271        fs::write(&generated_file, "").unwrap();
272
273        assert!(filter.is_ignored(&generated_file));
274    }
275
276    #[test]
277    fn test_no_ignorefile() {
278        let dir = TempDir::new().unwrap();
279        let filter = IgnoreFilter::new(dir.path());
280
281        assert!(!filter.is_ignored(&dir.path().join("src/main.rs")));
282    }
283
284    #[test]
285    fn test_default_trait() {
286        let filter = IgnoreFilter::default();
287
288        // Default should exclude tests, node_modules, vendor
289        assert!(filter.is_ignored(Path::new("/project/tests/test.rs")));
290        assert!(filter.is_ignored(Path::new("/project/node_modules/pkg")));
291        assert!(filter.is_ignored(Path::new("/project/vendor/lib")));
292    }
293
294    #[test]
295    fn test_chained_configuration() {
296        let dir = TempDir::new().unwrap();
297        let filter = IgnoreFilter::new(dir.path())
298            .with_include_tests(true)
299            .with_include_node_modules(true)
300            .with_include_vendor(true);
301
302        assert!(!filter.is_ignored(Path::new("/project/tests/test.rs")));
303        assert!(!filter.is_ignored(Path::new("/project/node_modules/pkg")));
304        assert!(!filter.is_ignored(Path::new("/project/vendor/lib")));
305    }
306
307    #[test]
308    fn test_gitignore_patterns() {
309        let dir = TempDir::new().unwrap();
310        let ignore_file = dir.path().join(".cc-auditignore");
311        fs::write(
312            &ignore_file,
313            r#"
314# Comment
315*.log
316/dist/
317!important.log
318"#,
319        )
320        .unwrap();
321
322        let filter = IgnoreFilter::new(dir.path());
323
324        let log_file = dir.path().join("debug.log");
325        fs::write(&log_file, "").unwrap();
326        assert!(filter.is_ignored(&log_file));
327
328        // Normal src file should not be ignored
329        let src_file = dir.path().join("main.rs");
330        fs::write(&src_file, "").unwrap();
331        assert!(!filter.is_ignored(&src_file));
332    }
333
334    #[test]
335    fn test_is_test_path_variations() {
336        let filter = IgnoreFilter::default();
337
338        assert!(filter.is_test_path(Path::new("/project/tests/unit")));
339        assert!(filter.is_test_path(Path::new("/project/test/fixtures")));
340        assert!(filter.is_test_path(Path::new("/project/__tests__/spec")));
341        assert!(filter.is_test_path(Path::new("/project/spec/helpers")));
342        assert!(filter.is_test_path(Path::new("/project/specs/api")));
343        assert!(filter.is_test_path(Path::new("/project/file_test")));
344        assert!(filter.is_test_path(Path::new("/project/api.test")));
345        assert!(!filter.is_test_path(Path::new("/project/src/main.rs")));
346        assert!(!filter.is_test_path(Path::new("/project/contest/app.js"))); // Should not match 'test' in 'contest'
347    }
348
349    #[test]
350    fn test_from_config() {
351        let dir = TempDir::new().unwrap();
352        let config = IgnoreConfig {
353            directories: ["custom_ignore_dir", "my_cache"]
354                .into_iter()
355                .map(String::from)
356                .collect(),
357            patterns: vec!["*.generated.js".to_string()],
358            include_tests: true,
359            include_node_modules: false,
360            include_vendor: true,
361        };
362
363        let filter = IgnoreFilter::from_config(dir.path(), &config);
364
365        // Tests should NOT be ignored (include_tests is true)
366        assert!(!filter.is_ignored(Path::new("/project/tests/test.rs")));
367        // node_modules should be ignored (include_node_modules is false)
368        assert!(filter.is_ignored(Path::new("/project/node_modules/pkg")));
369        // vendor should NOT be ignored (include_vendor is true)
370        assert!(!filter.is_ignored(Path::new("/project/vendor/lib")));
371        // custom directories should be ignored
372        assert!(filter.is_ignored(Path::new("/project/custom_ignore_dir/file.rs")));
373        assert!(filter.is_ignored(Path::new("/project/my_cache/data")));
374    }
375
376    #[test]
377    fn test_extra_directories_ignored() {
378        let dir = TempDir::new().unwrap();
379        let mut config = IgnoreConfig::default();
380        config.directories.insert("my_special_dir".to_string());
381
382        let filter = IgnoreFilter::from_config(dir.path(), &config);
383
384        // Custom directory should be ignored
385        assert!(filter.is_ignored(Path::new("/project/my_special_dir/file.rs")));
386        // Nested path with custom directory should be ignored
387        assert!(filter.is_ignored(Path::new("/project/src/my_special_dir/nested/file.rs")));
388    }
389
390    #[test]
391    fn test_custom_patterns_from_config() {
392        let dir = TempDir::new().unwrap();
393        let config = IgnoreConfig {
394            directories: std::collections::HashSet::new(),
395            patterns: vec!["*.log".to_string(), "temp/**".to_string()],
396            include_tests: true,
397            include_node_modules: true,
398            include_vendor: true,
399        };
400
401        let filter = IgnoreFilter::from_config(dir.path(), &config);
402
403        // Create test files
404        let log_file = dir.path().join("debug.log");
405        fs::write(&log_file, "").unwrap();
406        assert!(filter.is_ignored(&log_file));
407
408        let temp_file = dir.path().join("temp/cache.txt");
409        fs::create_dir_all(dir.path().join("temp")).unwrap();
410        fs::write(&temp_file, "").unwrap();
411        assert!(filter.is_ignored(&temp_file));
412
413        // Normal file should not be ignored
414        let src_file = dir.path().join("main.rs");
415        fs::write(&src_file, "").unwrap();
416        assert!(!filter.is_ignored(&src_file));
417    }
418
419    #[test]
420    fn test_with_config_method() {
421        let dir = TempDir::new().unwrap();
422        let config = IgnoreConfig {
423            directories: ["target", "dist"].into_iter().map(String::from).collect(),
424            patterns: vec![],
425            include_tests: true,
426            include_node_modules: true,
427            include_vendor: false,
428        };
429
430        let filter = IgnoreFilter::new(dir.path()).with_config(&config);
431
432        // Tests should NOT be ignored
433        assert!(!filter.is_ignored(Path::new("/project/tests/test.rs")));
434        // node_modules should NOT be ignored
435        assert!(!filter.is_ignored(Path::new("/project/node_modules/pkg")));
436        // vendor should be ignored
437        assert!(filter.is_ignored(Path::new("/project/vendor/lib")));
438        // target should be ignored (from extra_directories)
439        assert!(filter.is_ignored(Path::new("/project/target/debug")));
440    }
441}