Skip to main content

cc_audit/discovery/
filter.rs

1//! Ignore filtering for file discovery.
2
3use crate::config::IgnoreConfig;
4use ignore::gitignore::{Gitignore, GitignoreBuilder};
5use std::collections::HashSet;
6use std::path::Path;
7
8/// Filter for determining which files should be ignored during scanning.
9#[derive(Default)]
10pub struct IgnoreFilter {
11    gitignore: Option<Gitignore>,
12    include_tests: bool,
13    include_node_modules: bool,
14    include_vendor: bool,
15    /// Additional directories to ignore from config
16    extra_directories: HashSet<String>,
17    /// Custom glob patterns from config
18    custom_patterns: Option<Gitignore>,
19}
20
21impl IgnoreFilter {
22    /// Create a new IgnoreFilter for the given root directory.
23    pub fn new(root: &Path) -> Self {
24        let gitignore = Self::load_ignorefiles(root);
25
26        Self {
27            gitignore,
28            include_tests: false,
29            include_node_modules: false,
30            include_vendor: false,
31            extra_directories: HashSet::new(),
32            custom_patterns: None,
33        }
34    }
35
36    /// Create IgnoreFilter from config.
37    pub fn from_config(root: &Path, config: &IgnoreConfig) -> Self {
38        let gitignore = Self::load_ignorefiles(root);
39        let custom_patterns = Self::build_custom_patterns(root, &config.patterns);
40
41        Self {
42            gitignore,
43            include_tests: config.include_tests,
44            include_node_modules: config.include_node_modules,
45            include_vendor: config.include_vendor,
46            extra_directories: config.directories.clone(),
47            custom_patterns,
48        }
49    }
50
51    /// Apply config settings to existing filter.
52    pub fn with_config(mut self, config: &IgnoreConfig) -> Self {
53        self.include_tests = config.include_tests;
54        self.include_node_modules = config.include_node_modules;
55        self.include_vendor = config.include_vendor;
56        self.extra_directories = config.directories.clone();
57        // Note: custom_patterns requires root path, so it's not updated here
58        self
59    }
60
61    /// Set whether to include test directories.
62    pub fn with_include_tests(mut self, include: bool) -> Self {
63        self.include_tests = include;
64        self
65    }
66
67    /// Set whether to include node_modules directories.
68    pub fn with_include_node_modules(mut self, include: bool) -> Self {
69        self.include_node_modules = include;
70        self
71    }
72
73    /// Set whether to include vendor directories.
74    pub fn with_include_vendor(mut self, include: bool) -> Self {
75        self.include_vendor = include;
76        self
77    }
78
79    /// Build gitignore-style patterns from config patterns.
80    fn build_custom_patterns(root: &Path, patterns: &[String]) -> Option<Gitignore> {
81        if patterns.is_empty() {
82            return None;
83        }
84
85        let mut builder = GitignoreBuilder::new(root);
86        for pattern in patterns {
87            // Add pattern - ignore errors for invalid patterns
88            let _ = builder.add_line(None, pattern);
89        }
90
91        builder.build().ok()
92    }
93
94    fn load_ignorefiles(root: &Path) -> Option<Gitignore> {
95        let mut builder = GitignoreBuilder::new(root);
96        let mut has_patterns = false;
97
98        // Load .gitignore first (if it exists and there's a .git directory)
99        let git_dir = root.join(".git");
100        let gitignore_file = root.join(".gitignore");
101        if git_dir.exists() && gitignore_file.exists() && builder.add(&gitignore_file).is_none() {
102            has_patterns = true;
103        }
104
105        // Load .cc-auditignore (overrides/extends .gitignore)
106        let cc_audit_ignore = root.join(".cc-auditignore");
107        if cc_audit_ignore.exists() && builder.add(&cc_audit_ignore).is_none() {
108            has_patterns = true;
109        }
110
111        if has_patterns {
112            builder.build().ok()
113        } else {
114            None
115        }
116    }
117
118    /// Check if a path should be ignored.
119    pub fn is_ignored(&self, path: &Path) -> bool {
120        // Check default exclusions first
121        if !self.include_tests && self.is_test_path(path) {
122            return true;
123        }
124
125        if !self.include_node_modules && self.is_node_modules_path(path) {
126            return true;
127        }
128
129        if !self.include_vendor && self.is_vendor_path(path) {
130            return true;
131        }
132
133        // Check extra directories from config
134        if self.is_in_extra_directories(path) {
135            return true;
136        }
137
138        // Check custom patterns from config
139        if let Some(ref custom) = self.custom_patterns {
140            let is_dir = path.is_dir();
141            if custom.matched(path, is_dir).is_ignore() {
142                return true;
143            }
144        }
145
146        // Check .cc-auditignore patterns
147        if let Some(ref gitignore) = self.gitignore {
148            let is_dir = path.is_dir();
149            return gitignore.matched(path, is_dir).is_ignore();
150        }
151
152        false
153    }
154
155    fn is_in_extra_directories(&self, path: &Path) -> bool {
156        if self.extra_directories.is_empty() {
157            return false;
158        }
159
160        path.components().any(|c| {
161            let name = c.as_os_str().to_string_lossy();
162            self.extra_directories.contains(name.as_ref())
163        })
164    }
165
166    fn is_test_path(&self, path: &Path) -> bool {
167        path.components().any(|c| {
168            let name = c.as_os_str().to_string_lossy();
169            name == "tests"
170                || name == "test"
171                || name == "__tests__"
172                || name == "spec"
173                || name == "specs"
174                || name.ends_with("_test")
175                || name.ends_with(".test")
176        })
177    }
178
179    fn is_node_modules_path(&self, path: &Path) -> bool {
180        path.components()
181            .any(|c| c.as_os_str().to_string_lossy() == "node_modules")
182    }
183
184    fn is_vendor_path(&self, path: &Path) -> bool {
185        path.components().any(|c| {
186            let name = c.as_os_str().to_string_lossy();
187            name == "vendor" || name == "vendors" || name == "third_party"
188        })
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use std::fs;
196    use tempfile::TempDir;
197
198    #[test]
199    fn test_default_excludes_tests() {
200        let dir = TempDir::new().unwrap();
201        let filter = IgnoreFilter::new(dir.path());
202
203        assert!(filter.is_ignored(Path::new("/project/tests/test_file.rs")));
204        assert!(filter.is_ignored(Path::new("/project/__tests__/spec.js")));
205        assert!(filter.is_ignored(Path::new("/project/spec/helpers.rb")));
206        assert!(!filter.is_ignored(Path::new("/project/src/main.rs")));
207    }
208
209    #[test]
210    fn test_default_excludes_node_modules() {
211        let dir = TempDir::new().unwrap();
212        let filter = IgnoreFilter::new(dir.path());
213
214        assert!(filter.is_ignored(Path::new("/project/node_modules/package/index.js")));
215        assert!(!filter.is_ignored(Path::new("/project/src/index.js")));
216    }
217
218    #[test]
219    fn test_default_excludes_vendor() {
220        let dir = TempDir::new().unwrap();
221        let filter = IgnoreFilter::new(dir.path());
222
223        assert!(filter.is_ignored(Path::new("/project/vendor/bundle/gems")));
224        assert!(filter.is_ignored(Path::new("/project/third_party/lib")));
225        assert!(!filter.is_ignored(Path::new("/project/src/lib")));
226    }
227
228    #[test]
229    fn test_include_tests() {
230        let dir = TempDir::new().unwrap();
231        let filter = IgnoreFilter::new(dir.path()).with_include_tests(true);
232
233        assert!(!filter.is_ignored(Path::new("/project/tests/test_file.rs")));
234    }
235
236    #[test]
237    fn test_include_node_modules() {
238        let dir = TempDir::new().unwrap();
239        let filter = IgnoreFilter::new(dir.path()).with_include_node_modules(true);
240
241        assert!(!filter.is_ignored(Path::new("/project/node_modules/package/index.js")));
242    }
243
244    #[test]
245    fn test_include_vendor() {
246        let dir = TempDir::new().unwrap();
247        let filter = IgnoreFilter::new(dir.path()).with_include_vendor(true);
248
249        assert!(!filter.is_ignored(Path::new("/project/vendor/bundle/gems")));
250    }
251
252    #[test]
253    fn test_custom_ignorefile() {
254        let dir = TempDir::new().unwrap();
255        let ignore_file = dir.path().join(".cc-auditignore");
256        fs::write(&ignore_file, "*.generated.js\nbuild/\n").unwrap();
257
258        let filter = IgnoreFilter::new(dir.path());
259
260        let generated_file = dir.path().join("app.generated.js");
261        fs::write(&generated_file, "").unwrap();
262
263        assert!(filter.is_ignored(&generated_file));
264    }
265
266    #[test]
267    fn test_no_ignorefile() {
268        let dir = TempDir::new().unwrap();
269        let filter = IgnoreFilter::new(dir.path());
270
271        assert!(!filter.is_ignored(&dir.path().join("src/main.rs")));
272    }
273
274    #[test]
275    fn test_default_trait() {
276        let filter = IgnoreFilter::default();
277
278        // Default should exclude tests, node_modules, vendor
279        assert!(filter.is_ignored(Path::new("/project/tests/test.rs")));
280        assert!(filter.is_ignored(Path::new("/project/node_modules/pkg")));
281        assert!(filter.is_ignored(Path::new("/project/vendor/lib")));
282    }
283
284    #[test]
285    fn test_chained_configuration() {
286        let dir = TempDir::new().unwrap();
287        let filter = IgnoreFilter::new(dir.path())
288            .with_include_tests(true)
289            .with_include_node_modules(true)
290            .with_include_vendor(true);
291
292        assert!(!filter.is_ignored(Path::new("/project/tests/test.rs")));
293        assert!(!filter.is_ignored(Path::new("/project/node_modules/pkg")));
294        assert!(!filter.is_ignored(Path::new("/project/vendor/lib")));
295    }
296
297    #[test]
298    fn test_gitignore_patterns() {
299        let dir = TempDir::new().unwrap();
300        let ignore_file = dir.path().join(".cc-auditignore");
301        fs::write(
302            &ignore_file,
303            r#"
304# Comment
305*.log
306/dist/
307!important.log
308"#,
309        )
310        .unwrap();
311
312        let filter = IgnoreFilter::new(dir.path());
313
314        let log_file = dir.path().join("debug.log");
315        fs::write(&log_file, "").unwrap();
316        assert!(filter.is_ignored(&log_file));
317
318        // Normal src file should not be ignored
319        let src_file = dir.path().join("main.rs");
320        fs::write(&src_file, "").unwrap();
321        assert!(!filter.is_ignored(&src_file));
322    }
323
324    #[test]
325    fn test_is_test_path_variations() {
326        let filter = IgnoreFilter::default();
327
328        assert!(filter.is_test_path(Path::new("/project/tests/unit")));
329        assert!(filter.is_test_path(Path::new("/project/test/fixtures")));
330        assert!(filter.is_test_path(Path::new("/project/__tests__/spec")));
331        assert!(filter.is_test_path(Path::new("/project/spec/helpers")));
332        assert!(filter.is_test_path(Path::new("/project/specs/api")));
333        assert!(filter.is_test_path(Path::new("/project/file_test")));
334        assert!(filter.is_test_path(Path::new("/project/api.test")));
335        assert!(!filter.is_test_path(Path::new("/project/src/main.rs")));
336        assert!(!filter.is_test_path(Path::new("/project/contest/app.js"))); // Should not match 'test' in 'contest'
337    }
338
339    #[test]
340    fn test_from_config() {
341        let dir = TempDir::new().unwrap();
342        let config = IgnoreConfig {
343            directories: ["custom_ignore_dir", "my_cache"]
344                .into_iter()
345                .map(String::from)
346                .collect(),
347            patterns: vec!["*.generated.js".to_string()],
348            include_tests: true,
349            include_node_modules: false,
350            include_vendor: true,
351        };
352
353        let filter = IgnoreFilter::from_config(dir.path(), &config);
354
355        // Tests should NOT be ignored (include_tests is true)
356        assert!(!filter.is_ignored(Path::new("/project/tests/test.rs")));
357        // node_modules should be ignored (include_node_modules is false)
358        assert!(filter.is_ignored(Path::new("/project/node_modules/pkg")));
359        // vendor should NOT be ignored (include_vendor is true)
360        assert!(!filter.is_ignored(Path::new("/project/vendor/lib")));
361        // custom directories should be ignored
362        assert!(filter.is_ignored(Path::new("/project/custom_ignore_dir/file.rs")));
363        assert!(filter.is_ignored(Path::new("/project/my_cache/data")));
364    }
365
366    #[test]
367    fn test_extra_directories_ignored() {
368        let dir = TempDir::new().unwrap();
369        let mut config = IgnoreConfig::default();
370        config.directories.insert("my_special_dir".to_string());
371
372        let filter = IgnoreFilter::from_config(dir.path(), &config);
373
374        // Custom directory should be ignored
375        assert!(filter.is_ignored(Path::new("/project/my_special_dir/file.rs")));
376        // Nested path with custom directory should be ignored
377        assert!(filter.is_ignored(Path::new("/project/src/my_special_dir/nested/file.rs")));
378    }
379
380    #[test]
381    fn test_custom_patterns_from_config() {
382        let dir = TempDir::new().unwrap();
383        let config = IgnoreConfig {
384            directories: std::collections::HashSet::new(),
385            patterns: vec!["*.log".to_string(), "temp/**".to_string()],
386            include_tests: true,
387            include_node_modules: true,
388            include_vendor: true,
389        };
390
391        let filter = IgnoreFilter::from_config(dir.path(), &config);
392
393        // Create test files
394        let log_file = dir.path().join("debug.log");
395        fs::write(&log_file, "").unwrap();
396        assert!(filter.is_ignored(&log_file));
397
398        let temp_file = dir.path().join("temp/cache.txt");
399        fs::create_dir_all(dir.path().join("temp")).unwrap();
400        fs::write(&temp_file, "").unwrap();
401        assert!(filter.is_ignored(&temp_file));
402
403        // Normal file should not be ignored
404        let src_file = dir.path().join("main.rs");
405        fs::write(&src_file, "").unwrap();
406        assert!(!filter.is_ignored(&src_file));
407    }
408
409    #[test]
410    fn test_with_config_method() {
411        let dir = TempDir::new().unwrap();
412        let config = IgnoreConfig {
413            directories: ["target", "dist"].into_iter().map(String::from).collect(),
414            patterns: vec![],
415            include_tests: true,
416            include_node_modules: true,
417            include_vendor: false,
418        };
419
420        let filter = IgnoreFilter::new(dir.path()).with_config(&config);
421
422        // Tests should NOT be ignored
423        assert!(!filter.is_ignored(Path::new("/project/tests/test.rs")));
424        // node_modules should NOT be ignored
425        assert!(!filter.is_ignored(Path::new("/project/node_modules/pkg")));
426        // vendor should be ignored
427        assert!(filter.is_ignored(Path::new("/project/vendor/lib")));
428        // target should be ignored (from extra_directories)
429        assert!(filter.is_ignored(Path::new("/project/target/debug")));
430    }
431}