Skip to main content

garbage_code_hunter/
analyzer.rs

1use regex::Regex;
2use std::cell::RefCell;
3use std::collections::HashMap;
4use std::fs;
5use std::path::{Path, PathBuf};
6use walkdir::WalkDir;
7
8use crate::context::ProjectConfig;
9use crate::finding::StyleFinding;
10use crate::language::{Language, SUPPORTED_EXTENSIONS};
11use crate::signals::{aggregate_detector_scores, SignalDetector, StyleSignal};
12use crate::style_ir::{StyleIr, StyleIrSummary};
13use crate::treesitter::duplication::{CrossFileDupDetector, IntraFileDupDetector};
14use crate::treesitter::engine::{ParsedFile, TreeSitterEngine};
15
16pub struct StyleIrFileInfo {
17    pub file_path: String,
18    pub summary: StyleIrSummary,
19    pub is_test: bool,
20}
21
22pub struct FullAnalysisResult {
23    pub findings: Vec<StyleFinding>,
24    pub file_count: usize,
25    pub total_lines: usize,
26    pub style_ir_files: Vec<StyleIrFileInfo>,
27}
28
29#[derive(Debug, Clone)]
30pub struct CodeIssue {
31    pub file_path: PathBuf,
32    pub line: usize,
33    pub column: usize,
34    pub rule_name: String,
35    pub message: String,
36    pub severity: Severity,
37}
38
39#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub enum Severity {
41    Mild,    // Minor issues
42    Spicy,   // Medium issues
43    Nuclear, // Serious issues
44}
45
46pub struct CodeAnalyzer {
47    ts_engine: TreeSitterEngine,
48    exclude_patterns: Vec<Regex>,
49    project_config: ProjectConfig,
50    cross_detector: RefCell<CrossFileDupDetector>,
51    detectors: Vec<Box<dyn SignalDetector>>,
52    direct_scores: RefCell<HashMap<StyleSignal, f64>>,
53}
54
55impl CodeAnalyzer {
56    pub fn new(exclude_patterns: &[String], lang: &str) -> Self {
57        Self::with_config(exclude_patterns, lang, ProjectConfig::default())
58    }
59
60    pub fn infection_spread(&self) -> HashMap<String, Vec<(String, usize, Vec<String>)>> {
61        self.cross_detector.borrow().infection_spread()
62    }
63
64    pub fn with_config(exclude_patterns: &[String], _lang: &str, config: ProjectConfig) -> Self {
65        // Default exclude patterns for common build/dependency directories
66        let default_excludes = [
67            "target",
68            "node_modules",
69            ".git",
70            ".svn",
71            ".hg",
72            "build",
73            "dist",
74            "out",
75            "__pycache__",
76            ".DS_Store",
77            ".venv",
78            "venv",
79            "vendor",
80        ];
81
82        let mut all_patterns: Vec<String> =
83            default_excludes.iter().map(|s| s.to_string()).collect();
84        all_patterns.extend(exclude_patterns.iter().cloned());
85
86        // Also add exclude patterns from project config
87        all_patterns.extend(config.whitelists.exclude_patterns.clone());
88
89        let patterns = all_patterns
90            .iter()
91            .filter_map(|pattern| {
92                // Convert glob patterns to regular expressions with path-boundary anchoring.
93                // Without anchors, "build" would match "mybuild/foo.o" — a substring false positive.
94                let glob_pattern = pattern
95                    .replace(".", r"\.")
96                    .replace("*", ".*")
97                    .replace("?", ".");
98                let regex_pattern = format!(r"(?:^|/){}(?:/|$)", glob_pattern);
99                Regex::new(&regex_pattern).ok()
100            })
101            .collect();
102
103        Self {
104            ts_engine: TreeSitterEngine::new(),
105            exclude_patterns: patterns,
106            project_config: config,
107            cross_detector: RefCell::new(CrossFileDupDetector::new()),
108            detectors: Vec::new(),
109            direct_scores: RefCell::new(HashMap::new()),
110        }
111    }
112
113    pub fn with_detectors(mut self, detectors: Vec<Box<dyn SignalDetector>>) -> Self {
114        self.detectors = detectors;
115        self
116    }
117
118    pub fn direct_signal_scores(&self) -> HashMap<StyleSignal, f64> {
119        self.direct_scores.borrow().clone()
120    }
121
122    fn should_exclude(&self, path: &Path) -> bool {
123        let path_str = path.to_string_lossy();
124        self.exclude_patterns
125            .iter()
126            .any(|pattern| pattern.is_match(&path_str))
127    }
128
129    /// Collect source files from a path (file or directory). Excludes
130    /// unsupported extensions and should_exclude paths. Includes generated files.
131    fn collect_source_files(&self, path: &Path) -> Vec<PathBuf> {
132        if path.is_file() {
133            if !self.should_exclude(path) {
134                let lang = Language::from_path(path);
135                if lang != Language::Unknown {
136                    return vec![path.to_path_buf()];
137                }
138            }
139            return Vec::new();
140        }
141        if !path.is_dir() {
142            return Vec::new();
143        }
144        WalkDir::new(path)
145            .into_iter()
146            .filter_map(|e| e.ok())
147            .filter(|e| !self.should_exclude(e.path()))
148            .filter(|e| {
149                e.path()
150                    .extension()
151                    .and_then(|ext| ext.to_str())
152                    .is_some_and(|ext| SUPPORTED_EXTENSIONS.contains(&ext))
153            })
154            .map(|e| e.path().to_path_buf())
155            .collect()
156    }
157
158    /// Compatibility wrapper — runs the full pipeline and converts back to `CodeIssue`s.
159    pub fn analyze_path(&self, path: &Path) -> Vec<CodeIssue> {
160        self.analyze_to_findings(path)
161            .into_iter()
162            .map(|f| f.to_code_issue())
163            .collect()
164    }
165
166    /// Full analysis pipeline returning `StyleFinding`s.
167    ///
168    /// - Phase 1: Parse all files and cache `ParsedFile`s
169    /// - Phase 2: Cross-file duplication detection
170    /// - Phase 3: Intra-file duplication detection
171    /// - Phase 4: Direct signal detection (scores + findings)
172    ///
173    /// Also populates `self.direct_scores` for downstream consumers.
174    pub fn analyze_to_findings(&self, path: &Path) -> Vec<StyleFinding> {
175        let files = self.collect_source_files(path);
176        if files.is_empty() {
177            return Vec::new();
178        }
179
180        // Phase 1: Parse all files and cache for downstream phases
181        let mut parsed_files: Vec<(ParsedFile, PathBuf, bool)> = Vec::new();
182
183        for file_path in &files {
184            if Self::is_generated_file(file_path) {
185                continue;
186            }
187            let content = match fs::read_to_string(file_path) {
188                Ok(c) => c,
189                Err(_) => continue,
190            };
191            let lang = Language::from_path(file_path);
192            if lang == Language::Unknown {
193                continue;
194            }
195            let is_test_file = Self::is_test_file(file_path);
196
197            if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
198                parsed_files.push((parsed, file_path.clone(), is_test_file));
199            }
200        }
201
202        // Phase 2: Cross-file duplication detection
203        let mut issues: Vec<CodeIssue> = Vec::new();
204        *self.cross_detector.borrow_mut() = CrossFileDupDetector::new();
205        for (parsed, _, is_test) in &parsed_files {
206            if *is_test && self.project_config.signals.skip_tests {
207                continue;
208            }
209            self.cross_detector.borrow_mut().process_file(parsed);
210        }
211        issues.extend(self.cross_detector.borrow().find_duplicates());
212        issues.extend(self.cross_detector.borrow().find_near_duplicates());
213
214        // Phase 3: Intra-file code duplication
215        for (parsed, _, is_test) in &parsed_files {
216            if *is_test && self.project_config.signals.skip_tests {
217                continue;
218            }
219            issues.extend(IntraFileDupDetector::check(parsed));
220        }
221
222        // Phase 4: Direct signal detection (scores + findings)
223        let mut findings: Vec<StyleFinding> = issues.iter().map(From::from).collect();
224        if !self.detectors.is_empty() && !parsed_files.is_empty() {
225            let parsed_for_scores: Vec<ParsedFile> =
226                parsed_files.iter().map(|(p, _, _)| p.clone()).collect();
227            let test_flags: Vec<bool> = parsed_files
228                .iter()
229                .map(|(_, _, is_test)| *is_test)
230                .collect();
231            let skip_tests_config = self.project_config.signals.skip_tests;
232            *self.direct_scores.borrow_mut() = aggregate_detector_scores(
233                &self.detectors,
234                &parsed_for_scores,
235                &test_flags,
236                skip_tests_config,
237            );
238
239            for (parsed, file_path, is_test_file) in &parsed_files {
240                let lang = parsed.language;
241                let ir = StyleIr::from_parsed(parsed);
242                for detector in &self.detectors {
243                    if !detector.supported_languages().contains(&lang) {
244                        continue;
245                    }
246                    let findings_iter = if let Some(ref ir) = ir {
247                        detector.detect_findings_with_ir(
248                            ir,
249                            parsed,
250                            *is_test_file,
251                            skip_tests_config,
252                        )
253                    } else {
254                        detector.detect_findings(parsed, *is_test_file, skip_tests_config)
255                    };
256                    for (signal, count) in findings_iter {
257                        let count = if *is_test_file {
258                            (count as f64 * 0.2).round() as usize
259                        } else {
260                            count
261                        };
262                        if count > 0 {
263                            findings.push(StyleFinding::for_signal(
264                                signal,
265                                count,
266                                file_path.clone(),
267                            ));
268                        }
269                    }
270                }
271            }
272        }
273
274        findings
275    }
276
277    pub fn analyze_full(&self, path: &Path) -> FullAnalysisResult {
278        let files = self.collect_source_files(path);
279        if files.is_empty() {
280            return FullAnalysisResult {
281                findings: Vec::new(),
282                file_count: 0,
283                total_lines: 0,
284                style_ir_files: Vec::new(),
285            };
286        }
287
288        let mut parsed_files: Vec<(ParsedFile, PathBuf, bool)> = Vec::new();
289        let mut style_ir_files: Vec<StyleIrFileInfo> = Vec::new();
290        let mut file_count: usize = 0;
291        let mut total_lines: usize = 0;
292
293        for file_path in &files {
294            if Self::is_generated_file(file_path) {
295                continue;
296            }
297            let content = match fs::read_to_string(file_path) {
298                Ok(c) => c,
299                Err(_) => continue,
300            };
301            let lang = Language::from_path(file_path);
302            if lang == Language::Unknown {
303                continue;
304            }
305            file_count += 1;
306            total_lines += content.lines().count();
307            let is_test_file = Self::is_test_file(file_path);
308
309            if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
310                if let Some(ir) = StyleIr::from_parsed(&parsed) {
311                    style_ir_files.push(StyleIrFileInfo {
312                        file_path: file_path.to_string_lossy().to_string(),
313                        summary: ir.summary(),
314                        is_test: is_test_file,
315                    });
316                }
317                parsed_files.push((parsed, file_path.clone(), is_test_file));
318            }
319        }
320
321        // Phase 2-3: Duplication detection
322        let mut issues: Vec<CodeIssue> = Vec::new();
323        *self.cross_detector.borrow_mut() = CrossFileDupDetector::new();
324        for (parsed, _, _) in &parsed_files {
325            self.cross_detector.borrow_mut().process_file(parsed);
326        }
327        issues.extend(self.cross_detector.borrow().find_duplicates());
328        issues.extend(self.cross_detector.borrow().find_near_duplicates());
329
330        for (parsed, _, _) in &parsed_files {
331            issues.extend(IntraFileDupDetector::check(parsed));
332        }
333
334        let mut findings: Vec<StyleFinding> = issues.iter().map(From::from).collect();
335
336        if !self.detectors.is_empty() && !parsed_files.is_empty() {
337            let parsed_for_scores: Vec<ParsedFile> =
338                parsed_files.iter().map(|(p, _, _)| p.clone()).collect();
339            let test_flags: Vec<bool> = parsed_files
340                .iter()
341                .map(|(_, _, is_test)| *is_test)
342                .collect();
343            let skip_tests_config = self.project_config.signals.skip_tests;
344            *self.direct_scores.borrow_mut() = aggregate_detector_scores(
345                &self.detectors,
346                &parsed_for_scores,
347                &test_flags,
348                skip_tests_config,
349            );
350
351            for (parsed, file_path, is_test_file) in &parsed_files {
352                let lang = parsed.language;
353                let ir = StyleIr::from_parsed(parsed);
354                for detector in &self.detectors {
355                    if !detector.supported_languages().contains(&lang) {
356                        continue;
357                    }
358                    let findings_iter = if let Some(ref ir) = ir {
359                        detector.detect_findings_with_ir(
360                            ir,
361                            parsed,
362                            *is_test_file,
363                            skip_tests_config,
364                        )
365                    } else {
366                        detector.detect_findings(parsed, *is_test_file, skip_tests_config)
367                    };
368                    for (signal, count) in findings_iter {
369                        let count = if *is_test_file {
370                            (count as f64 * 0.2).round() as usize
371                        } else {
372                            count
373                        };
374                        if count > 0 {
375                            findings.push(StyleFinding::for_signal(
376                                signal,
377                                count,
378                                file_path.clone(),
379                            ));
380                        }
381                    }
382                }
383            }
384        }
385
386        FullAnalysisResult {
387            findings,
388            file_count,
389            total_lines,
390            style_ir_files,
391        }
392    }
393
394    fn is_generated_file(path: &Path) -> bool {
395        let name = path.to_string_lossy();
396        // Protobuf generated files
397        name.ends_with(".pb.go")
398            || name.contains("_grpc.pb.go")
399            || name.ends_with(".pb.gw.go")
400            || name.ends_with(".pulsar.go")
401            || name.ends_with(".pb.cc")
402            || name.ends_with(".pb.h")
403        // Dependencies
404            || name.contains("/node_modules/")
405            || name.contains("\\node_modules\\")
406            || name.contains("/vendor/")
407            || name.contains("\\vendor\\")
408        // Minified bundles
409            || name.contains("/swagger-ui/")
410        // Generated files from code generators
411            || name.contains(".gen.")
412            || name.contains(".generated.")
413        // Minified / bundled JavaScript
414            || name.ends_with(".min.js")
415            || name.ends_with(".bundle.js")
416    }
417
418    pub fn analyze_file(&self, file_path: &Path) -> Vec<CodeIssue> {
419        if Self::is_generated_file(file_path) {
420            return vec![];
421        }
422        self.analyze_path(file_path)
423    }
424
425    fn is_test_file(path: &Path) -> bool {
426        let path_str = path.to_string_lossy();
427        let normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
428
429        if normalized.contains("/tests/")
430            || normalized.contains("\\tests\\")
431            || normalized.starts_with("tests/")
432            || normalized.starts_with("tests\\")
433            || normalized.contains("/test/")
434            || normalized.contains("\\test\\")
435            || normalized.ends_with("_test.rs")
436            || normalized.ends_with("_tests.rs")
437            || normalized.ends_with("_test.c")
438            || normalized.ends_with("_test.cpp")
439            || normalized.ends_with("_test.cc")
440            || normalized.ends_with("_test.go")
441            || normalized.ends_with(".test.js")
442            || normalized.ends_with(".spec.js")
443            || normalized.ends_with(".test.jsx")
444            || normalized.ends_with(".spec.jsx")
445            || normalized.ends_with(".test.ts")
446            || normalized.ends_with(".spec.ts")
447            || normalized.ends_with(".test.tsx")
448            || normalized.ends_with(".spec.tsx")
449            || normalized.ends_with("_test.rb")
450            || normalized.ends_with("_spec.rb")
451            || normalized.ends_with("Test.java")
452            || normalized.ends_with("Tests.java")
453            || normalized.ends_with("Tests.swift")
454            || normalized.ends_with("Test.swift")
455            || normalized.ends_with("_test.zig")
456            || normalized.starts_with("test_")
457        {
458            return true;
459        }
460        // Check for example files (singular and plural)
461        if normalized.contains("/examples/")
462            || normalized.contains("\\examples\\")
463            || normalized.starts_with("examples/")
464            || normalized.starts_with("examples\\")
465            || normalized.contains("/example/")
466            || normalized.contains("\\example\\")
467            || normalized.starts_with("example/")
468            || normalized.starts_with("example\\")
469            || normalized.ends_with("_example.rs")
470            || normalized.ends_with("_examples.rs")
471        {
472            return true;
473        }
474        // Check for benchmark files
475        if normalized.contains("/benches/")
476            || normalized.contains("\\benches\\")
477            || normalized.starts_with("benches/")
478            || normalized.starts_with("benches\\")
479            || normalized.ends_with("_bench.rs")
480            || normalized.ends_with("_benches.rs")
481        {
482            return true;
483        }
484        // Check for test-files directories
485        if normalized.contains("/test-files/")
486            || normalized.contains("\\test-files\\")
487            || normalized.starts_with("test-files/")
488            || normalized.starts_with("test-files\\")
489            || normalized.contains("/test_files/")
490            || normalized.contains("\\test_files\\")
491        {
492            return true;
493        }
494        // Check for fixture/mock directories
495        if normalized.contains("/fixtures/")
496            || normalized.contains("\\fixtures\\")
497            || normalized.contains("/mocks/")
498            || normalized.contains("\\mocks\\")
499        {
500            return true;
501        }
502        false
503    }
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509    use std::path::Path;
510
511    // ── is_generated_file ────────────────────────────────────────
512
513    /// Objective: Verify that protobuf-generated files (.pb.go, _grpc.pb.go, .pb.gw.go,
514    ///            .pulsar.go, .pb.cc, .pb.h) are correctly identified as generated.
515    /// Invariants: All protobuf suffix patterns must be detected regardless of path prefix.
516    #[test]
517    fn test_is_generated_file_detects_all_protobuf_suffixes() {
518        assert!(
519            CodeAnalyzer::is_generated_file(Path::new("api.pb.go")),
520            "expected .pb.go to be generated"
521        );
522        assert!(
523            CodeAnalyzer::is_generated_file(Path::new("service_grpc.pb.go")),
524            "expected _grpc.pb.go to be generated"
525        );
526        assert!(
527            CodeAnalyzer::is_generated_file(Path::new("gateway.pb.gw.go")),
528            "expected .pb.gw.go to be generated"
529        );
530        assert!(
531            CodeAnalyzer::is_generated_file(Path::new("topic.pulsar.go")),
532            "expected .pulsar.go to be generated"
533        );
534        assert!(
535            CodeAnalyzer::is_generated_file(Path::new("types.pb.cc")),
536            "expected .pb.cc to be generated"
537        );
538        assert!(
539            CodeAnalyzer::is_generated_file(Path::new("types.pb.h")),
540            "expected .pb.h to be generated"
541        );
542    }
543
544    /// Objective: Verify that dependency/vendor directories are detected.
545    /// Invariants: Paths containing /node_modules/, /vendor/, or /swagger-ui/ are generated,
546    ///             regardless of the file extension.
547    #[test]
548    fn test_is_generated_file_detects_dependency_directories() {
549        assert!(
550            CodeAnalyzer::is_generated_file(Path::new("/project/node_modules/foo/index.js")),
551            "node_modules should be generated"
552        );
553        assert!(
554            CodeAnalyzer::is_generated_file(Path::new("/project/vendor/bar/main.rs")),
555            "vendor should be generated"
556        );
557        assert!(
558            CodeAnalyzer::is_generated_file(Path::new("/project/swagger-ui/index.html")),
559            "swagger-ui should be generated"
560        );
561    }
562
563    /// Objective: Verify that user-written source files are NOT marked as generated.
564    /// Invariants: Any path that does not match a generated suffix or generated directory
565    ///             pattern must return false.
566    #[test]
567    fn test_is_generated_file_does_not_flag_user_code() {
568        assert!(
569            !CodeAnalyzer::is_generated_file(Path::new("src/main.rs")),
570            "src/main.rs should not be generated"
571        );
572        assert!(
573            !CodeAnalyzer::is_generated_file(Path::new("src/server.go")),
574            "src/server.go (Go source) should not be generated"
575        );
576        assert!(
577            !CodeAnalyzer::is_generated_file(Path::new("app.py")),
578            "app.py should not be generated"
579        );
580    }
581
582    /// Objective: Verify that a file ending in .go but not matching any protobuf pattern
583    ///            is correctly treated as user code, even in a path containing "vendor"
584    ///            as a substring (not the /vendor/ directory).
585    /// Invariants: Only exact /vendor/ path component must match, not partial substring.
586    #[test]
587    fn test_is_generated_file_does_not_false_positive_go_source() {
588        assert!(
589            !CodeAnalyzer::is_generated_file(Path::new("src/vendor_service.go")),
590            "vendor_service.go should not be treated as generated just because 'vendor' appears in the name"
591        );
592    }
593
594    // ── is_test_file ─────────────────────────────────────────────
595
596    #[test]
597    fn test_is_test_file_detects_test_directories() {
598        assert!(CodeAnalyzer::is_test_file(Path::new("src/tests/helper.rs")));
599        assert!(CodeAnalyzer::is_test_file(Path::new("examples/hello.rs")));
600        assert!(CodeAnalyzer::is_test_file(Path::new("benches/perf.rs")));
601        assert!(CodeAnalyzer::is_test_file(Path::new(
602            "tests/fixtures/data.rs"
603        )));
604        assert!(CodeAnalyzer::is_test_file(Path::new(
605            "tests/mocks/service.rs"
606        )));
607        assert!(CodeAnalyzer::is_test_file(Path::new(
608            "test-files/input.txt"
609        )));
610    }
611
612    #[test]
613    fn test_is_test_file_detects_rust_c_cpp() {
614        assert!(CodeAnalyzer::is_test_file(Path::new("src/foo_test.rs")));
615        assert!(CodeAnalyzer::is_test_file(Path::new("src/foo_tests.rs")));
616        assert!(CodeAnalyzer::is_test_file(Path::new("test_main.c")));
617        assert!(CodeAnalyzer::is_test_file(Path::new("foo_test.c")));
618        assert!(CodeAnalyzer::is_test_file(Path::new("foo_test.cpp")));
619        assert!(CodeAnalyzer::is_test_file(Path::new("foo_test.cc")));
620    }
621
622    #[test]
623    fn test_is_test_file_detects_go() {
624        assert!(CodeAnalyzer::is_test_file(Path::new("handler_test.go")));
625        assert!(CodeAnalyzer::is_test_file(Path::new("pkg/service_test.go")));
626        assert!(!CodeAnalyzer::is_test_file(Path::new("handler.go")));
627    }
628
629    #[test]
630    fn test_is_test_file_detects_js_ts() {
631        assert!(CodeAnalyzer::is_test_file(Path::new("app.test.js")));
632        assert!(CodeAnalyzer::is_test_file(Path::new("app.spec.js")));
633        assert!(CodeAnalyzer::is_test_file(Path::new("app.test.jsx")));
634        assert!(CodeAnalyzer::is_test_file(Path::new("app.spec.jsx")));
635        assert!(CodeAnalyzer::is_test_file(Path::new("app.test.ts")));
636        assert!(CodeAnalyzer::is_test_file(Path::new("app.spec.ts")));
637        assert!(CodeAnalyzer::is_test_file(Path::new("app.test.tsx")));
638        assert!(CodeAnalyzer::is_test_file(Path::new("app.spec.tsx")));
639        assert!(!CodeAnalyzer::is_test_file(Path::new("app.js")));
640        assert!(!CodeAnalyzer::is_test_file(Path::new("app.ts")));
641    }
642
643    #[test]
644    fn test_is_test_file_detects_java() {
645        assert!(CodeAnalyzer::is_test_file(Path::new(
646            "UserServiceTest.java"
647        )));
648        assert!(CodeAnalyzer::is_test_file(Path::new(
649            "UserServiceTests.java"
650        )));
651        assert!(!CodeAnalyzer::is_test_file(Path::new("UserService.java")));
652    }
653
654    #[test]
655    fn test_is_test_file_detects_ruby() {
656        assert!(CodeAnalyzer::is_test_file(Path::new("user_test.rb")));
657        assert!(CodeAnalyzer::is_test_file(Path::new("user_spec.rb")));
658        assert!(!CodeAnalyzer::is_test_file(Path::new("user.rb")));
659    }
660
661    #[test]
662    fn test_is_test_file_detects_swift() {
663        assert!(CodeAnalyzer::is_test_file(Path::new(
664            "UserServiceTests.swift"
665        )));
666        assert!(CodeAnalyzer::is_test_file(Path::new(
667            "UserServiceTest.swift"
668        )));
669        assert!(!CodeAnalyzer::is_test_file(Path::new("UserService.swift")));
670    }
671
672    #[test]
673    fn test_is_test_file_detects_zig() {
674        assert!(CodeAnalyzer::is_test_file(Path::new("main_test.zig")));
675        assert!(!CodeAnalyzer::is_test_file(Path::new("main.zig")));
676    }
677
678    #[test]
679    fn test_is_test_file_does_not_flag_normal_source() {
680        assert!(!CodeAnalyzer::is_test_file(Path::new("src/main.rs")));
681        assert!(!CodeAnalyzer::is_test_file(Path::new("src/lib.rs")));
682    }
683
684    #[test]
685    fn test_is_test_file_strips_leading_dot_slash() {
686        assert!(CodeAnalyzer::is_test_file(Path::new("./tests/test.rs")));
687    }
688
689    // ── should_exclude ───────────────────────────────────────────
690
691    /// Objective: Verify that default exclude patterns (target, node_modules, .git etc.)
692    ///            are applied automatically even without custom patterns.
693    /// Invariants: CodeAnalyzer::new with empty custom patterns still excludes common dirs.
694    #[test]
695    fn test_should_exclude_applies_default_patterns() {
696        let analyzer = CodeAnalyzer::new(&[], "en");
697        assert!(
698            analyzer.should_exclude(Path::new("node_modules/foo")),
699            "node_modules should be excluded by default"
700        );
701        assert!(
702            analyzer.should_exclude(Path::new("target/debug/build")),
703            "target/ should be excluded by default"
704        );
705        assert!(
706            !analyzer.should_exclude(Path::new("src/main.rs")),
707            "src/ should not be excluded"
708        );
709    }
710
711    /// Objective: Verify that custom exclude patterns are added alongside defaults.
712    /// Invariants: Both custom and default patterns are checked.
713    #[test]
714    fn test_should_exclude_combines_custom_and_default_patterns() {
715        let analyzer = CodeAnalyzer::new(&["generated".to_string()], "en");
716        assert!(
717            analyzer.should_exclude(Path::new("build/generated/code.rs")),
718            "custom pattern 'generated' should match"
719        );
720        assert!(
721            analyzer.should_exclude(Path::new("target/release/exe")),
722            "default pattern 'target' should still match"
723        );
724    }
725
726    /// Objective: Verify that a pattern does NOT match unrelated directories.
727    /// Invariants: Glob-to-regex conversion creates "build" => "build.*", which should
728    ///             match "build/..." but not "src/main.rs".
729    #[test]
730    fn test_should_exclude_only_matches_intended_directories() {
731        let analyzer = CodeAnalyzer::new(&["build".to_string()], "en");
732        assert!(
733            analyzer.should_exclude(Path::new("build/foo.o")),
734            "'build' pattern should match build/ path"
735        );
736        assert!(
737            !analyzer.should_exclude(Path::new("src/main.rs")),
738            "'build' pattern should NOT match src/ path"
739        );
740    }
741
742    // ── analyze_to_findings ───────────────────────────────────────
743
744    /// Objective: Verify that `analyze_to_findings()` produces both rule-based
745    /// findings (from CodeIssue conversion) AND direct detector signal findings.
746    /// Invariants: With a file containing panics + naming issues, the output must
747    /// include at least some PanicAddiction findings and some findings with a signal
748    /// other than Duplication (the default when no signal is recognized).
749    #[test]
750    fn test_analyze_to_findings_includes_detector_findings() {
751        use crate::detectors::PanicAddictionDetector;
752        use std::io::Write;
753
754        let dir = tempfile::tempdir().expect("tempdir");
755        let file_path = dir.path().join("code.rs");
756        let mut f = std::fs::File::create(&file_path).expect("create temp file");
757        write!(
758            f,
759            "fn main() {{
760    let _ = foo.unwrap();
761    let _ = bar.expect(\"msg\");
762    panic!(\"boom\");
763    let x = 1;
764}}
765"
766        )
767        .expect("write");
768
769        let analyzer = CodeAnalyzer::new(&[], "en")
770            .with_detectors(vec![
771                Box::new(PanicAddictionDetector::new()) as Box<dyn SignalDetector>
772            ]);
773
774        let findings = analyzer.analyze_to_findings(dir.path());
775
776        // Must have at least one finding with PanicAddiction signal
777        let panic_signal_findings: Vec<_> = findings
778            .iter()
779            .filter(|f| f.signal == StyleSignal::PanicAddiction)
780            .collect();
781        assert!(
782            !panic_signal_findings.is_empty(),
783            "expected at least one PanicAddiction finding from detector, got {} total findings",
784            findings.len()
785        );
786
787        // Verify at least 1 finding exists from the detector
788        assert!(
789            !findings.is_empty(),
790            "expected at least 1 total finding, got {}",
791            findings.len()
792        );
793    }
794}