garbage_code_hunter/
analyzer.rs

1use regex::Regex;
2use std::fs;
3use std::path::{Path, PathBuf};
4use syn::parse_file;
5use walkdir::WalkDir;
6
7use crate::cross_file::{CrossFileAnalyzer, CrossFileConfig};
8use crate::rules::RuleEngine;
9
10#[derive(Debug, Clone)]
11pub struct CodeIssue {
12    pub file_path: PathBuf,
13    pub line: usize,
14    pub column: usize,
15    pub rule_name: String,
16    pub message: String,
17    pub severity: Severity,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
21pub enum Severity {
22    Mild,    // Minor issues
23    Spicy,   // Medium issues
24    Nuclear, // Serious issues
25}
26
27pub struct CodeAnalyzer {
28    rule_engine: RuleEngine,
29    exclude_patterns: Vec<Regex>,
30    lang: String,
31}
32
33impl CodeAnalyzer {
34    pub fn rule_names(&self) -> Vec<&'static str> {
35        self.rule_engine.rule_names()
36    }
37
38    pub fn new(exclude_patterns: &[String], lang: &str) -> Self {
39        // Default exclude patterns for common build/dependency directories
40        let default_excludes = [
41            "target",
42            "node_modules",
43            ".git",
44            ".svn",
45            ".hg",
46            "build",
47            "dist",
48            "out",
49            "__pycache__",
50            ".DS_Store",
51        ];
52
53        let mut all_patterns: Vec<String> =
54            default_excludes.iter().map(|s| s.to_string()).collect();
55        all_patterns.extend(exclude_patterns.iter().cloned());
56
57        let patterns = all_patterns
58            .iter()
59            .filter_map(|pattern| {
60                // Convert glob patterns to regular expressions
61                let regex_pattern = pattern
62                    .replace(".", r"\.")
63                    .replace("*", ".*")
64                    .replace("?", ".");
65                Regex::new(&regex_pattern).ok()
66            })
67            .collect();
68
69        Self {
70            rule_engine: RuleEngine::new(),
71            exclude_patterns: patterns,
72            lang: lang.to_string(),
73        }
74    }
75
76    fn should_exclude(&self, path: &Path) -> bool {
77        let path_str = path.to_string_lossy();
78        self.exclude_patterns
79            .iter()
80            .any(|pattern| pattern.is_match(&path_str))
81    }
82
83    pub fn analyze_path(&self, path: &Path) -> Vec<CodeIssue> {
84        let mut issues = Vec::new();
85
86        if path.is_file() {
87            if !self.should_exclude(path) {
88                if let Some(ext) = path.extension() {
89                    if ext == "rs" {
90                        issues.extend(self.analyze_file(path));
91                    }
92                }
93            }
94        } else if path.is_dir() {
95            // Initialize cross-file analyzer for directory analysis
96            let mut cross_file = CrossFileAnalyzer::with_config(CrossFileConfig::default());
97
98            for entry in WalkDir::new(path)
99                .into_iter()
100                .filter_map(|e| e.ok())
101                .filter(|e| !self.should_exclude(e.path()))
102                .filter(|e| e.path().extension().is_some_and(|ext| ext == "rs"))
103            {
104                // Run standard single-file analysis
105                issues.extend(self.analyze_file(entry.path()));
106
107                // Also feed into cross-file analyzer for duplication detection
108                if let Ok(content) = fs::read_to_string(entry.path()) {
109                    if let Err(e) = cross_file.process_file(entry.path(), &content) {
110                        eprintln!(
111                            "Warning: Failed to process {} for cross-file analysis: {}",
112                            entry.path().display(),
113                            e
114                        );
115                    }
116                }
117            }
118
119            // Find cross-file duplicates and convert to CodeIssue format
120            let duplicates = cross_file.find_all_duplicates();
121            for dup in duplicates {
122                let severity = dup.severity.clone();
123                for location in &dup.fingerprint.locations {
124                    issues.push(CodeIssue {
125                        file_path: location.file_path.clone(),
126                        line: location.line_start,
127                        column: 0,
128                        rule_name: "cross-file-duplication".to_string(),
129                        message: format!(
130                            "Duplicated function '{}' found in {} files ({} occurrences)",
131                            dup.fingerprint.function_name, dup.file_count, dup.total_occurrences
132                        ),
133                        severity: severity.clone(),
134                    });
135                }
136            }
137        }
138
139        issues
140    }
141
142    pub fn analyze_file(&self, file_path: &Path) -> Vec<CodeIssue> {
143        let content = match fs::read_to_string(file_path) {
144            Ok(content) => content,
145            Err(_) => return vec![],
146        };
147
148        let syntax_tree = match parse_file(&content) {
149            Ok(tree) => tree,
150            Err(_) => return vec![],
151        };
152
153        let is_test_file = Self::is_test_file(file_path, &content);
154
155        self.rule_engine
156            .check_file(file_path, &syntax_tree, &content, &self.lang, is_test_file)
157    }
158
159    fn is_test_file(path: &Path, content: &str) -> bool {
160        let path_str = path.to_string_lossy();
161        // Normalize: strip leading "./" for consistent matching
162        let normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
163
164        // Check file path patterns
165        if normalized.contains("/tests/")
166            || normalized.contains("\\tests\\")
167            || normalized.starts_with("tests/")
168            || normalized.starts_with("tests\\")
169            || normalized.ends_with("_test.rs")
170            || normalized.ends_with("_tests.rs")
171        {
172            return true;
173        }
174        // Check for example files (singular and plural)
175        if normalized.contains("/examples/")
176            || normalized.contains("\\examples\\")
177            || normalized.starts_with("examples/")
178            || normalized.starts_with("examples\\")
179            || normalized.contains("/example/")
180            || normalized.contains("\\example\\")
181            || normalized.starts_with("example/")
182            || normalized.starts_with("example\\")
183            || normalized.ends_with("_example.rs")
184            || normalized.ends_with("_examples.rs")
185        {
186            return true;
187        }
188        // Check for benchmark files
189        if normalized.contains("/benches/")
190            || normalized.contains("\\benches\\")
191            || normalized.starts_with("benches/")
192            || normalized.starts_with("benches\\")
193            || normalized.ends_with("_bench.rs")
194            || normalized.ends_with("_benches.rs")
195        {
196            return true;
197        }
198        // Check for test-files directories
199        if normalized.contains("/test-files/")
200            || normalized.contains("\\test-files\\")
201            || normalized.starts_with("test-files/")
202            || normalized.starts_with("test-files\\")
203            || normalized.contains("/test_files/")
204            || normalized.contains("\\test_files\\")
205        {
206            return true;
207        }
208        // Check for fixture/mock directories
209        if normalized.contains("/fixtures/")
210            || normalized.contains("\\fixtures\\")
211            || normalized.contains("/mocks/")
212            || normalized.contains("\\mocks\\")
213        {
214            return true;
215        }
216        // Check for #[cfg(test)] module in content
217        content.contains("#[cfg(test)]")
218    }
219}
garbage_code_hunter/analyzer.rs

garbage_code_hunter/
analyzer.rs