garbage_code_hunter/
analyzer.rs1use regex::Regex;
2use std::fs;
3use std::path::{Path, PathBuf};
4use syn::parse_file;
5use walkdir::WalkDir;
6
7use crate::cross_file::{CrossFileAnalyzer, CrossFileConfig};
8use crate::rules::RuleEngine;
9
10#[derive(Debug, Clone)]
11pub struct CodeIssue {
12 pub file_path: PathBuf,
13 pub line: usize,
14 pub column: usize,
15 pub rule_name: String,
16 pub message: String,
17 pub severity: Severity,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
21pub enum Severity {
22 Mild, Spicy, Nuclear, }
26
27pub struct CodeAnalyzer {
28 rule_engine: RuleEngine,
29 exclude_patterns: Vec<Regex>,
30 lang: String,
31}
32
33impl CodeAnalyzer {
34 pub fn rule_names(&self) -> Vec<&'static str> {
35 self.rule_engine.rule_names()
36 }
37
38 pub fn new(exclude_patterns: &[String], lang: &str) -> Self {
39 let default_excludes = [
41 "target",
42 "node_modules",
43 ".git",
44 ".svn",
45 ".hg",
46 "build",
47 "dist",
48 "out",
49 "__pycache__",
50 ".DS_Store",
51 ];
52
53 let mut all_patterns: Vec<String> =
54 default_excludes.iter().map(|s| s.to_string()).collect();
55 all_patterns.extend(exclude_patterns.iter().cloned());
56
57 let patterns = all_patterns
58 .iter()
59 .filter_map(|pattern| {
60 let regex_pattern = pattern
62 .replace(".", r"\.")
63 .replace("*", ".*")
64 .replace("?", ".");
65 Regex::new(®ex_pattern).ok()
66 })
67 .collect();
68
69 Self {
70 rule_engine: RuleEngine::new(),
71 exclude_patterns: patterns,
72 lang: lang.to_string(),
73 }
74 }
75
76 fn should_exclude(&self, path: &Path) -> bool {
77 let path_str = path.to_string_lossy();
78 self.exclude_patterns
79 .iter()
80 .any(|pattern| pattern.is_match(&path_str))
81 }
82
83 pub fn analyze_path(&self, path: &Path) -> Vec<CodeIssue> {
84 let mut issues = Vec::new();
85
86 if path.is_file() {
87 if !self.should_exclude(path) {
88 if let Some(ext) = path.extension() {
89 if ext == "rs" {
90 issues.extend(self.analyze_file(path));
91 }
92 }
93 }
94 } else if path.is_dir() {
95 let mut cross_file = CrossFileAnalyzer::with_config(CrossFileConfig::default());
97
98 for entry in WalkDir::new(path)
99 .into_iter()
100 .filter_map(|e| e.ok())
101 .filter(|e| !self.should_exclude(e.path()))
102 .filter(|e| e.path().extension().is_some_and(|ext| ext == "rs"))
103 {
104 issues.extend(self.analyze_file(entry.path()));
106
107 if let Ok(content) = fs::read_to_string(entry.path()) {
109 if let Err(e) = cross_file.process_file(entry.path(), &content) {
110 eprintln!(
111 "Warning: Failed to process {} for cross-file analysis: {}",
112 entry.path().display(),
113 e
114 );
115 }
116 }
117 }
118
119 let duplicates = cross_file.find_all_duplicates();
121 for dup in duplicates {
122 let severity = dup.severity.clone();
123 for location in &dup.fingerprint.locations {
124 issues.push(CodeIssue {
125 file_path: location.file_path.clone(),
126 line: location.line_start,
127 column: 0,
128 rule_name: "cross-file-duplication".to_string(),
129 message: format!(
130 "Duplicated function '{}' found in {} files ({} occurrences)",
131 dup.fingerprint.function_name, dup.file_count, dup.total_occurrences
132 ),
133 severity: severity.clone(),
134 });
135 }
136 }
137 }
138
139 issues
140 }
141
142 pub fn analyze_file(&self, file_path: &Path) -> Vec<CodeIssue> {
143 let content = match fs::read_to_string(file_path) {
144 Ok(content) => content,
145 Err(_) => return vec![],
146 };
147
148 let syntax_tree = match parse_file(&content) {
149 Ok(tree) => tree,
150 Err(_) => return vec![],
151 };
152
153 let is_test_file = Self::is_test_file(file_path, &content);
154
155 self.rule_engine
156 .check_file(file_path, &syntax_tree, &content, &self.lang, is_test_file)
157 }
158
159 fn is_test_file(path: &Path, content: &str) -> bool {
160 let path_str = path.to_string_lossy();
161 let normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
163
164 if normalized.contains("/tests/")
166 || normalized.contains("\\tests\\")
167 || normalized.starts_with("tests/")
168 || normalized.starts_with("tests\\")
169 || normalized.ends_with("_test.rs")
170 || normalized.ends_with("_tests.rs")
171 {
172 return true;
173 }
174 if normalized.contains("/examples/")
176 || normalized.contains("\\examples\\")
177 || normalized.starts_with("examples/")
178 || normalized.starts_with("examples\\")
179 || normalized.contains("/example/")
180 || normalized.contains("\\example\\")
181 || normalized.starts_with("example/")
182 || normalized.starts_with("example\\")
183 || normalized.ends_with("_example.rs")
184 || normalized.ends_with("_examples.rs")
185 {
186 return true;
187 }
188 if normalized.contains("/benches/")
190 || normalized.contains("\\benches\\")
191 || normalized.starts_with("benches/")
192 || normalized.starts_with("benches\\")
193 || normalized.ends_with("_bench.rs")
194 || normalized.ends_with("_benches.rs")
195 {
196 return true;
197 }
198 if normalized.contains("/test-files/")
200 || normalized.contains("\\test-files\\")
201 || normalized.starts_with("test-files/")
202 || normalized.starts_with("test-files\\")
203 || normalized.contains("/test_files/")
204 || normalized.contains("\\test_files\\")
205 {
206 return true;
207 }
208 if normalized.contains("/fixtures/")
210 || normalized.contains("\\fixtures\\")
211 || normalized.contains("/mocks/")
212 || normalized.contains("\\mocks\\")
213 {
214 return true;
215 }
216 content.contains("#[cfg(test)]")
218 }
219}