garbage_code_hunter/
analyzer.rs1use regex::Regex;
2use std::fs;
3use std::path::{Path, PathBuf};
4use walkdir::WalkDir;
5
6use crate::context::{FileContext, ProjectConfig};
7use crate::language::{Language, SUPPORTED_EXTENSIONS};
8use crate::rules::generic::GenericRuleEngine;
9use crate::treesitter::duplication::{CrossFileDupDetector, IntraFileDupDetector};
10use crate::treesitter::{TreeSitterEngine, TreeSitterRuleEngine};
11
12#[derive(Debug, Clone)]
13pub struct CodeIssue {
14 pub file_path: PathBuf,
15 pub line: usize,
16 pub column: usize,
17 pub rule_name: String,
18 pub message: String,
19 pub severity: Severity,
20}
21
22#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
23pub enum Severity {
24 Mild, Spicy, Nuclear, }
28
29pub struct CodeAnalyzer {
30 generic_engine: GenericRuleEngine,
31 ts_engine: TreeSitterEngine,
32 ts_rule_engine: TreeSitterRuleEngine,
33 exclude_patterns: Vec<Regex>,
34 lang: String,
35}
36
37impl CodeAnalyzer {
38 pub fn rule_names(&self) -> Vec<&'static str> {
39 self.ts_rule_engine.rule_names()
40 }
41
42 pub fn new(exclude_patterns: &[String], lang: &str) -> Self {
43 Self::with_config(exclude_patterns, lang, ProjectConfig::default())
44 }
45
46 pub fn with_config(exclude_patterns: &[String], lang: &str, config: ProjectConfig) -> Self {
47 let default_excludes = [
49 "target",
50 "node_modules",
51 ".git",
52 ".svn",
53 ".hg",
54 "build",
55 "dist",
56 "out",
57 "__pycache__",
58 ".DS_Store",
59 ];
60
61 let mut all_patterns: Vec<String> =
62 default_excludes.iter().map(|s| s.to_string()).collect();
63 all_patterns.extend(exclude_patterns.iter().cloned());
64
65 all_patterns.extend(config.whitelists.exclude_patterns.clone());
67
68 let patterns = all_patterns
69 .iter()
70 .filter_map(|pattern| {
71 let regex_pattern = pattern
73 .replace(".", r"\.")
74 .replace("*", ".*")
75 .replace("?", ".");
76 Regex::new(®ex_pattern).ok()
77 })
78 .collect();
79
80 let mut ts_rule_engine = TreeSitterRuleEngine::new();
81 crate::treesitter::rules::rust_rules::register_rust_rules(&mut ts_rule_engine);
82
83 Self {
84 generic_engine: GenericRuleEngine::new(),
85 ts_engine: TreeSitterEngine::new(),
86 ts_rule_engine,
87 exclude_patterns: patterns,
88 lang: lang.to_string(),
89 }
90 }
91
92 fn should_exclude(&self, path: &Path) -> bool {
93 let path_str = path.to_string_lossy();
94 self.exclude_patterns
95 .iter()
96 .any(|pattern| pattern.is_match(&path_str))
97 }
98
99 pub fn analyze_path(&self, path: &Path) -> Vec<CodeIssue> {
100 if path.is_file() {
101 if !self.should_exclude(path) {
102 let lang = Language::from_path(path);
103 if lang != Language::Unknown {
104 return self.analyze_file(path);
105 }
106 }
107 return Vec::new();
108 }
109
110 if !path.is_dir() {
111 return Vec::new();
112 }
113
114 let files: Vec<PathBuf> = WalkDir::new(path)
116 .into_iter()
117 .filter_map(|e| e.ok())
118 .filter(|e| !self.should_exclude(e.path()))
119 .filter(|e| {
120 e.path()
121 .extension()
122 .and_then(|ext| ext.to_str())
123 .is_some_and(|ext| SUPPORTED_EXTENSIONS.contains(&ext))
124 })
125 .map(|e| e.path().to_path_buf())
126 .collect();
127
128 let mut issues: Vec<CodeIssue> = files
130 .iter()
131 .flat_map(|file_path| self.analyze_file(file_path))
132 .collect();
133
134 let mut cross_detector = CrossFileDupDetector::new();
136 for file_path in &files {
137 if let Ok(content) = fs::read_to_string(file_path) {
138 if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
139 cross_detector.process_file(&parsed);
140 }
141 }
142 }
143 issues.extend(cross_detector.find_duplicates());
144 issues.extend(cross_detector.find_near_duplicates());
145
146 for file_path in &files {
148 if let Ok(content) = fs::read_to_string(file_path) {
149 if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
150 issues.extend(IntraFileDupDetector::check(&parsed));
151 }
152 }
153 }
154
155 issues
156 }
157
158 pub fn analyze_file(&self, file_path: &Path) -> Vec<CodeIssue> {
159 let content = match fs::read_to_string(file_path) {
160 Ok(content) => content,
161 Err(_) => return vec![],
162 };
163
164 let lang = Language::from_path(file_path);
165 let is_test_file = Self::is_test_file(file_path, &content);
166
167 if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
169 let context = FileContext::from_path(file_path);
170 self.ts_rule_engine.check_file_with_context(
171 &parsed,
172 is_test_file,
173 &context,
174 &ProjectConfig::default(),
175 )
176 } else if lang == Language::C || lang == Language::Cpp {
177 self.generic_engine
179 .check_file(file_path, &content, &self.lang)
180 } else {
181 vec![]
182 }
183 }
184
185 fn is_test_file(path: &Path, content: &str) -> bool {
186 let path_str = path.to_string_lossy();
187 let normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
189
190 if normalized.contains("/tests/")
192 || normalized.contains("\\tests\\")
193 || normalized.starts_with("tests/")
194 || normalized.starts_with("tests\\")
195 || normalized.contains("/test/")
196 || normalized.contains("\\test\\")
197 || normalized.ends_with("_test.rs")
198 || normalized.ends_with("_tests.rs")
199 || normalized.ends_with("_test.c")
200 || normalized.ends_with("_test.cpp")
201 || normalized.ends_with("_test.cc")
202 || normalized.starts_with("test_")
203 {
204 return true;
205 }
206 if normalized.contains("/examples/")
208 || normalized.contains("\\examples\\")
209 || normalized.starts_with("examples/")
210 || normalized.starts_with("examples\\")
211 || normalized.contains("/example/")
212 || normalized.contains("\\example\\")
213 || normalized.starts_with("example/")
214 || normalized.starts_with("example\\")
215 || normalized.ends_with("_example.rs")
216 || normalized.ends_with("_examples.rs")
217 {
218 return true;
219 }
220 if normalized.contains("/benches/")
222 || normalized.contains("\\benches\\")
223 || normalized.starts_with("benches/")
224 || normalized.starts_with("benches\\")
225 || normalized.ends_with("_bench.rs")
226 || normalized.ends_with("_benches.rs")
227 {
228 return true;
229 }
230 if normalized.contains("/test-files/")
232 || normalized.contains("\\test-files\\")
233 || normalized.starts_with("test-files/")
234 || normalized.starts_with("test-files\\")
235 || normalized.contains("/test_files/")
236 || normalized.contains("\\test_files\\")
237 {
238 return true;
239 }
240 if normalized.contains("/fixtures/")
242 || normalized.contains("\\fixtures\\")
243 || normalized.contains("/mocks/")
244 || normalized.contains("\\mocks\\")
245 {
246 return true;
247 }
248 content.contains("#[cfg(test)]")
250 }
251}