use regex::Regex;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use crate::context::{FileContext, ProjectConfig};
use crate::language::{Language, SUPPORTED_EXTENSIONS};
use crate::rules::generic::GenericRuleEngine;
use crate::treesitter::duplication::{CrossFileDupDetector, IntraFileDupDetector};
use crate::treesitter::{TreeSitterEngine, TreeSitterRuleEngine};
#[derive(Debug, Clone)]
pub struct CodeIssue {
pub file_path: PathBuf,
pub line: usize,
pub column: usize,
pub rule_name: String,
pub message: String,
pub severity: Severity,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Severity {
Mild, Spicy, Nuclear, }
pub struct CodeAnalyzer {
generic_engine: GenericRuleEngine,
ts_engine: TreeSitterEngine,
ts_rule_engine: TreeSitterRuleEngine,
exclude_patterns: Vec<Regex>,
lang: String,
}
impl CodeAnalyzer {
pub fn rule_names(&self) -> Vec<&'static str> {
self.ts_rule_engine.rule_names()
}
pub fn new(exclude_patterns: &[String], lang: &str) -> Self {
Self::with_config(exclude_patterns, lang, ProjectConfig::default())
}
pub fn with_config(exclude_patterns: &[String], lang: &str, config: ProjectConfig) -> Self {
let default_excludes = [
"target",
"node_modules",
".git",
".svn",
".hg",
"build",
"dist",
"out",
"__pycache__",
".DS_Store",
];
let mut all_patterns: Vec<String> =
default_excludes.iter().map(|s| s.to_string()).collect();
all_patterns.extend(exclude_patterns.iter().cloned());
all_patterns.extend(config.whitelists.exclude_patterns.clone());
let patterns = all_patterns
.iter()
.filter_map(|pattern| {
let regex_pattern = pattern
.replace(".", r"\.")
.replace("*", ".*")
.replace("?", ".");
Regex::new(®ex_pattern).ok()
})
.collect();
let mut ts_rule_engine = TreeSitterRuleEngine::new();
crate::treesitter::rules::rust_rules::register_rust_rules(&mut ts_rule_engine);
Self {
generic_engine: GenericRuleEngine::new(),
ts_engine: TreeSitterEngine::new(),
ts_rule_engine,
exclude_patterns: patterns,
lang: lang.to_string(),
}
}
fn should_exclude(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
self.exclude_patterns
.iter()
.any(|pattern| pattern.is_match(&path_str))
}
pub fn analyze_path(&self, path: &Path) -> Vec<CodeIssue> {
if path.is_file() {
if !self.should_exclude(path) {
let lang = Language::from_path(path);
if lang != Language::Unknown {
return self.analyze_file(path);
}
}
return Vec::new();
}
if !path.is_dir() {
return Vec::new();
}
let files: Vec<PathBuf> = WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| !self.should_exclude(e.path()))
.filter(|e| {
e.path()
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| SUPPORTED_EXTENSIONS.contains(&ext))
})
.map(|e| e.path().to_path_buf())
.collect();
let mut issues: Vec<CodeIssue> = files
.iter()
.flat_map(|file_path| self.analyze_file(file_path))
.collect();
let mut cross_detector = CrossFileDupDetector::new();
for file_path in &files {
if let Ok(content) = fs::read_to_string(file_path) {
if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
cross_detector.process_file(&parsed);
}
}
}
issues.extend(cross_detector.find_duplicates());
issues.extend(cross_detector.find_near_duplicates());
for file_path in &files {
if let Ok(content) = fs::read_to_string(file_path) {
if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
issues.extend(IntraFileDupDetector::check(&parsed));
}
}
}
issues
}
pub fn analyze_file(&self, file_path: &Path) -> Vec<CodeIssue> {
let content = match fs::read_to_string(file_path) {
Ok(content) => content,
Err(_) => return vec![],
};
let lang = Language::from_path(file_path);
let is_test_file = Self::is_test_file(file_path, &content);
if let Some(parsed) = self.ts_engine.parse_file(file_path, &content) {
let context = FileContext::from_path(file_path);
self.ts_rule_engine.check_file_with_context(
&parsed,
is_test_file,
&context,
&ProjectConfig::default(),
)
} else if lang == Language::C || lang == Language::Cpp {
self.generic_engine
.check_file(file_path, &content, &self.lang)
} else {
vec![]
}
}
fn is_test_file(path: &Path, content: &str) -> bool {
let path_str = path.to_string_lossy();
let normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
if normalized.contains("/tests/")
|| normalized.contains("\\tests\\")
|| normalized.starts_with("tests/")
|| normalized.starts_with("tests\\")
|| normalized.contains("/test/")
|| normalized.contains("\\test\\")
|| normalized.ends_with("_test.rs")
|| normalized.ends_with("_tests.rs")
|| normalized.ends_with("_test.c")
|| normalized.ends_with("_test.cpp")
|| normalized.ends_with("_test.cc")
|| normalized.starts_with("test_")
{
return true;
}
if normalized.contains("/examples/")
|| normalized.contains("\\examples\\")
|| normalized.starts_with("examples/")
|| normalized.starts_with("examples\\")
|| normalized.contains("/example/")
|| normalized.contains("\\example\\")
|| normalized.starts_with("example/")
|| normalized.starts_with("example\\")
|| normalized.ends_with("_example.rs")
|| normalized.ends_with("_examples.rs")
{
return true;
}
if normalized.contains("/benches/")
|| normalized.contains("\\benches\\")
|| normalized.starts_with("benches/")
|| normalized.starts_with("benches\\")
|| normalized.ends_with("_bench.rs")
|| normalized.ends_with("_benches.rs")
{
return true;
}
if normalized.contains("/test-files/")
|| normalized.contains("\\test-files\\")
|| normalized.starts_with("test-files/")
|| normalized.starts_with("test-files\\")
|| normalized.contains("/test_files/")
|| normalized.contains("\\test_files\\")
{
return true;
}
if normalized.contains("/fixtures/")
|| normalized.contains("\\fixtures\\")
|| normalized.contains("/mocks/")
|| normalized.contains("\\mocks\\")
{
return true;
}
content.contains("#[cfg(test)]")
}
}