use crate::models::{Finding, Severity};
use anyhow::Result;
use serde::{Deserialize, Serialize};
pub fn finding_id(detector: &str, file: &str, line: u32) -> String {
let mut h: u64 = 0xcbf29ce484222325; for b in detector.as_bytes().iter()
.chain(&[0xff]) .chain(file.as_bytes().iter())
.chain(&[0xff])
.chain(&line.to_le_bytes())
{
h ^= *b as u64;
h = h.wrapping_mul(0x100000001b3); }
format!("{:016x}", h)
}
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum DetectorScope {
FileLocal,
FileScopedGraph,
GraphWide,
}
#[derive(Debug, Clone)]
pub struct DetectorResult {
pub detector_name: String,
pub findings: Vec<Finding>,
pub duration_ms: u64,
pub success: bool,
pub error: Option<String>,
}
impl DetectorResult {
pub fn success(detector_name: String, findings: Vec<Finding>, duration_ms: u64) -> Self {
Self {
detector_name,
findings,
duration_ms,
success: true,
error: None,
}
}
pub fn failure(detector_name: String, error: String, duration_ms: u64) -> Self {
Self {
detector_name,
findings: Vec::new(),
duration_ms,
success: false,
error: Some(error),
}
}
pub fn skipped(detector_name: &str) -> Self {
Self {
detector_name: detector_name.to_string(),
findings: Vec::new(),
duration_ms: 0,
success: true,
error: None,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct DetectorConfig {
#[allow(dead_code)] pub repo_id: Option<String>,
pub max_findings: Option<usize>,
pub options: HashMap<String, serde_json::Value>,
pub coupling_multiplier: f64,
pub complexity_multiplier: f64,
pub adaptive: crate::calibrate::ThresholdResolver,
}
impl DetectorConfig {
pub fn new() -> Self {
Self {
repo_id: None,
max_findings: None,
options: HashMap::new(),
coupling_multiplier: 1.0,
complexity_multiplier: 1.0,
adaptive: crate::calibrate::ThresholdResolver::default(),
}
}
pub fn from_project_config(
detector_name: &str,
project_config: &crate::config::ProjectConfig,
) -> Self {
let mut config = Self::new();
let normalized = crate::config::normalize_detector_name(detector_name);
if let Some(detector_override) = project_config
.detectors
.get(&normalized)
.or_else(|| project_config.detectors.get(detector_name))
{
for (key, value) in &detector_override.thresholds {
let json_value = match value {
crate::config::ThresholdValue::Integer(v) => serde_json::json!(*v),
crate::config::ThresholdValue::Float(v) => serde_json::json!(*v),
crate::config::ThresholdValue::Boolean(v) => serde_json::json!(*v),
crate::config::ThresholdValue::String(v) => serde_json::json!(v),
};
config.options.insert(key.clone(), json_value);
}
}
config
}
pub fn from_project_config_with_type(
detector_name: &str,
project_config: &crate::config::ProjectConfig,
repo_path: &std::path::Path,
) -> Self {
let mut config = Self::from_project_config(detector_name, project_config);
let project_type = project_config.project_type(repo_path);
config.coupling_multiplier = project_type.coupling_multiplier();
config.complexity_multiplier = project_type.complexity_multiplier();
config
}
pub fn with_adaptive(mut self, resolver: crate::calibrate::ThresholdResolver) -> Self {
self.adaptive = resolver;
self
}
#[allow(dead_code)] pub fn with_repo_id(mut self, repo_id: impl Into<String>) -> Self {
self.repo_id = Some(repo_id.into());
self
}
#[allow(dead_code)] pub fn with_max_findings(mut self, max: usize) -> Self {
self.max_findings = Some(max);
self
}
#[allow(dead_code)] pub fn with_option(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
self.options.insert(key.into(), value);
self
}
pub fn get_option<T: serde::de::DeserializeOwned>(&self, key: &str) -> Option<T> {
self.options
.get(key)
.and_then(|v| serde_json::from_value(v.clone()).ok())
}
pub fn get_option_or<T: serde::de::DeserializeOwned>(&self, key: &str, default: T) -> T {
self.get_option(key).unwrap_or(default)
}
}
pub fn is_non_production_file(path: &std::path::Path) -> bool {
let path_str = path.to_string_lossy().to_lowercase();
path_str.contains("/scripts/")
|| path_str.contains("/benchmarks/")
|| path_str.contains("/benchmark/")
|| path_str.contains("/tools/")
|| path_str.contains("/examples/")
|| path_str.contains("/example/")
|| path_str.contains("/docs/")
|| path_str.contains("/doc/")
|| path_str.contains("/contrib/")
|| path_str.contains("/misc/")
|| path_str.contains("/hack/")
|| path_str.contains("/utils/") && path_str.contains(".py") || path_str.starts_with("scripts/")
|| path_str.starts_with("benchmarks/")
|| path_str.starts_with("tools/")
|| path_str.starts_with("examples/")
|| path_str.starts_with("docs/")
}
pub fn is_test_file(path: &std::path::Path) -> bool {
let path_str = path.to_string_lossy().to_lowercase();
let filename = path
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
path_str.ends_with("_test.go") ||
path_str.ends_with("_test.py") ||
filename.starts_with("test_") || path_str.contains("/tests/") ||
path_str.contains("/test/") ||
path_str.contains("/__tests__/") ||
path_str.contains("/e2e/") ||
path_str.starts_with("tests/") ||
path_str.starts_with("test/") ||
path_str.contains("/spec/") ||
path_str.ends_with("_spec.rb") ||
path_str.ends_with(".test.ts") ||
path_str.ends_with(".test.js") ||
path_str.ends_with(".test.tsx") ||
path_str.ends_with(".test.jsx") ||
path_str.ends_with(".spec.ts") ||
path_str.ends_with(".spec.js") ||
path_str.ends_with(".spec.tsx") ||
path_str.ends_with(".spec.jsx") ||
path_str.contains("/fixtures/") ||
path_str.contains("/testdata/") ||
path_str.contains("/__fixtures__/") ||
path_str.contains("/__mocks__/")
}
pub fn is_test_path(path_str: &str) -> bool {
let lower = path_str.to_lowercase();
lower.contains("/test/")
|| lower.contains("/tests/")
|| lower.contains("/__tests__/")
|| lower.contains("/spec/")
|| lower.contains("/test_")
|| lower.contains("_test.")
|| lower.contains(".test.")
|| lower.contains(".spec.")
|| lower.contains("_spec.")
|| lower.starts_with("tests/")
|| lower.starts_with("test/")
|| lower.starts_with("__tests__/")
|| lower.starts_with("spec/")
}
pub trait Detector: Send + Sync {
fn name(&self) -> &'static str;
fn description(&self) -> &'static str;
fn detect(&self, ctx: &super::analysis_context::AnalysisContext) -> Result<Vec<Finding>>;
fn is_dependent(&self) -> bool {
false
}
#[allow(dead_code)] fn dependencies(&self) -> Vec<&'static str> {
vec![]
}
fn category(&self) -> &'static str {
"code_smell"
}
fn config(&self) -> Option<&DetectorConfig> {
None
}
fn scope(&self) -> DetectorScope {
DetectorScope::GraphWide
}
fn detector_scope(&self) -> DetectorScope {
if self.requires_graph() {
DetectorScope::FileScopedGraph
} else {
DetectorScope::FileLocal
}
}
fn requires_graph(&self) -> bool {
true
}
fn set_precomputed_taint(
&self,
_cross: Vec<super::taint::TaintPath>,
_intra: Vec<super::taint::TaintPath>,
) {
}
fn taint_category(&self) -> Option<super::taint::TaintCategory> {
None
}
fn file_extensions(&self) -> &'static [&'static str] {
&[]
}
fn content_requirements(&self) -> super::detector_context::ContentFlags {
super::detector_context::ContentFlags::empty()
}
fn is_deterministic(&self) -> bool {
false
}
fn is_network_bound(&self) -> bool {
false
}
fn bypass_postprocessor(&self) -> bool {
false
}
}
pub type ProgressCallback = Box<dyn Fn(&str, usize, usize) + Send + Sync>;
#[derive(Debug, Clone, Default)]
pub struct DetectionSummary {
pub detectors_run: usize,
pub detectors_succeeded: usize,
pub detectors_failed: usize,
pub total_findings: usize,
pub by_severity: HashMap<Severity, usize>,
pub total_duration_ms: u64,
}
impl DetectionSummary {
pub fn add_result(&mut self, result: &DetectorResult) {
self.detectors_run += 1;
self.total_duration_ms += result.duration_ms;
if result.success {
self.detectors_succeeded += 1;
self.total_findings += result.findings.len();
for finding in &result.findings {
*self.by_severity.entry(finding.severity).or_insert(0) += 1;
}
} else {
self.detectors_failed += 1;
}
}
}
pub fn compile_glob_patterns(patterns: &[String]) -> Vec<regex::Regex> {
patterns
.iter()
.filter(|p| p.contains('*'))
.filter_map(|p| {
let re_str = format!("^{}$", p.replace('*', ".*"));
regex::Regex::new(&re_str).ok()
})
.collect()
}
pub fn should_exclude_path(
path: &str,
patterns: &[String],
compiled_globs: &[regex::Regex],
) -> bool {
for pattern in patterns {
if pattern.ends_with('/') {
let dir = pattern.trim_end_matches('/');
if path.split('/').any(|p| p == dir) {
return true;
}
} else if pattern.contains('*') {
continue; } else if path.contains(pattern) {
return true;
}
}
let filename = std::path::Path::new(path)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("");
for re in compiled_globs {
if re.is_match(path) || re.is_match(filename) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detector_config() {
let config = DetectorConfig::new()
.with_repo_id("test-repo")
.with_max_findings(100)
.with_option("threshold", serde_json::json!(10));
assert_eq!(config.repo_id, Some("test-repo".to_string()));
assert_eq!(config.max_findings, Some(100));
assert_eq!(config.get_option::<i32>("threshold"), Some(10));
assert_eq!(config.get_option_or("missing", 5), 5);
}
#[test]
fn test_detector_result_success() {
let result = DetectorResult::success("TestDetector".to_string(), vec![], 100);
assert!(result.success);
assert!(result.error.is_none());
assert_eq!(result.duration_ms, 100);
}
#[test]
fn test_detector_result_failure() {
let result = DetectorResult::failure("TestDetector".to_string(), "oops".to_string(), 50);
assert!(!result.success);
assert_eq!(result.error, Some("oops".to_string()));
}
#[test]
fn test_detection_summary() {
let mut summary = DetectionSummary::default();
let result1 = DetectorResult::success("D1".to_string(), vec![], 100);
let result2 = DetectorResult::failure("D2".to_string(), "err".to_string(), 50);
summary.add_result(&result1);
summary.add_result(&result2);
assert_eq!(summary.detectors_run, 2);
assert_eq!(summary.detectors_succeeded, 1);
assert_eq!(summary.detectors_failed, 1);
assert_eq!(summary.total_duration_ms, 150);
}
#[test]
fn test_is_test_file() {
use super::is_test_file;
use std::path::Path;
assert!(is_test_file(Path::new("foo_test.go")));
assert!(is_test_file(Path::new("test_foo.py")));
assert!(is_test_file(Path::new("src/tests/helper.py")));
assert!(is_test_file(Path::new("app.spec.ts")));
assert!(!is_test_file(Path::new("src/main.py")));
assert!(!is_test_file(Path::new("testing_utils.py"))); }
#[test]
fn test_requires_graph_annotation_coverage() {
let tmp = tempfile::tempdir().expect("create tempdir");
let init = crate::detectors::DetectorInit::test_default();
let detectors = crate::detectors::create_all_detectors(&init);
let graph_independent: Vec<_> = detectors
.iter()
.filter(|d| !d.requires_graph())
.map(|d| d.name())
.collect();
let graph_dependent: Vec<_> = detectors
.iter()
.filter(|d| d.requires_graph())
.map(|d| d.name())
.collect();
println!(
"Graph-independent detectors ({}): {:?}",
graph_independent.len(),
graph_independent
);
println!(
"Graph-dependent detectors ({}): {:?}",
graph_dependent.len(),
graph_dependent
);
assert!(
graph_independent.len() >= 34,
"Expected >= 34 graph-independent detectors, got {}",
graph_independent.len()
);
}
}