use crate::detectors::analysis_context::AnalysisContext;
use crate::detectors::base::{is_test_file, Detector};
use crate::detectors::context_hmm::FunctionContext;
use crate::detectors::taint::centralized::CentralizedTaintResults;
use crate::graph::CodeNode;
use crate::models::Finding;
use rayon::prelude::*;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::time::Instant;
use tracing::{debug, error, info, warn};
pub fn run_detectors(
detectors: &[Arc<dyn Detector>],
ctx: &AnalysisContext<'_>,
workers: usize,
) -> (Vec<Finding>, HashSet<String>) {
if detectors.is_empty() {
return (Vec::new(), HashSet::new());
}
let bypass_set: HashSet<String> = detectors
.iter()
.filter(|d| d.bypass_postprocessor())
.map(|d| d.name().to_string())
.collect();
let (independent, dependent): (Vec<_>, Vec<_>) =
detectors.iter().partition(|d| !d.is_dependent());
info!(
"run_detectors: {} independent, {} dependent, {} workers, {} bypass postprocessor",
independent.len(),
dependent.len(),
workers,
bypass_set.len(),
);
let pool = match rayon::ThreadPoolBuilder::new()
.num_threads(workers)
.stack_size(8 * 1024 * 1024) .build()
{
Ok(p) => p,
Err(e) => {
error!("Failed to build rayon pool: {e}");
return (Vec::new(), bypass_set);
}
};
let mut results: Vec<(String, Vec<Finding>, bool)> = pool.install(|| {
independent
.par_iter()
.map(|detector| run_one(detector, ctx))
.collect()
});
results.sort_by(|a, b| a.0.cmp(&b.0));
let mut all_findings: Vec<Finding> = Vec::new();
let mut skipped_count: usize = 0;
for (_name, findings, skipped) in results {
if skipped {
skipped_count += 1;
}
all_findings.extend(findings);
}
for detector in dependent {
let (_name, findings, skipped) = run_one(detector, ctx);
if skipped {
skipped_count += 1;
}
all_findings.extend(findings);
}
if skipped_count > 0 {
warn!(
"{} detector(s) skipped due to errors (see warnings above)",
skipped_count,
);
}
(all_findings, bypass_set)
}
fn run_one(detector: &Arc<dyn Detector>, ctx: &AnalysisContext<'_>) -> (String, Vec<Finding>, bool) {
let name = detector.name().to_string();
let start = Instant::now();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| detector.detect(ctx)));
let elapsed_ms = start.elapsed().as_millis() as u64;
match result {
Ok(Ok(mut findings)) => {
if let Some(config) = detector.config() {
if let Some(max) = config.max_findings {
if findings.len() > max {
findings.truncate(max);
}
}
}
debug!(
"{} produced {} findings in {}ms",
name,
findings.len(),
elapsed_ms,
);
(name, findings, false)
}
Ok(Err(e)) => {
warn!("{} skipped (query error): {}", name, e);
(name, Vec::new(), true)
}
Err(panic_info) => {
let panic_msg = if let Some(s) = panic_info.downcast_ref::<&str>() {
s.to_string()
} else if let Some(s) = panic_info.downcast_ref::<String>() {
s.clone()
} else {
"Unknown panic".to_string()
};
error!("{} panicked: {}", name, panic_msg);
(name, Vec::new(), true)
}
}
}
pub fn inject_taint_precomputed(
detectors: &[Arc<dyn Detector>],
precomputed: &CentralizedTaintResults,
) {
for detector in detectors {
if let Some(category) = detector.taint_category() {
let cross = precomputed
.cross_function
.get(&category)
.cloned()
.unwrap_or_default();
let intra = precomputed
.intra_function
.get(&category)
.cloned()
.unwrap_or_default();
detector.set_precomputed_taint(cross, intra);
}
}
}
pub fn filter_test_file_findings(findings: &mut Vec<Finding>) {
let before = findings.len();
for f in findings.iter_mut() {
if !f.affected_files.is_empty()
&& f.affected_files.iter().all(|p| super::base::is_non_production_file(p))
&& f.severity != crate::models::Severity::Low
&& f.severity != crate::models::Severity::Info
{
debug!("Downgrading non-production finding to LOW: {}", f.title);
f.severity = crate::models::Severity::Low;
}
}
findings.retain(|f| {
if f.affected_files.is_empty() {
return true; }
!f.affected_files.iter().all(|p| is_test_file(p))
});
let removed = before - findings.len();
if removed > 0 {
debug!("Filtered out {} findings from test files", removed);
}
}
pub fn apply_hmm_context_filter(findings: Vec<Finding>, ctx: &AnalysisContext<'_>) -> Vec<Finding> {
use rustc_hash::FxHashSet;
let hmm = &ctx.hmm_classifications;
if hmm.is_empty() {
return findings;
}
static COUPLING_DETECTORS: &[&str] = &[
"DegreeCentralityDetector",
"ShotgunSurgeryDetector",
"FeatureEnvyDetector",
"InappropriateIntimacyDetector",
];
static DEAD_CODE_DETECTORS: &[&str] = &["UnreachableCodeDetector", "DeadCodeDetector"];
let coupling_set: FxHashSet<&str> = COUPLING_DETECTORS.iter().copied().collect();
let dead_code_set: FxHashSet<&str> = DEAD_CODE_DETECTORS.iter().copied().collect();
let i = ctx.graph.interner();
let mut func_by_file: HashMap<&str, Vec<&CodeNode>> = HashMap::new();
for &func_idx in ctx.graph.functions_idx() {
let Some(func) = ctx.graph.node_idx(func_idx) else { continue };
func_by_file.entry(func.path(i)).or_default().push(func);
}
for funcs in func_by_file.values_mut() {
funcs.sort_unstable_by_key(|f| f.line_start);
}
let before = findings.len();
let filtered: Vec<Finding> = findings
.into_par_iter()
.filter(|finding| {
let is_coupling = coupling_set.contains(finding.detector.as_str());
let is_dead_code = dead_code_set.contains(finding.detector.as_str());
if !is_coupling && !is_dead_code {
return true;
}
let func_qn = find_function_at_line(finding, &func_by_file, i);
if let Some(qn) = func_qn {
if let Some((context, _confidence)) = hmm.get(qn) {
if is_coupling && context.skip_coupling() {
return false;
}
if is_dead_code && context.skip_dead_code() {
return false;
}
}
}
true
})
.collect();
let removed = before - filtered.len();
if removed > 0 {
info!("HMM context filter removed {} false positives", removed);
}
filtered
}
fn find_function_at_line<'a>(
finding: &Finding,
func_by_file: &HashMap<&str, Vec<&'a CodeNode>>,
interner: &'a crate::graph::interner::StringInterner,
) -> Option<&'a str> {
let (file, line) = match (finding.affected_files.first(), finding.line_start) {
(Some(f), Some(l)) => (f, l),
_ => return None,
};
let file_str = file.to_string_lossy();
let funcs = func_by_file.get(file_str.as_ref())?;
let idx = funcs.partition_point(|f| f.line_start <= line);
if idx > 0 {
let func = funcs[idx - 1];
if func.line_end >= line {
return Some(func.qn(interner));
}
}
None
}
pub fn sort_findings_deterministic(findings: &mut [Finding]) {
findings.sort_by(|a, b| {
b.severity
.cmp(&a.severity)
.then_with(|| {
let a_file = a
.affected_files
.first()
.map(|f| f.to_string_lossy())
.unwrap_or_default();
let b_file = b
.affected_files
.first()
.map(|f| f.to_string_lossy())
.unwrap_or_default();
a_file.cmp(&b_file)
})
.then_with(|| a.line_start.cmp(&b.line_start))
.then_with(|| a.detector.cmp(&b.detector))
.then_with(|| a.title.cmp(&b.title))
});
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
use crate::models::Severity;
use std::path::PathBuf;
#[test]
fn test_run_detectors_empty() {
let graph = GraphBuilder::new().freeze();
let ctx = AnalysisContext::test(&graph);
let (findings, bypass_set) = run_detectors(&[], &ctx, 1);
assert!(findings.is_empty());
assert!(bypass_set.is_empty());
}
#[test]
fn test_sort_findings_deterministic() {
let mut findings = vec![
Finding {
severity: Severity::Low,
detector: "B".into(),
title: "b".into(),
affected_files: vec![PathBuf::from("z.py")],
line_start: Some(10),
..Default::default()
},
Finding {
severity: Severity::High,
detector: "A".into(),
title: "a".into(),
affected_files: vec![PathBuf::from("a.py")],
line_start: Some(1),
..Default::default()
},
Finding {
severity: Severity::High,
detector: "A".into(),
title: "a".into(),
affected_files: vec![PathBuf::from("a.py")],
line_start: Some(5),
..Default::default()
},
];
sort_findings_deterministic(&mut findings);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(findings[1].severity, Severity::High);
assert_eq!(findings[2].severity, Severity::Low);
assert_eq!(findings[0].line_start, Some(1));
assert_eq!(findings[1].line_start, Some(5));
}
#[test]
fn test_filter_test_file_findings() {
let mut findings = vec![
Finding {
detector: "X".into(),
affected_files: vec![PathBuf::from("tests/test_foo.py")],
..Default::default()
},
Finding {
detector: "Y".into(),
affected_files: vec![PathBuf::from("src/lib.rs")],
..Default::default()
},
Finding {
detector: "Z".into(),
affected_files: vec![],
..Default::default()
},
];
filter_test_file_findings(&mut findings);
assert_eq!(findings.len(), 2);
assert_eq!(findings[0].detector, "Y");
assert_eq!(findings[1].detector, "Z");
}
}