use crate::models::{Finding, Severity};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap, HashSet};
use tracing::{debug, info};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum VotingStrategy {
#[default]
Majority,
Weighted,
Threshold,
Unanimous,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum ConfidenceMethod {
Average,
#[default]
Weighted,
Bayesian,
Max,
Min,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
pub enum SeverityResolution {
#[default]
Highest,
Lowest,
MajorityVote,
WeightedVote,
}
#[derive(Debug, Clone)]
pub struct DetectorWeight {
#[allow(dead_code)] pub name: String,
pub weight: f64,
pub accuracy: f64,
}
impl DetectorWeight {
pub fn new(name: impl Into<String>, weight: f64, accuracy: f64) -> Self {
Self {
name: name.into(),
weight,
accuracy,
}
}
}
impl Default for DetectorWeight {
fn default() -> Self {
Self {
name: "default".to_string(),
weight: 1.0,
accuracy: 0.80,
}
}
}
#[derive(Debug, Clone)]
pub struct ConsensusResult {
pub has_consensus: bool,
pub confidence: f64,
pub severity: Severity,
pub contributing_detectors: Vec<String>,
pub vote_count: usize,
#[allow(dead_code)] pub total_detectors: usize,
#[allow(dead_code)] pub agreement_ratio: f64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VotingStats {
pub total_input: usize,
pub total_output: usize,
pub groups_analyzed: usize,
pub single_detector_findings: usize,
pub multi_detector_findings: usize,
pub boosted_by_consensus: usize,
pub rejected_low_confidence: usize,
pub strategy: VotingStrategy,
pub confidence_method: ConfidenceMethod,
pub threshold: f64,
}
fn default_detector_weights() -> HashMap<String, DetectorWeight> {
let weights = vec![
("CircularDependencyDetector", 1.2, 0.95),
("GodClassDetector", 1.1, 0.85),
("FeatureEnvyDetector", 1.0, 0.80),
("ShotgunSurgeryDetector", 1.0, 0.85),
("InappropriateIntimacyDetector", 1.0, 0.80),
("ArchitecturalBottleneckDetector", 1.1, 0.90),
];
let mut map = HashMap::new();
for (name, weight, accuracy) in weights {
map.insert(
name.to_string(),
DetectorWeight::new(name, weight, accuracy),
);
}
map.insert("default".to_string(), DetectorWeight::default());
map
}
pub struct VotingEngine {
strategy: VotingStrategy,
confidence_method: ConfidenceMethod,
severity_resolution: SeverityResolution,
confidence_threshold: f64,
min_detectors_for_boost: usize,
detector_weights: HashMap<String, DetectorWeight>,
}
impl Default for VotingEngine {
fn default() -> Self {
Self::new()
}
}
impl VotingEngine {
pub fn new() -> Self {
Self {
strategy: VotingStrategy::default(),
confidence_method: ConfidenceMethod::default(),
severity_resolution: SeverityResolution::default(),
confidence_threshold: 0.6,
min_detectors_for_boost: 2,
detector_weights: default_detector_weights(),
}
}
pub fn with_config(
strategy: VotingStrategy,
confidence_method: ConfidenceMethod,
severity_resolution: SeverityResolution,
confidence_threshold: f64,
min_detectors_for_boost: usize,
) -> Self {
Self {
strategy,
confidence_method,
severity_resolution,
confidence_threshold,
min_detectors_for_boost,
detector_weights: default_detector_weights(),
}
}
pub fn vote(&self, findings: Vec<Finding>) -> (Vec<Finding>, VotingStats) {
if findings.is_empty() {
return (
vec![],
VotingStats {
total_input: 0,
total_output: 0,
..Default::default()
},
);
}
let groups = self.group_by_entity(&findings);
let mut consensus_findings = Vec::new();
let mut rejected_count = 0;
let mut boosted_count = 0;
for group_findings in groups.values() {
if group_findings.len() == 1 {
let finding = &group_findings[0];
let confidence = self.get_finding_confidence(finding);
if confidence >= self.confidence_threshold {
consensus_findings.push(finding.clone());
} else {
rejected_count += 1;
}
} else {
let consensus = self.calculate_consensus(group_findings);
if consensus.has_consensus && consensus.confidence >= self.confidence_threshold {
let merged = self.create_consensus_finding(group_findings, &consensus);
consensus_findings.push(merged);
boosted_count += 1;
} else if consensus.vote_count == 1 {
let mut sorted = group_findings.to_vec();
sorted.sort_by(|a, b| b.severity.cmp(&a.severity));
let best = &sorted[0];
let confidence = self.get_finding_confidence(best);
if confidence >= self.confidence_threshold {
consensus_findings.push(best.clone());
} else {
rejected_count += 1;
}
} else {
rejected_count += 1;
}
}
}
let stats = VotingStats {
total_input: findings.len(),
total_output: consensus_findings.len(),
groups_analyzed: groups.len(),
single_detector_findings: groups.values().filter(|g| g.len() == 1).count(),
multi_detector_findings: groups.values().filter(|g| g.len() > 1).count(),
boosted_by_consensus: boosted_count,
rejected_low_confidence: rejected_count,
strategy: self.strategy,
confidence_method: self.confidence_method,
threshold: self.confidence_threshold,
};
info!(
"VotingEngine: {} -> {} findings ({} boosted, {} rejected)",
findings.len(),
consensus_findings.len(),
boosted_count,
rejected_count
);
(consensus_findings, stats)
}
fn group_by_entity(&self, findings: &[Finding]) -> std::collections::BTreeMap<String, Vec<Finding>> {
let mut groups: std::collections::BTreeMap<String, Vec<Finding>> = std::collections::BTreeMap::new();
for finding in findings {
let key = self.get_entity_key(finding);
groups.entry(key).or_default().push(finding.clone());
}
groups
}
fn get_entity_key(&self, finding: &Finding) -> String {
let category = self.get_issue_category(finding);
let entity_name = self.extract_entity_name(&finding.title);
let location = if !finding.affected_files.is_empty() {
let file = finding.affected_files[0].to_string_lossy();
match (finding.line_start, finding.line_end) {
(Some(start), Some(end)) => {
format!("{}:{}-{}", file, start, end)
}
(Some(start), None) => {
format!("{}:{}", file, start)
}
_ => file.to_string(),
}
} else {
"unknown".to_string()
};
if !entity_name.is_empty() {
format!("{}::{}::{}", category, entity_name, location)
} else {
format!("{}::{}", category, location)
}
}
fn extract_entity_name(&self, title: &str) -> String {
if let Some(pos) = title.find(": ") {
let after_colon = &title[pos + 2..];
after_colon
.split_whitespace()
.next()
.unwrap_or("")
.trim_matches(|c: char| !c.is_alphanumeric() && c != '_')
.to_string()
} else {
String::new()
}
}
fn get_issue_category(&self, finding: &Finding) -> &str {
let detector = finding.detector.to_lowercase();
if detector.contains("circular") || detector.contains("dependency") {
"circular_dependency"
} else if detector.contains("god") || detector.contains("class") {
"god_class"
} else if detector.contains("dead") {
"dead_code"
} else if detector.contains("security") {
"security"
} else if detector.contains("complexity") {
"complexity"
} else if detector.contains("duplicate") || detector.contains("clone") {
"duplication"
} else if detector.contains("type") {
"type_error"
} else if detector.contains("lint") {
"lint"
} else {
"other"
}
}
fn calculate_consensus(&self, findings: &[Finding]) -> ConsensusResult {
let detectors: Vec<&str> = findings.iter().map(|f| f.detector.as_str()).collect();
let unique_detectors: HashSet<&str> = detectors.iter().copied().collect();
let mut unique_vec: Vec<String> = unique_detectors.iter().map(|s| s.to_string()).collect();
unique_vec.sort();
let confidence = self.calculate_confidence(findings);
let severity = self.resolve_severity(findings);
let has_consensus = self.check_consensus(findings, &unique_vec);
let agreement_ratio = unique_detectors.len() as f64 / findings.len().max(1) as f64;
ConsensusResult {
has_consensus,
confidence,
severity,
contributing_detectors: unique_vec,
vote_count: unique_detectors.len(),
total_detectors: findings.len(),
agreement_ratio,
}
}
fn check_consensus(&self, findings: &[Finding], unique_detectors: &[String]) -> bool {
let detector_count = unique_detectors.len();
if detector_count < 2 {
return false;
}
match self.strategy {
VotingStrategy::Unanimous => {
detector_count >= 2 && detector_count == findings.len()
}
VotingStrategy::Majority => {
detector_count >= 2
}
VotingStrategy::Weighted => {
let total_weight: f64 = unique_detectors
.iter()
.map(|d| self.get_detector_weight(d))
.sum();
total_weight >= 2.0
}
VotingStrategy::Threshold => {
let confidence = self.calculate_confidence(findings);
confidence >= self.confidence_threshold && detector_count >= 2
}
}
}
fn calculate_confidence(&self, findings: &[Finding]) -> f64 {
let mut confidences = Vec::new();
let mut weights = Vec::new();
for finding in findings {
let conf = self.get_finding_confidence(finding);
let weight = self.get_detector_weight(&finding.detector);
confidences.push(conf);
weights.push(weight);
}
if confidences.is_empty() {
return 0.0;
}
let base = match self.confidence_method {
ConfidenceMethod::Average => confidences.iter().sum::<f64>() / confidences.len() as f64,
ConfidenceMethod::Weighted => {
let total_weight: f64 = weights.iter().sum();
if total_weight > 0.0 {
confidences
.iter()
.zip(weights.iter())
.map(|(c, w)| c * w)
.sum::<f64>()
/ total_weight
} else {
confidences.iter().sum::<f64>() / confidences.len() as f64
}
}
ConfidenceMethod::Max => confidences.iter().cloned().fold(0.0, f64::max),
ConfidenceMethod::Min => confidences.iter().cloned().fold(1.0, f64::min),
ConfidenceMethod::Bayesian => {
let mut by_family: BTreeMap<String, Vec<f64>> = BTreeMap::new();
for f in findings {
let family = f
.detector
.split(['[', '+', ':'])
.next()
.unwrap_or(f.detector.as_str())
.to_string();
by_family
.entry(family)
.or_default()
.push(self.get_finding_confidence(f));
}
let family_confidences: Vec<f64> = by_family
.values()
.map(|vals| vals.iter().sum::<f64>() / vals.len() as f64)
.collect();
let mut prior = 0.5;
for conf in family_confidences {
let likelihood = conf;
let denom = prior * likelihood + (1.0 - prior) * (1.0 - likelihood);
if denom > 0.0 {
prior = (prior * likelihood) / denom;
}
}
prior
}
};
let unique_detectors: HashSet<&str> =
findings.iter().map(|f| f.detector.as_str()).collect();
if unique_detectors.len() >= self.min_detectors_for_boost {
let boost = ((unique_detectors.len() - 1) as f64 * 0.05).min(0.20);
(base + boost).min(1.0)
} else {
base
}
}
fn resolve_severity(&self, findings: &[Finding]) -> Severity {
if findings.is_empty() {
return Severity::Medium;
}
match self.severity_resolution {
SeverityResolution::Highest => findings
.iter()
.map(|f| f.severity)
.max()
.unwrap_or(Severity::Medium),
SeverityResolution::Lowest => findings
.iter()
.map(|f| f.severity)
.min()
.unwrap_or(Severity::Medium),
SeverityResolution::MajorityVote => {
let mut counts: BTreeMap<Severity, usize> = BTreeMap::new();
for finding in findings {
*counts.entry(finding.severity).or_insert(0) += 1;
}
counts
.into_iter()
.max_by(|(sev_a, count_a), (sev_b, count_b)| {
count_a.cmp(count_b).then_with(|| sev_a.cmp(sev_b))
})
.map(|(sev, _)| sev)
.unwrap_or(Severity::Medium)
}
SeverityResolution::WeightedVote => {
let mut severity_scores: BTreeMap<Severity, f64> = BTreeMap::new();
for finding in findings {
let conf = self.get_finding_confidence(finding);
let weight = self.get_detector_weight(&finding.detector);
*severity_scores.entry(finding.severity).or_insert(0.0) += conf * weight;
}
severity_scores
.into_iter()
.max_by(|(sev_a, a), (sev_b, b)| {
a.partial_cmp(b)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| sev_a.cmp(sev_b))
})
.map(|(sev, _)| sev)
.unwrap_or(Severity::Medium)
}
}
}
fn get_finding_confidence(&self, finding: &Finding) -> f64 {
if let Some(conf) = finding.confidence {
return conf.clamp(0.0, 1.0);
}
self.detector_weights
.get(&finding.detector)
.or_else(|| self.detector_weights.get("default"))
.map(|w| w.accuracy)
.unwrap_or(0.7)
}
fn get_detector_weight(&self, detector_name: &str) -> f64 {
self.detector_weights
.get(detector_name)
.or_else(|| self.detector_weights.get("default"))
.map(|w| w.weight)
.unwrap_or(1.0)
}
fn is_utility_function_name(title: &str) -> bool {
let func_name = title
.split(':')
.next_back()
.unwrap_or("")
.trim()
.to_lowercase();
const UTILITY_PREFIXES: &[&str] = &[
"is_",
"has_",
"check_",
"validate_",
"should_",
"can_",
"find_",
"calculate_",
"compute_",
"scan_",
"extract_",
"normalize_",
"get_",
"set_",
"parse_",
"format_",
"resolve_",
"schedule_",
"add_",
"update_",
"delete_",
"remove_",
"apply_",
"use",
"fetch_",
"load_",
"save_",
"send_",
"notify_",
];
UTILITY_PREFIXES.iter().any(|p| func_name.starts_with(p))
}
fn create_consensus_finding(
&self,
findings: &[Finding],
consensus: &ConsensusResult,
) -> Finding {
let mut sorted_findings = findings.to_vec();
sorted_findings.sort_by(|a, b| b.severity.cmp(&a.severity));
let base = &sorted_findings[0];
let mut final_severity = consensus.severity;
if final_severity == Severity::Critical && Self::is_utility_function_name(&base.title) {
final_severity = Severity::High;
}
let detector_names: Vec<&str> = consensus
.contributing_detectors
.iter()
.take(3)
.map(|s| s.as_str())
.collect();
let detector_str = if consensus.contributing_detectors.len() > 3 {
format!(
"Consensus[{}+{}more]",
detector_names.join("+"),
consensus.contributing_detectors.len() - 3
)
} else {
format!("Consensus[{}]", detector_names.join("+"))
};
let consensus_note = format!(
"\n\n**Consensus Analysis**\n\
- {} detectors agree on this issue\n\
- Confidence: {:.0}%\n\
- Detectors: {}",
consensus.vote_count,
consensus.confidence * 100.0,
consensus.contributing_detectors.join(", ")
);
Finding {
id: base.id.clone(),
detector: detector_str,
severity: final_severity,
title: format!("{} [{} detectors]", base.title, consensus.vote_count),
description: format!("{}{}", base.description, consensus_note),
affected_files: base.affected_files.clone(),
line_start: base.line_start,
line_end: base.line_end,
suggested_fix: self.merge_suggestions(findings),
estimated_effort: base.estimated_effort.clone(),
category: base.category.clone(),
cwe_id: base.cwe_id.clone(),
why_it_matters: base.why_it_matters.clone(),
confidence: Some(consensus.confidence),
..Default::default()
}
}
fn merge_suggestions(&self, findings: &[Finding]) -> Option<String> {
let mut suggestions = Vec::new();
let mut seen = HashSet::new();
for f in findings {
if let Some(ref fix) = f.suggested_fix {
if !seen.contains(fix) {
suggestions.push(format!("[{}] {}", f.detector, fix));
seen.insert(fix.clone());
}
}
}
if suggestions.is_empty() {
findings.first().and_then(|f| f.suggested_fix.clone())
} else {
Some(suggestions.join("\n\n"))
}
}
}
#[cfg(test)]
mod utility_tests {
use super::*;
#[test]
fn test_utility_function_detection() {
let test_cases = vec![
("Architectural Bottleneck: is_sql_context", true),
("Architectural Bottleneck: is_hash_mention_not_usage", true),
("Architectural Bottleneck: scan_file", true),
("Architectural Bottleneck: find_dead_classes", true),
("Architectural Bottleneck: check_line_for_patterns", true),
("Architectural Bottleneck: calculate_health_scores", true),
("Architectural Bottleneck: remove_finding_impact", true), ("Some Other Finding", false),
];
for (title, expected) in test_cases {
let result = VotingEngine::is_utility_function_name(title);
assert_eq!(
result, expected,
"Title '{}' expected {} but got {}",
title, expected, result
);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn make_finding(detector: &str, file: &str, line: u32, severity: Severity) -> Finding {
Finding {
id: format!("{}-{}-{}", detector, file, line),
detector: detector.to_string(),
severity,
title: format!("Issue in {}", file),
description: "test finding".to_string(),
affected_files: vec![PathBuf::from(file)],
line_start: Some(line),
line_end: Some(line),
..Default::default()
}
}
#[test]
fn test_vote_returns_results() {
let engine = VotingEngine::new();
let findings = vec![
make_finding("DetectorA", "src/main.py", 10, Severity::Medium),
make_finding("DetectorB", "src/utils.py", 20, Severity::High),
];
let (results, stats) = engine.vote(findings);
assert_eq!(stats.total_input, 2, "Should receive 2 input findings");
assert!(
stats.total_output <= stats.total_input,
"Output should not exceed input"
);
for f in &results {
assert!(!f.detector.is_empty(), "Detector name should not be empty");
}
}
#[test]
fn test_vote_empty_input() {
let engine = VotingEngine::new();
let (results, stats) = engine.vote(vec![]);
assert!(results.is_empty(), "Empty input should produce empty output");
assert_eq!(stats.total_input, 0);
assert_eq!(stats.total_output, 0);
}
#[test]
fn test_consensus_merges_findings_from_different_detectors() {
let engine = VotingEngine::new();
let findings = vec![
make_finding("GodClassDetector", "src/app.py", 5, Severity::High),
make_finding("GodClassDetector2", "src/app.py", 5, Severity::Medium),
];
let (results, stats) = engine.vote(findings);
assert!(
stats.boosted_by_consensus >= 1 || stats.total_output >= 1,
"Two different detectors on same location should produce output. Stats: {:?}",
stats
);
if stats.boosted_by_consensus > 0 {
let consensus_finding = results.iter().find(|f| f.detector.contains("Consensus"));
assert!(
consensus_finding.is_some(),
"Consensus finding should contain 'Consensus' in detector name"
);
}
}
#[test]
fn test_single_detector_findings_pass_through() {
let engine = VotingEngine::new();
let findings = vec![make_finding(
"CircularDependencyDetector",
"src/lint.py",
42,
Severity::Medium,
)];
let (results, stats) = engine.vote(findings);
assert_eq!(stats.single_detector_findings, 1);
assert_eq!(
results.len(),
1,
"High-accuracy single-detector finding should pass through"
);
}
}