#![cfg_attr(coverage_nightly, coverage(off))]
use super::types::QueryResult;
use crate::models::churn::FileChurnMetrics;
use std::collections::HashMap;
use std::path::Path;
pub fn enrich_with_churn(results: &mut [QueryResult], file_churn: &HashMap<String, (u32, f32)>) {
for result in results.iter_mut() {
if let Some((commit_count, churn_score)) = file_churn.get(&result.file_path) {
result.commit_count = *commit_count;
result.churn_score = *churn_score;
}
}
}
pub fn build_churn_map(metrics: &[FileChurnMetrics]) -> HashMap<String, (u32, f32)> {
metrics
.iter()
.map(|m| {
(
m.relative_path.clone(),
(m.commit_count as u32, m.churn_score),
)
})
.collect()
}
#[cfg_attr(coverage_nightly, coverage(off))] pub async fn enrich_results_with_churn(
results: &mut [QueryResult],
project_root: &Path,
period_days: u32,
) -> Result<(), String> {
use crate::services::incremental_churn::IncrementalChurnAnalyzer;
if results.is_empty() {
return Ok(());
}
let cached = results
.iter()
.filter(|r| r.commit_count > 0 || r.churn_score > 0.0)
.count();
if cached * 2 > results.len() {
return Ok(());
}
let files: Vec<std::path::PathBuf> = results
.iter()
.map(|r| project_root.join(&r.file_path))
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let analyzer = IncrementalChurnAnalyzer::new(project_root.to_path_buf());
let analysis = analyzer
.analyze_incremental(files, period_days)
.await
.map_err(|e| format!("Churn analysis failed: {e}"))?;
let churn_map = build_churn_map(&analysis.files);
enrich_with_churn(results, &churn_map);
Ok(())
}
fn detect_language_for_duplication(
path: &str,
) -> Option<crate::services::duplicate_detector::Language> {
use crate::services::duplicate_detector::Language;
let ext_langs: &[(&[&str], Language)] = &[
(&[".rs"], Language::Rust),
(&[".ts", ".tsx"], Language::TypeScript),
(&[".js", ".jsx"], Language::JavaScript),
(&[".py"], Language::Python),
(&[".c"], Language::C),
(&[".cpp", ".cc", ".cxx", ".cu", ".cuh"], Language::Cpp),
(&[".kt"], Language::Kotlin),
];
ext_langs
.iter()
.find(|(exts, _)| exts.iter().any(|ext| path.ends_with(ext)))
.map(|(_, lang)| *lang)
}
fn collect_file_contents(results: &[QueryResult], project_root: &Path) -> HashMap<String, String> {
let mut contents: HashMap<String, String> = HashMap::new();
for result in results {
if contents.contains_key(&result.file_path) {
continue;
}
let full_path = project_root.join(&result.file_path);
if let Ok(content) = std::fs::read_to_string(&full_path) {
contents.insert(result.file_path.clone(), content);
}
}
contents
}
#[cfg_attr(coverage_nightly, coverage(off))] pub async fn enrich_results_with_duplicates(
results: &mut [QueryResult],
project_root: &Path,
) -> Result<(), String> {
use crate::services::duplicate_detector::{DuplicateDetectionConfig, DuplicateDetectionEngine};
if results.is_empty() {
return Ok(());
}
let file_contents = collect_file_contents(results, project_root);
let files_to_analyze: Vec<_> = file_contents
.iter()
.filter_map(|(path, content)| {
detect_language_for_duplication(path)
.map(|lang| (std::path::PathBuf::from(path), content.clone(), lang))
})
.collect();
if files_to_analyze.is_empty() {
return Ok(());
}
let config = DuplicateDetectionConfig {
min_tokens: 20,
similarity_threshold: 0.65,
..Default::default()
};
let engine = DuplicateDetectionEngine::new(config);
let report = engine
.detect_duplicates(&files_to_analyze)
.map_err(|e| format!("Duplicate detection failed: {e}"))?;
let mut file_duplication: HashMap<String, (u32, f32)> = HashMap::new();
for group in &report.groups {
for fragment in &group.fragments {
let path_str = fragment.file.to_string_lossy().to_string();
let entry = file_duplication.entry(path_str).or_insert((0, 0.0));
entry.0 += 1;
entry.1 = entry.1.max(group.average_similarity as f32);
}
}
for result in results.iter_mut() {
if let Some((clone_count, dup_score)) = file_duplication.get(&result.file_path) {
result.clone_count = *clone_count;
result.duplication_score = *dup_score;
}
}
Ok(())
}
#[cfg_attr(coverage_nightly, coverage(off))] pub async fn enrich_results_with_entropy(
results: &mut [QueryResult],
project_root: &Path,
) -> Result<(), String> {
use crate::entropy::{EntropyAnalyzer, EntropyConfig};
if results.is_empty() {
return Ok(());
}
let cached = results.iter().filter(|r| r.pattern_diversity > 0.0).count();
if cached * 2 > results.len() {
return Ok(());
}
let config = EntropyConfig::default().with_project_ignores(project_root);
let analyzer = EntropyAnalyzer::with_config(config);
let report = analyzer
.analyze(project_root)
.await
.map_err(|e| format!("Entropy analysis failed: {e}"))?;
let overall_diversity = report.entropy_metrics.pattern_diversity as f32;
let mut file_pattern_count: HashMap<String, usize> = HashMap::new();
for violation in &report.actionable_violations {
for file in &violation.affected_files {
let path_str = file
.strip_prefix(project_root)
.unwrap_or(file)
.to_string_lossy()
.to_string();
*file_pattern_count.entry(path_str).or_insert(0) += 1;
}
}
let max_patterns = file_pattern_count.values().max().copied().unwrap_or(1) as f32;
for result in results.iter_mut() {
if let Some(&pattern_count) = file_pattern_count.get(&result.file_path) {
result.pattern_diversity = 1.0 - (pattern_count as f32 / max_patterns).min(1.0);
} else {
result.pattern_diversity = overall_diversity;
}
}
Ok(())
}
fn run_batuta_and_parse(project_root: &Path) -> Result<HashMap<String, Vec<String>>, String> {
use std::process::Command;
let output = Command::new("batuta")
.args(["bug-hunter", "falsify", "--format", "json", "--target", "."])
.current_dir(project_root)
.output()
.map_err(|e| format!("Failed to run batuta: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.contains("Usage:") {
return Err(format!("batuta failed: {stderr}"));
}
}
let stdout = String::from_utf8_lossy(&output.stdout);
let json_start = match stdout.find('{') {
Some(s) => s,
None => return Ok(HashMap::new()),
};
let parsed: serde_json::Value =
serde_json::from_str(stdout.get(json_start..).unwrap_or_default())
.map_err(|e| format!("Failed to parse batuta output: {e}"))?;
let findings = match parsed.get("findings").and_then(|f| f.as_array()) {
Some(f) => f,
None => return Ok(HashMap::new()),
};
let mut fault_map: HashMap<String, Vec<String>> = HashMap::new();
for finding in findings {
let file = finding.get("file").and_then(|f| f.as_str()).unwrap_or("");
let line = finding.get("line").and_then(|l| l.as_u64()).unwrap_or(0);
let title = finding
.get("title")
.and_then(|t| t.as_str())
.unwrap_or("Unknown fault pattern");
let id = finding.get("id").and_then(|i| i.as_str()).unwrap_or("BH");
let normalized = file.strip_prefix("./").unwrap_or(file);
fault_map
.entry(normalized.to_string())
.or_default()
.push(format!("{id}: {title} at line {line}"));
}
Ok(fault_map)
}
fn faults_in_range(faults: &[String], start_line: usize, end_line: usize) -> Vec<String> {
faults
.iter()
.filter(|f| {
f.split("at line ")
.last()
.and_then(|s| s.parse::<usize>().ok())
.is_some_and(|line| line >= start_line && line <= end_line)
})
.cloned()
.collect()
}
#[cfg_attr(coverage_nightly, coverage(off))] pub async fn enrich_results_with_faults(
results: &mut [QueryResult],
project_root: &Path,
) -> Result<(), String> {
if results.is_empty() {
return Ok(());
}
let cached = results
.iter()
.filter(|r| !r.fault_annotations.is_empty())
.count();
if cached * 2 > results.len() {
return Ok(());
}
let fault_map = run_batuta_and_parse(project_root)?;
for result in results.iter_mut() {
if let Some(faults) = fault_map.get(&result.file_path) {
let func_end = result.start_line + result.loc as usize;
let relevant = faults_in_range(faults, result.start_line, func_end);
if !relevant.is_empty() {
result.fault_annotations = relevant;
}
}
}
Ok(())
}