#![cfg_attr(coverage_nightly, coverage(off))]
use super::types::QueryResult;
use crate::models::churn::FileChurnMetrics;
use std::collections::HashMap;
use std::path::Path;
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn enrich_with_churn(results: &mut [QueryResult], file_churn: &HashMap<String, (u32, f32)>) {
for result in results.iter_mut() {
if let Some((commit_count, churn_score)) = file_churn.get(&result.file_path) {
result.commit_count = *commit_count;
result.churn_score = *churn_score;
}
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn build_churn_map(metrics: &[FileChurnMetrics]) -> HashMap<String, (u32, f32)> {
metrics
.iter()
.map(|m| {
(
m.relative_path.clone(),
(m.commit_count as u32, m.churn_score),
)
})
.collect()
}
#[cfg_attr(coverage_nightly, coverage(off))] #[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn enrich_results_with_churn(
results: &mut [QueryResult],
project_root: &Path,
period_days: u32,
) -> Result<(), String> {
use crate::services::incremental_churn::IncrementalChurnAnalyzer;
if results.is_empty() {
return Ok(());
}
let cached = results
.iter()
.filter(|r| r.commit_count > 0 || r.churn_score > 0.0)
.count();
if cached * 2 > results.len() {
return Ok(());
}
let files: Vec<std::path::PathBuf> = results
.iter()
.map(|r| project_root.join(&r.file_path))
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
let analyzer = IncrementalChurnAnalyzer::new(project_root.to_path_buf());
let analysis = analyzer
.analyze_incremental(files, period_days)
.await
.map_err(|e| format!("Churn analysis failed: {e}"))?;
let churn_map = build_churn_map(&analysis.files);
enrich_with_churn(results, &churn_map);
Ok(())
}
fn detect_language_for_duplication(
path: &str,
) -> Option<crate::services::duplicate_detector::Language> {
use crate::services::duplicate_detector::Language;
let ext_langs: &[(&[&str], Language)] = &[
(&[".rs"], Language::Rust),
(&[".ts", ".tsx"], Language::TypeScript),
(&[".js", ".jsx"], Language::JavaScript),
(&[".py"], Language::Python),
(&[".c"], Language::C),
(&[".cpp", ".cc", ".cxx", ".cu", ".cuh"], Language::Cpp),
(&[".kt"], Language::Kotlin),
];
ext_langs
.iter()
.find(|(exts, _)| exts.iter().any(|ext| path.ends_with(ext)))
.map(|(_, lang)| *lang)
}
fn collect_file_contents(results: &[QueryResult], project_root: &Path) -> HashMap<String, String> {
let mut contents: HashMap<String, String> = HashMap::new();
for result in results {
if contents.contains_key(&result.file_path) {
continue;
}
let full_path = project_root.join(&result.file_path);
if let Ok(content) = std::fs::read_to_string(&full_path) {
contents.insert(result.file_path.clone(), content);
}
}
contents
}
#[cfg_attr(coverage_nightly, coverage(off))] #[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn enrich_results_with_duplicates(
results: &mut [QueryResult],
project_root: &Path,
) -> Result<(), String> {
use crate::services::duplicate_detector::{DuplicateDetectionConfig, DuplicateDetectionEngine};
if results.is_empty() {
return Ok(());
}
let file_contents = collect_file_contents(results, project_root);
let files_to_analyze: Vec<_> = file_contents
.iter()
.filter_map(|(path, content)| {
detect_language_for_duplication(path)
.map(|lang| (std::path::PathBuf::from(path), content.clone(), lang))
})
.collect();
if files_to_analyze.is_empty() {
return Ok(());
}
let config = DuplicateDetectionConfig {
min_tokens: 20,
similarity_threshold: 0.65,
..Default::default()
};
let engine = DuplicateDetectionEngine::new(config);
let report = engine
.detect_duplicates(&files_to_analyze)
.map_err(|e| format!("Duplicate detection failed: {e}"))?;
let mut file_duplication: HashMap<String, (u32, f32)> = HashMap::new();
for group in &report.groups {
for fragment in &group.fragments {
let path_str = fragment.file.to_string_lossy().to_string();
let entry = file_duplication.entry(path_str).or_insert((0, 0.0));
entry.0 += 1;
entry.1 = entry.1.max(group.average_similarity as f32);
}
}
for result in results.iter_mut() {
if let Some((clone_count, dup_score)) = file_duplication.get(&result.file_path) {
result.clone_count = *clone_count;
result.duplication_score = *dup_score;
}
}
Ok(())
}
#[cfg_attr(coverage_nightly, coverage(off))] #[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn enrich_results_with_entropy(
results: &mut [QueryResult],
project_root: &Path,
) -> Result<(), String> {
use crate::entropy::{EntropyAnalyzer, EntropyConfig};
if results.is_empty() {
return Ok(());
}
let cached = results.iter().filter(|r| r.pattern_diversity > 0.0).count();
if cached * 2 > results.len() {
return Ok(());
}
let config = EntropyConfig::default().with_project_ignores(project_root);
let analyzer = EntropyAnalyzer::with_config(config);
let report = analyzer
.analyze(project_root)
.await
.map_err(|e| format!("Entropy analysis failed: {e}"))?;
let overall_diversity = report.entropy_metrics.pattern_diversity as f32;
let mut file_pattern_count: HashMap<String, usize> = HashMap::new();
for violation in &report.actionable_violations {
for file in &violation.affected_files {
let path_str = file
.strip_prefix(project_root)
.unwrap_or(file)
.to_string_lossy()
.to_string();
*file_pattern_count.entry(path_str).or_insert(0) += 1;
}
}
let max_patterns = file_pattern_count.values().max().copied().unwrap_or(1) as f32;
for result in results.iter_mut() {
if let Some(&pattern_count) = file_pattern_count.get(&result.file_path) {
result.pattern_diversity = 1.0 - (pattern_count as f32 / max_patterns).min(1.0);
} else {
result.pattern_diversity = overall_diversity;
}
}
Ok(())
}
fn load_faults_from_cache(project_root: &Path) -> Result<HashMap<String, Vec<String>>, String> {
let cache_dir = project_root.join(".pmat/bug-hunter-cache");
let entries = match std::fs::read_dir(&cache_dir) {
Ok(e) => e,
Err(_) => return Ok(HashMap::new()),
};
let newest = entries
.flatten()
.filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
.max_by_key(|e| {
e.metadata()
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH)
});
let entry = match newest {
Some(e) => e,
None => return Ok(HashMap::new()),
};
let data = match std::fs::read_to_string(entry.path()) {
Ok(d) => d,
Err(_) => return Ok(HashMap::new()),
};
let parsed: serde_json::Value = match serde_json::from_str(&data) {
Ok(v) => v,
Err(_) => return Ok(HashMap::new()),
};
let findings = match parsed.get("findings").and_then(|f| f.as_array()) {
Some(f) => f,
None => return Ok(HashMap::new()),
};
let mut fault_map: HashMap<String, Vec<String>> = HashMap::new();
for finding in findings {
let file = finding.get("file").and_then(|f| f.as_str()).unwrap_or("");
let line = finding.get("line").and_then(|l| l.as_u64()).unwrap_or(0);
let title = finding
.get("title")
.and_then(|t| t.as_str())
.unwrap_or("Unknown fault pattern");
let id = finding.get("id").and_then(|i| i.as_str()).unwrap_or("BH");
let normalized = file.strip_prefix("./").unwrap_or(file);
fault_map
.entry(normalized.to_string())
.or_default()
.push(format!("{id}: {title} at line {line}"));
}
Ok(fault_map)
}
fn faults_in_range(faults: &[String], start_line: usize, end_line: usize) -> Vec<String> {
faults
.iter()
.filter(|f| {
f.split("at line ")
.last()
.and_then(|s| s.parse::<usize>().ok())
.is_some_and(|line| line >= start_line && line <= end_line)
})
.cloned()
.collect()
}
#[cfg_attr(coverage_nightly, coverage(off))] #[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn enrich_results_with_faults(
results: &mut [QueryResult],
project_root: &Path,
) -> Result<(), String> {
if results.is_empty() {
return Ok(());
}
let cached = results
.iter()
.filter(|r| !r.fault_annotations.is_empty())
.count();
if cached * 2 > results.len() {
return Ok(());
}
let fault_map = load_faults_from_cache(project_root)?;
if fault_map.is_empty() {
return Ok(());
}
for result in results.iter_mut() {
if let Some(faults) = fault_map.get(&result.file_path) {
let func_end = result.start_line + result.loc as usize;
let relevant = faults_in_range(faults, result.start_line, func_end);
if !relevant.is_empty() {
result.fault_annotations = relevant;
}
}
}
Ok(())
}
#[cfg(test)]
mod enrichment_pure_tests {
use super::*;
use crate::services::duplicate_detector::Language;
fn make_result(path: &str) -> QueryResult {
let mut r = QueryResult {
function_name: "f".to_string(),
file_path: path.to_string(),
signature: "fn f()".to_string(),
definition_type: "function".to_string(),
doc_comment: None,
start_line: 1,
end_line: 10,
language: "Rust".to_string(),
tdg_score: 80.0,
tdg_grade: "A".to_string(),
complexity: 5,
big_o: "O(1)".to_string(),
satd_count: 0,
loc: 10,
relevance_score: 0.95,
source: None,
calls: Vec::new(),
called_by: Vec::new(),
pagerank: 0.0,
in_degree: 0,
out_degree: 0,
commit_count: 0,
churn_score: 0.0,
clone_count: 0,
duplication_score: 0.0,
pattern_diversity: 0.0,
fault_annotations: Vec::new(),
line_coverage_pct: 0.0,
lines_covered: 0,
lines_total: 0,
missed_lines: 0,
impact_score: 0.0,
coverage_status: String::new(),
coverage_diff: 0.0,
coverage_exclusion: Default::default(),
coverage_excluded: false,
cross_project_callers: 0,
io_classification: String::new(),
io_patterns: Vec::new(),
suggested_module: String::new(),
contract_level: None,
contract_equation: None,
};
r.start_line = 0;
r
}
#[test]
fn test_enrich_with_churn_applies_metrics_for_matching_paths() {
let mut results = vec![make_result("src/a.rs"), make_result("src/b.rs")];
let mut churn = HashMap::new();
churn.insert("src/a.rs".to_string(), (5u32, 0.5_f32));
enrich_with_churn(&mut results, &churn);
assert_eq!(results[0].commit_count, 5);
assert!((results[0].churn_score - 0.5).abs() < 1e-6);
assert_eq!(results[1].commit_count, 0);
}
#[test]
fn test_enrich_with_churn_empty_map_no_op() {
let mut results = vec![make_result("src/a.rs")];
enrich_with_churn(&mut results, &HashMap::new());
assert_eq!(results[0].commit_count, 0);
}
#[test]
fn test_build_churn_map_builds_lookup_from_metrics() {
use crate::models::churn::FileChurnMetrics;
use chrono::Utc;
use std::path::PathBuf;
let metrics = vec![
FileChurnMetrics {
path: PathBuf::from("src/a.rs"),
relative_path: "src/a.rs".to_string(),
commit_count: 3,
unique_authors: vec![],
additions: 0,
deletions: 0,
churn_score: 0.3,
last_modified: Utc::now(),
first_seen: Utc::now(),
},
FileChurnMetrics {
path: PathBuf::from("src/b.rs"),
relative_path: "src/b.rs".to_string(),
commit_count: 7,
unique_authors: vec![],
additions: 0,
deletions: 0,
churn_score: 0.7,
last_modified: Utc::now(),
first_seen: Utc::now(),
},
];
let map = build_churn_map(&metrics);
assert_eq!(map.get("src/a.rs"), Some(&(3u32, 0.3_f32)));
assert_eq!(map.get("src/b.rs"), Some(&(7u32, 0.7_f32)));
}
#[test]
fn test_build_churn_map_empty_input_returns_empty() {
let map = build_churn_map(&[]);
assert!(map.is_empty());
}
#[test]
fn test_detect_language_rust() {
assert!(matches!(
detect_language_for_duplication("src/foo.rs"),
Some(Language::Rust)
));
}
#[test]
fn test_detect_language_typescript_ts_and_tsx() {
assert!(matches!(
detect_language_for_duplication("foo.ts"),
Some(Language::TypeScript)
));
assert!(matches!(
detect_language_for_duplication("foo.tsx"),
Some(Language::TypeScript)
));
}
#[test]
fn test_detect_language_javascript_js_and_jsx() {
assert!(matches!(
detect_language_for_duplication("foo.js"),
Some(Language::JavaScript)
));
assert!(matches!(
detect_language_for_duplication("foo.jsx"),
Some(Language::JavaScript)
));
}
#[test]
fn test_detect_language_python() {
assert!(matches!(
detect_language_for_duplication("foo.py"),
Some(Language::Python)
));
}
#[test]
fn test_detect_language_c_and_cpp_variants() {
assert!(matches!(
detect_language_for_duplication("a.c"),
Some(Language::C)
));
for ext in &["cpp", "cc", "cxx", "cu", "cuh"] {
assert!(matches!(
detect_language_for_duplication(&format!("a.{ext}")),
Some(Language::Cpp)
));
}
}
#[test]
fn test_detect_language_kotlin() {
assert!(matches!(
detect_language_for_duplication("foo.kt"),
Some(Language::Kotlin)
));
}
#[test]
fn test_detect_language_unknown_returns_none() {
assert!(detect_language_for_duplication("foo.xyz").is_none());
assert!(detect_language_for_duplication("foo").is_none());
}
#[test]
fn test_faults_in_range_includes_in_range_only() {
let faults = vec![
"BH-1: x at line 5".to_string(),
"BH-2: y at line 50".to_string(),
"BH-3: z at line 105".to_string(),
];
let in_range = faults_in_range(&faults, 1, 100);
assert_eq!(in_range.len(), 2);
assert!(in_range.iter().any(|f| f.contains("line 5")));
assert!(in_range.iter().any(|f| f.contains("line 50")));
}
#[test]
fn test_faults_in_range_empty_input_returns_empty() {
let in_range = faults_in_range(&[], 1, 100);
assert!(in_range.is_empty());
}
#[test]
fn test_faults_in_range_skips_unparseable_line_marker() {
let faults = vec!["BH-1: badly formatted".to_string()];
let in_range = faults_in_range(&faults, 1, 100);
assert!(in_range.is_empty());
}
#[test]
fn test_faults_in_range_boundary_inclusive() {
let faults = vec![
"BH-1: low at line 1".to_string(),
"BH-2: high at line 100".to_string(),
];
let in_range = faults_in_range(&faults, 1, 100);
assert_eq!(in_range.len(), 2);
}
}