use std::collections::HashMap;
use crate::calibration::SCORE_TOLERANCE;
use crate::compliance::{
ComplianceDimension, ComplianceDimensionPipeline, CompliancePipelineObservation,
CompliancePipelineSubjectKind, HeatmapContext, HeatmapEntry,
};
use crate::loss::common::counterfactual_composite;
use crate::metrics::AnalysisReport;
type FnKey = (String, String, usize);
pub(crate) fn duplication_compliance(report: &AnalysisReport) -> ComplianceDimension {
let dup_score = report.duplication.duplication_score;
let score = 1.0 - dup_score;
let total_fn_count = report.duplication.functions_fingerprinted;
let (detail_parts, scope_map) = collect_duplication_details(report);
let mut observations: Vec<CompliancePipelineObservation> = detail_parts
.into_iter()
.map(|((name, file, line), parts)| {
let key: FnKey = (name.clone(), file.clone(), line);
let scope_path = scope_map.get(&key).cloned().unwrap_or_default();
CompliancePipelineObservation {
kind: CompliancePipelineSubjectKind::Function,
file,
line,
name,
detail: parts.join("; "),
cohort: None,
size_hint: 1.0,
raw: 1.0,
normalized: None,
calibrated_score: 0.0,
scope_path,
}
})
.collect();
observations.sort_by(|a, b| {
a.file
.cmp(&b.file)
.then(a.line.cmp(&b.line))
.then(a.name.cmp(&b.name))
});
ComplianceDimension {
name: "duplication".into(),
score,
item_count: total_fn_count,
rule: "score = 1 - duplication_score".into(),
pipeline: ComplianceDimensionPipeline {
cohorts: Vec::new(),
artifact_aggregation: None,
aggregated_score: score,
observations,
},
}
}
fn collect_duplication_details(
report: &AnalysisReport,
) -> (
HashMap<FnKey, Vec<String>>,
HashMap<FnKey, Vec<crate::metrics::ScopeSegment>>,
) {
let mut detail_parts: HashMap<FnKey, Vec<String>> = HashMap::new();
let mut scope_map: HashMap<FnKey, Vec<crate::metrics::ScopeSegment>> = HashMap::new();
for group in &report.duplication.exact_duplicates {
for loc in &group.functions {
let key: FnKey = (loc.name.clone(), loc.file.clone(), loc.line);
let other_names: Vec<&str> = group
.functions
.iter()
.filter(|other| {
other.name != loc.name || other.file != loc.file || other.line != loc.line
})
.map(|other| other.name.as_str())
.collect();
let detail = format!(
"exact duplicate of {} (shape_len {})",
other_names.join(", "),
group.shape_length,
);
detail_parts.entry(key.clone()).or_default().push(detail);
scope_map
.entry(key)
.or_insert_with(|| loc.scope_path.clone());
}
}
for pair in &report.duplication.near_duplicates {
for (this, other) in [(&pair.a, &pair.b), (&pair.b, &pair.a)] {
let key: FnKey = (this.name.clone(), this.file.clone(), this.line);
let detail = format!(
"near duplicate of {} (similarity {:.2})",
other.name, pair.similarity,
);
detail_parts.entry(key.clone()).or_default().push(detail);
scope_map
.entry(key)
.or_insert_with(|| this.scope_path.clone());
}
}
(detail_parts, scope_map)
}
pub(crate) fn emit_duplication_heatmap(ctx: &HeatmapContext<'_>, entries: &mut Vec<HeatmapEntry>) {
if (ctx.dim.score - 1.0).abs() < SCORE_TOLERANCE {
return;
}
let cf_composite = counterfactual_composite(
ctx.dim_scores,
ctx.dim_index,
1.0,
ctx.policy.aggregation.objective_scalarization,
);
let responsibility_per_fn = cf_composite - ctx.composite_score;
let violators = &ctx.dim.pipeline.observations;
let violator_count = violators.len();
if violator_count == 0 {
return;
}
let share = responsibility_per_fn / violator_count as f64;
for obs in violators {
entries.push(HeatmapEntry {
file: obs.file.clone(),
line: obs.line,
function_name: obs.name.clone(),
dimension: "duplication".into(),
responsibility: share,
detail: obs.detail.clone(),
scope_path: obs.scope_path.clone(),
});
}
}