use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::path::{Path, PathBuf};
use git2::{Repository, Sort};
use serde::{Deserialize, Serialize};
use crate::core::config::Config;
use crate::core::finding::{Finding, IntoFindings, Location};
use crate::core::severity::Severity;
use crate::feature::{decorate, Feature, FeatureKind, FeatureMeta, HotspotIndex};
use crate::observer::walk::{is_path_excluded, since_cutoff};
use crate::observer::{ObservationMeta, Observer};
use crate::observers::ObserverReports;
const BULK_COMMIT_FILE_LIMIT: usize = 50;
#[derive(Debug, Clone)]
pub struct ChangeCouplingObserver {
pub enabled: bool,
pub excluded: Vec<String>,
pub since_days: u32,
pub min_coupling: u32,
pub symmetric_threshold: f64,
}
impl Default for ChangeCouplingObserver {
fn default() -> Self {
Self {
enabled: false,
excluded: Vec::new(),
since_days: 0,
min_coupling: 0,
symmetric_threshold: default_symmetric_threshold(),
}
}
}
impl ChangeCouplingObserver {
#[must_use]
pub fn from_config(cfg: &Config) -> Self {
Self {
enabled: cfg.metrics.change_coupling.enabled,
excluded: cfg.observer_excluded_paths(),
since_days: cfg.git.since_days,
min_coupling: cfg.metrics.change_coupling.min_coupling,
symmetric_threshold: cfg.metrics.change_coupling.symmetric_threshold,
}
}
#[must_use]
pub fn scan(&self, root: &Path) -> ChangeCouplingReport {
let mut report = ChangeCouplingReport {
since_days: self.since_days,
min_coupling: self.min_coupling,
..ChangeCouplingReport::default()
};
if !self.enabled {
return report;
}
let Ok(repo) = Repository::discover(root) else {
return report;
};
let cutoff_secs = since_cutoff(self.since_days);
let Ok(mut revwalk) = repo.revwalk() else {
return report;
};
if revwalk.set_sorting(Sort::TIME).is_err() || revwalk.push_head().is_err() {
return report;
}
let mut pair_counts: HashMap<(PathBuf, PathBuf), u32> = HashMap::new();
let mut file_commits: HashMap<PathBuf, u32> = HashMap::new();
let mut commits_considered: u32 = 0;
for oid_res in revwalk {
let Ok(oid) = oid_res else {
continue;
};
let Ok(commit) = repo.find_commit(oid) else {
continue;
};
if commit.time().seconds() < cutoff_secs {
break;
}
if self.absorb_commit(&repo, &commit, &mut pair_counts, &mut file_commits) {
commits_considered = commits_considered.saturating_add(1);
}
}
let pairs = collect_pairs(
pair_counts,
self.min_coupling,
&file_commits,
self.symmetric_threshold,
);
let file_sums = compute_file_sums(&pairs);
let totals = CouplingTotals {
pairs: pairs.len(),
files: file_sums.len(),
commits_considered,
};
report.pairs = pairs;
report.file_sums = file_sums;
report.totals = totals;
report
}
fn absorb_commit(
&self,
repo: &Repository,
commit: &git2::Commit<'_>,
pair_counts: &mut HashMap<(PathBuf, PathBuf), u32>,
file_commits: &mut HashMap<PathBuf, u32>,
) -> bool {
let Ok(commit_tree) = commit.tree() else {
return false;
};
let parent_tree = commit.parent(0).ok().and_then(|p| p.tree().ok());
let Ok(diff) = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)
else {
return false;
};
let mut paths: BTreeSet<PathBuf> = BTreeSet::new();
for delta in diff.deltas() {
let Some(path) = delta.new_file().path() else {
continue;
};
if path.as_os_str().is_empty() || is_path_excluded(path, &self.excluded) {
continue;
}
paths.insert(path.to_path_buf());
}
if paths.is_empty() || paths.len() > BULK_COMMIT_FILE_LIMIT {
return false;
}
for path in &paths {
let entry = file_commits.entry(path.clone()).or_insert(0);
*entry = entry.saturating_add(1);
}
if paths.len() < 2 {
return true;
}
let ordered: Vec<&PathBuf> = paths.iter().collect();
for (i, a) in ordered.iter().enumerate() {
for b in &ordered[i + 1..] {
let counter = pair_counts.entry(((*a).clone(), (*b).clone())).or_insert(0);
*counter = counter.saturating_add(1);
}
}
true
}
}
#[must_use]
pub fn default_symmetric_threshold() -> f64 {
0.5
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ChangeCouplingReport {
pub pairs: Vec<FilePair>,
pub file_sums: Vec<FileSum>,
pub totals: CouplingTotals,
pub since_days: u32,
pub min_coupling: u32,
}
impl ChangeCouplingReport {
#[must_use]
pub fn worst_n_pairs(&self, n: usize) -> Vec<FilePair> {
let mut top = self.pairs.clone();
top.truncate(n);
top
}
#[must_use]
pub fn worst_n_files(&self, n: usize) -> Vec<FileSum> {
let mut top = self.file_sums.clone();
top.truncate(n);
top
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FilePair {
pub a: PathBuf,
pub b: PathBuf,
pub count: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub direction: Option<PairDirection>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum PairDirection {
Symmetric,
OneWay { from: PathBuf, to: PathBuf },
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FileSum {
pub path: PathBuf,
pub sum: u32,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct CouplingTotals {
pub pairs: usize,
pub files: usize,
pub commits_considered: u32,
}
impl Observer for ChangeCouplingObserver {
type Output = ChangeCouplingReport;
fn meta(&self) -> ObservationMeta {
ObservationMeta {
name: "change_coupling",
version: 1,
}
}
fn observe(&self, project_root: &Path) -> anyhow::Result<Self::Output> {
Ok(self.scan(project_root))
}
}
impl IntoFindings for ChangeCouplingReport {
fn into_findings(&self) -> Vec<Finding> {
self.pairs
.iter()
.map(|pair| {
let b_str = pair.b.to_string_lossy().into_owned();
let primary = Location {
file: pair.a.clone(),
line: None,
symbol: Some(b_str.clone()),
};
let (metric, arrow) = render_metric_and_arrow(pair);
let summary = format!(
"co-changed {} times: {} {arrow} {}",
pair.count,
pair.a.display(),
b_str,
);
Finding::new(metric, primary, summary, &format!("count:{}", pair.count))
.with_locations(vec![Location::file(pair.b.clone())])
})
.collect()
}
}
fn render_metric_and_arrow(pair: &FilePair) -> (&'static str, &'static str) {
match &pair.direction {
Some(PairDirection::Symmetric) => ("change_coupling.symmetric", "↔ (symmetric)"),
Some(PairDirection::OneWay { from, .. }) if from == &pair.a => ("change_coupling", "→"),
Some(PairDirection::OneWay { .. }) => ("change_coupling", "←"),
None => ("change_coupling", "↔"),
}
}
fn collect_pairs(
pair_counts: HashMap<(PathBuf, PathBuf), u32>,
min_coupling: u32,
file_commits: &HashMap<PathBuf, u32>,
symmetric_threshold: f64,
) -> Vec<FilePair> {
let mut pairs: Vec<FilePair> = pair_counts
.into_iter()
.filter(|(_, count)| *count >= min_coupling)
.map(|((a, b), count)| {
let count_a = file_commits.get(&a).copied().unwrap_or(0).max(count);
let count_b = file_commits.get(&b).copied().unwrap_or(0).max(count);
let direction =
classify_direction(&a, &b, count, count_a, count_b, symmetric_threshold);
FilePair {
a,
b,
count,
direction: Some(direction),
}
})
.collect();
pairs.sort_by(|x, y| {
y.count
.cmp(&x.count)
.then_with(|| x.a.cmp(&y.a))
.then_with(|| x.b.cmp(&y.b))
});
pairs
}
fn classify_direction(
a: &Path,
b: &Path,
pair_count: u32,
count_a: u32,
count_b: u32,
symmetric_threshold: f64,
) -> PairDirection {
#[allow(clippy::cast_precision_loss)]
let p_b_given_a = f64::from(pair_count) / f64::from(count_a);
#[allow(clippy::cast_precision_loss)]
let p_a_given_b = f64::from(pair_count) / f64::from(count_b);
if p_b_given_a >= symmetric_threshold && p_a_given_b >= symmetric_threshold {
PairDirection::Symmetric
} else if p_a_given_b > p_b_given_a {
PairDirection::OneWay {
from: a.to_path_buf(),
to: b.to_path_buf(),
}
} else {
PairDirection::OneWay {
from: b.to_path_buf(),
to: a.to_path_buf(),
}
}
}
fn compute_file_sums(pairs: &[FilePair]) -> Vec<FileSum> {
let mut sums: BTreeMap<PathBuf, u32> = BTreeMap::new();
for pair in pairs {
let a = sums.entry(pair.a.clone()).or_insert(0);
*a = a.saturating_add(pair.count);
let b = sums.entry(pair.b.clone()).or_insert(0);
*b = b.saturating_add(pair.count);
}
let mut file_sums: Vec<FileSum> = sums
.into_iter()
.map(|(path, sum)| FileSum { path, sum })
.collect();
file_sums.sort_by(|x, y| y.sum.cmp(&x.sum).then_with(|| x.path.cmp(&y.path)));
file_sums
}
pub struct ChangeCouplingFeature;
impl Feature for ChangeCouplingFeature {
fn meta(&self) -> FeatureMeta {
FeatureMeta {
name: "change_coupling",
version: 1,
kind: FeatureKind::Observer,
}
}
fn enabled(&self, cfg: &Config) -> bool {
cfg.metrics.change_coupling.enabled
}
fn lower(
&self,
reports: &ObserverReports,
_cfg: &Config,
cal: &crate::core::calibration::Calibration,
hotspot: &HotspotIndex,
) -> Vec<Finding> {
let Some(cc) = reports.change_coupling.as_ref() else {
return Vec::new();
};
let cal_cc = cal.calibration.change_coupling.as_ref();
let mut out = Vec::with_capacity(cc.pairs.len());
for (pair, finding) in cc.pairs.iter().zip(cc.into_findings()) {
let severity = cal_cc.map_or(Severity::Ok, |c| c.classify(f64::from(pair.count)));
out.push(decorate(finding, severity, hotspot));
}
out
}
}