use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::path::{Path, PathBuf};
use git2::{Repository, Sort};
use serde::{Deserialize, Serialize};
use crate::core::config::Config;
use crate::observer::walk::{is_path_excluded, since_cutoff};
use crate::observer::{ObservationMeta, Observer};
const BULK_COMMIT_FILE_LIMIT: usize = 50;
#[derive(Debug, Clone, Default)]
pub struct ChangeCouplingObserver {
pub enabled: bool,
pub excluded: Vec<String>,
pub since_days: u32,
pub min_coupling: u32,
}
impl ChangeCouplingObserver {
#[must_use]
pub fn from_config(cfg: &Config) -> Self {
Self {
enabled: cfg.metrics.change_coupling.enabled,
excluded: cfg.observer_excluded_paths(),
since_days: cfg.git.since_days,
min_coupling: cfg.metrics.change_coupling.min_coupling,
}
}
#[must_use]
pub fn scan(&self, root: &Path) -> ChangeCouplingReport {
let mut report = ChangeCouplingReport {
since_days: self.since_days,
min_coupling: self.min_coupling,
..ChangeCouplingReport::default()
};
if !self.enabled {
return report;
}
let Ok(repo) = Repository::discover(root) else {
return report;
};
let cutoff_secs = since_cutoff(self.since_days);
let Ok(mut revwalk) = repo.revwalk() else {
return report;
};
if revwalk.set_sorting(Sort::TIME).is_err() || revwalk.push_head().is_err() {
return report;
}
let mut pair_counts: HashMap<(PathBuf, PathBuf), u32> = HashMap::new();
let mut commits_considered: u32 = 0;
for oid_res in revwalk {
let Ok(oid) = oid_res else {
continue;
};
let Ok(commit) = repo.find_commit(oid) else {
continue;
};
if commit.time().seconds() < cutoff_secs {
break;
}
if self.absorb_commit(&repo, &commit, &mut pair_counts) {
commits_considered = commits_considered.saturating_add(1);
}
}
let pairs = collect_pairs(pair_counts, self.min_coupling);
let file_sums = compute_file_sums(&pairs);
let totals = CouplingTotals {
pairs: pairs.len(),
files: file_sums.len(),
commits_considered,
};
report.pairs = pairs;
report.file_sums = file_sums;
report.totals = totals;
report
}
fn absorb_commit(
&self,
repo: &Repository,
commit: &git2::Commit<'_>,
pair_counts: &mut HashMap<(PathBuf, PathBuf), u32>,
) -> bool {
let Ok(commit_tree) = commit.tree() else {
return false;
};
let parent_tree = commit.parent(0).ok().and_then(|p| p.tree().ok());
let Ok(diff) = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)
else {
return false;
};
let mut paths: BTreeSet<PathBuf> = BTreeSet::new();
for delta in diff.deltas() {
let Some(path) = delta.new_file().path() else {
continue;
};
if path.as_os_str().is_empty() || is_path_excluded(path, &self.excluded) {
continue;
}
paths.insert(path.to_path_buf());
}
if paths.len() < 2 || paths.len() > BULK_COMMIT_FILE_LIMIT {
return false;
}
let ordered: Vec<&PathBuf> = paths.iter().collect();
for (i, a) in ordered.iter().enumerate() {
for b in &ordered[i + 1..] {
let counter = pair_counts.entry(((*a).clone(), (*b).clone())).or_insert(0);
*counter = counter.saturating_add(1);
}
}
true
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ChangeCouplingReport {
pub pairs: Vec<FilePair>,
pub file_sums: Vec<FileSum>,
pub totals: CouplingTotals,
pub since_days: u32,
pub min_coupling: u32,
}
impl ChangeCouplingReport {
#[must_use]
pub fn worst_n_pairs(&self, n: usize) -> Vec<FilePair> {
let mut top = self.pairs.clone();
top.truncate(n);
top
}
#[must_use]
pub fn worst_n_files(&self, n: usize) -> Vec<FileSum> {
let mut top = self.file_sums.clone();
top.truncate(n);
top
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FilePair {
pub a: PathBuf,
pub b: PathBuf,
pub count: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FileSum {
pub path: PathBuf,
pub sum: u32,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct CouplingTotals {
pub pairs: usize,
pub files: usize,
pub commits_considered: u32,
}
impl Observer for ChangeCouplingObserver {
type Output = ChangeCouplingReport;
fn meta(&self) -> ObservationMeta {
ObservationMeta {
name: "change_coupling",
version: 1,
}
}
fn observe(&self, project_root: &Path) -> anyhow::Result<Self::Output> {
Ok(self.scan(project_root))
}
}
fn collect_pairs(
pair_counts: HashMap<(PathBuf, PathBuf), u32>,
min_coupling: u32,
) -> Vec<FilePair> {
let mut pairs: Vec<FilePair> = pair_counts
.into_iter()
.filter(|(_, count)| *count >= min_coupling)
.map(|((a, b), count)| FilePair { a, b, count })
.collect();
pairs.sort_by(|x, y| {
y.count
.cmp(&x.count)
.then_with(|| x.a.cmp(&y.a))
.then_with(|| x.b.cmp(&y.b))
});
pairs
}
fn compute_file_sums(pairs: &[FilePair]) -> Vec<FileSum> {
let mut sums: BTreeMap<PathBuf, u32> = BTreeMap::new();
for pair in pairs {
let a = sums.entry(pair.a.clone()).or_insert(0);
*a = a.saturating_add(pair.count);
let b = sums.entry(pair.b.clone()).or_insert(0);
*b = b.saturating_add(pair.count);
}
let mut file_sums: Vec<FileSum> = sums
.into_iter()
.map(|(path, sum)| FileSum { path, sum })
.collect();
file_sums.sort_by(|x, y| y.sum.cmp(&x.sum).then_with(|| x.path.cmp(&y.path)));
file_sums
}