use anyhow::{Context, Result};
use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::process::Command;
use crate::types::{FilePhase, RankingConfig};
pub struct GitWeightCalculator {
config: RankingConfig,
}
#[derive(Debug, Clone)]
pub struct FileStats {
pub last_modified_days: u32,
pub first_seen_days: u32,
pub commit_count: usize,
pub authors: HashSet<String>,
}
impl GitWeightCalculator {
pub fn new(config: RankingConfig) -> Self {
Self { config }
}
pub fn compute_weights(
&self,
root: &Path,
rel_fnames: &[String],
) -> Result<HashMap<String, f64>> {
let stats = self
.get_file_stats(root, rel_fnames)
.context("Failed to get git file stats")?;
let mut weights = HashMap::with_capacity(rel_fnames.len());
for fname in rel_fnames {
let weight = if let Some(file_stats) = stats.get(fname) {
self.calculate_weight(file_stats)
} else {
1.0
};
weights.insert(fname.clone(), weight);
}
Ok(weights)
}
pub fn get_file_stats(
&self,
root: &Path,
rel_fnames: &[String],
) -> Result<HashMap<String, FileStats>> {
if rel_fnames.is_empty() {
return Ok(HashMap::new());
}
let output = Command::new("git")
.arg("log")
.arg("--format=%aI|%ae") .arg("--name-only") .arg("--all") .arg("-n")
.arg("500") .arg("--") .args(rel_fnames)
.current_dir(root)
.output()
.context("Failed to execute git log")?;
if !output.status.success() {
return Ok(HashMap::new());
}
let log_text = String::from_utf8_lossy(&output.stdout);
self.parse_git_log(&log_text, rel_fnames)
}
fn parse_git_log(
&self,
log_text: &str,
rel_fnames: &[String],
) -> Result<HashMap<String, FileStats>> {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.context("System time is before UNIX epoch")?
.as_secs() as i64;
let fname_set: HashSet<&str> = rel_fnames.iter().map(|s| s.as_str()).collect();
let mut first_seen: HashMap<String, i64> = HashMap::new();
let mut last_modified: HashMap<String, i64> = HashMap::new();
let mut commit_counts: HashMap<String, usize> = HashMap::new();
let mut authors_map: HashMap<String, HashSet<String>> = HashMap::new();
let mut current_timestamp: Option<i64> = None;
let mut current_author: Option<String> = None;
for line in log_text.lines() {
let line = line.trim();
if line.is_empty() {
current_timestamp = None;
current_author = None;
continue;
}
if let Some(pipe_idx) = line.find('|') {
let (timestamp_str, author_str) = line.split_at(pipe_idx);
let author_str = &author_str[1..];
if let Ok(timestamp) = parse_iso8601(timestamp_str) {
current_timestamp = Some(timestamp);
current_author = Some(author_str.to_string());
}
} else if let Some(ts) = current_timestamp {
if fname_set.contains(line) {
let fname = line.to_string();
first_seen
.entry(fname.clone())
.and_modify(|t| *t = (*t).min(ts))
.or_insert(ts);
last_modified
.entry(fname.clone())
.and_modify(|t| *t = (*t).max(ts))
.or_insert(ts);
*commit_counts.entry(fname.clone()).or_insert(0) += 1;
if let Some(ref author) = current_author {
authors_map
.entry(fname.clone())
.or_insert_with(HashSet::new)
.insert(author.clone());
}
}
}
}
let mut stats = HashMap::new();
for fname in fname_set {
if let Some(&last_ts) = last_modified.get(fname) {
let last_modified_days = days_since(now, last_ts);
let first_ts = first_seen.get(fname).copied().unwrap_or(last_ts);
let first_seen_days = days_since(now, first_ts);
let commit_count = commit_counts.get(fname).copied().unwrap_or(0);
let authors = authors_map.get(fname).cloned().unwrap_or_default();
stats.insert(
fname.to_string(),
FileStats {
last_modified_days,
first_seen_days,
commit_count,
authors,
},
);
}
}
Ok(stats)
}
fn calculate_weight(&self, stats: &FileStats) -> f64 {
let recency_boost = self.recency_boost(stats.last_modified_days);
let churn_boost = self.churn_boost(stats.commit_count);
recency_boost * churn_boost
}
fn recency_boost(&self, days: u32) -> f64 {
let max_boost = self.config.git_recency_max_boost;
let decay_days = self.config.git_recency_decay_days;
1.0 + (max_boost - 1.0) * (-f64::from(days) / decay_days).exp()
}
fn churn_boost(&self, commit_count: usize) -> f64 {
let threshold = self.config.git_churn_threshold;
let max_boost = self.config.git_churn_max_boost;
let excess = commit_count.saturating_sub(threshold) as f64;
1.0 + (1.0 + excess).ln() * (max_boost - 1.0) / 5.0
}
pub fn classify_phase(&self, stats: &FileStats) -> FilePhase {
let age = stats.first_seen_days;
let quiet = stats.last_modified_days;
if age >= self.config.phase_crystal_min_age_days
&& quiet >= self.config.phase_crystal_min_quiet_days
{
return FilePhase::Crystal;
}
if age <= self.config.phase_emergent_max_age_days {
return FilePhase::Emergent;
}
if age >= self.config.phase_rotting_min_age_days
&& quiet <= self.config.phase_rotting_max_quiet_days
&& stats.commit_count >= self.config.git_badge_churn_commits
{
return FilePhase::Rotting;
}
FilePhase::Evolving
}
pub fn get_badges(&self, stats: &HashMap<String, FileStats>) -> HashMap<String, Vec<String>> {
let mut badges = HashMap::new();
for (fname, file_stats) in stats {
let mut file_badges = Vec::new();
if file_stats.last_modified_days <= self.config.git_badge_recent_days {
file_badges.push("recent".to_string());
}
if file_stats.commit_count >= self.config.git_badge_churn_commits {
file_badges.push("high-churn".to_string());
}
let phase = self.classify_phase(file_stats);
file_badges.push(phase.badge().to_string());
badges.insert(fname.clone(), file_badges);
}
badges
}
pub fn compute_temporal_coupling(
&self,
root: &Path,
rel_fnames: &[String],
) -> Result<HashMap<String, Vec<(String, f64)>>> {
if rel_fnames.is_empty() {
return Ok(HashMap::new());
}
let output = Command::new("git")
.arg("log")
.arg("--format=%H") .arg("--name-only")
.arg("--all")
.arg("-n")
.arg("500")
.arg("--")
.args(rel_fnames)
.current_dir(root)
.output()
.context("Failed to execute git log for temporal coupling")?;
if !output.status.success() {
return Ok(HashMap::new());
}
let log_text = String::from_utf8_lossy(&output.stdout);
let mut file_commits: HashMap<String, HashSet<String>> = HashMap::new();
let fname_set: HashSet<&str> = rel_fnames.iter().map(|s| s.as_str()).collect();
let mut current_commit: Option<String> = None;
for line in log_text.lines() {
let line = line.trim();
if line.is_empty() {
current_commit = None;
continue;
}
if line.len() == 40 && line.chars().all(|c| c.is_ascii_hexdigit()) {
current_commit = Some(line.to_string());
} else if let Some(ref commit) = current_commit {
if fname_set.contains(line) {
file_commits
.entry(line.to_string())
.or_insert_with(HashSet::new)
.insert(commit.clone());
}
}
}
let mut coupling: HashMap<String, Vec<(String, f64)>> = HashMap::new();
for (file_a, commits_a) in &file_commits {
let mut pairs = Vec::new();
for (file_b, commits_b) in &file_commits {
if file_a == file_b {
continue;
}
let intersection = commits_a.intersection(commits_b).count();
let union = commits_a.union(commits_b).count();
if union > 0 {
let score = intersection as f64 / union as f64;
if score >= 0.3 {
pairs.push((file_b.clone(), score));
}
}
}
pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
if !pairs.is_empty() {
coupling.insert(file_a.clone(), pairs);
}
}
Ok(coupling)
}
}
fn parse_iso8601(s: &str) -> Result<i64> {
let parts: Vec<&str> = s.split('T').collect();
if parts.len() != 2 {
anyhow::bail!("Invalid ISO 8601 format");
}
let date = parts[0];
let time_tz = parts[1];
let date_parts: Vec<&str> = date.split('-').collect();
if date_parts.len() != 3 {
anyhow::bail!("Invalid date format");
}
let year: i32 = date_parts[0].parse()?;
let month: i32 = date_parts[1].parse()?;
let day: i32 = date_parts[2].parse()?;
let tz_split_pos = time_tz.find('+').or_else(|| time_tz.find('-'));
let (time, _tz) = if let Some(pos) = tz_split_pos {
(&time_tz[..pos], &time_tz[pos..])
} else {
(time_tz, "")
};
let time_parts: Vec<&str> = time.split(':').collect();
if time_parts.len() != 3 {
anyhow::bail!("Invalid time format");
}
let hour: i32 = time_parts[0].parse()?;
let minute: i32 = time_parts[1].parse()?;
let second: i32 = time_parts[2].parse()?;
let days_since_epoch = (year - 1970) * 365 + (year - 1969) / 4 + days_in_months(month - 1)
+ day
- 1;
let seconds = i64::from(days_since_epoch) * 86400
+ i64::from(hour) * 3600
+ i64::from(minute) * 60
+ i64::from(second);
Ok(seconds)
}
fn days_in_months(months: i32) -> i32 {
const DAYS: [i32; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
DAYS.iter().take(months as usize).sum()
}
fn days_since(now_secs: i64, then_secs: i64) -> u32 {
let diff = now_secs.saturating_sub(then_secs);
(diff / 86400) as u32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_recency_boost() {
let config = RankingConfig::default();
let calc = GitWeightCalculator::new(config.clone());
let boost_today = calc.recency_boost(0);
assert!(boost_today > config.git_recency_max_boost * 0.99);
let boost_30d = calc.recency_boost(30);
assert!(boost_30d > 3.0 && boost_30d < 6.0);
let boost_180d = calc.recency_boost(180);
assert!(boost_180d < 2.0);
}
#[test]
fn test_churn_boost() {
let config = RankingConfig::default();
let calc = GitWeightCalculator::new(config.clone());
let boost_threshold = calc.churn_boost(config.git_churn_threshold);
assert!((boost_threshold - 1.0).abs() < 0.01);
let boost_10 = calc.churn_boost(10);
assert!(boost_10 > 2.5 && boost_10 < 3.0);
let boost_50 = calc.churn_boost(50);
assert!(boost_50 > 4.0 && boost_50 < config.git_churn_max_boost);
}
#[test]
fn test_classify_phase() {
let config = RankingConfig::default();
let calc = GitWeightCalculator::new(config);
let crystal = FileStats {
last_modified_days: 60,
first_seen_days: 200,
commit_count: 5,
authors: HashSet::new(),
};
assert_eq!(calc.classify_phase(&crystal), FilePhase::Crystal);
let emergent = FileStats {
last_modified_days: 1,
first_seen_days: 10,
commit_count: 3,
authors: HashSet::new(),
};
assert_eq!(calc.classify_phase(&emergent), FilePhase::Emergent);
let rotting = FileStats {
last_modified_days: 2,
first_seen_days: 100,
commit_count: 15,
authors: HashSet::new(),
};
assert_eq!(calc.classify_phase(&rotting), FilePhase::Rotting);
let evolving = FileStats {
last_modified_days: 20,
first_seen_days: 60,
commit_count: 7,
authors: HashSet::new(),
};
assert_eq!(calc.classify_phase(&evolving), FilePhase::Evolving);
}
#[test]
fn test_parse_iso8601() {
let timestamp = "2024-01-15T10:30:00+00:00";
let result = parse_iso8601(timestamp);
assert!(result.is_ok());
let secs = result.unwrap();
assert!(secs > 1_700_000_000); assert!(secs < 1_800_000_000); }
#[test]
fn test_days_since() {
let now = 1_700_000_000;
let then = now - 86400 * 7; assert_eq!(days_since(now, then), 7);
let same = days_since(now, now);
assert_eq!(same, 0);
}
#[test]
fn test_get_badges() {
let config = RankingConfig::default();
let calc = GitWeightCalculator::new(config);
let mut stats = HashMap::new();
stats.insert(
"hot.rs".to_string(),
FileStats {
last_modified_days: 2,
first_seen_days: 100,
commit_count: 15,
authors: HashSet::new(),
},
);
stats.insert(
"stable.rs".to_string(),
FileStats {
last_modified_days: 60,
first_seen_days: 200,
commit_count: 5,
authors: HashSet::new(),
},
);
let badges = calc.get_badges(&stats);
let hot_badges = &badges["hot.rs"];
assert!(hot_badges.contains(&"recent".to_string()));
assert!(hot_badges.contains(&"high-churn".to_string()));
assert!(hot_badges.contains(&"rotting".to_string()));
let stable_badges = &badges["stable.rs"];
assert!(stable_badges.contains(&"crystal".to_string()));
}
#[test]
fn test_parse_git_log() {
let config = RankingConfig::default();
let calc = GitWeightCalculator::new(config);
let log = r#"2024-01-15T10:30:00+00:00|alice@example.com
src/lib.rs
src/main.rs
2024-01-14T09:00:00+00:00|bob@example.com
src/lib.rs
2024-01-10T15:20:00+00:00|alice@example.com
src/main.rs
"#;
let files = vec!["src/lib.rs".to_string(), "src/main.rs".to_string()];
let stats = calc.parse_git_log(log, &files).unwrap();
let lib_stats = &stats["src/lib.rs"];
assert_eq!(lib_stats.commit_count, 2);
assert_eq!(lib_stats.authors.len(), 2);
let main_stats = &stats["src/main.rs"];
assert_eq!(main_stats.commit_count, 2);
assert_eq!(main_stats.authors.len(), 1);
}
}