use super::git2_provider::FileCommitScan;
use chrono::{DateTime, Utc};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
struct CommitInfo {
date: DateTime<Utc>,
message: String,
author: String,
}
#[derive(Debug, Clone, Default)]
pub(super) struct FileHistoryData {
total_commits: usize,
bug_fix_count: usize,
authors: HashSet<String>,
last_modified: Option<DateTime<Utc>>,
first_seen: Option<DateTime<Utc>>,
total_churn: usize,
}
impl FileHistoryData {
fn add_commit(&mut self, commit: &CommitInfo, file_churn: usize) {
self.total_commits += 1;
if is_bug_fix(&commit.message) {
self.bug_fix_count += 1;
}
self.authors.insert(commit.author.clone());
self.last_modified = Some(
self.last_modified
.map(|d| d.max(commit.date))
.unwrap_or(commit.date),
);
self.first_seen = Some(
self.first_seen
.map(|d| d.min(commit.date))
.unwrap_or(commit.date),
);
self.total_churn += file_churn;
}
fn calculate_change_frequency(&self, now: DateTime<Utc>) -> f64 {
let age_days = self.calculate_age_days(now);
if age_days > 0 {
(self.total_commits as f64 / age_days as f64) * 30.0
} else {
0.0
}
}
fn calculate_age_days(&self, now: DateTime<Utc>) -> u32 {
self.first_seen
.map(|first| now.signed_duration_since(first).num_days().max(0) as u32)
.unwrap_or(0)
}
fn calculate_stability(&self, now: DateTime<Utc>) -> f64 {
if self.total_commits == 0 {
return 1.0; }
let age_days = self.calculate_age_days(now);
let churn_factor = if age_days > 0 {
let monthly_churn = (self.total_commits as f64) / (age_days as f64) * 30.0;
1.0 / (1.0 + monthly_churn)
} else {
0.5
};
let bug_factor = 1.0 - (self.bug_fix_count as f64 / self.total_commits as f64).min(1.0);
let age_factor = (age_days as f64 / 365.0).min(1.0);
(churn_factor * 0.4 + bug_factor * 0.4 + age_factor * 0.2).min(1.0)
}
}
pub struct BatchedGitHistory {
file_histories: HashMap<PathBuf, FileHistoryData>,
}
impl BatchedGitHistory {
pub fn from_commit_scans(scans: &[FileCommitScan]) -> Self {
let mut file_histories: HashMap<PathBuf, FileHistoryData> = HashMap::new();
for scan in scans {
let commit_info = CommitInfo {
date: scan.date,
message: scan.message.clone(),
author: scan.author_email.clone(),
};
for (path, churn) in &scan.file_churn {
file_histories
.entry(path.clone())
.or_default()
.add_commit(&commit_info, *churn);
}
}
Self { file_histories }
}
fn get_file_history(&self, path: &Path) -> Option<&FileHistoryData> {
self.file_histories.get(path)
}
#[cfg(test)]
pub fn all_paths(&self) -> Vec<&PathBuf> {
self.file_histories.keys().collect()
}
#[cfg(test)]
pub fn has_path(&self, path: &Path) -> bool {
self.file_histories.contains_key(path)
}
#[allow(clippy::type_complexity)]
pub fn calculate_metrics(
&self,
path: &Path,
now: DateTime<Utc>,
) -> Option<(f64, usize, Option<DateTime<Utc>>, usize, f64, usize, u32)> {
self.get_file_history(path).map(|history| {
(
history.calculate_change_frequency(now),
history.bug_fix_count,
history.last_modified,
history.authors.len(),
history.calculate_stability(now),
history.total_commits,
history.calculate_age_days(now),
)
})
}
}
pub fn is_bug_fix(message: &str) -> bool {
if is_excluded_commit(message) {
return false;
}
let lowercase = message.to_lowercase();
let words: Vec<&str> = lowercase.split(|c: char| !c.is_alphanumeric()).collect();
words.iter().any(|&word| {
matches!(
word,
"bug" | "fix" | "fixes" | "fixed" | "fixing" | "hotfix"
)
})
}
fn is_excluded_commit(commit_line: &str) -> bool {
let lowercase = commit_line.to_lowercase();
if lowercase.contains("style:")
|| lowercase.contains("chore:")
|| lowercase.contains("docs:")
|| lowercase.contains("test:")
{
return true;
}
let exclusion_keywords = ["formatting", "linting", "whitespace", "typo"];
for keyword in &exclusion_keywords {
if lowercase.contains(keyword) {
return true;
}
}
if lowercase.contains("refactor:") {
let words: Vec<&str> = lowercase.split(|c: char| !c.is_alphanumeric()).collect();
let has_bug_keyword = words.iter().any(|&word| {
matches!(
word,
"bug" | "fix" | "fixes" | "fixed" | "fixing" | "issue" | "hotfix"
)
});
if !has_bug_keyword {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_bug_fix() {
assert!(is_bug_fix("fix: resolve login bug"));
assert!(is_bug_fix("Fixed the payment issue"));
assert!(is_bug_fix("Bug fix for issue #123"));
assert!(is_bug_fix("hotfix: urgent fix"));
assert!(is_bug_fix("fixes issue with validation"));
assert!(!is_bug_fix("style: apply formatting fixes"));
assert!(!is_bug_fix("chore: update dependencies"));
assert!(!is_bug_fix("docs: fix typo"));
assert!(!is_bug_fix("refactor: improve prefix handling"));
assert!(!is_bug_fix("Add debugging utilities"));
assert!(!is_bug_fix("update: add fixture for testing"));
assert!(is_bug_fix("refactor: fix memory leak"));
}
#[test]
fn test_is_excluded_commit() {
assert!(is_excluded_commit("style: apply formatting fixes"));
assert!(is_excluded_commit("chore: update dependencies"));
assert!(is_excluded_commit("docs: fix typo"));
assert!(is_excluded_commit("test: add unit tests"));
assert!(is_excluded_commit("refactor: improve prefix handling"));
assert!(is_excluded_commit("apply linting rules"));
assert!(is_excluded_commit("remove whitespace"));
assert!(!is_excluded_commit("fix: resolve login bug"));
assert!(!is_excluded_commit("Fixed the payment issue"));
assert!(!is_excluded_commit("refactor: fix memory leak"));
}
#[test]
fn test_file_history_data_accumulation() {
let mut history = FileHistoryData::default();
let commit1 = CommitInfo {
date: DateTime::parse_from_rfc3339("2025-01-01T10:00:00Z")
.unwrap()
.with_timezone(&Utc),
message: "fix: resolve bug".to_string(),
author: "author1@example.com".to_string(),
};
let commit2 = CommitInfo {
date: DateTime::parse_from_rfc3339("2025-01-02T10:00:00Z")
.unwrap()
.with_timezone(&Utc),
message: "feat: add feature".to_string(),
author: "author2@example.com".to_string(),
};
history.add_commit(&commit1, 15);
history.add_commit(&commit2, 10);
assert_eq!(history.total_commits, 2);
assert_eq!(history.bug_fix_count, 1);
assert_eq!(history.authors.len(), 2);
assert_eq!(history.total_churn, 25);
assert!(history.last_modified.is_some());
assert!(history.first_seen.is_some());
}
#[test]
fn test_calculate_change_frequency() {
let now = Utc::now();
let ten_days_ago = now - chrono::Duration::days(10);
let history = FileHistoryData {
total_commits: 10,
first_seen: Some(ten_days_ago),
..Default::default()
};
let freq = history.calculate_change_frequency(now);
assert!(freq > 25.0 && freq < 35.0); }
#[test]
fn test_calculate_stability_new_file() {
let now = Utc::now();
let history = FileHistoryData::default();
let stability = history.calculate_stability(now);
assert_eq!(stability, 1.0); }
#[test]
fn test_calculate_stability_with_commits() {
let now = Utc::now();
let history = FileHistoryData {
total_commits: 10,
bug_fix_count: 2,
first_seen: Some(
DateTime::parse_from_rfc3339("2024-01-01T10:00:00Z")
.unwrap()
.with_timezone(&Utc),
),
..Default::default()
};
let stability = history.calculate_stability(now);
assert!((0.0..=1.0).contains(&stability));
}
}