use super::{AnalysisTarget, Context, ContextDetails, ContextProvider};
use crate::core::FunctionMetrics;
use anyhow::Result;
use chrono::{DateTime, Utc};
use dashmap::DashMap;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;
mod batched;
pub mod batched_function;
mod blame_cache;
mod function_level;
pub mod git2_provider;
mod stability;
#[cfg(test)]
mod test_helpers;
#[cfg(test)]
mod tests;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileHistory {
pub change_frequency: f64,
pub bug_fix_count: usize,
pub last_modified: Option<DateTime<Utc>>,
pub author_count: usize,
pub stability_score: f64,
pub total_commits: usize,
pub age_days: u32,
}
pub struct GitHistoryProvider {
repo_root: PathBuf,
cache: Arc<DashMap<PathBuf, FileHistory>>,
batched_history: Option<batched::BatchedGitHistory>,
batched_functions: Option<batched_function::BatchedFunctionGitHistory>,
blame_cache: blame_cache::FileBlameCache,
git2_repo: Option<git2_provider::Git2Repository>,
}
impl GitHistoryProvider {
pub fn new(repo_root: PathBuf) -> Result<Self> {
let git2_repo = match git2_provider::Git2Repository::open(&repo_root) {
Ok(repo) => {
log::debug!(
"git2 repository opened successfully at {}",
repo_root.display()
);
Some(repo)
}
Err(e) => {
anyhow::bail!("Not a git repository: {} ({})", repo_root.display(), e);
}
};
let canonical_repo_root = git2_repo
.as_ref()
.map(|r| r.repo_path().to_path_buf())
.unwrap_or(repo_root);
let batched_history = None;
let blame_cache = blame_cache::FileBlameCache::new(canonical_repo_root.clone());
Ok(Self {
repo_root: canonical_repo_root,
cache: Arc::new(DashMap::new()),
batched_history,
batched_functions: None,
blame_cache,
git2_repo,
})
}
pub fn preload_function_histories(&mut self, metrics: &[FunctionMetrics]) -> Result<()> {
self.preload_function_histories_with_progress(metrics, None)
}
pub fn preload_function_histories_with_progress(
&mut self,
metrics: &[FunctionMetrics],
progress_cb: Option<batched_function::ProgressCallback<'_>>,
) -> Result<()> {
let Some(ref repo) = self.git2_repo else {
return Ok(());
};
let targets: Vec<batched_function::FunctionPreloadTarget> = metrics
.iter()
.filter(|m| !m.name.is_empty())
.map(|m| batched_function::FunctionPreloadTarget {
file: self.to_relative_path(&m.file).into_owned(),
name: m.name.clone(),
line_range: (m.line, m.line.saturating_add(m.length.max(1))),
})
.collect();
let start = Instant::now();
let scan = batched_function::BatchedFunctionGitHistory::build(
repo,
&self.blame_cache,
&targets,
progress_cb,
)?;
self.batched_functions = Some(scan.functions);
self.batched_history = Some(scan.file_history);
log::info!(
"Function git history preload: {} functions in {:?}",
self.batched_functions
.as_ref()
.map(batched_function::BatchedFunctionGitHistory::len)
.unwrap_or(0),
start.elapsed()
);
Ok(())
}
fn to_relative_path<'a>(&self, path: &'a Path) -> std::borrow::Cow<'a, Path> {
if let Ok(rel) = path.strip_prefix(&self.repo_root) {
return std::borrow::Cow::Borrowed(rel);
}
if let Ok(rel) = path.strip_prefix("./") {
return std::borrow::Cow::Borrowed(rel);
}
if let Ok(rel) = path.strip_prefix(".") {
if !rel.as_os_str().is_empty() {
return std::borrow::Cow::Borrowed(rel);
}
}
if path.is_absolute() {
if let (Ok(canonical_path), Ok(canonical_root)) =
(path.canonicalize(), self.repo_root.canonicalize())
{
if let Ok(rel) = canonical_path.strip_prefix(&canonical_root) {
return std::borrow::Cow::Owned(rel.to_path_buf());
}
}
}
std::borrow::Cow::Borrowed(path)
}
fn get_or_fetch_history(&self, path: &Path, now: DateTime<Utc>) -> Result<FileHistory> {
let relative_path = self.to_relative_path(path);
if let Some(cached) = self.cache.get(relative_path.as_ref()) {
return Ok(cached.clone());
}
if let Some(ref batched) = self.batched_history {
if let Some((
change_frequency,
bug_fix_count,
last_modified,
author_count,
stability_score,
total_commits,
age_days,
)) = batched.calculate_metrics(relative_path.as_ref(), now)
{
let history = FileHistory {
change_frequency,
bug_fix_count,
last_modified,
author_count,
stability_score,
total_commits,
age_days,
};
self.cache
.insert(relative_path.into_owned(), history.clone());
return Ok(history);
}
}
let history = self.fetch_history_direct(relative_path.as_ref(), now)?;
self.cache
.insert(relative_path.into_owned(), history.clone());
Ok(history)
}
fn fetch_history_direct(&self, path: &Path, now: DateTime<Utc>) -> Result<FileHistory> {
if let Some(ref repo) = self.git2_repo {
let total_commits = repo.count_file_commits(path)?;
let age_days = repo.file_age_days(path, now)?;
let bug_fix_count = repo.count_bug_fixes(path)?;
let author_count = repo.file_authors(path)?.len();
let last_modified = repo.file_last_modified(path)?;
let change_frequency = if age_days > 0 {
(total_commits as f64) / (age_days as f64) * 30.0
} else {
0.0
};
let stability_score =
self.calculate_stability_from_values(age_days, total_commits, bug_fix_count);
Ok(FileHistory {
change_frequency,
bug_fix_count,
last_modified,
author_count,
stability_score,
total_commits,
age_days,
})
} else {
log::warn!("git2 not available, returning default history");
Ok(FileHistory {
change_frequency: 0.0,
bug_fix_count: 0,
last_modified: None,
author_count: 0,
stability_score: 1.0,
total_commits: 0,
age_days: 0,
})
}
}
fn calculate_stability_from_values(
&self,
age_days: u32,
commits: usize,
bug_fixes: usize,
) -> f64 {
if commits == 0 {
return 1.0; }
let churn_factor = if age_days > 0 {
let monthly_churn = (commits as f64) / (age_days as f64) * 30.0;
1.0 / (1.0 + monthly_churn)
} else {
0.5
};
let bug_factor = 1.0 - (bug_fixes as f64 / commits as f64).min(1.0);
let age_factor = (age_days as f64 / 365.0).min(1.0);
(churn_factor * 0.4 + bug_factor * 0.4 + age_factor * 0.2).min(1.0)
}
pub fn analyze_file(&mut self, path: &Path) -> Result<FileHistory> {
self.get_or_fetch_history(path, Utc::now())
}
pub fn analyze_file_with_time(&self, path: &Path, now: DateTime<Utc>) -> Result<FileHistory> {
self.get_or_fetch_history(path, now)
}
#[cfg(test)]
pub fn batched_paths(&self) -> Vec<std::path::PathBuf> {
self.batched_history
.as_ref()
.map(|b| b.all_paths().into_iter().cloned().collect())
.unwrap_or_default()
}
#[cfg(test)]
pub fn batched_has_path(&self, path: &Path) -> bool {
self.batched_history
.as_ref()
.map(|b| b.has_path(path))
.unwrap_or(false)
}
#[cfg(test)]
pub fn repo_root(&self) -> &Path {
&self.repo_root
}
}
impl ContextProvider for GitHistoryProvider {
fn name(&self) -> &str {
"git_history"
}
fn gather(&self, target: &AnalysisTarget) -> Result<Context> {
if !target.function_name.is_empty() {
match self.gather_for_function(target) {
Ok(context) => return Ok(context),
Err(e) => {
log::debug!(
"Function-level git analysis failed for '{}', falling back to file-level: {}",
target.function_name,
e
);
}
}
}
self.gather_for_file(target)
}
fn weight(&self) -> f64 {
1.0 }
fn explain(&self, context: &Context) -> String {
match &context.details {
ContextDetails::Historical {
change_frequency,
bug_density,
age_days,
author_count,
..
} => stability::explain_historical_context(
*change_frequency,
*bug_density,
(*age_days).into(),
*author_count,
),
_ => "No historical information".to_string(),
}
}
}
impl GitHistoryProvider {
fn gather_for_function(&self, target: &AnalysisTarget) -> Result<Context> {
let relative_path = self.to_relative_path(&target.file_path);
let history = self
.lookup_function_history(relative_path.as_ref(), target)?
.ok_or_else(|| {
anyhow::anyhow!(
"No function history for '{}' in {}",
target.function_name,
relative_path.display()
)
})?;
Ok(Self::context_from_function_history(
self.name(),
self.weight(),
&history,
target.reference_time,
))
}
fn context_from_function_history(
provider_name: &str,
weight: f64,
history: &function_level::FunctionHistory,
reference_time: DateTime<Utc>,
) -> Context {
let contribution = stability::classify_risk_contribution(
history.change_frequency(reference_time),
history.bug_density(),
);
Context {
provider: provider_name.to_string(),
weight,
contribution,
details: ContextDetails::Historical {
change_frequency: history.change_frequency(reference_time),
bug_density: history.bug_density(),
age_days: history.age_days(reference_time),
author_count: history.authors.len(),
total_commits: history.total_commits_including_introduction() as u32,
bug_fix_count: history.bug_fix_count as u32,
},
}
}
fn lookup_function_history(
&self,
relative_path: &Path,
target: &AnalysisTarget,
) -> Result<Option<function_level::FunctionHistory>> {
if let Some(ref batched) = self.batched_functions {
if let Some(history) = batched.get(relative_path, &target.function_name) {
return Ok(Some(history));
}
}
let Some(ref repo) = self.git2_repo else {
return Ok(None);
};
function_level::get_function_history_git2(
repo,
relative_path,
&target.function_name,
target.line_range,
&self.blame_cache,
)
.map(Some)
}
fn gather_for_file(&self, target: &AnalysisTarget) -> Result<Context> {
let history = self.get_or_fetch_history(&target.file_path, target.reference_time)?;
let bug_density =
stability::calculate_bug_density(history.bug_fix_count, history.total_commits);
let contribution =
stability::classify_risk_contribution(history.change_frequency, bug_density);
Ok(Context {
provider: self.name().to_string(),
weight: self.weight(),
contribution,
details: ContextDetails::Historical {
change_frequency: history.change_frequency,
bug_density,
age_days: history.age_days,
author_count: history.author_count,
total_commits: history.total_commits as u32,
bug_fix_count: history.bug_fix_count as u32,
},
})
}
}