use anyhow::Result;
use std::collections::HashSet;
use std::path::Path;
use tracing::{debug, info, warn};
use super::blame::GitBlame;
use super::history::GitHistory;
use crate::graph::{GraphStore, CodeNode, CodeEdge, NodeKind, EdgeKind};
#[derive(Debug, Clone, Default)]
pub struct EnrichmentStats {
pub functions_enriched: usize,
pub classes_enriched: usize,
pub commits_created: usize,
pub edges_created: usize,
pub files_skipped: usize,
pub cache_hits: usize,
pub cache_misses: usize,
}
pub struct GitEnricher<'a> {
blame: GitBlame,
history: &'a GitHistory,
graph: &'a GraphStore,
seen_commits: HashSet<String>,
}
impl<'a> GitEnricher<'a> {
pub fn new(history: &'a GitHistory, graph: &'a GraphStore) -> Result<Self> {
let repo_root = history.repo_root()?;
let blame = GitBlame::open(repo_root)?;
Ok(Self {
blame,
history,
graph,
seen_commits: HashSet::new(),
})
}
pub fn enrich_all(&mut self) -> Result<EnrichmentStats> {
let mut stats = EnrichmentStats::default();
let functions = self.graph.get_functions();
let classes = self.graph.get_classes();
let mut unique_files: HashSet<String> = HashSet::new();
for f in &functions {
if f.get_str("last_modified").is_none() {
unique_files.insert(f.file_path.clone());
}
}
for c in &classes {
if c.get_str("last_modified").is_none() {
unique_files.insert(c.file_path.clone());
}
}
let file_list: Vec<String> = unique_files.into_iter().collect();
let (cache_hits, cache_misses) = if !file_list.is_empty() {
info!("Pre-warming git blame cache for {} files...", file_list.len());
let (hits, misses) = self.blame.prewarm_cache(&file_list);
debug!("Git cache: {} hits, {} computed", hits, misses);
(hits, misses)
} else {
(0, 0)
};
stats.cache_hits = cache_hits;
stats.cache_misses = cache_misses;
info!("Enriching Function nodes with git history...");
let func_stats = self.enrich_functions()?;
stats.functions_enriched = func_stats.functions_enriched;
stats.commits_created += func_stats.commits_created;
stats.edges_created += func_stats.edges_created;
info!("Enriching Class nodes with git history...");
let class_stats = self.enrich_classes()?;
stats.classes_enriched = class_stats.classes_enriched;
stats.commits_created += class_stats.commits_created;
stats.edges_created += class_stats.edges_created;
info!(
"Git enrichment complete: {} functions, {} classes, {} commits, {} edges",
stats.functions_enriched,
stats.classes_enriched,
stats.commits_created,
stats.edges_created
);
Ok(stats)
}
fn enrich_functions(&mut self) -> Result<EnrichmentStats> {
let mut stats = EnrichmentStats::default();
let functions = self.graph.get_functions();
let functions_to_enrich: Vec<_> = functions
.into_iter()
.filter(|f| f.get_str("last_modified").is_none())
.collect();
let total = functions_to_enrich.len();
debug!("Found {} functions to enrich", total);
for (i, func) in functions_to_enrich.into_iter().enumerate() {
if i > 0 && i % 500 == 0 {
debug!("Enriched {}/{} functions", i, total);
}
let line_start = func.line_start;
let line_end = func.line_end;
if line_start == 0 {
continue;
}
match self.blame.get_entity_blame(&func.file_path, line_start, line_end) {
Ok(blame_info) => {
if let (Some(last_modified), Some(author)) =
(&blame_info.last_modified, &blame_info.last_author)
{
self.graph.update_node_properties(
&func.qualified_name,
&[
("last_modified", serde_json::Value::String(last_modified.clone())),
("author", serde_json::Value::String(author.clone())),
("commit_count", serde_json::Value::Number((blame_info.commit_count as i64).into())),
],
);
stats.functions_enriched += 1;
}
}
Err(e) => {
debug!("Failed to get blame for {}:{}: {}", func.file_path, line_start, e);
stats.files_skipped += 1;
}
}
}
Ok(stats)
}
fn enrich_classes(&mut self) -> Result<EnrichmentStats> {
let mut stats = EnrichmentStats::default();
let classes = self.graph.get_classes();
let classes_to_enrich: Vec<_> = classes
.into_iter()
.filter(|c| c.get_str("last_modified").is_none())
.collect();
let total = classes_to_enrich.len();
debug!("Found {} classes to enrich", total);
for (i, class) in classes_to_enrich.into_iter().enumerate() {
if i > 0 && i % 50 == 0 {
debug!("Enriched {}/{} classes", i, total);
}
let line_start = class.line_start;
let line_end = class.line_end;
if line_start == 0 {
continue;
}
match self.blame.get_entity_blame(&class.file_path, line_start, line_end) {
Ok(blame_info) => {
if let (Some(last_modified), Some(author)) =
(&blame_info.last_modified, &blame_info.last_author)
{
self.graph.update_node_properties(
&class.qualified_name,
&[
("last_modified", serde_json::Value::String(last_modified.clone())),
("author", serde_json::Value::String(author.clone())),
("commit_count", serde_json::Value::Number((blame_info.commit_count as i64).into())),
],
);
stats.classes_enriched += 1;
}
}
Err(e) => {
debug!("Failed to get blame for {}:{}: {}", class.file_path, line_start, e);
stats.files_skipped += 1;
}
}
}
Ok(stats)
}
fn create_commit_if_needed(&mut self, hash: &str, author: &str, timestamp: &str) -> bool {
if self.seen_commits.contains(hash) {
return false;
}
let node = CodeNode::new(NodeKind::Commit, hash, "")
.with_qualified_name(hash)
.with_property("author", author)
.with_property("timestamp", timestamp);
self.graph.add_node(node);
self.seen_commits.insert(hash.to_string());
true
}
fn create_modified_in_edge(&self, entity_qn: &str, commit_hash: &str) -> bool {
self.graph.add_edge_by_name(
entity_qn,
commit_hash,
CodeEdge::new(EdgeKind::ModifiedIn),
)
}
}
pub fn enrich_graph_with_git(
repo_path: &Path,
graph: &GraphStore,
_repo_id: Option<&str>,
) -> Result<EnrichmentStats> {
let history = GitHistory::new(repo_path)?;
let mut enricher = GitEnricher::new(&history, graph)?;
enricher.enrich_all()
}