use anyhow::{Context, Result};
use chrono::{DateTime, TimeZone, Utc};
use git2::{DiffOptions, Repository, Sort};
use std::collections::HashMap;
use std::path::Path;
use tracing::{debug, warn};
#[derive(Debug, Clone)]
pub struct CommitInfo {
pub hash: String,
pub full_hash: String,
pub author: String,
pub author_email: String,
pub timestamp: String,
pub message: String,
pub files_changed: Vec<String>,
pub insertions: usize,
pub deletions: usize,
}
#[derive(Debug, Clone, Default)]
pub struct FileChurn {
pub total_insertions: usize,
pub total_deletions: usize,
pub commit_count: usize,
pub authors: Vec<String>,
pub last_modified: Option<String>,
pub last_author: Option<String>,
}
pub struct GitHistory {
repo: Repository,
}
impl GitHistory {
pub fn new(path: &Path) -> Result<Self> {
Self::open(path)
}
pub fn open(path: &Path) -> Result<Self> {
let repo = Repository::discover(path)
.with_context(|| format!("Failed to open git repository at {:?}", path))?;
debug!("Opened git repository at {:?}", repo.path());
Ok(Self { repo })
}
pub fn is_git_repo(path: &Path) -> bool {
Repository::discover(path).is_ok()
}
pub fn repo_root(&self) -> Result<&Path> {
self.repo
.workdir()
.context("Repository has no working directory (bare repo?)")
}
pub fn get_file_commits(&self, file_path: &str, max_commits: usize) -> Result<Vec<CommitInfo>> {
let mut revwalk = self.repo.revwalk()?;
revwalk.set_sorting(Sort::TIME)?;
revwalk.push_head()?;
let mut commits = Vec::new();
let file_path_normalized = Path::new(file_path);
for oid_result in revwalk {
if commits.len() >= max_commits {
break;
}
let oid = oid_result?;
let commit = self.repo.find_commit(oid)?;
let parent = commit.parent(0).ok();
let tree = commit.tree()?;
let parent_tree = parent.as_ref().map(|p| p.tree()).transpose()?;
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_path);
let diff = self.repo.diff_tree_to_tree(
parent_tree.as_ref(),
Some(&tree),
Some(&mut diff_opts),
)?;
if diff.deltas().len() == 0 {
continue;
}
let commit_info = self.extract_commit_info(&commit)?;
commits.push(commit_info);
}
Ok(commits)
}
pub fn get_recent_commits(
&self,
max_commits: usize,
since: Option<DateTime<Utc>>,
) -> Result<Vec<CommitInfo>> {
let mut revwalk = self.repo.revwalk()?;
revwalk.set_sorting(Sort::TIME)?;
revwalk.push_head()?;
let mut commits = Vec::new();
for oid_result in revwalk {
if commits.len() >= max_commits {
break;
}
let oid = oid_result?;
let commit = self.repo.find_commit(oid)?;
if let Some(since_ts) = since {
let commit_time = commit.time();
let commit_dt = Utc.timestamp_opt(commit_time.seconds(), 0).single();
if let Some(dt) = commit_dt {
if dt < since_ts {
break; }
}
}
let commit_info = self.extract_commit_info(&commit)?;
commits.push(commit_info);
}
Ok(commits)
}
pub fn get_file_churn(&self, file_path: &str, max_commits: usize) -> Result<FileChurn> {
let commits = self.get_file_commits(file_path, max_commits)?;
let mut churn = FileChurn::default();
let mut author_set = std::collections::HashSet::new();
for commit in &commits {
let stats = self.get_commit_file_stats(&commit.full_hash, file_path)?;
churn.total_insertions += stats.0;
churn.total_deletions += stats.1;
author_set.insert(commit.author.clone());
}
churn.commit_count = commits.len();
churn.authors = author_set.into_iter().collect();
if let Some(latest) = commits.first() {
churn.last_modified = Some(latest.timestamp.clone());
churn.last_author = Some(latest.author.clone());
}
Ok(churn)
}
pub fn get_all_file_churn(&self, max_commits: usize) -> Result<HashMap<String, FileChurn>> {
let mut churn_map: HashMap<String, FileChurn> = HashMap::new();
let mut revwalk = self.repo.revwalk()?;
revwalk.set_sorting(Sort::TIME)?;
revwalk.push_head()?;
let mut commit_count = 0;
for oid_result in revwalk {
if commit_count >= max_commits {
break;
}
let oid = oid_result?;
let commit = self.repo.find_commit(oid)?;
let parent = commit.parent(0).ok();
let tree = commit.tree()?;
let parent_tree = parent.as_ref().map(|p| p.tree()).transpose()?;
let diff = self
.repo
.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
let author = commit.author().name().unwrap_or("Unknown").to_string();
let timestamp = format_git_time(&commit.time());
diff.foreach(
&mut |delta, _| {
if let Some(path) = delta.new_file().path() {
let path_str = path.to_string_lossy().to_string();
let entry = churn_map.entry(path_str).or_default();
entry.commit_count += 1;
if !entry.authors.contains(&author) {
entry.authors.push(author.clone());
}
if entry.last_modified.is_none() {
entry.last_modified = Some(timestamp.clone());
entry.last_author = Some(author.clone());
}
}
true
},
None,
None,
None,
)?;
let stats = diff.stats()?;
commit_count += 1;
}
Ok(churn_map)
}
fn get_commit_file_stats(&self, commit_hash: &str, file_path: &str) -> Result<(usize, usize)> {
let oid = git2::Oid::from_str(commit_hash)?;
let commit = self.repo.find_commit(oid)?;
let parent = commit.parent(0).ok();
let tree = commit.tree()?;
let parent_tree = parent.as_ref().map(|p| p.tree()).transpose()?;
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_path);
let diff =
self.repo
.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts))?;
let stats = diff.stats()?;
Ok((stats.insertions(), stats.deletions()))
}
fn extract_commit_info(&self, commit: &git2::Commit) -> Result<CommitInfo> {
let author = commit.author();
let timestamp = format_git_time(&commit.time());
let message = commit
.message()
.unwrap_or("")
.lines()
.next()
.unwrap_or("")
.to_string();
let parent = commit.parent(0).ok();
let tree = commit.tree()?;
let parent_tree = parent.as_ref().map(|p| p.tree()).transpose()?;
let diff = self
.repo
.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
let mut files_changed = Vec::new();
diff.foreach(
&mut |delta, _| {
if let Some(path) = delta.new_file().path() {
files_changed.push(path.to_string_lossy().to_string());
}
true
},
None,
None,
None,
)?;
let stats = diff.stats()?;
Ok(CommitInfo {
hash: commit.id().to_string()[..12].to_string(),
full_hash: commit.id().to_string(),
author: author.name().unwrap_or("Unknown").to_string(),
author_email: author.email().unwrap_or("").to_string(),
timestamp,
message,
files_changed,
insertions: stats.insertions(),
deletions: stats.deletions(),
})
}
pub fn get_tracked_files(&self) -> Result<Vec<String>> {
let head = self.repo.head()?;
let tree = head.peel_to_tree()?;
let mut files = Vec::new();
tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
if entry.kind() == Some(git2::ObjectType::Blob) {
let path = if dir.is_empty() {
entry.name().unwrap_or("").to_string()
} else {
format!("{}{}", dir, entry.name().unwrap_or(""))
};
files.push(path);
}
git2::TreeWalkResult::Ok
})?;
Ok(files)
}
pub fn get_line_range_commits(
&self,
file_path: &str,
line_start: u32,
line_end: u32,
max_commits: usize,
) -> Result<Vec<CommitInfo>> {
let file_commits = self.get_file_commits(file_path, max_commits * 2)?;
let mut matching_commits = Vec::new();
for commit in file_commits {
if matching_commits.len() >= max_commits {
break;
}
if self.commit_touches_lines(&commit.full_hash, file_path, line_start, line_end)? {
matching_commits.push(commit);
}
}
Ok(matching_commits)
}
fn commit_touches_lines(
&self,
commit_hash: &str,
file_path: &str,
line_start: u32,
line_end: u32,
) -> Result<bool> {
let oid = git2::Oid::from_str(commit_hash)?;
let commit = self.repo.find_commit(oid)?;
let parent = commit.parent(0).ok();
let tree = commit.tree()?;
let parent_tree = parent.as_ref().map(|p| p.tree()).transpose()?;
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_path);
let diff =
self.repo
.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts))?;
let mut touches_lines = false;
diff.foreach(
&mut |_, _| true,
None,
Some(&mut |_, hunk| {
let hunk_start = hunk.new_start();
let hunk_end = hunk_start + hunk.new_lines();
if hunk_start <= line_end && hunk_end >= line_start {
touches_lines = true;
}
true
}),
None,
)?;
Ok(touches_lines)
}
}
fn format_git_time(time: &git2::Time) -> String {
match Utc.timestamp_opt(time.seconds(), 0).single() {
Some(dt) => dt.to_rfc3339(),
None => "1970-01-01T00:00:00Z".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
fn create_test_repo() -> Result<(tempfile::TempDir, Repository)> {
let dir = tempdir()?;
let repo = Repository::init(dir.path())?;
let mut config = repo.config()?;
config.set_str("user.name", "Test User")?;
config.set_str("user.email", "test@example.com")?;
{
let sig = repo.signature()?;
let tree_id = {
let mut index = repo.index()?;
std::fs::write(dir.path().join("test.txt"), "hello")?;
index.add_path(Path::new("test.txt"))?;
index.write()?;
index.write_tree()?
};
let tree = repo.find_tree(tree_id)?;
repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])?;
}
Ok((dir, repo))
}
#[test]
fn test_open_repo() -> Result<()> {
let (dir, _repo) = create_test_repo()?;
let history = GitHistory::open(dir.path())?;
assert!(history.repo_root()?.exists());
Ok(())
}
#[test]
fn test_is_git_repo() -> Result<()> {
let (dir, _repo) = create_test_repo()?;
assert!(GitHistory::is_git_repo(dir.path()));
let non_repo = tempdir()?;
assert!(!GitHistory::is_git_repo(non_repo.path()));
Ok(())
}
#[test]
fn test_get_recent_commits() -> Result<()> {
let (dir, _repo) = create_test_repo()?;
let history = GitHistory::open(dir.path())?;
let commits = history.get_recent_commits(10, None)?;
assert_eq!(commits.len(), 1);
assert_eq!(commits[0].message, "Initial commit");
Ok(())
}
#[test]
fn test_get_file_commits() -> Result<()> {
let (dir, _repo) = create_test_repo()?;
let history = GitHistory::open(dir.path())?;
let commits = history.get_file_commits("test.txt", 10)?;
assert_eq!(commits.len(), 1);
Ok(())
}
}