use anyhow::{Context as _, Result};
use chrono::{DateTime, TimeZone, Utc};
use git2::{BlameOptions, DiffOptions, Oid, Repository, Sort};
use rayon::prelude::*;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct CommitStats {
pub hash: git2::Oid,
pub date: DateTime<Utc>,
pub message: String,
pub author_email: String,
pub files: Vec<FileStats>,
}
#[derive(Debug, Clone)]
pub struct FileStats {
pub path: PathBuf,
pub additions: usize,
pub deletions: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BlameLineInfo {
pub author: String,
pub commit_hash: String,
}
#[derive(Debug, Clone, Default)]
pub struct BlameData {
pub lines: HashMap<usize, BlameLineInfo>,
}
pub struct Git2Repository {
repo_path: PathBuf,
}
impl Git2Repository {
pub fn open(path: &Path) -> Result<Self> {
let repo = Repository::discover(path)
.with_context(|| format!("Failed to discover git repository at {}", path.display()))?;
let repo_path = repo
.workdir()
.ok_or_else(|| anyhow::anyhow!("Bare repositories are not supported"))?
.to_path_buf();
Ok(Self { repo_path })
}
pub fn repo_path(&self) -> &Path {
&self.repo_path
}
fn open_repo(&self) -> Result<Repository> {
Repository::open(&self.repo_path)
.with_context(|| format!("Failed to open repository at {}", self.repo_path.display()))
}
pub fn count_file_commits(&self, file_path: &Path) -> Result<usize> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let count = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.filter(|commit| self.commit_touches_file(&repo, commit, &relative_path))
.count();
Ok(count)
}
pub fn file_age_days(&self, file_path: &Path, now: DateTime<Utc>) -> Result<u32> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME | Sort::REVERSE)?;
let first_commit = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.find(|commit| self.commit_touches_file(&repo, commit, &relative_path));
match first_commit {
Some(commit) => {
let time = commit.time();
let commit_date = Utc.timestamp_opt(time.seconds(), 0).single();
match commit_date {
Some(date) => {
let age = now.signed_duration_since(date);
Ok(age.num_days().max(0) as u32)
}
None => Ok(0),
}
}
None => Ok(0),
}
}
pub fn file_authors(&self, file_path: &Path) -> Result<HashSet<String>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let authors: HashSet<String> = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.filter(|commit| self.commit_touches_file(&repo, commit, &relative_path))
.filter_map(|commit| commit.author().email().map(String::from))
.collect();
Ok(authors)
}
pub fn file_last_modified(&self, file_path: &Path) -> Result<Option<DateTime<Utc>>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let last_commit = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.find(|commit| self.commit_touches_file(&repo, commit, &relative_path));
match last_commit {
Some(commit) => {
let time = commit.time();
Ok(Utc.timestamp_opt(time.seconds(), 0).single())
}
None => Ok(None),
}
}
pub fn all_commits_with_stats(&self) -> Result<Vec<CommitStats>> {
let oids: Vec<Oid> = {
let repo = self.open_repo()?;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
revwalk.filter_map(|r| r.ok()).collect()
};
let repo_path = self.repo_path.clone();
let commits: Vec<CommitStats> = oids
.into_par_iter()
.filter_map(|oid| {
let repo = Repository::open(&repo_path).ok()?;
let commit = repo.find_commit(oid).ok()?;
Self::commit_to_stats_static(&repo, &commit).ok().flatten()
})
.collect();
Ok(commits)
}
fn commit_to_stats_static(
repo: &Repository,
commit: &git2::Commit,
) -> Result<Option<CommitStats>> {
let parent = commit.parents().next();
let parent_tree = parent.and_then(|p| p.tree().ok());
let tree = commit.tree()?;
let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None)?;
let mut file_stats: HashMap<PathBuf, (usize, usize)> = HashMap::new();
for i in 0..diff.deltas().count() {
if let Some(delta) = diff.get_delta(i) {
if let Some(path) = delta.new_file().path() {
file_stats.entry(path.to_path_buf()).or_insert((0, 0));
}
}
}
diff.foreach(
&mut |_, _| true,
None,
None,
Some(&mut |delta, _hunk, line| {
if let Some(path) = delta.new_file().path() {
let entry = file_stats.entry(path.to_path_buf()).or_insert((0, 0));
match line.origin() {
'+' => entry.0 += 1,
'-' => entry.1 += 1,
_ => {}
}
}
true
}),
)?;
if file_stats.is_empty() {
return Ok(None);
}
let files: Vec<FileStats> = file_stats
.into_iter()
.map(|(path, (additions, deletions))| FileStats {
path,
additions,
deletions,
})
.collect();
let time = commit.time();
let date = Utc
.timestamp_opt(time.seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
Ok(Some(CommitStats {
hash: commit.id(),
date,
message: commit.message().unwrap_or("").to_string(),
author_email: commit.author().email().unwrap_or("").to_string(),
files,
}))
}
pub fn blame_file(&self, file_path: &Path) -> Result<BlameData> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let relative_str = relative_path.to_string_lossy();
let mut opts = BlameOptions::new();
opts.track_copies_same_file(true);
let blame = repo
.blame_file(Path::new(relative_str.as_ref()), Some(&mut opts))
.with_context(|| format!("Failed to blame file {}", file_path.display()))?;
let mut lines = HashMap::new();
for hunk in blame.iter() {
let sig = hunk.final_signature();
let author = sig.name().unwrap_or("Unknown").to_string();
let commit_hash = hunk.final_commit_id().to_string();
let start_line = hunk.final_start_line();
let num_lines = hunk.lines_in_hunk();
for i in 0..num_lines {
let line_num = start_line + i;
lines.insert(
line_num,
BlameLineInfo {
author: author.clone(),
commit_hash: commit_hash.clone(),
},
);
}
}
Ok(BlameData { lines })
}
pub fn find_introduction(
&self,
file_path: &Path,
pattern: &str,
) -> Result<Option<(git2::Oid, DateTime<Utc>)>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME | Sort::REVERSE)?;
for oid in revwalk.filter_map(|r| r.ok()) {
let commit = repo.find_commit(oid)?;
if self.commit_introduces_pattern(&repo, &commit, &relative_path, pattern)? {
let time = commit.time();
if let Some(date) = Utc.timestamp_opt(time.seconds(), 0).single() {
return Ok(Some((oid, date)));
}
}
}
Ok(None)
}
pub fn find_modifications(
&self,
file_path: &Path,
pattern: &str,
after_commit: git2::Oid,
) -> Result<Vec<CommitStats>> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let regex = regex::Regex::new(pattern)?;
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let mut results = Vec::new();
for oid in revwalk.filter_map(|r| r.ok()) {
if oid == after_commit {
break;
}
let commit = repo.find_commit(oid)?;
if self.commit_modifies_pattern(&repo, &commit, &relative_path, ®ex)? {
if let Some(stats) = self.commit_to_basic_stats(&commit)? {
results.push(stats);
}
}
}
Ok(results)
}
pub fn count_bug_fixes(&self, file_path: &Path) -> Result<usize> {
let repo = self.open_repo()?;
let relative_path = self.to_relative_path(file_path);
let mut revwalk = repo.revwalk()?;
revwalk.push_head()?;
revwalk.set_sorting(Sort::TIME)?;
let count = revwalk
.filter_map(|oid| oid.ok())
.filter_map(|oid| repo.find_commit(oid).ok())
.filter(|commit| self.commit_touches_file(&repo, commit, &relative_path))
.filter(|commit| commit.message().map(is_bug_fix_message).unwrap_or(false))
.count();
Ok(count)
}
fn to_relative_path(&self, path: &Path) -> PathBuf {
path.strip_prefix(&self.repo_path)
.unwrap_or(path)
.to_path_buf()
}
fn commit_touches_file(
&self,
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
) -> bool {
let tree = match commit.tree() {
Ok(t) => t,
Err(_) => return false,
};
let file_str = file_path.to_string_lossy();
if tree.get_path(Path::new(file_str.as_ref())).is_err() {
return false;
}
let parent = commit.parents().next();
let parent_tree = parent.and_then(|p| p.tree().ok());
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_str.as_ref());
let diff =
match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts)) {
Ok(d) => d,
Err(_) => return false,
};
diff.deltas().count() > 0
}
fn commit_to_basic_stats(&self, commit: &git2::Commit) -> Result<Option<CommitStats>> {
let time = commit.time();
let date = Utc
.timestamp_opt(time.seconds(), 0)
.single()
.unwrap_or_else(Utc::now);
Ok(Some(CommitStats {
hash: commit.id(),
date,
message: commit.message().unwrap_or("").to_string(),
author_email: commit.author().email().unwrap_or("").to_string(),
files: Vec::new(),
}))
}
fn commit_introduces_pattern(
&self,
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
pattern: &str,
) -> Result<bool> {
let tree = commit.tree()?;
let file_str = file_path.to_string_lossy();
let entry = match tree.get_path(Path::new(file_str.as_ref())) {
Ok(e) => e,
Err(_) => return Ok(false),
};
let blob = repo.find_blob(entry.id())?;
let content = std::str::from_utf8(blob.content()).unwrap_or("");
if !content.contains(pattern) {
return Ok(false);
}
let parent = commit.parents().next();
if let Some(parent_commit) = parent {
let parent_tree = parent_commit.tree()?;
if let Ok(parent_entry) = parent_tree.get_path(Path::new(file_str.as_ref())) {
if let Ok(parent_blob) = repo.find_blob(parent_entry.id()) {
let parent_content = std::str::from_utf8(parent_blob.content()).unwrap_or("");
if parent_content.contains(pattern) {
return Ok(false); }
}
}
}
Ok(true)
}
fn commit_modifies_pattern(
&self,
repo: &Repository,
commit: &git2::Commit,
file_path: &Path,
regex: ®ex::Regex,
) -> Result<bool> {
let parent = commit.parents().next();
let parent_tree = parent.and_then(|p| p.tree().ok());
let tree = commit.tree()?;
let mut diff_opts = DiffOptions::new();
diff_opts.pathspec(file_path.to_string_lossy().as_ref());
let diff =
repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), Some(&mut diff_opts))?;
let mut found = false;
diff.foreach(
&mut |_, _| true,
None,
None,
Some(&mut |_, _, line| {
if let Ok(content) = std::str::from_utf8(line.content()) {
if regex.is_match(content) {
found = true;
}
}
true
}),
)?;
Ok(found)
}
}
pub fn is_bug_fix_message(message: &str) -> bool {
if is_excluded_commit(message) {
return false;
}
let lowercase = message.to_lowercase();
let words: Vec<&str> = lowercase.split(|c: char| !c.is_alphanumeric()).collect();
words.iter().any(|&word| {
matches!(
word,
"bug" | "fix" | "fixes" | "fixed" | "fixing" | "hotfix"
)
})
}
fn is_excluded_commit(commit_line: &str) -> bool {
let lowercase = commit_line.to_lowercase();
if lowercase.contains("style:")
|| lowercase.contains("chore:")
|| lowercase.contains("docs:")
|| lowercase.contains("test:")
{
return true;
}
let exclusion_keywords = ["formatting", "linting", "whitespace", "typo"];
for keyword in &exclusion_keywords {
if lowercase.contains(keyword) {
return true;
}
}
if lowercase.contains("refactor:") {
let words: Vec<&str> = lowercase.split(|c: char| !c.is_alphanumeric()).collect();
let has_bug_keyword = words.iter().any(|&word| {
matches!(
word,
"bug" | "fix" | "fixes" | "fixed" | "fixing" | "issue" | "hotfix"
)
});
if !has_bug_keyword {
return true;
}
}
false
}
pub fn extract_authors_for_range(
blame_data: &BlameData,
start_line: usize,
end_line: usize,
) -> HashSet<String> {
(start_line..=end_line)
.filter_map(|line| blame_data.lines.get(&line))
.map(|info| info.author.clone())
.filter(|author| !author.is_empty() && author != "Not Committed Yet")
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use std::process::Command;
use tempfile::TempDir;
fn setup_test_repo() -> Result<(TempDir, PathBuf)> {
let temp_dir = TempDir::new()?;
let repo_path = temp_dir.path().to_path_buf();
Command::new("git")
.arg("init")
.current_dir(&repo_path)
.output()?;
Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(&repo_path)
.output()?;
Command::new("git")
.args(["config", "user.name", "Test User"])
.current_dir(&repo_path)
.output()?;
Ok((temp_dir, repo_path))
}
fn create_and_commit_file(
repo_path: &Path,
file_name: &str,
content: &str,
message: &str,
) -> Result<()> {
let file_path = repo_path.join(file_name);
std::fs::write(&file_path, content)?;
Command::new("git")
.args(["add", file_name])
.current_dir(repo_path)
.output()?;
Command::new("git")
.args(["commit", "-m", message])
.current_dir(repo_path)
.output()?;
Ok(())
}
#[test]
fn test_is_bug_fix_message() {
assert!(is_bug_fix_message("fix: resolve login bug"));
assert!(is_bug_fix_message("Fixed the payment issue"));
assert!(is_bug_fix_message("Bug fix for issue #123"));
assert!(is_bug_fix_message("hotfix: urgent fix"));
assert!(!is_bug_fix_message("style: apply formatting fixes"));
assert!(!is_bug_fix_message("chore: update dependencies"));
assert!(!is_bug_fix_message("docs: fix typo"));
assert!(!is_bug_fix_message("refactor: improve prefix handling"));
assert!(!is_bug_fix_message("Add debugging utilities"));
}
#[test]
fn test_extract_authors_for_range() {
let mut lines = HashMap::new();
lines.insert(
1,
BlameLineInfo {
author: "Alice".into(),
commit_hash: "abc".into(),
},
);
lines.insert(
2,
BlameLineInfo {
author: "Bob".into(),
commit_hash: "def".into(),
},
);
lines.insert(
3,
BlameLineInfo {
author: "Alice".into(),
commit_hash: "abc".into(),
},
);
let blame_data = BlameData { lines };
let authors = extract_authors_for_range(&blame_data, 1, 3);
assert_eq!(authors.len(), 2);
assert!(authors.contains("Alice"));
assert!(authors.contains("Bob"));
}
#[test]
fn test_git2_repository_open() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
let repo = Git2Repository::open(&repo_path)?;
let expected = repo_path.canonicalize().unwrap_or(repo_path);
let actual = repo
.repo_path()
.canonicalize()
.unwrap_or(repo.repo_path().to_path_buf());
assert_eq!(actual, expected);
Ok(())
}
#[test]
fn test_git2_repository_count_commits() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
create_and_commit_file(&repo_path, "test.rs", "fn main() { println!(); }", "Second")?;
create_and_commit_file(&repo_path, "test.rs", "fn main() { dbg!(); }", "Third")?;
let repo = Git2Repository::open(&repo_path)?;
let count = repo.count_file_commits(Path::new("test.rs"))?;
assert_eq!(count, 3);
Ok(())
}
#[test]
fn test_git2_repository_file_authors() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
let repo = Git2Repository::open(&repo_path)?;
let authors = repo.file_authors(Path::new("test.rs"))?;
assert!(authors.contains("test@example.com"));
Ok(())
}
#[test]
fn test_git2_repository_blame() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
let content = "line1\nline2\nline3\n";
create_and_commit_file(&repo_path, "test.txt", content, "Initial commit")?;
let repo = Git2Repository::open(&repo_path)?;
let blame = repo.blame_file(Path::new("test.txt"))?;
assert!(blame.lines.contains_key(&1));
assert!(blame.lines.contains_key(&2));
assert!(blame.lines.contains_key(&3));
Ok(())
}
#[test]
fn test_git2_repository_all_commits_with_stats() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
create_and_commit_file(&repo_path, "other.rs", "fn other() {}", "Second commit")?;
let repo = Git2Repository::open(&repo_path)?;
let commits = repo.all_commits_with_stats()?;
assert_eq!(commits.len(), 2);
Ok(())
}
#[test]
fn test_git2_repository_count_bug_fixes() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial commit")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn main() { v2 }",
"fix: resolve bug",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn main() { v3 }",
"feat: add feature",
)?;
create_and_commit_file(&repo_path, "test.rs", "fn main() { v4 }", "hotfix: urgent")?;
let repo = Git2Repository::open(&repo_path)?;
let bug_fixes = repo.count_bug_fixes(Path::new("test.rs"))?;
assert_eq!(bug_fixes, 2);
Ok(())
}
fn get_head_oid(repo_path: &Path) -> Result<git2::Oid> {
let output = Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(repo_path)
.output()?;
let hash = String::from_utf8_lossy(&output.stdout).trim().to_string();
Ok(git2::Oid::from_str(&hash)?)
}
#[test]
fn test_find_modifications_finds_commits_modifying_pattern() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "let marker = 0;", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"let marker = 1;",
"First modification",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"let marker = 2;",
"Second modification",
)?;
let repo = Git2Repository::open(&repo_path)?;
let zero_oid = git2::Oid::zero();
let modifications = repo.find_modifications(Path::new("test.rs"), "marker", zero_oid)?;
assert_eq!(modifications.len(), 3, "Expected 3 modifications");
let messages: Vec<_> = modifications.iter().map(|m| m.message.as_str()).collect();
assert!(
messages.iter().any(|m| m.contains("Initial")),
"Should include Initial"
);
assert!(
messages.iter().any(|m| m.contains("First modification")),
"Should include First modification"
);
assert!(
messages.iter().any(|m| m.contains("Second modification")),
"Should include Second modification"
);
Ok(())
}
#[test]
fn test_find_modifications_returns_empty_when_pattern_not_in_diff() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn stable_pattern() {}\n// placeholder",
"Initial",
)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn stable_pattern() {}\n// changed placeholder",
"Modify placeholder only",
)?;
let repo = Git2Repository::open(&repo_path)?;
let zero_oid = git2::Oid::zero();
let untouched_modifications =
repo.find_modifications(Path::new("test.rs"), "xyz_never_exists", zero_oid)?;
assert!(
untouched_modifications.is_empty(),
"Expected no modifications for pattern that was never in the diff"
);
Ok(())
}
#[test]
fn test_find_modifications_stops_at_specified_commit() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "let x = 1;", "v1")?;
create_and_commit_file(&repo_path, "test.rs", "let x = 2;", "v2")?;
create_and_commit_file(&repo_path, "test.rs", "let x = 3;", "v3")?;
create_and_commit_file(&repo_path, "test.rs", "let x = 4;", "v4")?;
let latest_oid = get_head_oid(&repo_path)?;
let repo = Git2Repository::open(&repo_path)?;
let modifications = repo.find_modifications(Path::new("test.rs"), r"let x", latest_oid)?;
assert!(
modifications.is_empty(),
"Expected no modifications when stopping at latest commit"
);
let all_mods =
repo.find_modifications(Path::new("test.rs"), r"let x", git2::Oid::zero())?;
assert_eq!(all_mods.len(), 4, "Should find all 4 commits with zero OID");
Ok(())
}
#[test]
fn test_find_introduction_finds_first_occurrence() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn other() {}", "Initial")?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn other() {}\nfn special_marker() {}",
"Add marker",
)?;
let expected_oid = get_head_oid(&repo_path)?;
create_and_commit_file(
&repo_path,
"test.rs",
"fn other() {}\nfn special_marker() { updated }",
"Update",
)?;
let repo = Git2Repository::open(&repo_path)?;
let result = repo.find_introduction(Path::new("test.rs"), "special_marker")?;
assert!(result.is_some());
let (oid, _date) = result.unwrap();
assert_eq!(oid, expected_oid);
Ok(())
}
#[test]
fn test_find_introduction_returns_none_when_not_found() -> Result<()> {
let (_temp, repo_path) = setup_test_repo()?;
create_and_commit_file(&repo_path, "test.rs", "fn main() {}", "Initial")?;
let repo = Git2Repository::open(&repo_path)?;
let result = repo.find_introduction(Path::new("test.rs"), "nonexistent_pattern")?;
assert!(result.is_none());
Ok(())
}
}